From 972d876ca96d2459ae61e426b54b15539ad7b3da Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 20 Jun 2020 06:36:44 -0700 Subject: [PATCH] Do not select zones with <5% free after upload (#9877) Looking into full disk errors on zoned setup. We don't take the 5% space requirement into account when selecting a zone. The interesting part is that even considering this we don't know the size of the object the user wants to upload when they do multipart uploads. It seems quite defensive to always upload multiparts to the zone where there is the most space since all load will be directed to a part of the cluster. In these cases we make sure it can at least hold a 1GiB file and we disadvantage fuller zones more by subtracting the expected size before weighing. --- cmd/erasure-zones.go | 55 +++++++++++++++++++++++++++++++++++--------- cmd/globals.go | 3 +++ cmd/xl-storage.go | 6 ++--- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/cmd/erasure-zones.go b/cmd/erasure-zones.go index bc16aba0b..3fe5dd7ac 100644 --- a/cmd/erasure-zones.go +++ b/cmd/erasure-zones.go @@ -103,12 +103,13 @@ func (p zonesAvailableSpace) TotalAvailable() uint64 { return total } -func (z *erasureZones) getAvailableZoneIdx(ctx context.Context) int { - zones := z.getZonesAvailableSpace(ctx) +// getAvailableZoneIdx will return an index that can hold size bytes. +// -1 is returned if no zones have available space for the size given. +func (z *erasureZones) getAvailableZoneIdx(ctx context.Context, size int64) int { + zones := z.getZonesAvailableSpace(ctx, size) total := zones.TotalAvailable() if total == 0 { - // Houston, we have a problem, maybe panic?? - return zones[0].Index + return -1 } // choose when we reach this many choose := rand.Uint64() % total @@ -120,10 +121,17 @@ func (z *erasureZones) getAvailableZoneIdx(ctx context.Context) int { } } // Should not happen, but print values just in case. - panic(fmt.Errorf("reached end of zones (total: %v, atTotal: %v, choose: %v)", total, atTotal, choose)) + logger.LogIf(ctx, fmt.Errorf("reached end of zones (total: %v, atTotal: %v, choose: %v)", total, atTotal, choose)) + return -1 } -func (z *erasureZones) getZonesAvailableSpace(ctx context.Context) zonesAvailableSpace { +// getZonesAvailableSpace will return the available space of each zone after storing the content. +// If there is not enough space the zone will return 0 bytes available. +// Negative sizes are seen as 0 bytes. +func (z *erasureZones) getZonesAvailableSpace(ctx context.Context, size int64) zonesAvailableSpace { + if size < 0 { + size = 0 + } var zones = make(zonesAvailableSpace, len(z.zones)) storageInfos := make([]StorageInfo, len(z.zones)) @@ -144,6 +152,24 @@ func (z *erasureZones) getZonesAvailableSpace(ctx context.Context) zonesAvailabl for _, davailable := range zinfo.Available { available += davailable } + var total uint64 + for _, dtotal := range zinfo.Total { + total += dtotal + } + // Make sure we can fit "size" on to the disk without getting above the diskFillFraction + if available < uint64(size) { + available = 0 + } + if available > 0 { + // How much will be left after adding the file. + available -= -uint64(size) + + // wantLeft is how much space there at least must be left. + wantLeft := uint64(float64(total) * (1.0 - diskFillFraction)) + if available <= wantLeft { + available = 0 + } + } zones[i] = zoneAvailableSpace{ Index: i, Available: available, @@ -154,7 +180,7 @@ func (z *erasureZones) getZonesAvailableSpace(ctx context.Context) zonesAvailabl // getZoneIdx returns the found previous object and its corresponding zone idx, // if none are found falls back to most available space zone. -func (z *erasureZones) getZoneIdx(ctx context.Context, bucket, object string, opts ObjectOptions) (idx int, err error) { +func (z *erasureZones) getZoneIdx(ctx context.Context, bucket, object string, opts ObjectOptions, size int64) (idx int, err error) { if z.SingleZone() { return 0, nil } @@ -179,7 +205,13 @@ func (z *erasureZones) getZoneIdx(ctx context.Context, bucket, object string, op // Success case and when DeleteMarker is true return. return i, nil } - return z.getAvailableZoneIdx(ctx), nil + + // We multiply the size by 2 to account for erasure coding. + idx = z.getAvailableZoneIdx(ctx, size*2) + if idx < 0 { + return -1, toObjectErr(errDiskFull) + } + return idx, nil } func (z *erasureZones) Shutdown(ctx context.Context) error { @@ -508,7 +540,7 @@ func (z *erasureZones) PutObject(ctx context.Context, bucket string, object stri return z.zones[0].PutObject(ctx, bucket, object, data, opts) } - idx, err := z.getZoneIdx(ctx, bucket, object, opts) + idx, err := z.getZoneIdx(ctx, bucket, object, opts, data.Size()) if err != nil { return ObjectInfo{}, err } @@ -590,7 +622,7 @@ func (z *erasureZones) CopyObject(ctx context.Context, srcBucket, srcObject, dst return z.zones[0].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) } - zoneIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts) + zoneIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts, srcInfo.Size) if err != nil { return objInfo, err } @@ -1310,7 +1342,8 @@ func (z *erasureZones) NewMultipartUpload(ctx context.Context, bucket, object st return z.zones[0].NewMultipartUpload(ctx, bucket, object, opts) } - idx, err := z.getZoneIdx(ctx, bucket, object, opts) + // We don't know the exact size, so we ask for at least 1GiB file. + idx, err := z.getZoneIdx(ctx, bucket, object, opts, 1<<30) if err != nil { return "", err } diff --git a/cmd/globals.go b/cmd/globals.go index 585380222..6fa21d237 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -96,6 +96,9 @@ const ( // Maximum size of default bucket encryption configuration allowed maxBucketSSEConfigSize = 1 * humanize.MiByte + + // diskFillFraction is the fraction of a disk we allow to be filled. + diskFillFraction = 0.95 ) var globalCLIContext = struct { diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index eca8f6a95..f4e7bb816 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -288,7 +288,7 @@ var ignoreDiskFreeOS = []string{ func checkDiskMinTotal(di disk.Info) (err error) { // Remove 5% from total space for cumulative disk space // used for journalling, inodes etc. - totalDiskSpace := float64(di.Total) * 0.95 + totalDiskSpace := float64(di.Total) * diskFillFraction if int64(totalDiskSpace) <= diskMinTotalSpace { return errMinDiskSize } @@ -298,7 +298,7 @@ func checkDiskMinTotal(di disk.Info) (err error) { // check if disk free has minimum required size. func checkDiskMinFree(di disk.Info) error { // Remove 5% from free space for cumulative disk space used for journalling, inodes etc. - availableDiskSpace := float64(di.Free) * 0.95 + availableDiskSpace := float64(di.Free) * diskFillFraction if int64(availableDiskSpace) <= diskMinFreeSpace { return errDiskFull } @@ -328,7 +328,7 @@ func checkDiskFree(diskPath string, neededSpace int64) (err error) { } // Check if we have enough space to store data - if neededSpace > int64(float64(di.Free)*0.95) { + if neededSpace > int64(float64(di.Free)*diskFillFraction) { return errDiskFull }