diff --git a/cmd/auth-handler.go b/cmd/auth-handler.go index 570b03a98..260699a84 100644 --- a/cmd/auth-handler.go +++ b/cmd/auth-handler.go @@ -459,8 +459,7 @@ func isReqAuthenticated(ctx context.Context, r *http.Request, region string, sty // Verify 'Content-Md5' and/or 'X-Amz-Content-Sha256' if present. // The verification happens implicit during reading. - reader, err := hash.NewReader(r.Body, -1, hex.EncodeToString(contentMD5), - hex.EncodeToString(contentSHA256), -1, globalCLIContext.StrictS3Compat) + reader, err := hash.NewReader(r.Body, -1, hex.EncodeToString(contentMD5), hex.EncodeToString(contentSHA256), -1) if err != nil { return toAPIErrorCode(ctx, err) } diff --git a/cmd/bucket-handlers.go b/cmd/bucket-handlers.go index 649f67b8f..7ef1948e3 100644 --- a/cmd/bucket-handlers.go +++ b/cmd/bucket-handlers.go @@ -917,7 +917,7 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h return } - hashReader, err := hash.NewReader(fileBody, fileSize, "", "", fileSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(fileBody, fileSize, "", "", fileSize) if err != nil { logger.LogIf(ctx, err) writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) @@ -964,7 +964,7 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h } info := ObjectInfo{Size: fileSize} // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", fileSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", fileSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/cmd/bucket-lifecycle.go b/cmd/bucket-lifecycle.go index 788fe8235..db74658b0 100644 --- a/cmd/bucket-lifecycle.go +++ b/cmd/bucket-lifecycle.go @@ -692,7 +692,7 @@ func restoreTransitionedObject(ctx context.Context, bucket, object string, objAP return err } defer gr.Close() - hashReader, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size) if err != nil { return err } diff --git a/cmd/config-common.go b/cmd/config-common.go index e8fa13f1d..b2cf3b624 100644 --- a/cmd/config-common.go +++ b/cmd/config-common.go @@ -64,7 +64,7 @@ func deleteConfig(ctx context.Context, objAPI objectDeleter, configFile string) } func saveConfig(ctx context.Context, objAPI ObjectLayer, configFile string, data []byte) error { - hashReader, err := hash.NewReader(bytes.NewReader(data), int64(len(data)), "", getSHA256Hash(data), int64(len(data)), globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(bytes.NewReader(data), int64(len(data)), "", getSHA256Hash(data), int64(len(data))) if err != nil { return err } diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index 953d837ba..848ef317b 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -128,7 +128,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) { nextBloomCycle++ var tmp [8]byte binary.LittleEndian.PutUint64(tmp[:], nextBloomCycle) - r, err := hash.NewReader(bytes.NewReader(tmp[:]), int64(len(tmp)), "", "", int64(len(tmp)), false) + r, err := hash.NewReader(bytes.NewReader(tmp[:]), int64(len(tmp)), "", "", int64(len(tmp))) if err != nil { logger.LogIf(ctx, err) continue diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 664e95f80..b0a854b1a 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -513,7 +513,7 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) }() defer pr.Close() - r, err := hash.NewReader(pr, -1, "", "", -1, false) + r, err := hash.NewReader(pr, -1, "", "", -1) if err != nil { return err } diff --git a/cmd/data-usage.go b/cmd/data-usage.go index d4464829f..b08f191d3 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -47,7 +47,7 @@ func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dui <-chan continue } size := int64(len(dataUsageJSON)) - r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false) + r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size) if err != nil { logger.LogIf(ctx, err) continue diff --git a/cmd/disk-cache.go b/cmd/disk-cache.go index 049de51b1..58681dbfc 100644 --- a/cmd/disk-cache.go +++ b/cmd/disk-cache.go @@ -705,7 +705,7 @@ func (c *cacheObjects) uploadObject(ctx context.Context, oi ObjectInfo) { if st == CommitComplete || st.String() == "" { return } - hashReader, err := hash.NewReader(cReader, oi.Size, "", "", oi.Size, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(cReader, oi.Size, "", "", oi.Size) if err != nil { return } diff --git a/cmd/gateway/s3/gateway-s3-metadata.go b/cmd/gateway/s3/gateway-s3-metadata.go index 61e547a45..a4f0ff4ec 100644 --- a/cmd/gateway/s3/gateway-s3-metadata.go +++ b/cmd/gateway/s3/gateway-s3-metadata.go @@ -168,7 +168,7 @@ func getGWMetadata(ctx context.Context, bucket, prefix string, gwMeta gwMetaV1) logger.LogIf(ctx, err) return nil, err } - hashReader, err := hash.NewReader(bytes.NewReader(metadataBytes), int64(len(metadataBytes)), "", "", int64(len(metadataBytes)), false) + hashReader, err := hash.NewReader(bytes.NewReader(metadataBytes), int64(len(metadataBytes)), "", "", int64(len(metadataBytes))) if err != nil { return nil, err } diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go index 77fc55b42..44d00ae2a 100644 --- a/cmd/metacache-bucket.go +++ b/cmd/metacache-bucket.go @@ -176,7 +176,7 @@ func (b *bucketMetacache) save(ctx context.Context) error { b.updated = false b.mu.Unlock() - hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len()), false) + hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len())) if err != nil { return err } diff --git a/cmd/metacache-set.go b/cmd/metacache-set.go index 82bb3220c..4f42e42cd 100644 --- a/cmd/metacache-set.go +++ b/cmd/metacache-set.go @@ -672,7 +672,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr return nil } o.debugln(color.Green("listPath:")+" saving block", b.n, "to", o.objectPath(b.n)) - r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false) + r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data))) logger.LogIf(ctx, err) custom := b.headerKV() _, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r), ObjectOptions{ diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index dc5b45118..115c92c9b 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -48,6 +48,7 @@ import ( objectlock "github.com/minio/minio/pkg/bucket/object/lock" "github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/replication" + "github.com/minio/minio/pkg/etag" "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/handlers" "github.com/minio/minio/pkg/hash" @@ -1025,7 +1026,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re srcInfo.metadataOnly = false } - var reader io.Reader + var reader io.Reader = gr // Set the actual size to the compressed/decrypted size if encrypted. actualSize, err := srcInfo.GetActualSize() @@ -1057,15 +1058,15 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re // avoid copying them in target object. crypto.RemoveInternalEntries(srcInfo.UserDefined) - s2c := newS2CompressReader(gr, actualSize) + s2c := newS2CompressReader(reader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, reader) length = -1 } else { reader = gr } - srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1162,11 +1163,13 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re } if isTargetEncrypted { - reader, objEncKey, err = newEncryptReader(srcInfo.Reader, newKey, dstBucket, dstObject, encMetadata, sseS3) + var encReader io.Reader + encReader, objEncKey, err = newEncryptReader(srcInfo.Reader, newKey, dstBucket, dstObject, encMetadata, sseS3) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } + reader = etag.Wrap(encReader, srcInfo.Reader) } if isSourceEncrypted { @@ -1176,7 +1179,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re } // do not try to verify encrypted content - srcInfo.Reader, err = hash.NewReader(reader, targetSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, targetSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1457,13 +1460,12 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } var ( - md5hex = hex.EncodeToString(md5Bytes) - sha256hex = "" - reader io.Reader + md5hex = hex.EncodeToString(md5Bytes) + sha256hex = "" + reader io.Reader = r.Body s3Err APIErrorCode putObject = objectAPI.PutObject ) - reader = r.Body // Check if put is allowed if s3Err = isPutActionAllowed(ctx, rAuthType, bucket, object, r, iampolicy.PutObjectAction); s3Err != ErrNone { @@ -1509,13 +1511,12 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } actualSize := size - if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 { // Storing the compression metadata. metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2 metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10) - actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1524,13 +1525,13 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req // Set compression metrics. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, actualReader) size = -1 // Since compressed size is un-predictable. md5hex = "" // Do not try to verify the content. sha256hex = "" } - hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1601,7 +1602,7 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, wantSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(reader, hashReader), wantSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1999,8 +2000,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt return } - partInfo, err := core.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, - gr, length, "", "", dstOpts.ServerSideEncryption) + partInfo, err := core.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, gr, length, "", "", dstOpts.ServerSideEncryption) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2015,7 +2015,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt } actualPartSize = length - var reader io.Reader + var reader io.Reader = etag.NewReader(gr, nil) mi, err := objectAPI.GetMultipartInfo(ctx, dstBucket, dstObject, uploadID, dstOpts) if err != nil { @@ -2027,15 +2027,13 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt _, isCompressed := mi.UserDefined[ReservedMetadataPrefix+"compression"] // Compress only if the compression is enabled during initial multipart. if isCompressed { - s2c := newS2CompressReader(gr, actualPartSize) + s2c := newS2CompressReader(reader, actualPartSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, reader) length = -1 - } else { - reader = gr } - srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualPartSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualPartSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2077,11 +2075,12 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt copy(objectEncryptionKey[:], key) partEncryptionKey := objectEncryptionKey.DerivePartKey(uint32(partID)) - reader, err = sio.EncryptReader(reader, sio.Config{Key: partEncryptionKey[:]}) + encReader, err := sio.EncryptReader(reader, sio.Config{Key: partEncryptionKey[:]}) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } + reader = etag.Wrap(encReader, reader) wantSize := int64(-1) if length >= 0 { @@ -2089,7 +2088,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt wantSize = info.EncryptedSize() } - srcInfo.Reader, err = hash.NewReader(reader, wantSize, "", "", actualPartSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, wantSize, "", "", actualPartSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2209,12 +2208,11 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http } var ( - md5hex = hex.EncodeToString(md5Bytes) - sha256hex = "" - reader io.Reader + md5hex = hex.EncodeToString(md5Bytes) + sha256hex = "" + reader io.Reader = r.Body s3Error APIErrorCode ) - reader = r.Body if s3Error = isPutActionAllowed(ctx, rAuthType, bucket, object, r, iampolicy.PutObjectAction); s3Error != ErrNone { writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL, guessIsBrowserReq(r)) return @@ -2271,7 +2269,7 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http _, isCompressed := mi.UserDefined[ReservedMetadataPrefix+"compression"] if objectAPI.IsCompressionSupported() && isCompressed { - actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2280,13 +2278,13 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http // Set compression metrics. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, actualReader) size = -1 // Since compressed size is un-predictable. md5hex = "" // Do not try to verify the content. sha256hex = "" } - hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2343,7 +2341,7 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http wantSize = info.EncryptedSize() } // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, wantSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(reader, hashReader), wantSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index 623f80956..6e90e5ca0 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -160,7 +160,7 @@ func calculateSignedChunkLength(chunkDataSize int64) int64 { } func mustGetPutObjReader(t TestErrHandler, data io.Reader, size int64, md5hex, sha256hex string) *PutObjReader { - hr, err := hash.NewReader(data, size, md5hex, sha256hex, size, globalCLIContext.StrictS3Compat) + hr, err := hash.NewReader(data, size, md5hex, sha256hex, size) if err != nil { t.Fatal(err) } diff --git a/cmd/web-handlers.go b/cmd/web-handlers.go index e7004814c..bab5ce6c4 100644 --- a/cmd/web-handlers.go +++ b/cmd/web-handlers.go @@ -50,6 +50,7 @@ import ( objectlock "github.com/minio/minio/pkg/bucket/object/lock" "github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/replication" + "github.com/minio/minio/pkg/etag" "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/handlers" "github.com/minio/minio/pkg/hash" @@ -1233,7 +1234,7 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { var reader io.Reader = r.Body actualSize := size - hashReader, err := hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1244,7 +1245,7 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2 metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(actualSize, 10) - actualReader, err := hash.NewReader(reader, actualSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, actualSize, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1254,8 +1255,8 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { size = -1 // Since compressed size is un-predictable. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c - hashReader, err = hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) + reader = etag.Wrap(s2c, actualReader) + hashReader, err = hash.NewReader(reader, size, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1276,15 +1277,18 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { if objectAPI.IsEncryptionSupported() { if _, ok := crypto.IsRequested(r.Header); ok && !HasSuffix(object, SlashSeparator) { // handle SSE requests - var objectEncryptionKey crypto.ObjectKey - reader, objectEncryptionKey, err = EncryptRequest(hashReader, r, bucket, object, metadata) + var ( + objectEncryptionKey crypto.ObjectKey + encReader io.Reader + ) + encReader, objectEncryptionKey, err = EncryptRequest(hashReader, r, bucket, object, metadata) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } info := ObjectInfo{Size: size} // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", size, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(encReader, hashReader), info.EncryptedSize(), "", "", size) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/pkg/etag/etag.go b/pkg/etag/etag.go new file mode 100644 index 000000000..8c8085afe --- /dev/null +++ b/pkg/etag/etag.go @@ -0,0 +1,284 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package etag provides an implementation of S3 ETags. +// +// Each S3 object has an associated ETag that can be +// used to e.g. quickly compare objects or check whether +// the content of an object has changed. +// +// In general, an S3 ETag is an MD5 checksum of the object +// content. However, there are many exceptions to this rule. +// +// +// Single-part Upload +// +// In case of a basic single-part PUT operation - without server +// side encryption or object compression - the ETag of an object +// is its content MD5. +// +// +// Multi-part Upload +// +// The ETag of an object does not correspond to its content MD5 +// when the object is uploaded in multiple parts via the S3 +// multipart API. Instead, S3 first computes a MD5 of each part: +// e1 := MD5(part-1) +// e2 := MD5(part-2) +// ... +// eN := MD5(part-N) +// +// Then, the ETag of the object is computed as MD5 of all individual +// part checksums. S3 also encodes the number of parts into the ETag +// by appending a - at the end: +// ETag := MD5(e1 || e2 || e3 ... || eN) || -N +// +// For example: ceb8853ddc5086cc4ab9e149f8f09c88-5 +// +// However, this scheme is only used for multipart objects that are +// not encrypted. +// +// Server-side Encryption +// +// S3 specifies three types of server-side-encryption - SSE-C, SSE-S3 +// and SSE-KMS - with different semantics w.r.t. ETags. +// In case of SSE-S3, the ETag of an object is computed the same as +// for single resp. multipart plaintext objects. In particular, +// the ETag of a singlepart SSE-S3 object is its content MD5. +// +// In case of SSE-C and SSE-KMS, the ETag of an object is computed +// differently. For singlepart uploads the ETag is not the content +// MD5 of the object. For multipart uploads the ETag is also not +// the MD5 of the individual part checksums but it still contains +// the number of parts as suffix. +// +// Instead, the ETag is kind of unpredictable for S3 clients when +// an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3 +// computes the ETag as MD5 of the encrypted content but there is +// no way to verify this assumption since the encryption happens +// inside AWS S3. +// Therefore, S3 clients must not make any assumption about ETags +// in case of SSE-C or SSE-KMS except that the ETag is well-formed. +// +// To put all of this into a simple rule: +// SSE-S3 : ETag == MD5 +// SSE-C : ETag != MD5 +// SSE-KMS: ETag != MD5 +// +// +// Encrypted ETags +// +// An S3 implementation has to remember the content MD5 of objects +// in case of SSE-S3. However, storing the ETag of an encrypted +// object in plaintext may reveal some information about the object. +// For example, two objects with the same ETag are identical with +// a very high probability. +// +// Therefore, an S3 implementation may encrypt an ETag before storing +// it. In this case, the stored ETag may not be a well-formed S3 ETag. +// For example, it can be larger due to a checksum added by authenticated +// encryption schemes. Such an ETag must be decrypted before sent to an +// S3 client. +// +// +// S3 Clients +// +// There are many different S3 client implementations. Most of them +// access the ETag by looking for the HTTP response header key "Etag". +// However, some of them assume that the header key has to be "ETag" +// (case-sensitive) and will fail otherwise. +// Further, some clients require that the ETag value is a double-quoted +// string. Therefore, this package provides dedicated functions for +// adding and extracing the ETag to/from HTTP headers. +package etag + +import ( + "bytes" + "encoding/hex" + "errors" + "fmt" + "net/http" + "strconv" + "strings" +) + +// ETag is a single S3 ETag. +// +// An S3 ETag sometimes corresponds to the MD5 of +// the S3 object content. However, when an object +// is encrypted, compressed or uploaded using +// the S3 multipart API then its ETag is not +// necessarily the MD5 of the object content. +// +// For a more detailed description of S3 ETags +// take a look at the package documentation. +type ETag []byte + +// String returns the string representation of the ETag. +// +// The returned string is a hex representation of the +// binary ETag with an optional '-' suffix. +func (e ETag) String() string { + if e.IsMultipart() { + return hex.EncodeToString(e[:16]) + string(e[16:]) + } + return hex.EncodeToString(e) +} + +// IsEncrypted reports whether the ETag is encrypted. +func (e ETag) IsEncrypted() bool { + return len(e) > 16 && !bytes.ContainsRune(e, '-') +} + +// IsMultipart reports whether the ETag belongs to an +// object that has been uploaded using the S3 multipart +// API. +// An S3 multipart ETag has a - suffix. +func (e ETag) IsMultipart() bool { + return len(e) > 16 && bytes.ContainsRune(e, '-') +} + +// Parts returns the number of object parts that are +// referenced by this ETag. It returns 1 if the object +// has been uploaded using the S3 singlepart API. +// +// Parts may panic if the ETag is an invalid multipart +// ETag. +func (e ETag) Parts() int { + if !e.IsMultipart() { + return 1 + } + + n := bytes.IndexRune(e, '-') + parts, err := strconv.Atoi(string(e[n+1:])) + if err != nil { + panic(err) // malformed ETag + } + return parts +} + +var _ Tagger = ETag{} // compiler check + +// ETag returns the ETag itself. +// +// By providing this method ETag implements +// the Tagger interface. +func (e ETag) ETag() ETag { return e } + +// Set adds the ETag to the HTTP headers. It overwrites any +// existing ETag entry. +// +// Due to legacy S3 clients, that make incorrect assumptions +// about HTTP headers, Set should be used instead of +// http.Header.Set(...). Otherwise, some S3 clients will not +// able to extract the ETag. +func Set(etag ETag, h http.Header) { + // Some (broken) S3 clients expect the ETag header to + // literally "ETag" - not "Etag". Further, some clients + // expect an ETag in double quotes. Therefore, we set the + // ETag directly as map entry instead of using http.Header.Set + h["ETag"] = []string{`"` + etag.String() + `"`} +} + +// Get extracts and parses an ETag from the given HTTP headers. +// It returns an error when the HTTP headers do not contain +// an ETag entry or when the ETag is malformed. +// +// Get only accepts AWS S3 compatible ETags - i.e. no +// encrypted ETags - and therefore is stricter than Parse. +func Get(h http.Header) (ETag, error) { + const strict = true + if v := h.Get("Etag"); v != "" { + return parse(v, strict) + } + v, ok := h["ETag"] + if !ok || len(v) == 0 { + return nil, errors.New("etag: HTTP header does not contain an ETag") + } + return parse(v[0], strict) +} + +// Equal returns true if and only if the two ETags are +// identical. +func Equal(a, b ETag) bool { return bytes.Equal(a, b) } + +// Parse parses s as an S3 ETag, returning the result. +// The string can be an encrypted, singlepart +// or multipart S3 ETag. It returns an error if s is +// not a valid textual representation of an ETag. +func Parse(s string) (ETag, error) { + const strict = false + return parse(s, strict) +} + +// parse parse s as an S3 ETag, returning the result. +// It operates in one of two modes: +// - strict +// - non-strict +// +// In strict mode, parse only accepts ETags that +// are AWS S3 compatible. In particular, an AWS +// S3 ETag always consists of a 128 bit checksum +// value and an optional - suffix. +// Therefore, s must have the following form in +// strict mode: <32-hex-characters>[-] +// +// In non-strict mode, parse also accepts ETags +// that are not AWS S3 compatible - e.g. encrypted +// ETags. +func parse(s string, strict bool) (ETag, error) { + // An S3 ETag may be a double-quoted string. + // Therefore, we remove double quotes at the + // start and end, if any. + if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) { + s = s[1 : len(s)-1] + } + + // An S3 ETag may be a multipart ETag that + // contains a '-' followed by a number. + // If the ETag does not a '-' is is either + // a singlepart or encrypted ETag. + n := strings.IndexRune(s, '-') + if n == -1 { + etag, err := hex.DecodeString(s) + if err != nil { + return nil, err + } + if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long + return nil, fmt.Errorf("etag: invalid length %d", len(etag)) + } + return ETag(etag), nil + } + + prefix, suffix := s[:n], s[n:] + if len(prefix) != 32 { + return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix)) + } + if len(suffix) <= 1 { + return nil, errors.New("etag: suffix is not a part number") + } + + etag, err := hex.DecodeString(prefix) + if err != nil { + return nil, err + } + partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1] + if err != nil { + return nil, err + } + if strict && (partNumber == 0 || partNumber > 10000) { + return nil, fmt.Errorf("etag: invalid part number %d", partNumber) + } + return ETag(append(etag, suffix...)), nil +} diff --git a/pkg/etag/etag_test.go b/pkg/etag/etag_test.go new file mode 100644 index 000000000..281584016 --- /dev/null +++ b/pkg/etag/etag_test.go @@ -0,0 +1,139 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etag + +import ( + "io" + "io/ioutil" + "strings" + "testing" +) + +var parseTests = []struct { + String string + ETag ETag + ShouldFail bool +}{ + {String: "3b83ef96387f1465", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}}, // 0 + {String: "3b83ef96387f14655fc854ddc3c6bd57", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 1 + {String: `"3b83ef96387f14655fc854ddc3c6bd57"`, ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 2 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-1", ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}}, // 3 + {String: `"ceb8853ddc5086cc4ab9e149f8f09c88-2"`, ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}}, // 4 + { // 5 + String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941", + ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65}, + }, + + {String: `"3b83ef96387f14655fc854ddc3c6bd57`, ShouldFail: true}, // 6 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-", ShouldFail: true}, // 7 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-2a", ShouldFail: true}, // 8 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-2-1", ShouldFail: true}, // 9 + {String: "90402c78d2dccddee1e9e86222ce2c-1", ShouldFail: true}, // 10 + {String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941-1", ShouldFail: true}, // 11 +} + +func TestParse(t *testing.T) { + for i, test := range parseTests { + etag, err := Parse(test.String) + if err == nil && test.ShouldFail { + t.Fatalf("Test %d: parse should have failed but succeeded", i) + } + if err != nil && !test.ShouldFail { + t.Fatalf("Test %d: failed to parse ETag %q: %v", i, test.String, err) + } + if !Equal(etag, test.ETag) { + t.Log([]byte(etag)) + t.Fatalf("Test %d: ETags don't match", i) + } + } +} + +var stringTests = []struct { + ETag ETag + String string +}{ + {ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}, String: "3b83ef96387f1465"}, // 0 + {ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}, String: "3b83ef96387f14655fc854ddc3c6bd57"}, // 1 + {ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-1"}, // 2 + {ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-2"}, // 3 + { // 4 + ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65}, + String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941", + }, +} + +func TestString(t *testing.T) { + for i, test := range stringTests { + s := test.ETag.String() + if s != test.String { + t.Fatalf("Test %d: got %s - want %s", i, s, test.String) + } + } +} + +var equalTests = []struct { + A string + B string + Equal bool +}{ + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57", Equal: true}, // 0 + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: `"3b83ef96387f14655fc854ddc3c6bd57"`, Equal: true}, // 1 + + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57-2", Equal: false}, // 2 + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "ceb8853ddc5086cc4ab9e149f8f09c88", Equal: false}, // 3 +} + +func TestEqual(t *testing.T) { + for i, test := range equalTests { + A, err := Parse(test.A) + if err != nil { + t.Fatalf("Test %d: %v", i, err) + } + B, err := Parse(test.B) + if err != nil { + t.Fatalf("Test %d: %v", i, err) + } + if equal := Equal(A, B); equal != test.Equal { + t.Fatalf("Test %d: got %v - want %v", i, equal, test.Equal) + } + } +} + +var readerTests = []struct { // Reference values computed by: echo | md5sum + Content string + ETag ETag +}{ + { + Content: "", ETag: ETag{212, 29, 140, 217, 143, 0, 178, 4, 233, 128, 9, 152, 236, 248, 66, 126}, + }, + { + Content: " ", ETag: ETag{114, 21, 238, 156, 125, 157, 194, 41, 210, 146, 26, 64, 232, 153, 236, 95}, + }, + { + Content: "Hello World", ETag: ETag{177, 10, 141, 177, 100, 224, 117, 65, 5, 183, 169, 155, 231, 46, 63, 229}, + }, +} + +func TestReader(t *testing.T) { + for i, test := range readerTests { + reader := NewReader(strings.NewReader(test.Content), test.ETag) + if _, err := io.Copy(ioutil.Discard, reader); err != nil { + t.Fatalf("Test %d: read failed: %v", i, err) + } + if ETag := reader.ETag(); !Equal(ETag, test.ETag) { + t.Fatalf("Test %d: ETag mismatch: got %q - want %q", i, ETag, test.ETag) + } + } +} diff --git a/pkg/etag/reader.go b/pkg/etag/reader.go new file mode 100644 index 000000000..9ff80963b --- /dev/null +++ b/pkg/etag/reader.go @@ -0,0 +1,151 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etag + +import ( + "crypto/md5" + "fmt" + "hash" + "io" +) + +// Tagger is the interface that wraps the basic ETag method. +type Tagger interface { + ETag() ETag +} + +type wrapReader struct { + io.Reader + Tagger +} + +// ETag returns the ETag of the underlying Tagger. +func (r *wrapReader) ETag() ETag { + if r.Tagger == nil { + return nil + } + return r.Tagger.ETag() +} + +// Wrap returns an io.Reader that reads from the wrapped +// io.Reader and implements the Tagger interaface. +// +// If content implements Tagger then the returned Reader +// returns ETag of the content. Otherwise, it returns +// nil as ETag. +// +// Wrap provides an adapter for io.Reader implemetations +// that don't implement the Tagger interface. +// It is mainly used to provide a high-level io.Reader +// access to the ETag computed by a low-level io.Reader: +// +// content := etag.NewReader(r.Body, nil) +// +// compressedContent := Compress(content) +// encryptedContent := Encrypt(compressedContent) +// +// // Now, we need an io.Reader that can access +// // the ETag computed over the content. +// reader := etag.Wrap(encryptedContent, content) +// +func Wrap(wrapped, content io.Reader) io.Reader { + if t, ok := content.(Tagger); ok { + return wrapReader{ + Reader: wrapped, + Tagger: t, + } + } + return wrapReader{ + Reader: wrapped, + } +} + +// A Reader wraps an io.Reader and computes the +// MD5 checksum of the read content as ETag. +// +// Optionally, a Reader can also verify that +// the computed ETag matches an expected value. +// Therefore, it compares both ETags once the +// underlying io.Reader returns io.EOF. +// If the computed ETag does not match the +// expected ETag then Read returns a VerifyError. +// +// Reader implements the Tagger interface. +type Reader struct { + src io.Reader + + md5 hash.Hash + checksum ETag + + readN int64 +} + +// NewReader returns a new Reader that computes the +// MD5 checksum of the content read from r as ETag. +// +// If the provided etag is not nil the returned +// Reader compares the etag with the computed +// MD5 sum once the r returns io.EOF. +func NewReader(r io.Reader, etag ETag) *Reader { + if er, ok := r.(*Reader); ok { + if er.readN == 0 && Equal(etag, er.checksum) { + return er + } + } + return &Reader{ + src: r, + md5: md5.New(), + checksum: etag, + } +} + +// Read reads up to len(p) bytes from the underlying +// io.Reader as specified by the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + n, err := r.src.Read(p) + r.readN += int64(n) + r.md5.Write(p[:n]) + + if err == io.EOF && len(r.checksum) != 0 { + if etag := r.ETag(); !Equal(etag, r.checksum) { + return n, VerifyError{ + Expected: r.checksum, + Computed: etag, + } + } + } + return n, err +} + +// ETag returns the ETag of all the content read +// so far. Reading more content changes the MD5 +// checksum. Therefore, calling ETag multiple +// times may return different results. +func (r *Reader) ETag() ETag { + sum := r.md5.Sum(nil) + return ETag(sum) +} + +// VerifyError is an error signaling that a +// computed ETag does not match an expected +// ETag. +type VerifyError struct { + Expected ETag + Computed ETag +} + +func (v VerifyError) Error() string { + return fmt.Sprintf("etag: expected ETag %q does not match computed ETag %q", v.Expected, v.Computed) +} diff --git a/pkg/hash/reader.go b/pkg/hash/reader.go index 8f3e5d7b1..ab9d84fdd 100644 --- a/pkg/hash/reader.go +++ b/pkg/hash/reader.go @@ -18,61 +18,141 @@ package hash import ( "bytes" - "crypto/md5" "encoding/base64" "encoding/hex" "errors" "hash" "io" + "github.com/minio/minio/pkg/etag" sha256 "github.com/minio/sha256-simd" ) -// Reader writes what it reads from an io.Reader to an MD5 and SHA256 hash.Hash. -// Reader verifies that the content of the io.Reader matches the expected checksums. +// A Reader wraps an io.Reader and computes the MD5 checksum +// of the read content as ETag. Optionally, it also computes +// the SHA256 checksum of the content. +// +// If the reference values for the ETag and content SHA26 +// are not empty then it will check whether the computed +// match the reference values. type Reader struct { - src io.Reader + src io.Reader + bytesRead int64 + size int64 actualSize int64 - bytesRead int64 - md5sum, sha256sum []byte // Byte values of md5sum, sha256sum of client sent values. - md5Hash, sha256Hash hash.Hash + checksum etag.ETag + contentSHA256 []byte + + sha256 hash.Hash } -// NewReader returns a new hash Reader which computes the MD5 sum and -// SHA256 sum (if set) of the provided io.Reader at EOF. -func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) { +// NewReader returns a new Reader that wraps src and computes +// MD5 checksum of everything it reads as ETag. +// +// It also computes the SHA256 checksum of everything it reads +// if sha256Hex is not the empty string. +// +// If size resp. actualSize is unknown at the time of calling +// NewReader then it should be set to -1. +// +// NewReader may try merge the given size, MD5 and SHA256 values +// into src - if src is a Reader - to avoid computing the same +// checksums multiple times. +func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) { + MD5, err := hex.DecodeString(md5Hex) + if err != nil { + return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified + ExpectedMD5: md5Hex, + CalculatedMD5: "", + } + } + SHA256, err := hex.DecodeString(sha256Hex) + if err != nil { + return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified + ExpectedSHA256: sha256Hex, + CalculatedSHA256: "", + } + } + + // Merge the size, MD5 and SHA256 values if src is a Reader. + // The size may be set to -1 by callers if unknown. if r, ok := src.(*Reader); ok { - // Merge expectations and return parent. - return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat) + if r.bytesRead > 0 { + return nil, errors.New("hash: already read from hash reader") + } + if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, etag.ETag(MD5)) { + return nil, BadDigest{ + ExpectedMD5: r.checksum.String(), + CalculatedMD5: md5Hex, + } + } + if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) { + return nil, SHA256Mismatch{ + ExpectedSHA256: hex.EncodeToString(r.contentSHA256), + CalculatedSHA256: sha256Hex, + } + } + if r.size >= 0 && size >= 0 && r.size != size { + return nil, ErrSizeMismatch{Want: r.size, Got: size} + } + + r.checksum = etag.ETag(MD5) + r.contentSHA256 = SHA256 + if r.size < 0 && size >= 0 { + r.src = etag.Wrap(io.LimitReader(r.src, size), r.src) + r.size = size + } + if r.actualSize <= 0 && actualSize >= 0 { + r.actualSize = actualSize + } + return r, nil } - // Create empty reader and merge into that. - r := Reader{src: src, size: -1, actualSize: -1} - return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat) + var hash hash.Hash + if size >= 0 { + src = io.LimitReader(src, size) + } + if len(SHA256) != 0 { + hash = sha256.New() + } + return &Reader{ + src: etag.NewReader(src, etag.ETag(MD5)), + size: size, + actualSize: actualSize, + checksum: etag.ETag(MD5), + contentSHA256: SHA256, + sha256: hash, + }, nil } -func (r *Reader) Read(p []byte) (n int, err error) { - n, err = r.src.Read(p) - if n > 0 { - if r.md5Hash != nil { - r.md5Hash.Write(p[:n]) - } - if r.sha256Hash != nil { - r.sha256Hash.Write(p[:n]) - } - } +func (r *Reader) Read(p []byte) (int, error) { + n, err := r.src.Read(p) r.bytesRead += int64(n) - - // At io.EOF verify if the checksums are right. - if err == io.EOF { - if cerr := r.verify(); cerr != nil { - return 0, cerr - } + if r.sha256 != nil { + r.sha256.Write(p[:n]) } - return + if err == io.EOF { // Verify content SHA256, if set. + if r.sha256 != nil { + if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) { + return n, SHA256Mismatch{ + ExpectedSHA256: hex.EncodeToString(r.contentSHA256), + CalculatedSHA256: hex.EncodeToString(sum), + } + } + } + } + if err != nil && err != io.EOF { + if v, ok := err.(etag.VerifyError); ok { + return n, BadDigest{ + ExpectedMD5: v.Expected.String(), + CalculatedMD5: v.Computed.String(), + } + } + } + return n, err } // Size returns the absolute number of bytes the Reader @@ -84,114 +164,58 @@ func (r *Reader) Size() int64 { return r.size } // DecompressedSize - For compressed objects. func (r *Reader) ActualSize() int64 { return r.actualSize } -// MD5 - returns byte md5 value +// ETag returns the ETag computed by an underlying etag.Tagger. +// If the underlying io.Reader does not implement etag.Tagger +// it returns nil. +func (r *Reader) ETag() etag.ETag { + if t, ok := r.src.(etag.Tagger); ok { + return t.ETag() + } + return nil +} + +// MD5 returns the MD5 checksum set as reference value. +// +// It corresponds to the checksum that is expected and +// not the actual MD5 checksum of the content. +// Therefore, refer to MD5Current. func (r *Reader) MD5() []byte { - return r.md5sum + return r.checksum } -// MD5Current - returns byte md5 value of the current state -// of the md5 hash after reading the incoming content. -// NOTE: Calling this function multiple times might yield -// different results if they are intermixed with Reader. +// MD5Current returns the MD5 checksum of the content +// that has been read so far. +// +// Calling MD5Current again after reading more data may +// result in a different checksum. func (r *Reader) MD5Current() []byte { - if r.md5Hash != nil { - return r.md5Hash.Sum(nil) - } - return nil + return r.ETag()[:] } -// SHA256 - returns byte sha256 value +// SHA256 returns the SHA256 checksum set as reference value. +// +// It corresponds to the checksum that is expected and +// not the actual SHA256 checksum of the content. func (r *Reader) SHA256() []byte { - return r.sha256sum + return r.contentSHA256 } -// MD5HexString returns hex md5 value. +// MD5HexString returns a hex representation of the MD5. func (r *Reader) MD5HexString() string { - return hex.EncodeToString(r.md5sum) + return hex.EncodeToString(r.checksum) } -// MD5Base64String returns base64 encoded MD5sum value. +// MD5Base64String returns a hex representation of the MD5. func (r *Reader) MD5Base64String() string { - return base64.StdEncoding.EncodeToString(r.md5sum) + return base64.StdEncoding.EncodeToString(r.checksum) } -// SHA256HexString returns hex sha256 value. +// SHA256HexString returns a hex representation of the SHA256. func (r *Reader) SHA256HexString() string { - return hex.EncodeToString(r.sha256sum) + return hex.EncodeToString(r.contentSHA256) } -// verify verifies if the computed MD5 sum and SHA256 sum are -// equal to the ones specified when creating the Reader. -func (r *Reader) verify() error { - if r.sha256Hash != nil && len(r.sha256sum) > 0 { - if sum := r.sha256Hash.Sum(nil); !bytes.Equal(r.sha256sum, sum) { - return SHA256Mismatch{hex.EncodeToString(r.sha256sum), hex.EncodeToString(sum)} - } - } - if r.md5Hash != nil && len(r.md5sum) > 0 { - if sum := r.md5Hash.Sum(nil); !bytes.Equal(r.md5sum, sum) { - return BadDigest{hex.EncodeToString(r.md5sum), hex.EncodeToString(sum)} - } - } - return nil -} - -// merge another hash into this one. -// There cannot be conflicting information given. -func (r *Reader) merge(size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) { - if r.bytesRead > 0 { - return nil, errors.New("internal error: Already read from hash reader") - } - // Merge sizes. - // If not set before, just add it. - if r.size < 0 && size >= 0 { - r.src = io.LimitReader(r.src, size) - r.size = size - } - // If set before and set now they must match. - if r.size >= 0 && size >= 0 && r.size != size { - return nil, ErrSizeMismatch{Want: r.size, Got: size} - } - - if r.actualSize <= 0 && actualSize >= 0 { - r.actualSize = actualSize - } - - // Merge SHA256. - sha256sum, err := hex.DecodeString(sha256Hex) - if err != nil { - return nil, SHA256Mismatch{} - } - - // If both are set, they must be the same. - if r.sha256Hash != nil && len(sha256sum) > 0 { - if !bytes.Equal(r.sha256sum, sha256sum) { - return nil, SHA256Mismatch{} - } - } else if len(sha256sum) > 0 { - r.sha256Hash = sha256.New() - r.sha256sum = sha256sum - } - - // Merge MD5 Sum. - md5sum, err := hex.DecodeString(md5Hex) - if err != nil { - return nil, BadDigest{} - } - // If both are set, they must expect the same. - if r.md5Hash != nil && len(md5sum) > 0 { - if !bytes.Equal(r.md5sum, md5sum) { - return nil, BadDigest{} - } - } else if len(md5sum) > 0 || (r.md5Hash == nil && strictCompat) { - r.md5Hash = md5.New() - r.md5sum = md5sum - } - return r, nil -} +var _ io.Closer = (*Reader)(nil) // compiler check // Close and release resources. -func (r *Reader) Close() error { - // Support the io.Closer interface. - return nil -} +func (r *Reader) Close() error { return nil } diff --git a/pkg/hash/reader_test.go b/pkg/hash/reader_test.go index 1942b377a..14d3e6c93 100644 --- a/pkg/hash/reader_test.go +++ b/pkg/hash/reader_test.go @@ -27,7 +27,7 @@ import ( // Tests functions like Size(), MD5*(), SHA256*() func TestHashReaderHelperMethods(t *testing.T) { - r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false) + r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4) if err != nil { t.Fatal(err) } @@ -109,13 +109,13 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Nested hash reader NewReader() should merge.", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, }, { desc: "Incorrect sha256, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", @@ -126,28 +126,28 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct sha256, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Correct sha256, nested, truncated", - src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4), size: 4, actualSize: -1, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Correct sha256, nested, truncated, swapped", - src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1), size: 4, actualSize: -1, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Incorrect MD5, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, md5hex: "0773da587b322af3a8718cb418a715ce", @@ -165,7 +165,7 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct MD5, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -180,7 +180,7 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct MD5, nested, truncated", - src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -188,7 +188,7 @@ func TestHashReaderVerification(t *testing.T) { } for i, testCase := range testCases { t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) { - r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, false) + r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize) if err != nil { t.Fatalf("Test %q: Initializing reader failed %s", testCase.desc, err) } @@ -202,8 +202,8 @@ func TestHashReaderVerification(t *testing.T) { } } -func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) *Reader { - r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize, strictCompat) +func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) *Reader { + r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize) if err != nil { t.Fatal(err) } @@ -219,63 +219,57 @@ func TestHashReaderInvalidArguments(t *testing.T) { actualSize int64 md5hex, sha256hex string success bool - expectedErr error - strict bool }{ { - desc: "Invalid md5sum NewReader() will fail.", - src: bytes.NewReader([]byte("abcd")), - size: 4, - actualSize: 4, - md5hex: "invalid-md5", - success: false, - expectedErr: BadDigest{}, + desc: "Invalid md5sum NewReader() will fail.", + src: bytes.NewReader([]byte("abcd")), + size: 4, + actualSize: 4, + md5hex: "invalid-md5", + success: false, }, { - desc: "Invalid sha256 NewReader() will fail.", - src: bytes.NewReader([]byte("abcd")), - size: 4, - actualSize: 4, - sha256hex: "invalid-sha256", - success: false, - expectedErr: SHA256Mismatch{}, + desc: "Invalid sha256 NewReader() will fail.", + src: bytes.NewReader([]byte("abcd")), + size: 4, + actualSize: 4, + sha256hex: "invalid-sha256", + success: false, }, { desc: "Nested hash reader NewReader() should merge.", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, success: true, }, { - desc: "Mismatching sha256", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false), - size: 4, - actualSize: 4, - sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", - success: false, - expectedErr: SHA256Mismatch{}, + desc: "Mismatching sha256", + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4), + size: 4, + actualSize: 4, + sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", + success: false, }, { desc: "Correct sha256", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4), size: 4, actualSize: 4, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", success: true, }, { - desc: "Mismatching MD5", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false), - size: 4, - actualSize: 4, - md5hex: "0773da587b322af3a8718cb418a715ce", - success: false, - expectedErr: BadDigest{}, + desc: "Mismatching MD5", + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4), + size: 4, + actualSize: 4, + md5hex: "0773da587b322af3a8718cb418a715ce", + success: false, }, { desc: "Correct MD5", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -289,29 +283,23 @@ func TestHashReaderInvalidArguments(t *testing.T) { success: true, }, { - desc: "Nested, size mismatch", - src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1, false), - size: 2, - actualSize: -1, - success: false, - expectedErr: ErrSizeMismatch{Want: 4, Got: 2}, + desc: "Nested, size mismatch", + src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1), + size: 2, + actualSize: -1, + success: false, }, } for i, testCase := range testCases { t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) { - _, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, testCase.strict) + _, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize) if err != nil && testCase.success { t.Errorf("Test %q: Expected success, but got error %s instead", testCase.desc, err) } if err == nil && !testCase.success { t.Errorf("Test %q: Expected error, but got success", testCase.desc) } - if !testCase.success { - if err != testCase.expectedErr { - t.Errorf("Test %q: Expected error %v, but got %v", testCase.desc, testCase.expectedErr, err) - } - } }) } }