ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c Gist of improvements: * Cross-server caching and listing will use the same data across servers and requests. * Lists can be arbitrarily resumed at a constant speed. * Metadata for all files scanned is stored for streaming retrieval. * The existing bloom filters controlled by the crawler is used for validating caches. * Concurrent requests for the same data (or parts of it) will not spawn additional walkers. * Listing a subdirectory of an existing recursive cache will use the cache. * All listing operations are fully streamable so the number of objects in a bucket no longer dictates the amount of memory. * Listings can be handled by any server within the cluster. * Caches are cleaned up when out of date or superseded by a more recent one.
2026-02-05 02:10:14 -05:00 · 2020-10-28 09:18:35 -07:00
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions
--- a/cmd/object-api-listobjects_test.go
+++ b/cmd/object-api-listobjects_test.go
@@ -578,6 +578,7 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
 	for i, testCase := range testCases {
 		testCase := testCase
 		t.Run(fmt.Sprintf("%s-Test%d", instanceType, i+1), func(t *testing.T) {
+			t.Log("ListObjects, bucket:", testCase.bucketName, "prefix:", testCase.prefix, "marker:", testCase.marker, "delimiter:", testCase.delimiter, "maxkeys:", testCase.maxKeys)
 			result, err := obj.ListObjects(context.Background(), testCase.bucketName,
 				testCase.prefix, testCase.marker, testCase.delimiter, int(testCase.maxKeys))
 			if err != nil && testCase.shouldPass {
@@ -602,9 +603,15 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
 				// otherwise it may lead to index out of range error in
 				// assertion following this.
 				if len(testCase.result.Objects) != len(result.Objects) {
-					t.Fatalf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects))
+					t.Logf("want: %v", objInfoNames(testCase.result.Objects))
+					t.Logf("got: %v", objInfoNames(result.Objects))
+					t.Errorf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects))
 				}
 				for j := 0; j < len(testCase.result.Objects); j++ {
+					if j >= len(result.Objects) {
+						t.Errorf("Test %d: %s: Expected object name to be \"%s\", but not nothing instead", i+1, instanceType, testCase.result.Objects[j].Name)
+						continue
+					}
 					if testCase.result.Objects[j].Name != result.Objects[j].Name {
 						t.Errorf("Test %d: %s: Expected object name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Objects[j].Name, result.Objects[j].Name)
 					}
@@ -616,16 +623,25 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
 				}

 				if len(testCase.result.Prefixes) != len(result.Prefixes) {
-					t.Fatalf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
+					t.Logf("want: %v", testCase.result.Prefixes)
+					t.Logf("got: %v", result.Prefixes)
+					t.Errorf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
 				}
 				for j := 0; j < len(testCase.result.Prefixes); j++ {
+					if j >= len(result.Prefixes) {
+						t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found no result", i+1, instanceType, testCase.result.Prefixes[j])
+						continue
+					}
 					if testCase.result.Prefixes[j] != result.Prefixes[j] {
 						t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Prefixes[j], result.Prefixes[j])
 					}
 				}

 				if testCase.result.IsTruncated != result.IsTruncated {
-					t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated)
+					// Allow an extra continuation token.
+					if !result.IsTruncated || len(result.Objects) == 0 {
+						t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated)
+					}
 				}

 				if testCase.result.IsTruncated && result.NextMarker == "" {
@@ -633,22 +649,35 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
 				}

 				if !testCase.result.IsTruncated && result.NextMarker != "" {
-					t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker)
+					if !result.IsTruncated || len(result.Objects) == 0 {
+						t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker)
+					}
 				}

 			}
 			// Take ListObject treeWalk go-routine to completion, if available in the treewalk pool.
-			if result.IsTruncated {
-				_, err = obj.ListObjects(context.Background(), testCase.bucketName,
+			for result.IsTruncated {
+				result, err = obj.ListObjects(context.Background(), testCase.bucketName,
 					testCase.prefix, result.NextMarker, testCase.delimiter, 1000)
 				if err != nil {
 					t.Fatal(err)
 				}
+				if !testCase.result.IsTruncated && len(result.Objects) > 0 {
+					t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects))
+				}
 			}
 		})
 	}
 }

+func objInfoNames(o []ObjectInfo) []string {
+	var res = make([]string, len(o))
+	for i := range o {
+		res[i] = o[i].Name
+	}
+	return res
+}
+
 // Wrapper for calling ListObjectVersions tests for both Erasure multiple disks and single node setup.
 func TestListObjectVersions(t *testing.T) {
 	ExecObjectLayerTest(t, testListObjectVersions)
@@ -1240,7 +1269,7 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
 				}

 				if len(testCase.result.Prefixes) != len(result.Prefixes) {
-					fmt.Println(testCase, testCase.result.Prefixes, result.Prefixes)
+					t.Log(testCase, testCase.result.Prefixes, result.Prefixes)
 					t.Fatalf("%s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
 				}
 				for j := 0; j < len(testCase.result.Prefixes); j++ {
@@ -1250,7 +1279,10 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
 				}

 				if testCase.result.IsTruncated != result.IsTruncated {
-					t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated)
+					// Allow an extra continuation token.
+					if !result.IsTruncated || len(result.Objects) == 0 {
+						t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated)
+					}
 				}

 				if testCase.result.IsTruncated && result.NextMarker == "" {
@@ -1258,17 +1290,22 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
 				}

 				if !testCase.result.IsTruncated && result.NextMarker != "" {
-					t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker)
+					if !result.IsTruncated || len(result.Objects) == 0 {
+						t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker)
+					}
 				}

 			}
 			// Take ListObject treeWalk go-routine to completion, if available in the treewalk pool.
-			if result.IsTruncated {
-				_, err = obj.ListObjectVersions(context.Background(), testCase.bucketName,
+			for result.IsTruncated {
+				result, err = obj.ListObjectVersions(context.Background(), testCase.bucketName,
 					testCase.prefix, result.NextMarker, "", testCase.delimiter, 1000)
 				if err != nil {
 					t.Fatal(err)
 				}
+				if !testCase.result.IsTruncated && len(result.Objects) > 0 {
+					t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects))
+				}
 			}
 		})
 	}