Copilot commented on code in PR #60414:
URL: https://github.com/apache/doris/pull/60414#discussion_r2751942814
##########
fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java:
##########
@@ -248,5 +251,108 @@ public void testExtendGlobs() {
String result = S3Util.extendGlobs(input);
Assert.assertEquals(expected, result);
}
+
+ // Tests for isDeterministicPattern
+
+ @Test
+ public void testIsDeterministicPattern_simpleFile() {
+ // Simple file path without any patterns
+ Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withBraces() {
+ // Path with brace pattern (deterministic - can be expanded)
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1,2,3}.csv"));
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1..3}.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withAsterisk() {
+ // Path with asterisk wildcard (not deterministic)
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/to/*.csv"));
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/*/file.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withQuestionMark() {
+ // Path with question mark wildcard (not deterministic)
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file?.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withBrackets() {
+ // Path with bracket pattern (not deterministic)
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[0-9].csv"));
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[abc].csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withEscape() {
+ // Path with escape character (not deterministic - complex pattern)
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file\\*.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_mixed() {
+ // Path with both braces and wildcards
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file{1,2}/*.csv"));
+ }
+
+ // Tests for expandBracePatterns
+
+ @Test
+ public void testExpandBracePatterns_noBraces() {
+ // No braces - returns single path
+ List<String> result = S3Util.expandBracePatterns("path/to/file.csv");
+ Assert.assertEquals(Arrays.asList("path/to/file.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_simpleBrace() {
+ // Simple brace expansion
+ List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv");
+ Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv",
"file3.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_multipleBraces() {
+ // Multiple brace expansions
+ List<String> result =
S3Util.expandBracePatterns("dir{a,b}/file{1,2}.csv");
+ Assert.assertEquals(Arrays.asList(
+ "dira/file1.csv", "dira/file2.csv",
+ "dirb/file1.csv", "dirb/file2.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_emptyBrace() {
+ // Empty brace content
+ List<String> result = S3Util.expandBracePatterns("file{}.csv");
+ Assert.assertEquals(Arrays.asList("file.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_singleValue() {
+ // Single value in brace
+ List<String> result = S3Util.expandBracePatterns("file{1}.csv");
+ Assert.assertEquals(Arrays.asList("file1.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_withPath() {
+ // Full path with braces
+ List<String> result =
S3Util.expandBracePatterns("data/year{2023,2024}/month{01,02}/file.csv");
+ Assert.assertEquals(8, result.size());
Review Comment:
The expected result size is incorrect. The pattern
`"data/year{2023,2024}/month{01,02}/file.csv"` should expand to 4 paths (2
years × 2 months), not 8. The correct expansion should produce:
1. `data/year2023/month01/file.csv`
2. `data/year2023/month02/file.csv`
3. `data/year2024/month01/file.csv`
4. `data/year2024/month02/file.csv`
Change the expected size from 8 to 4.
```suggestion
Assert.assertEquals(4, result.size());
```
##########
fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java:
##########
@@ -578,6 +579,23 @@ private GlobListResult globListInternal(String remotePath,
List<RemoteFile> resu
}
bucket = uri.getBucket();
+
+ // Optimization: For deterministic paths (no wildcards like *, ?,
[...]),
+ // use HEAD requests instead of listing to avoid requiring
ListBucket permission.
+ // This is useful when only GetObject permission is granted.
+ // Controlled by config: s3_skip_list_for_deterministic_path
+ String keyPattern = uri.getKey();
+ if (Config.s3_skip_list_for_deterministic_path
+ && S3Util.isDeterministicPattern(keyPattern)
Review Comment:
No limit on the number of expanded paths. A pattern like
`file{{1..1000}}/dir{{1..1000}}/data.csv` would expand to 1 million paths and
trigger 1 million HEAD requests, which could overwhelm the S3 service or cause
performance issues. Consider adding a reasonable upper limit (e.g., 1000 or
10000 paths) and either throwing an error or falling back to the LIST-based
approach when this limit is exceeded.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]