This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch 
optimize_s3_skip_list_for_deterministic_paths
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8fd43e3fc87de39a8d04bf3e28f875ce45e5e24b
Author: Yongqiang YANG <[email protected]>
AuthorDate: Sun Feb 1 12:44:21 2026 -0800

    [opt](s3) Skip S3 listing for deterministic file paths using HEAD requests
    
    For S3 paths without wildcards (*, ?, [...]), use HEAD requests instead
    of ListObjectsV2 to avoid requiring s3:ListBucket permission. This is
    useful when only s3:GetObject permission is granted.
    
    Brace patterns like {1..10} are expanded to concrete file paths and
    verified individually with HEAD requests.
---
 .../java/org/apache/doris/common/util/S3Util.java  | 111 +++++++++++++++++++++
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java |  64 ++++++++++++
 .../org/apache/doris/common/util/S3UtilTest.java   | 106 ++++++++++++++++++++
 3 files changed, 281 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index e537d1f47b0..08e907e53de 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -54,6 +54,8 @@ import 
software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -433,4 +435,113 @@ public class S3Util {
             SecurityChecker.getInstance().stopSSRFChecking();
         }
     }
+
+    /**
+     * Check if a path pattern is deterministic, meaning all file paths can be 
determined
+     * without listing. A pattern is deterministic if it contains no wildcard 
characters
+     * (*, ?, [...]) but may contain brace patterns ({...}) which can be 
expanded.
+     *
+     * This allows skipping S3 ListBucket operations when only GetObject 
permission is available.
+     *
+     * @param pathPattern Path that may contain glob patterns
+     * @return true if the pattern is deterministic (no wildcards)
+     */
+    public static boolean isDeterministicPattern(String pathPattern) {
+        // Check for wildcard characters that require listing
+        // Note: '{' is NOT a wildcard - it's a brace expansion pattern that 
can be deterministically expanded
+        char[] wildcardChars = {'*', '?', '['};
+        for (char c : wildcardChars) {
+            if (pathPattern.indexOf(c) != -1) {
+                return false;
+            }
+        }
+        // Check for escaped characters which indicate complex patterns
+        if (pathPattern.indexOf('\\') != -1) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Expand brace patterns in a path to generate all concrete file paths.
+     * Handles nested and multiple brace patterns.
+     *
+     * Examples:
+     *   - "file{1,2,3}.csv" => ["file1.csv", "file2.csv", "file3.csv"]
+     *   - "data/part{1..3}/file.csv" => ["data/part1/file.csv", 
"data/part2/file.csv", "data/part3/file.csv"]
+     *   - "file.csv" => ["file.csv"] (no braces)
+     *
+     * @param pathPattern Path with optional brace patterns (already processed 
by extendGlobs)
+     * @return List of expanded concrete paths
+     */
+    public static List<String> expandBracePatterns(String pathPattern) {
+        List<String> result = new ArrayList<>();
+        expandBracePatternsRecursive(pathPattern, result);
+        return result;
+    }
+
+    private static void expandBracePatternsRecursive(String pattern, 
List<String> result) {
+        int braceStart = pattern.indexOf('{');
+        if (braceStart == -1) {
+            // No more braces, add the pattern as-is
+            result.add(pattern);
+            return;
+        }
+
+        // Find matching closing brace (handle nested braces)
+        int braceEnd = findMatchingBrace(pattern, braceStart);
+        if (braceEnd == -1) {
+            // Malformed pattern, treat as literal
+            result.add(pattern);
+            return;
+        }
+
+        String prefix = pattern.substring(0, braceStart);
+        String braceContent = pattern.substring(braceStart + 1, braceEnd);
+        String suffix = pattern.substring(braceEnd + 1);
+
+        // Split by comma, but respect nested braces
+        List<String> alternatives = splitBraceContent(braceContent);
+
+        for (String alt : alternatives) {
+            // Recursively expand any remaining braces in the suffix
+            expandBracePatternsRecursive(prefix + alt + suffix, result);
+        }
+    }
+
+    private static int findMatchingBrace(String pattern, int start) {
+        int depth = 0;
+        for (int i = start; i < pattern.length(); i++) {
+            char c = pattern.charAt(i);
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+                if (depth == 0) {
+                    return i;
+                }
+            }
+        }
+        return -1;
+    }
+
+    private static List<String> splitBraceContent(String content) {
+        List<String> parts = new ArrayList<>();
+        int depth = 0;
+        int start = 0;
+
+        for (int i = 0; i < content.length(); i++) {
+            char c = content.charAt(i);
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+            } else if (c == ',' && depth == 0) {
+                parts.add(content.substring(start, i));
+                start = i + 1;
+            }
+        }
+        parts.add(content.substring(start));
+        return parts;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 9cd0c6ea9e5..9c062f6d6e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -578,6 +578,70 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
             }
 
             bucket = uri.getBucket();
+
+            // Optimization: For deterministic paths (no wildcards like *, ?, 
[...]),
+            // use HEAD requests instead of listing to avoid requiring 
ListBucket permission.
+            // This is useful when only GetObject permission is granted.
+            String keyPattern = uri.getKey();
+            if (S3Util.isDeterministicPattern(keyPattern) && !hasLimits && 
startFile == null) {
+                // First expand any {..} patterns, then use HEAD requests
+                String expandedPattern = S3Util.extendGlobs(keyPattern);
+                List<String> expandedPaths = 
S3Util.expandBracePatterns(expandedPattern);
+
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("Using HEAD requests for deterministic path 
pattern: {}, expanded to {} paths",
+                            remotePath, expandedPaths.size());
+                }
+
+                for (String key : expandedPaths) {
+                    String fullPath = "s3://" + bucket + "/" + key;
+                    try {
+                        HeadObjectResponse headResponse = getClient()
+                                .headObject(HeadObjectRequest.builder()
+                                        .bucket(bucket)
+                                        .key(key)
+                                        .build());
+
+                        matchCnt++;
+                        matchFileSize += headResponse.contentLength();
+                        RemoteFile remoteFile = new RemoteFile(
+                                fileNameOnly ? 
Paths.get(key).getFileName().toString() : fullPath,
+                                true, // isFile
+                                headResponse.contentLength(),
+                                headResponse.contentLength(),
+                                headResponse.lastModified() != null
+                                        ? 
headResponse.lastModified().toEpochMilli() : 0
+                        );
+                        result.add(remoteFile);
+
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("HEAD success for {}: size={}", 
fullPath, headResponse.contentLength());
+                        }
+                    } catch (NoSuchKeyException e) {
+                        // File does not exist, skip it (this is expected for 
some expanded patterns)
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("File does not exist (skipped): {}", 
fullPath);
+                        }
+                    } catch (S3Exception e) {
+                        if (e.statusCode() == HttpStatus.SC_NOT_FOUND) {
+                            if (LOG.isDebugEnabled()) {
+                                LOG.debug("File does not exist (skipped): {}", 
fullPath);
+                            }
+                        } else {
+                            throw e;
+                        }
+                    }
+                }
+
+                if (LOG.isDebugEnabled()) {
+                    long duration = System.nanoTime() - startTime;
+                    LOG.debug("Deterministic path HEAD requests: checked {} 
paths, found {} files, took {} ms",
+                            expandedPaths.size(), matchCnt, duration / 1000 / 
1000);
+                }
+
+                return new GlobListResult(Status.OK, currentMaxFile, bucket, 
"");
+            }
+
             String globPath = S3Util.extendGlobs(uri.getKey());
 
             if (LOG.isDebugEnabled()) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
index 23715440e8c..914434aea30 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -20,6 +20,9 @@ package org.apache.doris.common.util;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class S3UtilTest {
 
     @Test
@@ -248,5 +251,108 @@ public class S3UtilTest {
         String result = S3Util.extendGlobs(input);
         Assert.assertEquals(expected, result);
     }
+
+    // Tests for isDeterministicPattern
+
+    @Test
+    public void testIsDeterministicPattern_simpleFile() {
+        // Simple file path without any patterns
+        Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withBraces() {
+        // Path with brace pattern (deterministic - can be expanded)
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1,2,3}.csv"));
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1..3}.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withAsterisk() {
+        // Path with asterisk wildcard (not deterministic)
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/to/*.csv"));
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/*/file.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withQuestionMark() {
+        // Path with question mark wildcard (not deterministic)
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file?.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withBrackets() {
+        // Path with bracket pattern (not deterministic)
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[0-9].csv"));
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[abc].csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withEscape() {
+        // Path with escape character (not deterministic - complex pattern)
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file\\*.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_mixed() {
+        // Path with both braces and wildcards
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file{1,2}/*.csv"));
+    }
+
+    // Tests for expandBracePatterns
+
+    @Test
+    public void testExpandBracePatterns_noBraces() {
+        // No braces - returns single path
+        List<String> result = S3Util.expandBracePatterns("path/to/file.csv");
+        Assert.assertEquals(Arrays.asList("path/to/file.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_simpleBrace() {
+        // Simple brace expansion
+        List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv");
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_multipleBraces() {
+        // Multiple brace expansions
+        List<String> result = 
S3Util.expandBracePatterns("dir{a,b}/file{1,2}.csv");
+        Assert.assertEquals(Arrays.asList(
+                "dira/file1.csv", "dira/file2.csv",
+                "dirb/file1.csv", "dirb/file2.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_emptyBrace() {
+        // Empty brace content
+        List<String> result = S3Util.expandBracePatterns("file{}.csv");
+        Assert.assertEquals(Arrays.asList("file.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_singleValue() {
+        // Single value in brace
+        List<String> result = S3Util.expandBracePatterns("file{1}.csv");
+        Assert.assertEquals(Arrays.asList("file1.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_withPath() {
+        // Full path with braces
+        List<String> result = 
S3Util.expandBracePatterns("data/year{2023,2024}/month{01,02}/file.csv");
+        Assert.assertEquals(8, result.size());
+        Assert.assertTrue(result.contains("data/year2023/month01/file.csv"));
+        Assert.assertTrue(result.contains("data/year2024/month02/file.csv"));
+    }
+
+    @Test
+    public void testExpandBracePatterns_extendedRange() {
+        // Test with extended range (after extendGlobs processing)
+        String expanded = S3Util.extendGlobs("file{1..3}.csv");
+        List<String> result = S3Util.expandBracePatterns(expanded);
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to