This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch fix/s3-glob-expansion-safety in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4d9b83be4d1657329b89cacceb96912f210fc3c7 Author: Yongqiang YANG <[email protected]> AuthorDate: Fri Mar 27 23:51:57 2026 -0700 [fix](s3) Add limit-aware brace expansion and fix misleading glob metrics Prevent OOM from unbounded brace expansion by adding early-stop to expandBracePatterns when the expansion exceeds s3_head_request_max_paths. Also skip LIST-path metrics logging when the HEAD/getProperties optimization was used, avoiding misleading "process 0 elements" logs. Found via review of #61775. Co-Authored-By: Claude Opus 4.6 <[email protected]> --- .../java/org/apache/doris/common/util/S3Util.java | 34 ++++++++++++++++-- .../org/apache/doris/fs/obj/AzureObjStorage.java | 28 ++++++++------- .../java/org/apache/doris/fs/obj/S3ObjStorage.java | 18 +++++----- .../org/apache/doris/common/util/S3UtilTest.java | 41 ++++++++++++++++++++++ 4 files changed, 98 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java index 3e4f4e7a62f..6bb5b7ce029 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java @@ -583,6 +583,15 @@ public class S3Util { return chars; } + /** + * Exception thrown when brace expansion exceeds the specified limit. + */ + public static class BraceExpansionTooLargeException extends RuntimeException { + public BraceExpansionTooLargeException(int limit) { + super("Brace expansion exceeded limit of " + limit + " paths"); + } + } + /** * Expand brace patterns in a path to generate all concrete file paths. * Handles nested and multiple brace patterns. @@ -597,11 +606,30 @@ public class S3Util { */ public static List<String> expandBracePatterns(String pathPattern) { List<String> result = new ArrayList<>(); - expandBracePatternsRecursive(pathPattern, result); + expandBracePatternsRecursive(pathPattern, result, 0); return result; } - private static void expandBracePatternsRecursive(String pattern, List<String> result) { + /** + * Expand brace patterns with a limit on the number of expanded paths. + * Stops expansion early if the limit is exceeded, avoiding large allocations. + * + * @param pathPattern Path with optional brace patterns + * @param maxPaths Maximum number of expanded paths allowed (must be > 0) + * @return List of expanded concrete paths + * @throws BraceExpansionTooLargeException if expansion exceeds maxPaths + */ + public static List<String> expandBracePatterns(String pathPattern, int maxPaths) { + List<String> result = new ArrayList<>(); + expandBracePatternsRecursive(pathPattern, result, maxPaths); + return result; + } + + private static void expandBracePatternsRecursive(String pattern, List<String> result, int maxPaths) { + if (maxPaths > 0 && result.size() >= maxPaths) { + throw new BraceExpansionTooLargeException(maxPaths); + } + int braceStart = pattern.indexOf('{'); if (braceStart == -1) { // No more braces, add the pattern as-is @@ -626,7 +654,7 @@ public class S3Util { for (String alt : alternatives) { // Recursively expand any remaining braces in the suffix - expandBracePatternsRecursive(prefix + alt + suffix, result); + expandBracePatternsRecursive(prefix + alt + suffix, result, maxPaths); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java index 4929e34e7f5..08f83ea8f60 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java @@ -354,6 +354,7 @@ public class AzureObjStorage implements ObjStorage<BlobServiceClient> { long elementCnt = 0; long matchCnt = 0; long startTime = System.nanoTime(); + boolean usedHeadPath = false; Status st = Status.OK; try { remotePath = AzurePropertyUtils.validateAndNormalizeUri(remotePath); @@ -370,6 +371,7 @@ public class AzureObjStorage implements ObjStorage<BlobServiceClient> { && S3Util.isDeterministicPattern(keyPattern)) { Status headStatus = globListByGetProperties(bucket, keyPattern, result, fileNameOnly, startTime); if (headStatus != null) { + usedHeadPath = true; return headStatus; } // If headStatus is null, fall through to use listing @@ -444,11 +446,13 @@ public class AzureObjStorage implements ObjStorage<BlobServiceClient> { st = new Status(Status.ErrCode.COMMON_ERROR, "errors while glob file " + remotePath + ": " + e.getMessage()); } finally { - long endTime = System.nanoTime(); - long duration = endTime - startTime; - LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", - remotePath, elementCnt, roundCnt, matchCnt, - duration / 1000); + if (!usedHeadPath) { + long endTime = System.nanoTime(); + long duration = endTime - startTime; + LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", + remotePath, elementCnt, roundCnt, matchCnt, + duration / 1000); + } } return st; } @@ -468,15 +472,15 @@ public class AzureObjStorage implements ObjStorage<BlobServiceClient> { List<RemoteFile> result, boolean fileNameOnly, long startTime) { try { // First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces + // Use limit-aware expansion to avoid large allocations before checking the limit String expandedPattern = S3Util.expandBracketPatterns(keyPattern); expandedPattern = S3Util.extendGlobs(expandedPattern); - List<String> expandedPaths = S3Util.expandBracePatterns(expandedPattern); - - // Fall back to listing if too many paths to avoid overwhelming Azure with requests - // Controlled by config: s3_head_request_max_paths - if (expandedPaths.size() > Config.s3_head_request_max_paths) { - LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST", - expandedPaths.size(), Config.s3_head_request_max_paths); + List<String> expandedPaths; + try { + expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths); + } catch (S3Util.BraceExpansionTooLargeException e) { + LOG.info("Brace expansion exceeded limit {}, falling back to LIST", + Config.s3_head_request_max_paths); return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java index ec827c785a5..bb7cf945a42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java @@ -575,6 +575,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> { long startTime = System.nanoTime(); String currentMaxFile = ""; boolean hasLimits = fileSizeLimit > 0 || fileNumLimit > 0; + boolean usedHeadPath = false; String bucket = ""; String finalPrefix = ""; try { @@ -602,6 +603,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> { GlobListResult headResult = globListByHeadRequests( bucket, keyPattern, result, fileNameOnly, startTime); if (headResult != null) { + usedHeadPath = true; return headResult; } // If headResult is null, fall through to use listing @@ -733,7 +735,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> { } finally { long endTime = System.nanoTime(); long duration = endTime - startTime; - if (LOG.isDebugEnabled()) { + if (!usedHeadPath && LOG.isDebugEnabled()) { LOG.debug("process {} elements under prefix {} for {} round, match {} elements, take {} ms", elementCnt, remotePath, roundCnt, matchCnt, duration / 1000 / 1000); @@ -756,15 +758,15 @@ public class S3ObjStorage implements ObjStorage<S3Client> { List<RemoteFile> result, boolean fileNameOnly, long startTime) { try { // First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces + // Use limit-aware expansion to avoid large allocations before checking the limit String expandedPattern = S3Util.expandBracketPatterns(keyPattern); expandedPattern = S3Util.extendGlobs(expandedPattern); - List<String> expandedPaths = S3Util.expandBracePatterns(expandedPattern); - - // Fall back to listing if too many paths to avoid overwhelming S3 with HEAD requests - // Controlled by config: s3_head_request_max_paths - if (expandedPaths.size() > Config.s3_head_request_max_paths) { - LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST", - expandedPaths.size(), Config.s3_head_request_max_paths); + List<String> expandedPaths; + try { + expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths); + } catch (S3Util.BraceExpansionTooLargeException e) { + LOG.info("Brace expansion exceeded limit {}, falling back to LIST", + Config.s3_head_request_max_paths); return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java index 4b976ed86cd..a17f443f080 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java @@ -456,5 +456,46 @@ public class S3UtilTest { // Malformed bracket (no closing ]) - [ kept as literal Assert.assertEquals("file[abc.csv", S3Util.expandBracketPatterns("file[abc.csv")); } + + // Tests for limit-aware expandBracePatterns + + @Test + public void testExpandBracePatterns_withinLimit() { + // Expansion within the limit should succeed + List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 10); + Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result); + } + + @Test + public void testExpandBracePatterns_exactlyAtLimit() { + // Expansion exactly at the limit should succeed + List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 3); + Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_exceedsLimit() { + // Expansion exceeding the limit should throw + S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 3); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_oneOverLimit() { + // maxPaths+1 items must also throw (boundary case) + S3Util.expandBracePatterns("file{1,2,3,4}.csv", 3); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_cartesianExceedsLimit() { + // Cartesian product {a,b,c} x {1,2,3} = 9 paths, limit = 5 + S3Util.expandBracePatterns("dir{a,b,c}/file{1,2,3}.csv", 5); + } + + @Test + public void testExpandBracePatterns_zeroLimitMeansUnlimited() { + // maxPaths=0 means no limit (backward compatibility) + List<String> result = S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0); + Assert.assertEquals(5, result.size()); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
