This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push: new bb58a66775 DRILL-8283: Add a configurable recursive file listing size limit (#2632) bb58a66775 is described below commit bb58a6677528d9833979248a506b87041eb4bb89 Author: James Turton <9107319+jntur...@users.noreply.github.com> AuthorDate: Thu Sep 8 07:12:19 2022 +0200 DRILL-8283: Add a configurable recursive file listing size limit (#2632) --- .../src/main/resources/core-site-example.xml | 16 ++++- .../org/apache/drill/exec/util/FileSystemUtil.java | 71 ++++++++++++++++++++-- .../apache/drill/exec/util/FileSystemUtilTest.java | 18 ++++++ 3 files changed, 99 insertions(+), 6 deletions(-) diff --git a/distribution/src/main/resources/core-site-example.xml b/distribution/src/main/resources/core-site-example.xml index 0261658c01..d870172d76 100644 --- a/distribution/src/main/resources/core-site-example.xml +++ b/distribution/src/main/resources/core-site-example.xml @@ -172,5 +172,19 @@ </description> </property> --> - + <!-- + Use the next property to set a limit on the numer of files that Drill + will list by recursing into a DFS directory tree. When the limit is + encountered the initiating operation will fail with an error. The + intended application of this limit is to allow admins to protect their + Drillbits from an errant or malicious SELECT * FROM dfs.huge_workspace + LIMIT 10 query, which will cause an OOM given a big enough workspace of + files. Defaults to 0 which means no limit. + --> + <!-- + <property> + <name>drill.exec.recursive_file_listing_max_size</name> + <value>10000</value> + </property> + ---> </configuration> diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java index 82500da30f..488f3fb35a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.util; import org.apache.drill.common.exceptions.ErrorHelper; +import org.apache.drill.common.exceptions.UserException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -28,9 +29,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.concurrent.CancellationException; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ForkJoinTask; import java.util.concurrent.RecursiveTask; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -41,6 +44,7 @@ import java.util.stream.Stream; public class FileSystemUtil { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSystemUtil.class); + public static final String RECURSIVE_FILE_LISTING_MAX_SIZE = "drill.exec.recursive_file_listing_max_size"; /** * Filter that will accept all files and directories. @@ -248,9 +252,24 @@ public class FileSystemUtil { */ private static List<FileStatus> listRecursive(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) { ForkJoinPool pool = new ForkJoinPool(); + AtomicInteger fileCounter = new AtomicInteger(0); + int recursiveListingMaxSize = fs.getConf().getInt(RECURSIVE_FILE_LISTING_MAX_SIZE, 0); + try { - RecursiveListing task = new RecursiveListing(fs, path, scope, suppressExceptions, filter); + RecursiveListing task = new RecursiveListing( + fs, + path, + scope, + suppressExceptions, + filter, + fileCounter, + recursiveListingMaxSize, + pool + ); return pool.invoke(task); + } catch (CancellationException ex) { + logger.debug("RecursiveListing task to list {} was cancelled.", path); + return Collections.<FileStatus>emptyList(); } finally { pool.shutdown(); } @@ -287,13 +306,29 @@ public class FileSystemUtil { private final Scope scope; private final boolean suppressExceptions; private final PathFilter filter; + // Running count of files for comparison with RECURSIVE_FILE_LISTING_MAX_SIZE + private final AtomicInteger fileCounter; + private final int recursiveListingMaxSize; + private final ForkJoinPool pool; - RecursiveListing(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) { + RecursiveListing( + FileSystem fs, + Path path, + Scope scope, + boolean suppressExceptions, + PathFilter filter, + AtomicInteger fileCounter, + int recursiveListingMaxSize, + ForkJoinPool pool + ) { this.fs = fs; this.path = path; this.scope = scope; this.suppressExceptions = suppressExceptions; this.filter = filter; + this.fileCounter = fileCounter; + this.recursiveListingMaxSize = recursiveListingMaxSize; + this.pool = pool; } @Override @@ -302,12 +337,39 @@ public class FileSystemUtil { List<RecursiveListing> tasks = new ArrayList<>(); try { - for (FileStatus status : fs.listStatus(path, filter)) { + FileStatus[] dirFs = fs.listStatus(path, filter); + if (fileCounter.addAndGet(dirFs.length) > recursiveListingMaxSize && recursiveListingMaxSize > 0 ) { + try { + throw UserException + .resourceError() + .message( + "File listing size limit of %d exceeded recursing through path %s, see JVM system property %s", + recursiveListingMaxSize, + path, + RECURSIVE_FILE_LISTING_MAX_SIZE + ) + .build(logger); + } finally { + // Attempt to abort all tasks + this.pool.shutdownNow(); + } + } + + for (FileStatus status : dirFs) { if (isStatusApplicable(status, scope)) { statuses.add(status); } if (status.isDirectory()) { - RecursiveListing task = new RecursiveListing(fs, status.getPath(), scope, suppressExceptions, filter); + RecursiveListing task = new RecursiveListing( + fs, + status.getPath(), + scope, + suppressExceptions, + filter, + fileCounter, + recursiveListingMaxSize, + pool + ); task.fork(); tasks.add(task); } @@ -328,5 +390,4 @@ public class FileSystemUtil { return statuses; } } - } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java index 299f1cc65e..48ee441ab6 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java @@ -17,6 +17,8 @@ */ package org.apache.drill.exec.util; +import org.apache.drill.common.exceptions.UserException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -26,8 +28,10 @@ import java.io.IOException; import java.util.Collections; import java.util.List; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; public class FileSystemUtilTest extends FileSystemUtilTestBase { @@ -199,4 +203,18 @@ public class FileSystemUtilTest extends FileSystemUtilTestBase { assertTrue("Should return empty result", fileStatuses.isEmpty()); } + @Test // DRILL-8283 + public void testRecursiveListingMaxSize() throws IOException { + Configuration conf = fs.getConf(); + int oldSize = conf.getInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 0); + conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 5); + + try { + FileSystemUtil.listAll(fs, new Path(base, "a"), true); + } catch (UserException ex) { + assertThat(ex.getMessage(), containsString("RESOURCE ERROR: File listing size limit")); + } finally { + conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, oldSize); + } + } }