Github user xuanyuanking commented on a diff in the pull request: https://github.com/apache/spark/pull/21618#discussion_r201007556 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -656,6 +656,25 @@ object SQLConf { .intConf .createWithDefault(10000) + val PARALLEL_GET_GLOBBED_PATH_THRESHOLD = + buildConf("spark.sql.sources.parallelGetGlobbedPath.threshold") + .doc("The maximum number of subfiles or directories allowed after a globbed path " + + "expansion. If the number of paths exceeds this value during expansion, it tries to " + + "expand the globbed in parallel with multi-thread.") + .intConf + .checkValue(threshlod => threshlod >= 0, "The maximum number of subfiles or directories " + + "must not be negative") + .createWithDefault(32) + + val PARALLEL_GET_GLOBBED_PATH_NUM_THREADS = + buildConf("spark.sql.sources.parallelGetGlobbedPath.numThreads") + .doc("The number of threads to get a collection of path in parallel. Set the " + + "number to avoid generating too many threads.") + .intConf + .checkValue(parallel => parallel >= 0, "The maximum number of threads allowed for getting " + --- End diff -- Thanks for your catch, while this value set to 0 we'll get a IllegalArgumentException during new ThreadPoolExecutor. So I use the 0 value here as the default value for controlling this feature as we discuss in https://github.com/apache/spark/pull/21618#discussion_r200465855
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org