Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/21618#discussion_r197657738 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala --- @@ -724,4 +726,35 @@ object DataSource extends Logging { """.stripMargin) } } + + /** + * Return all paths represented by the wildcard string. + * Use a local thread pool to do this while there's too many paths. + */ + private def getGlobbedPaths( + sparkSession: SparkSession, + fs: FileSystem, + hadoopConf: Configuration, + qualified: Path): Seq[Path] = { + val getGlobbedPathThreshold = sparkSession.sessionState.conf.parallelGetGlobbedPathThreshold + val paths = SparkHadoopUtil.get.expandGlobPath(fs, qualified, getGlobbedPathThreshold) --- End diff -- If the code has any unexpected side effect, is that possible to turn it off and restore it to the previous code path?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org