Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20072#discussion_r158632706 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -261,6 +261,16 @@ object SQLConf { .booleanConf .createWithDefault(false) + val HADOOPFSRELATION_SIZE_FACTOR = buildConf( + "org.apache.spark.sql.execution.datasources.sizeFactor") + .internal() + .doc("The result of multiplying this factor with the size of data source files is propagated" + + " to serve as the stats to choose the best execution plan. In the case where the " + + " the in-disk and in-memory size of data is significantly different, users can adjust this" + + " factor for a better choice of the execution plan. The default value is 1.0.") + .doubleConf + .createWithDefault(1.0) --- End diff -- checkValues > 0.0
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org