Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/22152#discussion_r211599957 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala --- @@ -69,10 +70,17 @@ private[sql] object JsonInferSchema { }.reduceOption(typeMerger).toIterator } - // Here we get RDD local iterator then fold, instead of calling `RDD.fold` directly, because - // `RDD.fold` will run the fold function in DAGScheduler event loop thread, which may not have - // active SparkSession and `SQLConf.get` may point to the wrong configs. - val rootType = mergedTypesFromPartitions.toLocalIterator.fold(StructType(Nil))(typeMerger) + // Here we manually submit a fold-like Spark job, so that we can set the SQLConf when running + // the fold functions in the scheduler event loop thread. + val existingConf = SQLConf.get + var rootType: DataType = StructType(Nil) + val foldPartition = (iter: Iterator[DataType]) => iter.fold(StructType(Nil))(typeMerger) --- End diff -- Yeah, agreed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org