Github user caneGuy commented on a diff in the pull request: https://github.com/apache/spark/pull/21526#discussion_r201963160 --- Diff: core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala --- @@ -1053,7 +1053,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) // users that they may loss data if they are using a direct output committer. val speculationEnabled = self.conf.getBoolean("spark.speculation", false) val outputCommitterClass = hadoopConf.get("mapred.output.committer.class", "") - if (speculationEnabled && outputCommitterClass.contains("Direct")) { + val outputCommitCoordinationEnabled = self.conf.getBoolean( + "spark.hadoop.outputCommitCoordination.enabled", true) + if (speculationEnabled && outputCommitterClass.contains("Direct") + && !outputCommitCoordinationEnabled) { val warningMessage = --- End diff -- Also modify `HiveFileFormat`. @cloud-fan @jiangxb1987 And the reason i do not use an other common function to refactor this is that i can't find a good place to put the function.Any suggestion?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org