Repository: spark Updated Branches: refs/heads/master 050c20cc9 -> 22d4aae8b
[SPARK-10063] Follow-up: remove dead code related to an old output committer. ## What changes were proposed in this pull request? DirectParquetOutputCommitter was removed from Spark as it was deemed unsafe to use. We however still have some code to generate warning. This patch removes those code as well. ## How was this patch tested? N/A Author: Reynold Xin <r...@databricks.com> Closes #16796 from rxin/remove-direct. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22d4aae8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22d4aae8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22d4aae8 Branch: refs/heads/master Commit: 22d4aae8be338051f6652cdf54dd593023751189 Parents: 050c20c Author: Reynold Xin <r...@databricks.com> Authored: Fri Feb 3 21:12:20 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Fri Feb 3 21:12:20 2017 -0800 ---------------------------------------------------------------------- .../internal/io/HadoopMapReduceCommitProtocol.scala | 3 --- .../internal/io/SparkHadoopMapReduceWriter.scala | 15 --------------- 2 files changed, 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/22d4aae8/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala index 2f33f2e..2c1b563 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala @@ -163,7 +163,4 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String) tmp.getFileSystem(taskContext.getConfiguration).delete(tmp, false) } } - - /** Whether we are using a direct output committer */ - def isDirectOutput(): Boolean = committer.getClass.getSimpleName.contains("Direct") } http://git-wip-us.apache.org/repos/asf/spark/blob/22d4aae8/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala index 6de1fc0..63918ef 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala @@ -83,17 +83,6 @@ object SparkHadoopMapReduceWriter extends Logging { isAppend = false).asInstanceOf[HadoopMapReduceCommitProtocol] committer.setupJob(jobContext) - // When speculation is on and output committer class name contains "Direct", we should warn - // users that they may loss data if they are using a direct output committer. - if (SparkHadoopWriterUtils.isSpeculationEnabled(sparkConf) && committer.isDirectOutput) { - val warningMessage = - s"$committer may be an output committer that writes data directly to " + - "the final location. Because speculation is enabled, this output committer may " + - "cause data loss (see the case in SPARK-10063). If possible, please use an output " + - "committer that does not have this behavior (e.g. FileOutputCommitter)." - logWarning(warningMessage) - } - // Try to write all RDD partitions as a Hadoop OutputFormat. try { val ret = sparkContext.runJob(rdd, (context: TaskContext, iter: Iterator[(K, V)]) => { @@ -230,10 +219,6 @@ object SparkHadoopWriterUtils { enabledInConf && !validationDisabled } - def isSpeculationEnabled(conf: SparkConf): Boolean = { - conf.getBoolean("spark.speculation", false) - } - // TODO: these don't seem like the right abstractions. // We should abstract the duplicate code in a less awkward way. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org