This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b508eab [SPARK-21882][CORE] OutputMetrics doesn't count written bytes correctly in the saveAsHadoopDataset function b508eab is described below commit b508eab9858b94f14b29e023812448e3d0c97712 Author: Sean Owen <sean.o...@databricks.com> AuthorDate: Fri Jun 14 12:44:43 2019 -0500 [SPARK-21882][CORE] OutputMetrics doesn't count written bytes correctly in the saveAsHadoopDataset function ## What changes were proposed in this pull request? (Continuation of https://github.com/apache/spark/pull/19118 ; see for details) ## How was this patch tested? Existing tests. Closes #24863 from srowen/SPARK-21882.2. Authored-by: Sean Owen <sean.o...@databricks.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala index 3a58ea8..a619f10 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala @@ -116,12 +116,14 @@ object SparkHadoopWriter extends Logging { jobTrackerId, commitJobId, sparkPartitionId, sparkAttemptNumber) committer.setupTask(taskContext) - val (outputMetrics, callback) = initHadoopOutputMetrics(context) - // Initiate the writer. config.initWriter(taskContext, sparkPartitionId) var recordsWritten = 0L + // We must initialize the callback for calculating bytes written after the statistic table + // is initialized in FileSystem which is happened in initWriter. + val (outputMetrics, callback) = initHadoopOutputMetrics(context) + // Write all rows in RDD partition. try { val ret = Utils.tryWithSafeFinallyAndFailureCallbacks { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org