Repository: spark Updated Branches: refs/heads/master 6b425874d -> cee230160
[SPARK-25871][STREAMING] Don't use EC for streaming WAL The write ahead log expects to be able to call hflush, but that is a no-op when writing to a file with hdfs erasure coding. So ensure that file is always written with replication instead, regardless of filesystem defaults. None yet. I'm posting this mostly to make it visible. Closes #22882 from squito/SPARK-25871. Authored-by: Imran Rashid <iras...@cloudera.com> Signed-off-by: Marcelo Vanzin <van...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cee23016 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cee23016 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cee23016 Branch: refs/heads/master Commit: cee230160ba2c3a210892f71e019190b02e34071 Parents: 6b42587 Author: Imran Rashid <iras...@cloudera.com> Authored: Tue Nov 6 10:52:33 2018 -0800 Committer: Marcelo Vanzin <van...@cloudera.com> Committed: Tue Nov 6 10:52:42 2018 -0800 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/streaming/util/HdfsUtils.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/cee23016/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala index a699735..8cb68b2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala @@ -21,6 +21,8 @@ import java.io.{FileNotFoundException, IOException} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ +import org.apache.spark.deploy.SparkHadoopUtil + private[streaming] object HdfsUtils { def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = { @@ -37,7 +39,8 @@ private[streaming] object HdfsUtils { throw new IllegalStateException("File exists and there is no append support!") } } else { - dfs.create(dfsPath) + // we dont' want to use hdfs erasure coding, as that lacks support for append and hflush + SparkHadoopUtil.createNonECFile(dfs, dfsPath) } } stream --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org