Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/22881#discussion_r229802904 --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala --- @@ -471,4 +472,33 @@ object SparkHadoopUtil { hadoopConf.set(key.substring("spark.hadoop.".length), value) } } + + // scalastyle:off line.size.limit + /** + * Create a path that uses replication instead of erasure coding (ec), regardless of the default + * configuration in hdfs for the given path. This can be helpful as hdfs ec doesn't support + * hflush(), hsync(), or append() + * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations + */ + // scalastyle:on line.size.limit + def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = { + try { + // Use reflection as this uses apis only avialable in hadoop 3 + val builderMethod = fs.getClass().getMethod("createFile", classOf[Path]) + val builder = builderMethod.invoke(fs, path) + val builderCls = builder.getClass() + // this may throw a NoSuchMethodException if the path is not on hdfs + val replicateMethod = builderCls.getMethod("replicate") + val buildMethod = builderCls.getMethod("build") + val b2 = replicateMethod.invoke(builder) + buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream] + } catch { + case _: NoSuchMethodException => --- End diff -- If you get that exception, it's a proper error that shouldn't fall back to this code path.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org