Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/22881#discussion_r229102457 --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala --- @@ -471,4 +473,42 @@ object SparkHadoopUtil { hadoopConf.set(key.substring("spark.hadoop.".length), value) } } + + + lazy val builderReflection: Option[(Class[_], Method, Method)] = Try { + val cls = Utils.classForName( + "org.apache.hadoop.hdfs.DistributedFileSystem$HdfsDataOutputStreamBuilder") + (cls, cls.getMethod("replicate"), cls.getMethod("build")) + }.toOption + + // scalastyle:off line.size.limit + /** + * Create a path that uses replication instead of erasure coding, regardless of the default + * configuration in hdfs for the given path. This can be helpful as hdfs ec doesn't support + * hflush(), hsync(), or append() + * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations + */ + // scalastyle:on line.size.limit + def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = { + try { + // Use reflection as this uses apis only avialable in hadoop 3 + val builderMethod = fs.getClass().getMethod("createFile", classOf[Path]) + val builder = builderMethod.invoke(fs, path) + builderReflection match { --- End diff -- Not sure `builderReflection` is helping here. Using `builder.getClass().getMethod("replicate")` would achieve the same, no? And keep all related code here. BTW it's not optimal to have to poke into `DistributedFileSystem` for this. That's marked as "limited private" and "unstable" in the hadoop libs. But there doesn't seem to be an alternative...
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org