This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new a7e4318 [SPARK-33089][SQL] make avro format propagate Hadoop config from DS options to underlying HDFS file system a7e4318 is described below commit a7e43185715549f14decc018f7a58e2119c99aae Author: Yuning Zhang <yuning.zh...@databricks.com> AuthorDate: Thu Oct 8 12:18:06 2020 +0900 [SPARK-33089][SQL] make avro format propagate Hadoop config from DS options to underlying HDFS file system ### What changes were proposed in this pull request? In `AvroUtils`'s `inferSchema()`, propagate Hadoop config from DS options to underlying HDFS file system. ### Why are the changes needed? There is a bug that when running: ```scala spark.read.format("avro").options(conf).load(path) ``` The underlying file system will not receive the `conf` options. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? unit test added Closes #29971 from yuningzh-db/avro_options. Authored-by: Yuning Zhang <yuning.zh...@databricks.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit bbc887bf73233b8c65ace05929290c0de4f63de8) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 9ff89f6..a9f34bb 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -42,7 +42,7 @@ private[sql] object AvroUtils extends Logging { spark: SparkSession, options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { - val conf = spark.sessionState.newHadoopConf() + val conf = spark.sessionState.newHadoopConfWithOptions(options) val parsedOptions = new AvroOptions(options, conf) if (parsedOptions.parameters.contains(ignoreExtensionKey)) { diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index e2ae489..d2f49ae 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1799,6 +1799,16 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { assert(version === SPARK_VERSION_SHORT) } } + + test("SPARK-33089: should propagate Hadoop config from DS options to underlying file system") { + withSQLConf( + "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, + "fs.file.impl.disable.cache" -> "true") { + val conf = Map("ds_option" -> "value") + val path = "file:" + testAvro.stripPrefix("file:") + spark.read.format("avro").options(conf).load(path) + } + } } class AvroV1Suite extends AvroSuite { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org