This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f378b506bf1 [SPARK-45470][SQL] Avoid paste string value of hive orc compression kind f378b506bf1 is described below commit f378b506bf1fc116e5dc4786d786e50d4a56574a Author: Jiaan Geng <belie...@163.com> AuthorDate: Mon Oct 9 23:04:15 2023 -0700 [SPARK-45470][SQL] Avoid paste string value of hive orc compression kind ### What changes were proposed in this pull request? Currently, Hive supports ORC format with some compression codec( Please refer [ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java)). Spark pasted many string literal of these compression codec. It is easy to make mistakes and reduce development efficiency. ### Why are the changes needed? Avoid paste string value of hive orc compression kind ### Does this PR introduce _any_ user-facing change? 'No'. Just update inner implementation. ### How was this patch tested? Exists test cases. ### Was this patch authored or co-authored using generative AI tooling? 'No'. Closes #43296 from beliefer/SPARK-45470. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../org/apache/spark/sql/hive/CompressionCodecSuite.scala | 14 ++++++++++---- .../spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala | 5 +++-- .../org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala | 4 +++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index 6669fbdfbde..a5d11f6e0e1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -23,6 +23,7 @@ import java.util.Locale import scala.jdk.CollectionConverters._ import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.ql.io.orc.CompressionKind import org.apache.orc.OrcConf.COMPRESS import org.apache.parquet.hadoop.ParquetOutputFormat import org.scalatest.BeforeAndAfterAll @@ -291,8 +292,10 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo tableCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"), sessionCompressCodecs = List("SNAPPY", "GZIP", "SNAPPY")) checkForTableWithCompressProp("orc", - tableCompressCodecs = List("NONE", "SNAPPY", "ZLIB"), - sessionCompressCodecs = List("SNAPPY", "ZLIB", "SNAPPY")) + tableCompressCodecs = + List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name, CompressionKind.ZLIB.name), + sessionCompressCodecs = + List(CompressionKind.SNAPPY.name, CompressionKind.ZLIB.name, CompressionKind.SNAPPY.name)) } test("table-level compression is not set but session-level compressions is set ") { @@ -301,7 +304,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo sessionCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP")) checkForTableWithCompressProp("orc", tableCompressCodecs = List.empty, - sessionCompressCodecs = List("NONE", "SNAPPY", "ZLIB")) + sessionCompressCodecs = + List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name, CompressionKind.ZLIB.name)) } def checkTableWriteWithCompressionCodecs(format: String, compressCodecs: List[String]): Unit = { @@ -336,6 +340,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo test("test table containing mixed compression codec") { checkTableWriteWithCompressionCodecs("parquet", List("UNCOMPRESSED", "SNAPPY", "GZIP")) - checkTableWriteWithCompressionCodecs("orc", List("NONE", "SNAPPY", "ZLIB")) + checkTableWriteWithCompressionCodecs( + "orc", + List(CompressionKind.NONE.name, CompressionKind.SNAPPY.name, CompressionKind.ZLIB.name)) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala index 3b82a6c458c..e9b6bd28823 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.orc import java.io.File import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.ql.io.orc.CompressionKind import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.catalog.CatalogUtils @@ -98,7 +99,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { val orcFilePath = maybeOrcFile.get.toPath.toString val expectedCompressionKind = OrcFileOperator.getFileReader(orcFilePath).get.getCompression - assert("ZLIB" === expectedCompressionKind.name()) + assert(CompressionKind.ZLIB.name() === expectedCompressionKind.name()) val copyDf = spark .read @@ -113,7 +114,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { .orc(file.getCanonicalPath) val expectedCompressionKind = OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression - assert("SNAPPY" === expectedCompressionKind.name()) + assert(CompressionKind.SNAPPY.name() === expectedCompressionKind.name()) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala index 9ee9ebc2282..43bcee5348a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala @@ -21,6 +21,8 @@ import java.io.File import scala.util.Random +import org.apache.hadoop.hive.ql.io.orc.CompressionKind + import org.apache.spark.SparkConf import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{DataFrame, SparkSession} @@ -46,7 +48,7 @@ object OrcReadBenchmark extends SqlBasedBenchmark { override def getSparkSession: SparkSession = { val conf = new SparkConf() - conf.set("orc.compression", "snappy") + conf.set("orc.compression", CompressionKind.SNAPPY.name()) val sparkSession = SparkSession.builder() .master("local[1]") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org