This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-1.9 in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.9 by this push: new 7480d8563 ORC-1699: Fix SparkBenchmark in Parquet format according to SPARK-40918 7480d8563 is described below commit 7480d8563c9d5b0caf5e98bda301f3b847316c1f Author: sychen <syc...@ctrip.com> AuthorDate: Wed Apr 24 08:48:50 2024 -0700 ORC-1699: Fix SparkBenchmark in Parquet format according to SPARK-40918 ### What changes were proposed in this pull request? This PR aims to fix SparkBenchmark in Parquet format according to SPARK-40918. ### Why are the changes needed? Similar to [ORC-1578](https://issues.apache.org/jira/browse/ORC-1578), there are similar problems when reading parquet format files in SparkBenchmark. ```java java.lang.IllegalArgumentException: OPTION_RETURNING_BATCH should always be set for ParquetFileFormat. To workaround this issue, set spark.sql.parquet.enableVectorizedReader=false. at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$1(ParquetFileFormat.scala:192) at scala.collection.immutable.Map$EmptyMap$.getOrElse(Map.scala:110) at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.buildReaderWithPartitionValues(ParquetFileFormat.scala:191) at org.apache.orc.bench.spark.SparkBenchmark.pushDown(SparkBenchmark.java:314) at org.apache.orc.bench.spark.jmh_generated.SparkBenchmark_pushDown_jmhTest.pushDown_avgt_jmhStub(SparkBenchmark_pushDown_jmhTest.java:219) ``` ### How was this patch tested? local test ### Was this patch authored or co-authored using generative AI tooling? No Closes #1908 from cxzl25/ORC-1699. Authored-by: sychen <syc...@ctrip.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit fe4eee0af3ca205deb76cd792b64a202c565ea2b) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../src/java/org/apache/orc/bench/spark/SparkBenchmark.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java index 1285875dc..90eceee98 100644 --- a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java +++ b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java @@ -195,6 +195,9 @@ public class SparkBenchmark implements OrcBenchmark { case "orc": options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 break; + case "parquet": + options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 + break; default: break; } @@ -228,6 +231,9 @@ public class SparkBenchmark implements OrcBenchmark { case "orc": options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 break; + case "parquet": + options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 + break; default: break; } @@ -303,6 +309,9 @@ public class SparkBenchmark implements OrcBenchmark { case "orc": options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 break; + case "parquet": + options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918 + break; default: break; }