Repository: spark Updated Branches: refs/heads/master 0e36ba621 -> 6b80ce4fb
[SPARK-19809][SQL][TEST][FOLLOWUP] Move the test case to HiveOrcQuerySuite ## What changes were proposed in this pull request? As a follow-up of #19948 , this PR moves the test case and adds comments. ## How was this patch tested? Pass the Jenkins. Author: Dongjoon Hyun <dongj...@apache.org> Closes #19960 from dongjoon-hyun/SPARK-19809-2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b80ce4f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b80ce4f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b80ce4f Branch: refs/heads/master Commit: 6b80ce4fb20da57c9513b94ab02b53a5fd7571d0 Parents: 0e36ba6 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Tue Dec 12 22:41:38 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Tue Dec 12 22:41:38 2017 -0800 ---------------------------------------------------------------------- .../sql/hive/execution/SQLQuerySuite.scala | 36 --------------- .../spark/sql/hive/orc/HiveOrcQuerySuite.scala | 48 +++++++++++++++++++- 2 files changed, 47 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6b80ce4f/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 93c91d3..c11e37a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2153,40 +2153,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } } - - test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") { - Seq("native", "hive").foreach { orcImpl => - withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { - withTempPath { f => - val path = f.getCanonicalPath - Seq(1 -> 2).toDF("c1", "c2").write.orc(path) - checkAnswer(spark.read.orc(path), Row(1, 2)) - - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - withTable("t") { - sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") - checkAnswer(spark.table("t"), Row(2, 1)) - } - } - } - } - } - } - - test("SPARK-19809 NullPointerException on zero-size ORC file") { - Seq("native", "hive").foreach { orcImpl => - withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { - withTempPath { dir => - withTable("spark_19809") { - sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'") - Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc")) - - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - checkAnswer(sql("SELECT * FROM spark_19809"), Seq.empty) - } - } - } - } - } - } } http://git-wip-us.apache.org/repos/asf/spark/blob/6b80ce4f/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 7244c36..92b2f06 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -17,7 +17,11 @@ package org.apache.spark.sql.hive.orc -import org.apache.spark.sql.AnalysisException +import java.io.File + +import com.google.common.io.Files + +import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.orc.OrcQueryTest @@ -162,4 +166,46 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } } + + // Since Hive 1.2.1 library code path still has this problem, users may hit this + // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, + // Apache Spark with the default configuration doesn't hit this bug. + test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") { + Seq("native", "hive").foreach { orcImpl => + withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { + withTempPath { f => + val path = f.getCanonicalPath + Seq(1 -> 2).toDF("c1", "c2").write.orc(path) + checkAnswer(spark.read.orc(path), Row(1, 2)) + + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 + withTable("t") { + sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") + checkAnswer(spark.table("t"), Row(2, 1)) + } + } + } + } + } + } + + // Since Hive 1.2.1 library code path still has this problem, users may hit this + // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, + // Apache Spark with the default configuration doesn't hit this bug. + test("SPARK-19809 NullPointerException on zero-size ORC file") { + Seq("native", "hive").foreach { orcImpl => + withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { + withTempPath { dir => + withTable("spark_19809") { + sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'") + Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc")) + + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 + checkAnswer(spark.table("spark_19809"), Seq.empty) + } + } + } + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org