spark git commit: [SPARK-23553][TESTS] Tests should not assume the default value of `spark.sql.sources.default`

lixiao Fri, 16 Mar 2018 09:37:12 -0700

Repository: spark
Updated Branches:
  refs/heads/master c95200048 -> 5414abca4



[SPARK-23553][TESTS] Tests should not assume the default value of 
`spark.sql.sources.default`

## What changes were proposed in this pull request?

Currently, some tests have an assumption that 
`spark.sql.sources.default=parquet`. In fact, that is a correct assumption, but 
that assumption makes it difficult to test new data source format.

This PR aims to
- Improve test suites more robust and makes it easy to test new data sources in 
the future.
- Test new native ORC data source with the full existing Apache Spark test 
coverage.

As an example, the PR uses `spark.sql.sources.default=orc` during reviews. The 
value should be `parquet` when this PR is accepted.

## How was this patch tested?

Pass the Jenkins with updated tests.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #20705 from dongjoon-hyun/SPARK-23553.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5414abca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5414abca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5414abca

Branch: refs/heads/master
Commit: 5414abca4fec6a68174c34d22d071c20027e959d
Parents: c952000
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Fri Mar 16 09:36:30 2018 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Fri Mar 16 09:36:30 2018 -0700

----------------------------------------------------------------------
 python/pyspark/sql/readwriter.py                |  4 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala    |  9 +--
 .../columnar/InMemoryColumnarQuerySuite.scala   |  5 +-
 .../spark/sql/execution/command/DDLSuite.scala  | 11 ++-
 .../ParquetPartitionDiscoverySuite.scala        | 10 +++
 .../sql/test/DataFrameReaderWriterSuite.scala   |  3 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala    | 72 +++++++++-----------
 .../PartitionProviderCompatibilitySuite.scala   |  6 +-
 .../hive/PartitionedTablePerfStatsSuite.scala   |  2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 11 +--
 .../sql/hive/execution/SQLQuerySuite.scala      | 19 ++----
 11 files changed, 81 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/python/pyspark/sql/readwriter.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 803f561..facc16b 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -147,8 +147,8 @@ class DataFrameReader(OptionUtils):
                        or a DDL-formatted string (For example ``col0 INT, col1 
DOUBLE``).
         :param options: all other string options
 
-        >>> df = 
spark.read.load('python/test_support/sql/parquet_partitioned', opt1=True,
-        ...     opt2=1, opt3='str')
+        >>> df = 
spark.read.format("parquet").load('python/test_support/sql/parquet_partitioned',
+        ...     opt1=True, opt2=1, opt3='str')
         >>> df.dtypes
         [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8f14575..640affc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2150,7 +2150,8 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
 
   test("data source table created in InMemoryCatalog should be able to 
read/write") {
     withTable("tbl") {
-      sql("CREATE TABLE tbl(i INT, j STRING) USING parquet")
+      val provider = spark.sessionState.conf.defaultDataSourceName
+      sql(s"CREATE TABLE tbl(i INT, j STRING) USING $provider")
       checkAnswer(sql("SELECT i, j FROM tbl"), Nil)
 
       Seq(1 -> "a", 2 -> "b").toDF("i", 
"j").write.mode("overwrite").insertInto("tbl")
@@ -2474,9 +2475,9 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
 
   test("SPARK-16975: Column-partition path starting '_' should be handled 
correctly") {
     withTempDir { dir =>
-      val parquetDir = new File(dir, "parquet").getCanonicalPath
-      spark.range(10).withColumn("_col", 
$"id").write.partitionBy("_col").save(parquetDir)
-      spark.read.parquet(parquetDir)
+      val dataDir = new File(dir, "data").getCanonicalPath
+      spark.range(10).withColumn("_col", 
$"id").write.partitionBy("_col").save(dataDir)
+      spark.read.load(dataDir)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index dc1766f..26b63e8 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -487,7 +487,10 @@ class InMemoryColumnarQuerySuite extends QueryTest with 
SharedSQLContext {
   }
 
   test("SPARK-22673: InMemoryRelation should utilize existing stats of the 
plan to be cached") {
-    withSQLConf("spark.sql.cbo.enabled" -> "true") {
+    // This test case depends on the size of parquet in statistics.
+    withSQLConf(
+        SQLConf.CBO_ENABLED.key -> "true",
+        SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "parquet") {
       withTempPath { workDir =>
         withTable("table1") {
           val workDirPath = workDir.getAbsolutePath

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 4041176..4df8fbf 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -154,10 +154,15 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with 
SharedSQLContext with Befo
       Seq(4 -> "d").toDF("i", "j").write.saveAsTable("t1")
 
       val e = intercept[AnalysisException] {
-        Seq(5 -> "e").toDF("i", 
"j").write.mode("append").format("json").saveAsTable("t1")
+        val format = if 
(spark.sessionState.conf.defaultDataSourceName.equalsIgnoreCase("json")) {
+          "orc"
+        } else {
+          "json"
+        }
+        Seq(5 -> "e").toDF("i", 
"j").write.mode("append").format(format).saveAsTable("t1")
       }
-      assert(e.message.contains("The format of the existing table default.t1 
is " +
-        "`ParquetFileFormat`. It doesn't match the specified format 
`JsonFileFormat`."))
+      assert(e.message.contains("The format of the existing table default.t1 
is "))
+      assert(e.message.contains("It doesn't match the specified format"))
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index edb3da9..e887c97 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -57,6 +57,16 @@ class ParquetPartitionDiscoverySuite extends QueryTest with 
ParquetTest with Sha
   val timeZone = TimeZone.getDefault()
   val timeZoneId = timeZone.getID
 
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.DEFAULT_DATA_SOURCE_NAME.key, "parquet")
+  }
+
+  protected override def afterAll(): Unit = {
+    spark.conf.unset(SQLConf.DEFAULT_DATA_SOURCE_NAME.key)
+    super.afterAll()
+  }
+
   test("column type inference") {
     def check(raw: String, literal: Literal, timeZone: TimeZone = timeZone): 
Unit = {
       assert(inferPartitionColumnValue(raw, true, timeZone) === literal)

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index a707a88..14b1feb 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -562,7 +562,8 @@ class DataFrameReaderWriterSuite extends QueryTest with 
SharedSQLContext with Be
     "and a same-name temp view exist") {
     withTable("same_name") {
       withTempView("same_name") {
-        sql("CREATE TABLE same_name(id LONG) USING parquet")
+        val format = spark.sessionState.conf.defaultDataSourceName
+        sql(s"CREATE TABLE same_name(id LONG) USING $format")
         spark.range(10).createTempView("same_name")
         spark.range(20).write.mode(SaveMode.Append).saveAsTable("same_name")
         checkAnswer(spark.table("same_name"), spark.range(10).toDF())

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 859099a..d93215f 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -591,7 +591,7 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
   }
 
   test("Pre insert nullability check (ArrayType)") {
-    withTable("arrayInParquet") {
+    withTable("array") {
       {
         val df = (Tuple1(Seq(Int.box(1), null: Integer)) :: Nil).toDF("a")
         val expectedSchema =
@@ -604,9 +604,8 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
         assert(df.schema === expectedSchema)
 
         df.write
-          .format("parquet")
           .mode(SaveMode.Overwrite)
-          .saveAsTable("arrayInParquet")
+          .saveAsTable("array")
       }
 
       {
@@ -621,25 +620,24 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
         assert(df.schema === expectedSchema)
 
         df.write
-          .format("parquet")
           .mode(SaveMode.Append)
-          .insertInto("arrayInParquet")
+          .insertInto("array")
       }
 
       (Tuple1(Seq(4, 5)) :: Nil).toDF("a")
         .write
         .mode(SaveMode.Append)
-        .saveAsTable("arrayInParquet") // This one internally calls 
df2.insertInto.
+        .saveAsTable("array") // This one internally calls df2.insertInto.
 
       (Tuple1(Seq(Int.box(6), null: Integer)) :: Nil).toDF("a")
         .write
         .mode(SaveMode.Append)
-        .saveAsTable("arrayInParquet")
+        .saveAsTable("array")
 
-      sparkSession.catalog.refreshTable("arrayInParquet")
+      sparkSession.catalog.refreshTable("array")
 
       checkAnswer(
-        sql("SELECT a FROM arrayInParquet"),
+        sql("SELECT a FROM array"),
         Row(ArrayBuffer(1, null)) ::
           Row(ArrayBuffer(2, 3)) ::
           Row(ArrayBuffer(4, 5)) ::
@@ -648,7 +646,7 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
   }
 
   test("Pre insert nullability check (MapType)") {
-    withTable("mapInParquet") {
+    withTable("map") {
       {
         val df = (Tuple1(Map(1 -> (null: Integer))) :: Nil).toDF("a")
         val expectedSchema =
@@ -661,9 +659,8 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
         assert(df.schema === expectedSchema)
 
         df.write
-          .format("parquet")
           .mode(SaveMode.Overwrite)
-          .saveAsTable("mapInParquet")
+          .saveAsTable("map")
       }
 
       {
@@ -678,27 +675,24 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
         assert(df.schema === expectedSchema)
 
         df.write
-          .format("parquet")
           .mode(SaveMode.Append)
-          .insertInto("mapInParquet")
+          .insertInto("map")
       }
 
       (Tuple1(Map(4 -> 5)) :: Nil).toDF("a")
         .write
-        .format("parquet")
         .mode(SaveMode.Append)
-        .saveAsTable("mapInParquet") // This one internally calls 
df2.insertInto.
+        .saveAsTable("map") // This one internally calls df2.insertInto.
 
       (Tuple1(Map(6 -> null.asInstanceOf[Integer])) :: Nil).toDF("a")
         .write
-        .format("parquet")
         .mode(SaveMode.Append)
-        .saveAsTable("mapInParquet")
+        .saveAsTable("map")
 
-      sparkSession.catalog.refreshTable("mapInParquet")
+      sparkSession.catalog.refreshTable("map")
 
       checkAnswer(
-        sql("SELECT a FROM mapInParquet"),
+        sql("SELECT a FROM map"),
         Row(Map(1 -> null)) ::
           Row(Map(2 -> 3)) ::
           Row(Map(4 -> 5)) ::
@@ -852,52 +846,52 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
       (from to to).map(i => i -> s"str$i").toDF("c1", "c2")
     }
 
-    withTable("insertParquet") {
-      createDF(0, 9).write.format("parquet").saveAsTable("insertParquet")
+    withTable("t") {
+      createDF(0, 9).write.saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"),
+        sql("SELECT p.c1, p.c2 FROM t p WHERE p.c1 > 5"),
         (6 to 9).map(i => Row(i, s"str$i")))
 
       intercept[AnalysisException] {
-        createDF(10, 19).write.format("parquet").saveAsTable("insertParquet")
+        createDF(10, 19).write.saveAsTable("t")
       }
 
-      createDF(10, 
19).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet")
+      createDF(10, 19).write.mode(SaveMode.Append).saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"),
+        sql("SELECT p.c1, p.c2 FROM t p WHERE p.c1 > 5"),
         (6 to 19).map(i => Row(i, s"str$i")))
 
-      createDF(20, 
29).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet")
+      createDF(20, 29).write.mode(SaveMode.Append).saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 
25"),
+        sql("SELECT p.c1, c2 FROM t p WHERE p.c1 > 5 AND p.c1 < 25"),
         (6 to 24).map(i => Row(i, s"str$i")))
 
       intercept[AnalysisException] {
-        createDF(30, 39).write.saveAsTable("insertParquet")
+        createDF(30, 39).write.saveAsTable("t")
       }
 
-      createDF(30, 39).write.mode(SaveMode.Append).saveAsTable("insertParquet")
+      createDF(30, 39).write.mode(SaveMode.Append).saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 
35"),
+        sql("SELECT p.c1, c2 FROM t p WHERE p.c1 > 5 AND p.c1 < 35"),
         (6 to 34).map(i => Row(i, s"str$i")))
 
-      createDF(40, 49).write.mode(SaveMode.Append).insertInto("insertParquet")
+      createDF(40, 49).write.mode(SaveMode.Append).insertInto("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 
45"),
+        sql("SELECT p.c1, c2 FROM t p WHERE p.c1 > 5 AND p.c1 < 45"),
         (6 to 44).map(i => Row(i, s"str$i")))
 
-      createDF(50, 
59).write.mode(SaveMode.Overwrite).saveAsTable("insertParquet")
+      createDF(50, 59).write.mode(SaveMode.Overwrite).saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 51 AND p.c1 < 
55"),
+        sql("SELECT p.c1, c2 FROM t p WHERE p.c1 > 51 AND p.c1 < 55"),
         (52 to 54).map(i => Row(i, s"str$i")))
-      createDF(60, 69).write.mode(SaveMode.Ignore).saveAsTable("insertParquet")
+      createDF(60, 69).write.mode(SaveMode.Ignore).saveAsTable("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p"),
+        sql("SELECT p.c1, c2 FROM t p"),
         (50 to 59).map(i => Row(i, s"str$i")))
 
-      createDF(70, 
79).write.mode(SaveMode.Overwrite).insertInto("insertParquet")
+      createDF(70, 79).write.mode(SaveMode.Overwrite).insertInto("t")
       checkAnswer(
-        sql("SELECT p.c1, c2 FROM insertParquet p"),
+        sql("SELECT p.c1, c2 FROM t p"),
         (70 to 79).map(i => Row(i, s"str$i")))
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 9440a17..80afc9d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -37,11 +37,11 @@ class PartitionProviderCompatibilitySuite
     spark.range(5).selectExpr("id as fieldOne", "id as partCol").write
       .partitionBy("partCol")
       .mode("overwrite")
-      .parquet(dir.getAbsolutePath)
+      .save(dir.getAbsolutePath)
 
     spark.sql(s"""
       |create table $tableName (fieldOne long, partCol int)
-      |using parquet
+      |using ${spark.sessionState.conf.defaultDataSourceName}
       |options (path "${dir.toURI}")
       |partitioned by (partCol)""".stripMargin)
   }
@@ -358,7 +358,7 @@ class PartitionProviderCompatibilitySuite
     try {
       spark.sql(s"""
         |create table test (id long, P1 int, P2 int)
-        |using parquet
+        |using ${spark.sessionState.conf.defaultDataSourceName}
         |options (path "${base.toURI}")
         |partitioned by (P1, P2)""".stripMargin)
       spark.sql(s"alter table test add partition (P1=0, P2=0) location 
'${a.toURI}'")

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 54d3962..1a86c60 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -417,7 +417,7 @@ class PartitionedTablePerfStatsSuite
       import spark.implicits._
       Seq(1).toDF("a").write.mode("overwrite").save(dir.getAbsolutePath)
       HiveCatalogMetrics.reset()
-      spark.read.parquet(dir.getAbsolutePath)
+      spark.read.load(dir.getAbsolutePath)
       assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 1)
       assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 1)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 65be244..db76ec9 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1658,8 +1658,8 @@ class HiveDDLSuite
         Seq(5 -> "e").toDF("i", "j")
           .write.format("hive").mode("append").saveAsTable("t1")
       }
-      assert(e.message.contains("The format of the existing table default.t1 
is " +
-        "`ParquetFileFormat`. It doesn't match the specified format 
`HiveFileFormat`."))
+      assert(e.message.contains("The format of the existing table default.t1 
is "))
+      assert(e.message.contains("It doesn't match the specified format 
`HiveFileFormat`."))
     }
   }
 
@@ -1709,11 +1709,12 @@ class HiveDDLSuite
       
spark.sessionState.catalog.getTableMetadata(TableIdentifier(tblName)).schema.map(_.name)
     }
 
+    val provider = spark.sessionState.conf.defaultDataSourceName
     withTable("t", "t1", "t2", "t3", "t4", "t5", "t6") {
-      sql("CREATE TABLE t(a int, b int, c int, d int) USING parquet 
PARTITIONED BY (d, b)")
+      sql(s"CREATE TABLE t(a int, b int, c int, d int) USING $provider 
PARTITIONED BY (d, b)")
       assert(getTableColumns("t") == Seq("a", "c", "d", "b"))
 
-      sql("CREATE TABLE t1 USING parquet PARTITIONED BY (d, b) AS SELECT 1 a, 
1 b, 1 c, 1 d")
+      sql(s"CREATE TABLE t1 USING $provider PARTITIONED BY (d, b) AS SELECT 1 
a, 1 b, 1 c, 1 d")
       assert(getTableColumns("t1") == Seq("a", "c", "d", "b"))
 
       Seq((1, 1, 1, 1)).toDF("a", "b", "c", "d").write.partitionBy("d", 
"b").saveAsTable("t2")
@@ -1723,7 +1724,7 @@ class HiveDDLSuite
         val dataPath = new File(new File(path, "d=1"), "b=1").getCanonicalPath
         Seq(1 -> 1).toDF("a", "c").write.save(dataPath)
 
-        sql(s"CREATE TABLE t3 USING parquet LOCATION '${path.toURI}'")
+        sql(s"CREATE TABLE t3 USING $provider LOCATION '${path.toURI}'")
         assert(getTableColumns("t3") == Seq("a", "c", "d", "b"))
       }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5414abca/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index baabc4a..73f83d5 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -516,24 +516,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
   test("CTAS with default fileformat") {
     val table = "ctas1"
     val ctas = s"CREATE TABLE IF NOT EXISTS $table SELECT key k, value FROM 
src"
-    withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
-      withSQLConf("hive.default.fileformat" -> "textfile") {
+    Seq("orc", "parquet").foreach { dataSourceFormat =>
+      withSQLConf(
+        SQLConf.CONVERT_CTAS.key -> "true",
+        SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> dataSourceFormat,
+        "hive.default.fileformat" -> "textfile") {
         withTable(table) {
           sql(ctas)
-          // We should use parquet here as that is the default datasource 
fileformat. The default
-          // datasource file format is controlled by 
`spark.sql.sources.default` configuration.
+          // The default datasource file format is controlled by 
`spark.sql.sources.default`.
           // This testcase verifies that setting `hive.default.fileformat` has 
no impact on
           // the target table's fileformat in case of CTAS.
-          assert(sessionState.conf.defaultDataSourceName === "parquet")
-          checkRelation(tableName = table, isDataSourceTable = true, format = 
"parquet")
+          checkRelation(tableName = table, isDataSourceTable = true, format = 
dataSourceFormat)
         }
       }
-      withSQLConf("spark.sql.sources.default" -> "orc") {
-        withTable(table) {
-          sql(ctas)
-          checkRelation(tableName = table, isDataSourceTable = true, format = 
"orc")
-         }
-      }
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-23553][TESTS] Tests should not assume the default value of `spark.sql.sources.default`

Reply via email to