[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

xwu0226 Mon, 20 Mar 2017 17:44:59 -0700

Github user xwu0226 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16626#discussion_r107051308
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
---
    @@ -2178,4 +2178,138 @@ abstract class DDLSuite extends QueryTest with 
SQLTestUtils {
           }
         }
       }
    +
    +  val supportedNativeFileFormatsForAlterTableAddColumns = Seq("parquet", 
"json", "csv")
    +
    +  supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
    +    test(s"alter datasource table add columns - $provider") {
    +      withTable("t1") {
    +        sql(s"CREATE TABLE t1 (c1 int) USING $provider")
    +        sql("INSERT INTO t1 VALUES (1)")
    +        sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
    +        checkAnswer(
    +          spark.table("t1"),
    +          Seq(Row(1, null))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 is null"),
    +          Seq(Row(1, null))
    +        )
    +
    +        sql("INSERT INTO t1 VALUES (3, 2)")
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 = 2"),
    +          Seq(Row(3, 2))
    +        )
    +      }
    +    }
    +  }
    +
    +  supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
    +    test(s"alter datasource table add columns - partitioned - $provider") {
    +      withTable("t1") {
    +        sql(s"CREATE TABLE t1 (c1 int, c2 int) USING $provider PARTITIONED 
BY (c2)")
    +        sql("INSERT INTO t1 PARTITION(c2 = 2) VALUES (1)")
    +        sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
    +        checkAnswer(
    +          spark.table("t1"),
    +          Seq(Row(1, null, 2))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c3 is null"),
    +          Seq(Row(1, null, 2))
    +        )
    +        sql("INSERT INTO t1 PARTITION(c2 =1) VALUES (2, 3)")
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c3 = 3"),
    +          Seq(Row(2, 3, 1))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 = 1"),
    +          Seq(Row(2, 3, 1))
    +        )
    +      }
    +    }
    +  }
    +
    +  test("alter datasource table add columns - text format not supported") {
    +    withTable("t1") {
    +      sql("CREATE TABLE t1 (c1 int) USING text")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
    +      }.getMessage
    +      assert(e.contains("ALTER ADD COLUMNS does not support datasource 
table with type"))
    +    }
    +  }
    +
    +  test("alter table add columns -- not support temp view") {
    +    withTempView("tmp_v") {
    +      sql("CREATE TEMPORARY VIEW tmp_v AS SELECT 1 AS c1, 2 AS c2")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE tmp_v ADD COLUMNS (c3 INT)")
    +      }
    +      assert(e.message.contains("ALTER ADD COLUMNS does not support 
views"))
    +    }
    +  }
    +
    +  test("alter table add columns -- not support view") {
    +    withView("v1") {
    +      sql("CREATE VIEW v1 AS SELECT 1 AS c1, 2 AS c2")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE v1 ADD COLUMNS (c3 INT)")
    +      }
    +      assert(e.message.contains("ALTER ADD COLUMNS does not support 
views"))
    +    }
    +  }
    +
    +  test("alter table add columns with existing column name") {
    +    withTable("t1") {
    +      sql("CREATE TABLE t1 (c1 int) USING PARQUET")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE t1 ADD COLUMNS (c1 string)")
    +      }.getMessage
    +      assert(e.contains("Found duplicate column(s)"))
    +    }
    +  }
    +
    +  test("alter table add columns to table referenced by a view") {
    +    withTable("t1") {
    +      withView("v1") {
    +        sql("CREATE TABLE t1 (c1 int, c2 int) USING PARQUET")
    +        sql("CREATE VIEW v1 AS SELECT * FROM t1")
    +        val originViewSchema = sql("SELECT * FROM v1").schema
    +        sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
    +        assert(sql("SELECT * FROM v1").schema == originViewSchema)
    +      }
    +    }
    +  }
    +
    +  Seq("true", "false").foreach { caseSensitive =>
    +    test(s"alter table add columns with existing column name - 
caseSensitive $caseSensitive") {
    +      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
    +        withTable("t1") {
    +          sql("CREATE TABLE t1 (c1 int) USING PARQUET")
    +          if (caseSensitive == "false") {
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
    +            }.getMessage
    +            assert(e.contains("Found duplicate column(s)"))
    +          } else {
    +            if (isUsingHiveMetastore) {
    +              // hive catalog will still complains that c1 is duplicate 
column name because hive
    +              // identifiers are case insensitive.
    --- End diff --
    
    It was the` hive.ql` error when I pass a column name that is different case 
from an existing column name. The error was thrown from hive client complaining 
that it is duplicate name, even though SparkSQL thinks it is a different name. 
Can the schema stored in table properties influence how hive sees the 
sensitivity of the column name?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

Reply via email to