Repository: carbondata
Updated Branches:
  refs/heads/master 43285bbd1 -> 45960f4a8


[CARBONDATA-2682][32K] fix create table with long_string_columns properties bugs

This PR fixes create table with long_string_columns bugs which are:
1.create table with columns both in long_string_columns and partition or 
no_inverted_index property should be blocked.
2.create table with duplicate columns in long_string_column property should be 
blocked.

This closes #2506


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/45960f4a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/45960f4a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/45960f4a

Branch: refs/heads/master
Commit: 45960f4a8d45c99d917ea9aec3579dc3b2345af4
Parents: 43285bb
Author: Sssan520 <liangap2...@aliyun.com>
Authored: Thu Jul 12 19:58:23 2018 +0800
Committer: xuchuanyin <xuchuan...@hust.edu.cn>
Committed: Mon Jul 23 14:53:27 2018 +0800

----------------------------------------------------------------------
 .../VarcharDataTypesBasicTestCase.scala         | 66 +++++++++++++--
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 88 +++++++++++++++++---
 2 files changed, 137 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/45960f4a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index cb7cd81..4b8187f 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.test.util.QueryTest
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
+import 
org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.metadata.datatype.DataTypes
@@ -79,7 +80,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
   }
 
   test("long string columns cannot be dictionary include") {
-    val exceptionCaught = intercept[Exception] {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
       sql(
         s"""
            | CREATE TABLE if not exists $longStringTable(
@@ -93,7 +94,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
   }
 
   test("long string columns cannot be dictionay exclude") {
-    val exceptionCaught = intercept[Exception] {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
       sql(
         s"""
            | CREATE TABLE if not exists $longStringTable(
@@ -107,7 +108,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
   }
 
   test("long string columns cannot be sort_columns") {
-    val exceptionCaught = intercept[Exception] {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
       sql(
         s"""
            | CREATE TABLE if not exists $longStringTable(
@@ -121,20 +122,73 @@ class VarcharDataTypesBasicTestCase extends QueryTest 
with BeforeAndAfterEach wi
   }
 
   test("long string columns can only be string columns") {
-    val exceptionCaught = intercept[Exception] {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
       sql(
         s"""
            | CREATE TABLE if not exists $longStringTable(
            | id INT, name STRING, description STRING, address STRING, note 
STRING
            | ) STORED BY 'carbondata'
            | TBLPROPERTIES('LONG_STRING_COLUMNS'='id, note')
-           |""".
-          stripMargin)
+           |""".stripMargin)
     }
     assert(exceptionCaught.getMessage.contains("long_string_columns: id"))
     assert(exceptionCaught.getMessage.contains("its data type is not string"))
   }
 
+  test("long string columns cannot contain duplicate columns") {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, name STRING, description STRING, address STRING, note 
STRING
+           | ) STORED BY 'carbondata'
+           | TBLPROPERTIES('LONG_STRING_COLUMNS'='address, note, Note')
+           |""".stripMargin)
+    }
+    assert(exceptionCaught.getMessage.contains("long_string_columns"))
+    assert(exceptionCaught.getMessage.contains("Duplicate columns are not 
allowed"))
+  }
+
+  test("long_string_columns: column does not exist in table ") {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, name STRING, description STRING, address STRING, note 
STRING
+           | ) STORED BY 'carbondata'
+           | TBLPROPERTIES('LONG_STRING_COLUMNS'='address, note, NoteS')
+           |""".stripMargin)
+    }
+    assert(exceptionCaught.getMessage.contains("long_string_columns"))
+    assert(exceptionCaught.getMessage.contains("does not exist in table"))
+  }
+
+  test("long_string_columns: columns cannot exist in patitions columns") {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, name STRING, description STRING, address STRING
+           | ) partitioned by (note string) STORED BY 'carbondata'
+           | TBLPROPERTIES('LONG_STRING_COLUMNS'='note')
+           |""".stripMargin)
+    }
+    assert(exceptionCaught.getMessage.contains("both in partition and 
long_string_columns"))
+  }
+
+  test("long_string_columns: columns cannot exist in no_inverted_index 
columns") {
+    val exceptionCaught = intercept[MalformedCarbonCommandException] {
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, name STRING, description STRING, address STRING, note 
STRING
+           | ) STORED BY 'carbondata'
+           | TBLPROPERTIES('no_inverted_index'='note', 
'LONG_STRING_COLUMNS'='address, note')
+           |""".stripMargin)
+    }
+    assert(exceptionCaught.getMessage.contains("both in no_inverted_index and 
long_string_columns"))
+  }
+
   private def prepareTable(): Unit = {
     sql(
       s"""

http://git-wip-us.apache.org/repos/asf/carbondata/blob/45960f4a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
 
b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 97fe51a..6f8ab63 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -384,6 +384,25 @@ abstract class CarbonDDLSqlParser extends 
AbstractCarbonSparkSQLParser {
         tableProperties.get(CarbonCommonConstants.CACHE_LEVEL).get,
         tableProperties)
     }
+    // long_string_columns columns cannot be in no_inverted_index columns
+    var longStringColumns = varcharColumns.map(_.toUpperCase)
+    var noInvColIntersecLongStrCols = longStringColumns
+      .intersect(noInvertedIdxCols.map(_.toUpperCase))
+    if (!noInvColIntersecLongStrCols.isEmpty) {
+      throw new MalformedCarbonCommandException(
+        s"Column(s): ${
+          noInvColIntersecLongStrCols.mkString(",")
+        } both in no_inverted_index and long_string_columns which is not 
allowed.")
+    }
+    // long_string_columns columns cannot be in partition columns
+    var partitionColIntersecLongStrCols = longStringColumns
+      .intersect(partitionCols.map(col => col.partitionColumn.toUpperCase))
+    if (!partitionColIntersecLongStrCols.isEmpty) {
+      throw new MalformedCarbonCommandException(
+        s"Column(s): ${
+          partitionColIntersecLongStrCols.mkString(",")
+        } both in partition and long_string_columns which is not allowed.")
+    }
     // validate the tableBlockSize from table properties
     CommonUtil.validateTableBlockSize(tableProperties)
     // validate table level properties for compaction
@@ -480,6 +499,63 @@ abstract class CarbonDDLSqlParser extends 
AbstractCarbonSparkSQLParser {
   }
 
   /**
+   * This method validates the long string columns, will check:
+   * 1.the column in tblproperty long_string_columns must be in table fields.
+   * 2.the column datatype in tblproperty long_string_columns should be string.
+   * 3.the columns in tblproperty long_string_columns cannot be duplicate
+   *
+   * @param fields table fields
+   * @param varcharCols the columns in tblproperty long_string_columns
+   * @return
+   */
+  private def validateLongStringColumns(fields: Seq[Field],
+      varcharCols: Seq[String]): Unit = {
+    var longStringColumnsMap: Map[String, Field] = Map[String, Field]()
+    fields.foreach(field =>
+      longStringColumnsMap.put(field.column.toUpperCase, field)
+    )
+    var dataTypeErr: Set[String] = Set[String]()
+    var duplicateColumnErr: Map[String, Int] = Map[String, Int]()
+    var nullColumnErr: Set[String] = Set[String]()
+    var tmpStr: String = ""
+    varcharCols.foreach {
+      column =>
+        tmpStr = column.toUpperCase
+        duplicateColumnErr.get(tmpStr) match {
+          case None => duplicateColumnErr.put(tmpStr, 1)
+          case Some(count) => duplicateColumnErr.put(tmpStr, count + 1)
+        }
+        longStringColumnsMap.get(tmpStr) match {
+          case None => nullColumnErr += column
+          case Some(field) => if 
(!DataTypes.STRING.getName.equalsIgnoreCase(field.dataType.get)) {
+            dataTypeErr += column
+          }
+        }
+    }
+    if (!nullColumnErr.isEmpty) {
+      val errMsg = s"long_string_columns: ${
+        nullColumnErr.mkString(",")
+      } does not exist in table. Please check create table statement."
+      throw new MalformedCarbonCommandException(errMsg)
+    }
+
+    var duplicateColumns = duplicateColumnErr.filter(kv => kv._2 != 1).keySet
+    if (!duplicateColumns.isEmpty) {
+      val errMsg = s"Column ambiguity as duplicate column(s):${
+        duplicateColumns.mkString(",")
+      } is present in long_string_columns. Duplicate columns are not allowed."
+      throw new MalformedCarbonCommandException(errMsg)
+    }
+
+    if (!dataTypeErr.isEmpty) {
+      val errMsg = s"long_string_columns: ${
+        dataTypeErr.mkString(",")
+      } ,its data type is not string. Please check create table statement."
+      throw new MalformedCarbonCommandException(errMsg)
+    }
+  }
+
+  /**
    * @param partitionCols
    * @param tableProperties
    */
@@ -652,17 +728,7 @@ abstract class CarbonDDLSqlParser extends 
AbstractCarbonSparkSQLParser {
     if 
(tableProperties.get(CarbonCommonConstants.LONG_STRING_COLUMNS).isDefined) {
       varcharCols =
         
tableProperties(CarbonCommonConstants.LONG_STRING_COLUMNS).split(",").map(_.trim)
-      varcharCols.foreach { varcharCol =>
-        val exists = fields.exists(f => f.column.equalsIgnoreCase(varcharCol) 
&&
-                                        
DataTypes.STRING.getName.equalsIgnoreCase(f.dataType.get))
-        if (!exists) {
-          throw new MalformedCarbonCommandException(
-            s"""
-               |${CarbonCommonConstants.LONG_STRING_COLUMNS}: $varcharCol does 
not exist in table
-               | or its data type is not string. Please check create table 
statement.
-             """.stripMargin)
-        }
-      }
+      validateLongStringColumns(fields, varcharCols)
     }
 
     // All columns in sortkey should be there in create table cols

Reply via email to