Repository: carbondata Updated Branches: refs/heads/master 43285bbd1 -> 45960f4a8
[CARBONDATA-2682][32K] fix create table with long_string_columns properties bugs This PR fixes create table with long_string_columns bugs which are: 1.create table with columns both in long_string_columns and partition or no_inverted_index property should be blocked. 2.create table with duplicate columns in long_string_column property should be blocked. This closes #2506 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/45960f4a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/45960f4a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/45960f4a Branch: refs/heads/master Commit: 45960f4a8d45c99d917ea9aec3579dc3b2345af4 Parents: 43285bb Author: Sssan520 <liangap2...@aliyun.com> Authored: Thu Jul 12 19:58:23 2018 +0800 Committer: xuchuanyin <xuchuan...@hust.edu.cn> Committed: Mon Jul 23 14:53:27 2018 +0800 ---------------------------------------------------------------------- .../VarcharDataTypesBasicTestCase.scala | 66 +++++++++++++-- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 88 +++++++++++++++++--- 2 files changed, 137 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/45960f4a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala index cb7cd81..4b8187f 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.test.util.QueryTest import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} +import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.metadata.CarbonMetadata import org.apache.carbondata.core.metadata.datatype.DataTypes @@ -79,7 +80,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi } test("long string columns cannot be dictionary include") { - val exceptionCaught = intercept[Exception] { + val exceptionCaught = intercept[MalformedCarbonCommandException] { sql( s""" | CREATE TABLE if not exists $longStringTable( @@ -93,7 +94,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi } test("long string columns cannot be dictionay exclude") { - val exceptionCaught = intercept[Exception] { + val exceptionCaught = intercept[MalformedCarbonCommandException] { sql( s""" | CREATE TABLE if not exists $longStringTable( @@ -107,7 +108,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi } test("long string columns cannot be sort_columns") { - val exceptionCaught = intercept[Exception] { + val exceptionCaught = intercept[MalformedCarbonCommandException] { sql( s""" | CREATE TABLE if not exists $longStringTable( @@ -121,20 +122,73 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi } test("long string columns can only be string columns") { - val exceptionCaught = intercept[Exception] { + val exceptionCaught = intercept[MalformedCarbonCommandException] { sql( s""" | CREATE TABLE if not exists $longStringTable( | id INT, name STRING, description STRING, address STRING, note STRING | ) STORED BY 'carbondata' | TBLPROPERTIES('LONG_STRING_COLUMNS'='id, note') - |""". - stripMargin) + |""".stripMargin) } assert(exceptionCaught.getMessage.contains("long_string_columns: id")) assert(exceptionCaught.getMessage.contains("its data type is not string")) } + test("long string columns cannot contain duplicate columns") { + val exceptionCaught = intercept[MalformedCarbonCommandException] { + sql( + s""" + | CREATE TABLE if not exists $longStringTable( + | id INT, name STRING, description STRING, address STRING, note STRING + | ) STORED BY 'carbondata' + | TBLPROPERTIES('LONG_STRING_COLUMNS'='address, note, Note') + |""".stripMargin) + } + assert(exceptionCaught.getMessage.contains("long_string_columns")) + assert(exceptionCaught.getMessage.contains("Duplicate columns are not allowed")) + } + + test("long_string_columns: column does not exist in table ") { + val exceptionCaught = intercept[MalformedCarbonCommandException] { + sql( + s""" + | CREATE TABLE if not exists $longStringTable( + | id INT, name STRING, description STRING, address STRING, note STRING + | ) STORED BY 'carbondata' + | TBLPROPERTIES('LONG_STRING_COLUMNS'='address, note, NoteS') + |""".stripMargin) + } + assert(exceptionCaught.getMessage.contains("long_string_columns")) + assert(exceptionCaught.getMessage.contains("does not exist in table")) + } + + test("long_string_columns: columns cannot exist in patitions columns") { + val exceptionCaught = intercept[MalformedCarbonCommandException] { + sql( + s""" + | CREATE TABLE if not exists $longStringTable( + | id INT, name STRING, description STRING, address STRING + | ) partitioned by (note string) STORED BY 'carbondata' + | TBLPROPERTIES('LONG_STRING_COLUMNS'='note') + |""".stripMargin) + } + assert(exceptionCaught.getMessage.contains("both in partition and long_string_columns")) + } + + test("long_string_columns: columns cannot exist in no_inverted_index columns") { + val exceptionCaught = intercept[MalformedCarbonCommandException] { + sql( + s""" + | CREATE TABLE if not exists $longStringTable( + | id INT, name STRING, description STRING, address STRING, note STRING + | ) STORED BY 'carbondata' + | TBLPROPERTIES('no_inverted_index'='note', 'LONG_STRING_COLUMNS'='address, note') + |""".stripMargin) + } + assert(exceptionCaught.getMessage.contains("both in no_inverted_index and long_string_columns")) + } + private def prepareTable(): Unit = { sql( s""" http://git-wip-us.apache.org/repos/asf/carbondata/blob/45960f4a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala index 97fe51a..6f8ab63 100644 --- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala +++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala @@ -384,6 +384,25 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { tableProperties.get(CarbonCommonConstants.CACHE_LEVEL).get, tableProperties) } + // long_string_columns columns cannot be in no_inverted_index columns + var longStringColumns = varcharColumns.map(_.toUpperCase) + var noInvColIntersecLongStrCols = longStringColumns + .intersect(noInvertedIdxCols.map(_.toUpperCase)) + if (!noInvColIntersecLongStrCols.isEmpty) { + throw new MalformedCarbonCommandException( + s"Column(s): ${ + noInvColIntersecLongStrCols.mkString(",") + } both in no_inverted_index and long_string_columns which is not allowed.") + } + // long_string_columns columns cannot be in partition columns + var partitionColIntersecLongStrCols = longStringColumns + .intersect(partitionCols.map(col => col.partitionColumn.toUpperCase)) + if (!partitionColIntersecLongStrCols.isEmpty) { + throw new MalformedCarbonCommandException( + s"Column(s): ${ + partitionColIntersecLongStrCols.mkString(",") + } both in partition and long_string_columns which is not allowed.") + } // validate the tableBlockSize from table properties CommonUtil.validateTableBlockSize(tableProperties) // validate table level properties for compaction @@ -480,6 +499,63 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { } /** + * This method validates the long string columns, will check: + * 1.the column in tblproperty long_string_columns must be in table fields. + * 2.the column datatype in tblproperty long_string_columns should be string. + * 3.the columns in tblproperty long_string_columns cannot be duplicate + * + * @param fields table fields + * @param varcharCols the columns in tblproperty long_string_columns + * @return + */ + private def validateLongStringColumns(fields: Seq[Field], + varcharCols: Seq[String]): Unit = { + var longStringColumnsMap: Map[String, Field] = Map[String, Field]() + fields.foreach(field => + longStringColumnsMap.put(field.column.toUpperCase, field) + ) + var dataTypeErr: Set[String] = Set[String]() + var duplicateColumnErr: Map[String, Int] = Map[String, Int]() + var nullColumnErr: Set[String] = Set[String]() + var tmpStr: String = "" + varcharCols.foreach { + column => + tmpStr = column.toUpperCase + duplicateColumnErr.get(tmpStr) match { + case None => duplicateColumnErr.put(tmpStr, 1) + case Some(count) => duplicateColumnErr.put(tmpStr, count + 1) + } + longStringColumnsMap.get(tmpStr) match { + case None => nullColumnErr += column + case Some(field) => if (!DataTypes.STRING.getName.equalsIgnoreCase(field.dataType.get)) { + dataTypeErr += column + } + } + } + if (!nullColumnErr.isEmpty) { + val errMsg = s"long_string_columns: ${ + nullColumnErr.mkString(",") + } does not exist in table. Please check create table statement." + throw new MalformedCarbonCommandException(errMsg) + } + + var duplicateColumns = duplicateColumnErr.filter(kv => kv._2 != 1).keySet + if (!duplicateColumns.isEmpty) { + val errMsg = s"Column ambiguity as duplicate column(s):${ + duplicateColumns.mkString(",") + } is present in long_string_columns. Duplicate columns are not allowed." + throw new MalformedCarbonCommandException(errMsg) + } + + if (!dataTypeErr.isEmpty) { + val errMsg = s"long_string_columns: ${ + dataTypeErr.mkString(",") + } ,its data type is not string. Please check create table statement." + throw new MalformedCarbonCommandException(errMsg) + } + } + + /** * @param partitionCols * @param tableProperties */ @@ -652,17 +728,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { if (tableProperties.get(CarbonCommonConstants.LONG_STRING_COLUMNS).isDefined) { varcharCols = tableProperties(CarbonCommonConstants.LONG_STRING_COLUMNS).split(",").map(_.trim) - varcharCols.foreach { varcharCol => - val exists = fields.exists(f => f.column.equalsIgnoreCase(varcharCol) && - DataTypes.STRING.getName.equalsIgnoreCase(f.dataType.get)) - if (!exists) { - throw new MalformedCarbonCommandException( - s""" - |${CarbonCommonConstants.LONG_STRING_COLUMNS}: $varcharCol does not exist in table - | or its data type is not string. Please check create table statement. - """.stripMargin) - } - } + validateLongStringColumns(fields, varcharCols) } // All columns in sortkey should be there in create table cols