[CARBONDATA-1077] ColumnDict and ALL_DICTIONARY_PATH must be used with SINGLE_PASS='true'
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fcb20924 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fcb20924 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fcb20924 Branch: refs/heads/branch-1.1 Commit: fcb20924fab8086e224439a9bb1e5be8af44b26b Parents: 5b66732 Author: mohammadshahidkhan <mohdshahidkhan1...@gmail.com> Authored: Mon May 22 18:34:14 2017 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu Jun 15 13:26:29 2017 +0530 ---------------------------------------------------------------------- .../dataload/TestLoadDataUseAllDictionary.scala | 2 +- .../predefdic/TestPreDefDictionary.scala | 44 +++++++++++++++++++- .../execution/command/carbonTableSchema.scala | 7 +++- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- .../execution/command/carbonTableSchema.scala | 7 +++- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- 6 files changed, 58 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala index 22cf8f7..d6deb89 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala @@ -38,7 +38,7 @@ class TestLoadDataUseAllDictionary extends QueryTest with BeforeAndAfterAll{ sql(s""" LOAD DATA LOCAL INPATH '$resourcesPath/source_without_header.csv' into table t3 options('FILEHEADER'='id,date,country,name,phonetype,serialname,salary', - 'All_DICTIONARY_PATH'='$resourcesPath/dict.txt') + 'All_DICTIONARY_PATH'='$resourcesPath/dict.txt','single_pass'='true') """) assert(false) } catch { http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/predefdic/TestPreDefDictionary.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/predefdic/TestPreDefDictionary.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/predefdic/TestPreDefDictionary.scala index 69af708..ca117c2 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/predefdic/TestPreDefDictionary.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/predefdic/TestPreDefDictionary.scala @@ -49,7 +49,7 @@ class TestPreDefDictionary extends QueryTest with BeforeAndAfterAll { STORED BY 'carbondata'""") sql( s"""LOAD DATA LOCAL INPATH '$testData' into table predefdictable - options('ALL_DICTIONARY_PATH'='$allDictFile')""") + options('ALL_DICTIONARY_PATH'='$allDictFile','single_pass'='true')""") checkAnswer( sql("select phonetype from predefdictable where phonetype='phone197'"), Seq(Row("phone197")) @@ -89,9 +89,51 @@ class TestPreDefDictionary extends QueryTest with BeforeAndAfterAll { Seq(Row("phone197")) ) } + + test("validation test columndict with single_pass= false.") { + val csvFilePath = s"$resourcesPath/nullvalueserialization.csv" + val testData = s"$resourcesPath/predefdic/data3.csv" + val csvHeader = "ID,phonetype" + val dicFilePath = s"$resourcesPath/predefdic/dicfilepath.csv" + sql( + """CREATE TABLE IF NOT EXISTS columndicValidationTable (ID Int, phonetype String) + STORED BY 'carbondata'""") + try { + sql( + s"""LOAD DATA LOCAL INPATH '$testData' into table columndicValidationTable + options('COLUMNDICT'='phonetype:$dicFilePath', 'SINGLE_PASS'='false')""") + } catch { + case x: Throwable => + val failMess: String = "Can not use all_dictionary_path or columndict without single_pass." + assert(failMess.equals(x.getMessage)) + } + } + + test("validation test ALL_DICTIONARY_PATH with single_pass= false.") { + val csvFilePath = s"$resourcesPath/nullvalueserialization.csv" + val testData = s"$resourcesPath/predefdic/data3.csv" + val csvHeader = "ID,phonetype" + val allDictFile = s"$resourcesPath/predefdic/allpredefdictionary.csv" + sql( + """CREATE TABLE IF NOT EXISTS predefdictableval (ID Int, phonetype String) + STORED BY 'carbondata'""") + try { + sql( + s"""LOAD DATA LOCAL INPATH '$testData' into table predefdictableval + options('ALL_DICTIONARY_PATH'='$allDictFile', 'SINGLE_PASS'='false')""") + } catch { + case x: Throwable => + val failMess: String = "Can not use all_dictionary_path or columndict without single_pass." + assert(failMess.equals(x.getMessage)) + } + } + override def afterAll { sql("DROP TABLE IF EXISTS predefdictable") sql("DROP TABLE IF EXISTS predefdictable1") sql("DROP TABLE IF EXISTS columndicTable") + sql("DROP TABLE IF EXISTS columndicValidationTable") + sql("DROP TABLE IF EXISTS predefdictableval") + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 7258511..1c1adc1 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -438,8 +438,11 @@ case class LoadTable( case "true" => true case "false" => - if (!StringUtils.isEmpty(allDictionaryPath)) { - true + // when single_pass = false and if either alldictionary + // or columnDict is configured the do not allow load + if (StringUtils.isNotEmpty(allDictionaryPath) || StringUtils.isNotEmpty(columnDict)) { + throw new MalformedCarbonCommandException( + "Can not use all_dictionary_path or columndict without single_pass.") } else { false } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 05b94ee..4505429 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -204,7 +204,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll try { sql(s""" LOAD DATA LOCAL INPATH "$complexFilePath1" INTO TABLE loadSqlTest - OPTIONS('FILEHEADER'='$header', 'COLUMNDICT'='$extColDictFilePath1') + OPTIONS('single_pass'='true','FILEHEADER'='$header', 'COLUMNDICT'='$extColDictFilePath1') """) } catch { case ex: Exception => @@ -234,7 +234,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll try { sql(s""" LOAD DATA LOCAL INPATH "$complexFilePath1" INTO TABLE loadSqlTest - OPTIONS('FILEHEADER'='$header', 'COLUMNDICT'='gamePointId:$filePath') + OPTIONS('single_pass'='true','FILEHEADER'='$header', 'COLUMNDICT'='gamePointId:$filePath') """) assert(false) } catch { http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 8818c6b..530c4cb 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -448,8 +448,11 @@ case class LoadTable( case "true" => true case "false" => - if (!StringUtils.isEmpty(allDictionaryPath)) { - true + // when single_pass = false and if either alldictionarypath + // or columnDict is configured the do not allow load + if (StringUtils.isNotEmpty(allDictionaryPath) || StringUtils.isNotEmpty(columnDict)) { + throw new MalformedCarbonCommandException( + "Can not use all_dictionary_path or columndict without single_pass.") } else { false } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fcb20924/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 10f99b7..1c16ea4 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -231,7 +231,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll sql( s""" LOAD DATA LOCAL INPATH "$complexFilePath1" INTO TABLE loadSqlTest - OPTIONS('FILEHEADER'='$header', 'COLUMNDICT'='$extColDictFilePath1') + OPTIONS('FILEHEADER'='$header', 'COLUMNDICT'='$extColDictFilePath1', 'single_pass'='true') """) } catch { case ex: Exception => @@ -264,7 +264,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll sql( s""" LOAD DATA LOCAL INPATH "$complexFilePath1" INTO TABLE loadSqlTest - OPTIONS('FILEHEADER'='$header', 'COLUMNDICT'='gamePointId:$filePath') + OPTIONS('single_pass'='true','FILEHEADER'='$header', 'COLUMNDICT'='gamePointId:$filePath') """) assert(false) } catch {