fixed null pointer exception for DictionaryBasedVectorResultCollector during alter table
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/22be3450 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/22be3450 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/22be3450 Branch: refs/heads/12-dev Commit: 22be34503df33016da4820bf91fb403fa975ff1c Parents: 413cb93 Author: kunal642 <kunal.kap...@knoldus.in> Authored: Mon Apr 3 22:54:18 2017 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu Apr 6 16:44:38 2017 +0530 ---------------------------------------------------------------------- .../RestructureBasedRawResultCollector.java | 13 +++-- .../RestructureBasedVectorResultCollector.java | 7 ++- .../src/test/resources/restructure/data1.csv | 2 +- .../vectorreader/AddColumnTestCases.scala | 61 ++++++++++++++++++++ 4 files changed, 75 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/22be3450/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java index fef5cc9..aa5802d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java @@ -183,9 +183,13 @@ public class RestructureBasedRawResultCollector extends RawBasedResultCollector */ private byte[] fillDictionaryKeyArrayWithLatestSchema(byte[] dictionaryKeyArray) { QueryDimension[] actualQueryDimensions = tableBlockExecutionInfos.getActualQueryDimensions(); - long[] keyArray = updatedCurrentBlockKeyGenerator.getKeyArray(dictionaryKeyArray); - long[] keyArrayWithNewAddedColumns = - new long[keyArray.length + dimensionInfo.getNewDictionaryColumnCount()]; + int newKeyArrayLength = dimensionInfo.getNewDictionaryColumnCount(); + long[] keyArray = null; + if (null != updatedCurrentBlockKeyGenerator) { + keyArray = updatedCurrentBlockKeyGenerator.getKeyArray(dictionaryKeyArray); + newKeyArrayLength += keyArray.length; + } + long[] keyArrayWithNewAddedColumns = new long[newKeyArrayLength]; int existingColumnKeyArrayIndex = 0; int newKeyArrayIndex = 0; for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) { @@ -228,7 +232,8 @@ public class RestructureBasedRawResultCollector extends RawBasedResultCollector int existingColumnValueIndex = 0; int newKeyArrayIndex = 0; for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) { - if (!actualQueryDimensions[i].getDimension().hasEncoding(Encoding.DICTIONARY)) { + if (!actualQueryDimensions[i].getDimension().hasEncoding(Encoding.DICTIONARY) + && !actualQueryDimensions[i].getDimension().hasEncoding(Encoding.IMPLICIT)) { // if dimension exists then add the byte array value else add the default value if (dimensionInfo.getDimensionExists()[i]) { noDictionaryKeyArrayWithNewlyAddedColumns[newKeyArrayIndex++] = http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/22be3450/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java index dd84e6f..99b7f01 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java @@ -107,19 +107,20 @@ public class RestructureBasedVectorResultCollector extends DictionaryBasedVector private void fillDataForNonExistingDimensions() { for (int i = 0; i < tableBlockExecutionInfos.getActualQueryDimensions().length; i++) { if (!dimensionInfo.getDimensionExists()[i]) { + int queryOrder = tableBlockExecutionInfos.getActualQueryDimensions()[i].getQueryOrder(); CarbonDimension dimension = tableBlockExecutionInfos.getActualQueryDimensions()[i].getDimension(); if (dimension.hasEncoding(Encoding.DIRECT_DICTIONARY)) { // fill direct dictionary column data - fillDirectDictionaryData(allColumnInfo[i].vector, allColumnInfo[i], + fillDirectDictionaryData(allColumnInfo[queryOrder].vector, allColumnInfo[queryOrder], dimensionInfo.getDefaultValues()[i]); } else if (dimension.hasEncoding(Encoding.DICTIONARY)) { // fill dictionary column data - fillDictionaryData(allColumnInfo[i].vector, allColumnInfo[i], + fillDictionaryData(allColumnInfo[queryOrder].vector, allColumnInfo[queryOrder], dimensionInfo.getDefaultValues()[i]); } else { // fill no dictionary data - fillNoDictionaryData(allColumnInfo[i].vector, allColumnInfo[i], + fillNoDictionaryData(allColumnInfo[queryOrder].vector, allColumnInfo[queryOrder], dimension.getDefaultValue()); } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/22be3450/integration/spark-common-test/src/test/resources/restructure/data1.csv ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/restructure/data1.csv b/integration/spark-common-test/src/test/resources/restructure/data1.csv index f5ee3dd..eef3a6b 100644 --- a/integration/spark-common-test/src/test/resources/restructure/data1.csv +++ b/integration/spark-common-test/src/test/resources/restructure/data1.csv @@ -1 +1 @@ -100,spark,abc,2015-04-23 12:01:01,21.23 +100,spark,abc,23-04-2015,21.23 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/22be3450/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala index 13003c7..36c9d95 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala @@ -1,6 +1,7 @@ package org.apache.spark.carbondata.restructure.vectorreader import java.math.{BigDecimal, RoundingMode} +import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.common.util.QueryTest @@ -106,6 +107,66 @@ class AddColumnTestCases extends QueryTest with BeforeAndAfterAll { sql("DROP TABLE IF EXISTS carbon_table") } + test("test to check if select * works for new added column") { + sql("DROP TABLE IF EXISTS carbon_new") + sql( + "CREATE TABLE carbon_new(intField int,stringField string,charField string,timestampField " + + "timestamp,decimalField decimal(6,2))STORED BY 'carbondata' TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='charField')") + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data1.csv' INTO TABLE carbon_new " + + s"options('FILEHEADER'='intField,stringField,charField,timestampField,decimalField')") + sql( + "Alter table carbon_new add columns(newField string) TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='newField','DEFAULT.VALUE.newField'='def')") + checkAnswer(sql("select * from carbon_new limit 1"), + Row(new Integer(100), + "spark", + "abc", + Timestamp.valueOf("2015-04-23 00:00:00.0"), + new BigDecimal(21.23).setScale(2, RoundingMode.HALF_UP), + "def")) + sql("drop table carbon_new") + } + + test("test to check data if all columns are provided in select") { + sql("DROP TABLE IF EXISTS carbon_new") + sql( + "CREATE TABLE carbon_new(intField int,stringField string,charField string,timestampField " + + "timestamp,decimalField decimal(6,2))STORED BY 'carbondata' TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='charField')") + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data1.csv' INTO TABLE carbon_new " + + s"options('FILEHEADER'='intField,stringField,charField,timestampField,decimalField')") + sql( + "Alter table carbon_new add columns(newField string) TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='newField')") + assert(sql( + "select intField,stringField,charField,timestampField,decimalField, newField from " + + "carbon_new limit 1").count().equals(1L)) + sql("drop table carbon_new") + } + + test("test to check data if new column query order is different from schema order") { + sql("DROP TABLE IF EXISTS carbon_new") + sql( + "CREATE TABLE carbon_new(intField int,stringField string,charField string,timestampField " + + "timestamp,decimalField decimal(6,2))STORED BY 'carbondata' TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='charField')") + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data1.csv' INTO TABLE carbon_new " + + s"options('FILEHEADER'='intField,stringField,charField,timestampField,decimalField')") + sql( + "Alter table carbon_new add columns(newField string) TBLPROPERTIES" + + "('DICTIONARY_EXCLUDE'='newField','DEFAULT.VALUE.newField'='def')") + checkAnswer(sql( + "select intField,stringField,charField,newField,timestampField,decimalField from " + + "carbon_new limit 1"), Row(new Integer(100), + "spark", + "abc", + "def", + Timestamp.valueOf("2015-04-23 00:00:00.0"), + new BigDecimal(21.23).setScale(2, RoundingMode.HALF_UP))) + sql("drop table carbon_new") + } + override def afterAll { sql("DROP TABLE IF EXISTS addcolumntest") sql("drop table if exists hivetable")