Repository: carbondata Updated Branches: refs/heads/branch-1.3 1997ca235 -> b51d8186a
[CARBONDATA-2199] Fixed Dimension column after restructure getting wrong block datatype Problem: Changing datatype of measure having sort_columns calls for restructure and after having restructure it changes the datatype to actual datatype for which accessing the data with changed datatype gives exception of incorrect length. Solution: Store the datatype in DimensionInfo while restructuring and access the same datatype to get the block data type. This closes #1993 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b51d8186 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b51d8186 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b51d8186 Branch: refs/heads/branch-1.3 Commit: b51d8186a82818672067dfd0387af6ff505f940c Parents: 1997ca2 Author: Jatin <jatin.de...@knoldus.in> Authored: Fri Feb 23 16:56:17 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Wed Feb 28 09:47:17 2018 +0530 ---------------------------------------------------------------------- .../DictionaryBasedVectorResultCollector.java | 2 +- .../executor/impl/AbstractQueryExecutor.java | 2 +- .../core/scan/executor/infos/DimensionInfo.java | 6 ++ .../scan/executor/util/RestructureUtil.java | 10 ++- .../scan/executor/util/RestructureUtilTest.java | 7 +- .../DBLocationCarbonTableTestCase.scala | 82 ++++++++++++++++++++ .../vectorreader/ChangeDataTypeTestCases.scala | 8 ++ 7 files changed, 113 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java index 5e6c99a..e9d6740 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java @@ -183,7 +183,7 @@ public class DictionaryBasedVectorResultCollector extends AbstractScannedResultC allColumnInfo[i].vector = columnarBatch.columnVectors[i]; if (null != allColumnInfo[i].dimension) { allColumnInfo[i].vector - .setBlockDataType(allColumnInfo[i].dimension.getDimension().getDataType()); + .setBlockDataType(dimensionInfo.dataType[i]); } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index cc2e513..69f5ceb 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -303,7 +303,7 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { List<QueryDimension> currentBlockQueryDimensions = RestructureUtil .createDimensionInfoAndGetCurrentBlockQueryDimension(blockExecutionInfo, queryModel.getQueryDimension(), tableBlockDimensions, - segmentProperties.getComplexDimensions()); + segmentProperties.getComplexDimensions(), queryModel.getQueryMeasures().size()); blockExecutionInfo.setBlockId( CarbonUtil.getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, segmentId)); blockExecutionInfo.setDeleteDeltaFilePath(deleteDeltaFiles); http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java index dd0c549..b41de82 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java @@ -17,6 +17,8 @@ package org.apache.carbondata.core.scan.executor.infos; +import org.apache.carbondata.core.metadata.datatype.DataType; + /** * This method will information about the query dimensions whether they exist in particular block * and their default value @@ -54,6 +56,10 @@ public class DimensionInfo { * count of no dictionary columns not existing in the current block */ private int newNoDictionaryColumnCount; + /** + * maintains the block datatype + */ + public DataType[] dataType; /** * @param dimensionExists http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java index 6500dd7..a029986 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java @@ -63,13 +63,15 @@ public class RestructureUtil { */ public static List<QueryDimension> createDimensionInfoAndGetCurrentBlockQueryDimension( BlockExecutionInfo blockExecutionInfo, List<QueryDimension> queryDimensions, - List<CarbonDimension> tableBlockDimensions, List<CarbonDimension> tableComplexDimension) { + List<CarbonDimension> tableBlockDimensions, List<CarbonDimension> tableComplexDimension, + int measureCount) { List<QueryDimension> presentDimension = new ArrayList<QueryDimension>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); boolean[] isDimensionExists = new boolean[queryDimensions.size()]; Object[] defaultValues = new Object[queryDimensions.size()]; // create dimension information instance DimensionInfo dimensionInfo = new DimensionInfo(isDimensionExists, defaultValues); + dimensionInfo.dataType = new DataType[queryDimensions.size() + measureCount]; int newDictionaryColumnCount = 0; int newNoDictionaryColumnCount = 0; // selecting only those dimension which is present in the query @@ -78,6 +80,8 @@ public class RestructureUtil { if (queryDimension.getDimension().hasEncoding(Encoding.IMPLICIT)) { presentDimension.add(queryDimension); isDimensionExists[dimIndex] = true; + dimensionInfo.dataType[queryDimension.getQueryOrder()] = + queryDimension.getDimension().getDataType(); } else { for (CarbonDimension tableDimension : tableBlockDimensions) { if (tableDimension.getColumnId().equals(queryDimension.getDimension().getColumnId())) { @@ -92,6 +96,8 @@ public class RestructureUtil { currentBlockDimension.setQueryOrder(queryDimension.getQueryOrder()); presentDimension.add(currentBlockDimension); isDimensionExists[dimIndex] = true; + dimensionInfo.dataType[currentBlockDimension.getQueryOrder()] = + currentBlockDimension.getDimension().getDataType(); break; } } @@ -109,6 +115,8 @@ public class RestructureUtil { currentBlockDimension.setQueryOrder(queryDimension.getQueryOrder()); presentDimension.add(currentBlockDimension); isDimensionExists[dimIndex] = true; + dimensionInfo.dataType[currentBlockDimension.getQueryOrder()] = + currentBlockDimension.getDimension().getDataType(); break; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java b/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java index 172a53e..90fcb74 100644 --- a/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java +++ b/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java @@ -85,6 +85,11 @@ public class RestructureUtilTest { queryDimension2.setDimension(tableComplexDimension2); QueryDimension queryDimension3 = new QueryDimension("Address"); queryDimension3.setDimension(new CarbonDimension(columnSchema5, 3, 3, 3, 3)); + QueryMeasure queryMeasure1 = new QueryMeasure("Age"); + QueryMeasure queryMeasure2 = new QueryMeasure("Salary"); + queryMeasure1.setMeasure(new CarbonMeasure(columnSchema3, 2)); + queryMeasure2.setMeasure(new CarbonMeasure(columnSchema4, 4)); + List<QueryMeasure> queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2); List<QueryDimension> queryDimensions = Arrays.asList(queryDimension1, queryDimension2, queryDimension3); @@ -92,7 +97,7 @@ public class RestructureUtilTest { List<QueryDimension> result = null; result = RestructureUtil .createDimensionInfoAndGetCurrentBlockQueryDimension(blockExecutionInfo, queryDimensions, - tableBlockDimensions, tableComplexDimensions); + tableBlockDimensions, tableComplexDimensions, queryMeasures.size()); List<CarbonDimension> resultDimension = new ArrayList<>(result.size()); for (QueryDimension queryDimension : result) { resultDimension.add(queryDimension.getDimension()); http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dblocation/DBLocationCarbonTableTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dblocation/DBLocationCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dblocation/DBLocationCarbonTableTestCase.scala index eb26276..e0e7d63 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dblocation/DBLocationCarbonTableTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dblocation/DBLocationCarbonTableTestCase.scala @@ -175,6 +175,88 @@ class DBLocationCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table carbontable") } + test("Alter table change dataType with sort column after adding measure column test"){ + sql("drop database if exists carbon cascade") + sql(s"create database carbon location '$dblocation'") + sql("use carbon") + sql( + """create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) + |STORED BY 'org.apache.carbondata.format' + |TBLPROPERTIES('SORT_COLUMNS' = 'c2') + |""".stripMargin) + sql("insert into carbontable select 'a',1,'aa','aaa'") + sql("insert into carbontable select 'b',1,'bb','bbb'") + sql("Alter table carbontable add columns (c6 int)") + sql("Alter table carbontable change c2 c2 bigint") + checkAnswer( + sql("""select c1,c2,c3,c5 from carbon.carbontable"""), + Seq(Row("a",1,"aa","aaa"), Row("b",1,"bb","bbb")) + ) + sql("drop table carbontable") + } + + test("Alter table change dataType with sort column after adding date datatype with default value test"){ + sql("drop database if exists carbon cascade") + sql(s"create database carbon location '$dblocation'") + sql("use carbon") + sql( + """create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) + |STORED BY 'org.apache.carbondata.format' + |TBLPROPERTIES('SORT_COLUMNS' = 'c2') + |""".stripMargin) + sql("insert into carbontable select 'a',1,'aa','aaa'") + sql("insert into carbontable select 'b',1,'bb','bbb'") + sql("Alter table carbontable add columns (dateData date) TBLPROPERTIES('DEFAULT.VALUE.dateData' = '1999-01-01')") + sql("Alter table carbontable change c2 c2 bigint") + checkAnswer( + sql("""select c1,c2,c3,c5 from carbon.carbontable"""), + Seq(Row("a",1,"aa","aaa"), Row("b",1,"bb","bbb")) + ) + sql("drop table carbontable") + } + + test("Alter table change dataType with sort column after adding dimension column with default value test"){ + sql("drop database if exists carbon cascade") + sql(s"create database carbon location '$dblocation'") + sql("use carbon") + sql( + """create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) + |STORED BY 'org.apache.carbondata.format' + |TBLPROPERTIES('SORT_COLUMNS' = 'c2') + |""".stripMargin) + sql("insert into carbontable select 'a',1,'aa','aaa'") + sql("insert into carbontable select 'b',1,'bb','bbb'") + sql("Alter table carbontable add columns (name String) TBLPROPERTIES('DEFAULT.VALUE.name' = 'hello')") + sql("Alter table carbontable change c2 c2 bigint") + checkAnswer( + sql("""select c1,c2,c3,c5,name from carbon.carbontable"""), + Seq(Row("a",1,"aa","aaa","hello"), Row("b",1,"bb","bbb","hello")) + ) + sql("drop table carbontable") + } + + test("Alter table change dataType with sort column after rename test"){ + sql("drop database if exists carbon cascade") + sql(s"create database carbon location '$dblocation'") + sql("use carbon") + sql( + """create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) + |STORED BY 'org.apache.carbondata.format' + |TBLPROPERTIES('SORT_COLUMNS' = 'c2') + |""".stripMargin) + sql("insert into carbontable select 'a',1,'aa','aaa'") + sql("insert into carbontable select 'b',1,'bb','bbb'") + sql("Alter table carbontable add columns (name String) TBLPROPERTIES('DEFAULT.VALUE.name' = 'hello')") + sql("Alter table carbontable rename to carbontable1") + sql("Alter table carbontable1 change c2 c2 bigint") + checkAnswer( + sql("""select c1,c2,c3,c5,name from carbon.carbontable1"""), + Seq(Row("a",1,"aa","aaa","hello"), Row("b",1,"bb","bbb","hello")) + ) + sql("drop table if exists carbontable") + sql("drop table if exists carbontable1") + } + test("Alter table drop column test") { sql("drop database if exists carbon cascade") sql(s"create database carbon location '$dblocation'") http://git-wip-us.apache.org/repos/asf/carbondata/blob/b51d8186/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala index f92d613..04740de 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala @@ -167,6 +167,8 @@ class ChangeDataTypeTestCases extends Spark2QueryTest with BeforeAndAfterAll { } test("test data type change for dictionary exclude INT type column") { + def test_change_data_type() = { + beforeAll sql("drop table if exists table_sort") sql("CREATE TABLE table_sort (imei int,age int,mac string) STORED BY 'carbondata' TBLPROPERTIES('DICTIONARY_EXCLUDE'='imei,age','SORT_COLUMNS'='imei,age')") sql("insert into table_sort select 32674,32794,'MAC1'") @@ -179,6 +181,12 @@ class ChangeDataTypeTestCases extends Spark2QueryTest with BeforeAndAfterAll { } finally { sqlContext.setConf("carbon.enable.vector.reader", "true") } + afterAll + } + sqlContext.setConf("carbon.enable.vector.reader", "true") + test_change_data_type() + sqlContext.setConf("carbon.enable.vector.reader", "false") + test_change_data_type() } override def afterAll {