[CARBONDATA-2606][Complex DataType Enhancements]Fix Null result if projection column have null primitive column and struct
Problem: In case if the actual value of the primitive data type is null, by PR#2489, we are moving all the null values to the end of the collected row without considering the data type. Solution: Place null in the end of output iff the null value is of complex primitive column. This closes #2559 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/766f5ea5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/766f5ea5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/766f5ea5 Branch: refs/heads/branch-1.4 Commit: 766f5ea527bcfcc186fb4030dd2baa33fc61c46b Parents: 2d00114 Author: ajantha-bhat <ajanthab...@gmail.com> Authored: Wed Jul 25 19:21:02 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu Aug 9 23:38:51 2018 +0530 ---------------------------------------------------------------------- .../impl/DictionaryBasedResultCollector.java | 57 +++++++++++++++----- .../complexType/TestComplexDataType.scala | 8 +++ 2 files changed, 53 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/766f5ea5/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java index 3184d80..1faf2fd 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionary import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.complextypes.StructQueryType; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; import org.apache.carbondata.core.scan.filter.GenericQueryType; import org.apache.carbondata.core.scan.model.ProjectionDimension; @@ -112,11 +113,27 @@ public class DictionaryBasedResultCollector extends AbstractScannedResultCollect */ @Override public List<Object[]> collectResultInRow(BlockletScannedResult scannedResult, int batchSize) { - // scan the record and add to list List<Object[]> listBasedResult = new ArrayList<>(batchSize); int rowCounter = 0; - + boolean isStructQueryType = false; + for (Object obj : scannedResult.complexParentIndexToQueryMap.values()) { + if (obj instanceof StructQueryType) { + //if any one of the map elements contains struct,need to shift rows if contains null. + isStructQueryType = true; + break; + } + } + boolean[] isComplexChildColumn = null; + if (isStructQueryType) { + // need to identify complex child columns for shifting rows if contains null + isComplexChildColumn = new boolean[queryDimensions.length + queryMeasures.length]; + for (ProjectionDimension dimension : queryDimensions) { + if (null != dimension.getDimension().getComplexParentDimension()) { + isComplexChildColumn[dimension.getOrdinal()] = true; + } + } + } while (scannedResult.hasNext() && rowCounter < batchSize) { Object[] row = new Object[queryDimensions.length + queryMeasures.length]; if (isDimensionExists) { @@ -140,16 +157,8 @@ public class DictionaryBasedResultCollector extends AbstractScannedResultCollect continue; } fillMeasureData(scannedResult, row); - if (scannedResult.complexParentIndexToQueryMap.toString().contains("StructQueryType")) { - // If a : <b,c> and d : <e,f> are two struct and if a.b,a.c,d.e is given in the - // projection list,then object array will contain a,null,d as result, because for a.b, - // a will be filled and for a.c null will be placed. - // Instead place null in the end of object array and send a,d,null as result. - int count = 0; - for (int j = 0; j < row.length; j++) { - if (row[j] != null) row[count++] = row[j]; - } - while (count < row.length) row[count++] = null; + if (isStructQueryType) { + shiftNullForStruct(row, isComplexChildColumn); } listBasedResult.add(row); rowCounter++; @@ -157,6 +166,30 @@ public class DictionaryBasedResultCollector extends AbstractScannedResultCollect return listBasedResult; } + /** + * shift the complex column null to the end + * + * @param row + * @param isComplexChildColumn + */ + private void shiftNullForStruct(Object[] row, boolean[] isComplexChildColumn) { + int count = 0; + // If a : <b,c> and d : <e,f> are two struct and if a.b,a.c,d.e is given in the + // projection list,then object array will contain a,null,d as result, because for a.b, + // a will be filled and for a.c null will be placed. + // Instead place null in the end of object array and send a,d,null as result. + for (int j = 0; j < row.length; j++) { + if (null == row[j] && !isComplexChildColumn[j]) { + // if it is a primitive column, don't shift the null to the end. + row[count++] = null; + } else if (null != row[j]) { + row[count++] = row[j]; + } + } + // fill the skipped content + while (count < row.length) row[count++] = null; + } + private void fillComplexColumnDataBufferForThisRow() { mergedComplexDimensionDataMap.clear(); int noDictionaryComplexColumnIndex = 0; http://git-wip-us.apache.org/repos/asf/carbondata/blob/766f5ea5/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala index 2b3cfc0..1451f7b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala @@ -1009,4 +1009,12 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) } + test("test null values in primitive data type and select all data types including complex data type") { + sql("DROP TABLE IF EXISTS table1") + sql( + "create table table1 (id int, name string, structField struct<intval:int, stringval:string>) stored by 'carbondata'") + sql("insert into table1 values(null,'aaa','23$bb')") + checkAnswer(sql("select * from table1"),Seq(Row(null,"aaa", Row(23,"bb")))) + } + }