[GitHub] [carbondata] kunal642 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI

GitBox Tue, 01 Sep 2020 00:35:16 -0700


kunal642 commented on a change in pull request #3778:
URL: https://github.com/apache/carbondata/pull/3778#discussion_r480922354




##########
File path: 
integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java
##########
@@ -243,46 +258,115 @@ private void 
processResult(List<CarbonIterator<RowBatch>> detailQueryResultItera
   /**
    * This method will prepare the data from raw object that will take part in 
sorting
    */
-  private Object[] prepareRowObjectForSorting(Object[] row) {
+  private Object[] prepareRowObjectForSorting(Object[] row,
+      Map<Integer, GenericQueryType> complexDimensionInfoMap, int[] 
complexColumnParentBlockIndexes)
+      throws SecondaryIndexException {
     ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0];
-    // ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount];
+    byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray();
 
     List<CarbonDimension> dimensions = segmentProperties.getDimensions();
     Object[] preparedRow = new Object[dimensions.size() + measureCount];
+    Map<Integer, Object[]> complexDataMap = new HashMap<>();
 
     int noDictionaryIndex = 0;
     int dictionaryIndex = 0;
+    int complexIndex = 0;
     int i = 0;
     // loop excluding last dimension as last one is implicit column.
     for (; i < dimensions.size() - 1; i++) {
       CarbonDimension dims = dimensions.get(i);
-      if (dims.hasEncoding(Encoding.DICTIONARY)) {
+      boolean isComplexColumn = false;
+      // check if dimension is a complex data type
+      if (!complexDimensionInfoMap.isEmpty() && 
complexColumnParentBlockIndexes.length > 0) {
+        for (GenericQueryType queryType : complexDimensionInfoMap.values()) {
+          if (queryType.getName().equalsIgnoreCase(dims.getColName())) {
+            isComplexColumn = true;
+            break;
+          }
+        }
+      }
+      if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) {
         // dictionary
         preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++);
       } else {
-        // no dictionary dims
-        byte[] noDictionaryKeyByIndex = 
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
-        // no dictionary primitive columns are expected to be in original data 
while loading,
-        // so convert it to original data
-        if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
-          Object dataFromBytes = 
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(
-              noDictionaryKeyByIndex, dims.getDataType());
-          if (null != dataFromBytes && dims.getDataType() == 
DataTypes.TIMESTAMP) {
-            dataFromBytes = (long) dataFromBytes / 1000L;
+        if (isComplexColumn) {
+          byte[] complexKeyByIndex = 
wrapper.getComplexKeyByIndex(complexIndex);
+          ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex);
+          GenericQueryType genericQueryType =
+              
complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]);
+          int complexDataLength = byteArrayInput.getShort(2);
+          // In case, if array is empty
+          if (complexDataLength == 0) {
+            complexDataLength = complexDataLength + 1;
+          }
+          // get flattened array data
+          Object[] complexFlattenedData = new Object[complexDataLength];
+          Object[] data = 
genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput);
+          for (int index = 0; index < complexDataLength; index++) {
+            complexFlattenedData[index] =
+                getData(data, index, dims.getColumnSchema().getDataType());
           }
-          preparedRow[i] = dataFromBytes;
+          complexDataMap.put(i, complexFlattenedData);
         } else {
-          preparedRow[i] = noDictionaryKeyByIndex;
+          // no dictionary dims
+          byte[] noDictionaryKeyByIndex = 
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
+          // no dictionary primitive columns are expected to be in original 
data while loading,
+          // so convert it to original data
+          if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
+            Object dataFromBytes = DataTypeUtil
+                
.getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex,
+                    dims.getDataType());
+            if (null != dataFromBytes && dims.getDataType() == 
DataTypes.TIMESTAMP) {
+              dataFromBytes = (long) dataFromBytes / 1000L;
+            }
+            preparedRow[i] = dataFromBytes;
+          } else {
+            preparedRow[i] = noDictionaryKeyByIndex;
+          }
         }
       }
     }
 
     // at last add implicit column position reference(PID)
+    preparedRow[i] = implicitColumnByteArray;
 
-    preparedRow[i] = wrapper.getImplicitColumnByteArray();
+    // In case of complex array type, flatten the data and add for sorting
+    // TODO: Handle for nested array and other complex types

Review comment:
       Please mention in some doc, that nexted complex is not supported

##########
File path: 
integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java
##########
@@ -243,46 +258,115 @@ private void 
processResult(List<CarbonIterator<RowBatch>> detailQueryResultItera
   /**
    * This method will prepare the data from raw object that will take part in 
sorting
    */
-  private Object[] prepareRowObjectForSorting(Object[] row) {
+  private Object[] prepareRowObjectForSorting(Object[] row,
+      Map<Integer, GenericQueryType> complexDimensionInfoMap, int[] 
complexColumnParentBlockIndexes)
+      throws SecondaryIndexException {
     ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0];
-    // ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount];
+    byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray();
 
     List<CarbonDimension> dimensions = segmentProperties.getDimensions();
     Object[] preparedRow = new Object[dimensions.size() + measureCount];
+    Map<Integer, Object[]> complexDataMap = new HashMap<>();
 
     int noDictionaryIndex = 0;
     int dictionaryIndex = 0;
+    int complexIndex = 0;
     int i = 0;
     // loop excluding last dimension as last one is implicit column.
     for (; i < dimensions.size() - 1; i++) {
       CarbonDimension dims = dimensions.get(i);
-      if (dims.hasEncoding(Encoding.DICTIONARY)) {
+      boolean isComplexColumn = false;
+      // check if dimension is a complex data type
+      if (!complexDimensionInfoMap.isEmpty() && 
complexColumnParentBlockIndexes.length > 0) {
+        for (GenericQueryType queryType : complexDimensionInfoMap.values()) {
+          if (queryType.getName().equalsIgnoreCase(dims.getColName())) {
+            isComplexColumn = true;
+            break;
+          }
+        }
+      }
+      if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) {
         // dictionary
         preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++);
       } else {
-        // no dictionary dims
-        byte[] noDictionaryKeyByIndex = 
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
-        // no dictionary primitive columns are expected to be in original data 
while loading,
-        // so convert it to original data
-        if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
-          Object dataFromBytes = 
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(
-              noDictionaryKeyByIndex, dims.getDataType());
-          if (null != dataFromBytes && dims.getDataType() == 
DataTypes.TIMESTAMP) {
-            dataFromBytes = (long) dataFromBytes / 1000L;
+        if (isComplexColumn) {
+          byte[] complexKeyByIndex = 
wrapper.getComplexKeyByIndex(complexIndex);
+          ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex);
+          GenericQueryType genericQueryType =
+              
complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]);
+          int complexDataLength = byteArrayInput.getShort(2);
+          // In case, if array is empty
+          if (complexDataLength == 0) {
+            complexDataLength = complexDataLength + 1;
+          }
+          // get flattened array data
+          Object[] complexFlattenedData = new Object[complexDataLength];
+          Object[] data = 
genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput);
+          for (int index = 0; index < complexDataLength; index++) {
+            complexFlattenedData[index] =
+                getData(data, index, dims.getColumnSchema().getDataType());
           }
-          preparedRow[i] = dataFromBytes;
+          complexDataMap.put(i, complexFlattenedData);
         } else {
-          preparedRow[i] = noDictionaryKeyByIndex;
+          // no dictionary dims
+          byte[] noDictionaryKeyByIndex = 
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
+          // no dictionary primitive columns are expected to be in original 
data while loading,
+          // so convert it to original data
+          if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
+            Object dataFromBytes = DataTypeUtil
+                
.getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex,
+                    dims.getDataType());
+            if (null != dataFromBytes && dims.getDataType() == 
DataTypes.TIMESTAMP) {
+              dataFromBytes = (long) dataFromBytes / 1000L;
+            }
+            preparedRow[i] = dataFromBytes;
+          } else {
+            preparedRow[i] = noDictionaryKeyByIndex;
+          }
         }
       }
     }
 
     // at last add implicit column position reference(PID)
+    preparedRow[i] = implicitColumnByteArray;
 
-    preparedRow[i] = wrapper.getImplicitColumnByteArray();
+    // In case of complex array type, flatten the data and add for sorting
+    // TODO: Handle for nested array and other complex types

Review comment:
       Please mention in some doc, that nested complex is not supported




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [carbondata] kunal642 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI

Reply via email to