Jackie-Jiang commented on code in PR #17722:
URL: https://github.com/apache/pinot/pull/17722#discussion_r2824724657


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -273,26 +312,64 @@ private AbstractColumnStatisticsCollector 
createKeyStatsCollector(String key, Ob
         return new DoubleColumnPreIndexStatsCollector(key, config);
       case BIG_DECIMAL:
         return new BigDecimalColumnPreIndexStatsCollector(key, config);
+      case BYTES:
+        return new BytesColumnPredIndexStatsCollector(key, config);
       case BOOLEAN:
       case STRING:
       case MAP:
-      case OBJECT:
         return new StringColumnPreIndexStatsCollector(key, config);
       default:
-        LOGGER.warn("Unknown data type {} for key {} and value type {}", type, 
key, value.getClass().getName());
+        LOGGER.warn("Unknown data type {} for key {}", dataType, key);
         return new StringColumnPreIndexStatsCollector(key, config);
+      }
+  }
+
+  private AbstractColumnStatisticsCollector 
promoteNumericKeyStatsToStringCollector(String key,
+      AbstractColumnStatisticsCollector keyStats, Object value) {
+    AbstractColumnStatisticsCollector stringKeyStats = 
createKeyStatsCollector(key, FieldSpec.DataType.STRING);
+    int previousTotalNumberOfEntries = keyStats.getTotalNumberOfEntries();
+    int previousMaxNumberOfMultiValues = keyStats.getMaxNumberOfMultiValues();
+    if (keyStats instanceof IntColumnPreIndexStatsCollector) {
+      for (int intValue : ((IntColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(intValue));
+      }
+    } else if (keyStats instanceof LongColumnPreIndexStatsCollector) {
+      for (long longValue : ((LongColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(longValue));
+      }
+    } else if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
+      for (float floatValue : ((FloatColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(floatValue));
+      }
+    } else if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
+      for (double doubleValue : ((DoubleColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(doubleValue));
+      }
+    } else if (keyStats instanceof BigDecimalColumnPreIndexStatsCollector) {
+      for (BigDecimal decimalValue : ((BigDecimalColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(decimalValue));
+      }
     }
+    _keyStats.put(key, stringKeyStats);
+    stringKeyStats._maxNumberOfMultiValues = previousMaxNumberOfMultiValues;
+    stringKeyStats._totalNumberOfEntries = previousTotalNumberOfEntries;
+    stringKeyStats.collect(String.valueOf(value));
+    return stringKeyStats;
   }
 
   /**
    * Convert Map value data type to stored field type.
    * Note that all unknown types are automatically converted to String type.
    */
   private static FieldSpec.DataType convertToDataType(PinotDataType ty) {
-    switch (ty) {
+      switch (ty) {

Review Comment:
   (minor) Reformat. Suggest changing the argumant name



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -156,23 +161,52 @@ public void collect(Object entry) {
           }
         }
         if (keyStats instanceof IntColumnPreIndexStatsCollector) {
-          keyStats.collect(Integer.parseInt(value.toString()));
+          try {
+            keyStats.collect(Integer.parseInt(value.toString()));
+          } catch (NumberFormatException e) {
+            LOGGER.warn(
+                "Could not parse map key '{}' value '{}' as INT; promoting to 
STRING collector",

Review Comment:
   (minor) Reformat, same for other places



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -254,17 +288,22 @@ public void seal() {
   private AbstractColumnStatisticsCollector createKeyStatsCollector(String 
key, Object value) {
     // Get the type of the value
     PinotDataType type = PinotDataType.getSingleValueType(value.getClass());
+    return createKeyStatsCollector(key, convertToDataType(type));
+  }
+
+  private AbstractColumnStatisticsCollector createKeyStatsCollector(String 
key, FieldSpec.DataType dataType) {
     TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(key).build();
     Schema keySchema = new Schema.SchemaBuilder().setSchemaName(key)
-        .addField(new DimensionFieldSpec(key, convertToDataType(type), 
false)).build();
+        .addField(new DimensionFieldSpec(key, dataType, false)).build();
     StatsCollectorConfig config = new StatsCollectorConfig(tableConfig, 
keySchema, null);
 
     if (_createNoDictCollectorsForKeys) {
       return new NoDictColumnStatisticsCollector(key, config);
     }
-    switch (type) {
-      case INTEGER:
+    switch (dataType.getStoredType()) {
+      case INT:
         return new IntColumnPreIndexStatsCollector(key, config);
+      case TIMESTAMP:

Review Comment:
   TIMESTAMP, BOOLEAN is not stored type. No need to have them here



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -273,26 +312,64 @@ private AbstractColumnStatisticsCollector 
createKeyStatsCollector(String key, Ob
         return new DoubleColumnPreIndexStatsCollector(key, config);
       case BIG_DECIMAL:
         return new BigDecimalColumnPreIndexStatsCollector(key, config);
+      case BYTES:
+        return new BytesColumnPredIndexStatsCollector(key, config);
       case BOOLEAN:

Review Comment:
   BOOLEAN should be handled as `IntColumnPreIndexStatsCollector`



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########


Review Comment:
   BOOLEAN should be handled as BOOLEAN



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -273,26 +312,64 @@ private AbstractColumnStatisticsCollector 
createKeyStatsCollector(String key, Ob
         return new DoubleColumnPreIndexStatsCollector(key, config);
       case BIG_DECIMAL:
         return new BigDecimalColumnPreIndexStatsCollector(key, config);
+      case BYTES:
+        return new BytesColumnPredIndexStatsCollector(key, config);
       case BOOLEAN:
       case STRING:
       case MAP:
-      case OBJECT:
         return new StringColumnPreIndexStatsCollector(key, config);
       default:
-        LOGGER.warn("Unknown data type {} for key {} and value type {}", type, 
key, value.getClass().getName());
+        LOGGER.warn("Unknown data type {} for key {}", dataType, key);
         return new StringColumnPreIndexStatsCollector(key, config);
+      }
+  }
+
+  private AbstractColumnStatisticsCollector 
promoteNumericKeyStatsToStringCollector(String key,
+      AbstractColumnStatisticsCollector keyStats, Object value) {
+    AbstractColumnStatisticsCollector stringKeyStats = 
createKeyStatsCollector(key, FieldSpec.DataType.STRING);
+    int previousTotalNumberOfEntries = keyStats.getTotalNumberOfEntries();
+    int previousMaxNumberOfMultiValues = keyStats.getMaxNumberOfMultiValues();
+    if (keyStats instanceof IntColumnPreIndexStatsCollector) {
+      for (int intValue : ((IntColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(intValue));
+      }
+    } else if (keyStats instanceof LongColumnPreIndexStatsCollector) {
+      for (long longValue : ((LongColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(longValue));
+      }
+    } else if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
+      for (float floatValue : ((FloatColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(floatValue));
+      }
+    } else if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
+      for (double doubleValue : ((DoubleColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(doubleValue));
+      }
+    } else if (keyStats instanceof BigDecimalColumnPreIndexStatsCollector) {
+      for (BigDecimal decimalValue : ((BigDecimalColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(decimalValue));
+      }
     }
+    _keyStats.put(key, stringKeyStats);
+    stringKeyStats._maxNumberOfMultiValues = previousMaxNumberOfMultiValues;
+    stringKeyStats._totalNumberOfEntries = previousTotalNumberOfEntries;
+    stringKeyStats.collect(String.valueOf(value));
+    return stringKeyStats;
   }
 
   /**
    * Convert Map value data type to stored field type.
    * Note that all unknown types are automatically converted to String type.
    */
   private static FieldSpec.DataType convertToDataType(PinotDataType ty) {
-    switch (ty) {
+      switch (ty) {
+      case BYTE:
+      case CHARACTER:
       case SHORT:
       case INTEGER:
         return FieldSpec.DataType.INT;
+      case BYTES:

Review Comment:
   (minor) Move BYTES below



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to