Copilot commented on code in PR #17722:
URL: https://github.com/apache/pinot/pull/17722#discussion_r2823357141


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java:
##########
@@ -276,12 +310,46 @@ private AbstractColumnStatisticsCollector 
createKeyStatsCollector(String key, Ob
       case BOOLEAN:
       case STRING:
       case MAP:
-      case OBJECT:
         return new StringColumnPreIndexStatsCollector(key, config);
       default:
-        LOGGER.warn("Unknown data type {} for key {} and value type {}", type, 
key, value.getClass().getName());
+        LOGGER.warn("Unknown data type {} for key {}", dataType, key);
         return new StringColumnPreIndexStatsCollector(key, config);
+      }
+  }
+
+  private AbstractColumnStatisticsCollector 
promoteNumericKeyStatsToStringCollector(String key,
+      AbstractColumnStatisticsCollector keyStats, Object value) {
+    AbstractColumnStatisticsCollector stringKeyStats = 
createKeyStatsCollector(key, FieldSpec.DataType.STRING);
+    int previousTotalNumberOfEntries = keyStats.getTotalNumberOfEntries();
+    int previousMaxNumberOfMultiValues = keyStats.getMaxNumberOfMultiValues();
+    boolean previousSorted = keyStats.isSorted();
+    if (keyStats instanceof IntColumnPreIndexStatsCollector) {
+      for (int intValue : ((IntColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(intValue));
+      }
+    } else if (keyStats instanceof LongColumnPreIndexStatsCollector) {
+      for (long longValue : ((LongColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(longValue));
+      }
+    } else if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
+      for (float floatValue : ((FloatColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(floatValue));
+      }
+    } else if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
+      for (double doubleValue : ((DoubleColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(doubleValue));
+      }
+    } else if (keyStats instanceof BigDecimalColumnPreIndexStatsCollector) {
+      for (BigDecimal decimalValue : ((BigDecimalColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(decimalValue));
+      }
     }
+    _keyStats.put(key, stringKeyStats);
+    stringKeyStats._maxNumberOfMultiValues = previousMaxNumberOfMultiValues;
+    stringKeyStats._sorted = previousSorted;
+    stringKeyStats._totalNumberOfEntries = previousTotalNumberOfEntries;
+    stringKeyStats.collect(String.valueOf(value));

Review Comment:
   In promoteNumericKeyStatsToStringCollector(), copying `previousSorted` from 
the numeric collector into the new STRING collector can produce an incorrect 
`isSorted()` result because the sort order changes when values are stringified 
(e.g., numeric sequence 2, 10 is sorted numerically but not lexicographically 
as "2", "10"). This can also desynchronize the internal `_previousValue` 
tracking used by `addressSorted()`, leaving the collector marked sorted when it 
is not. Avoid overriding `_sorted` here; instead let the STRING collector 
compute sortedness while replaying values (or explicitly recompute based on 
string comparisons).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to