Jackie-Jiang commented on code in PR #10891:
URL: https://github.com/apache/pinot/pull/10891#discussion_r1237916717
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -119,65 +121,126 @@ private List<String> getColumnsToAddMinMaxValue() {
private boolean needAddColumnMinMaxValueForColumn(String columnName) {
ColumnMetadata columnMetadata =
_segmentMetadata.getColumnMetadataFor(columnName);
- return columnMetadata.hasDictionary() && columnMetadata.getMinValue() ==
null
+ return columnMetadata.getMinValue() == null
&& columnMetadata.getMaxValue() == null &&
!columnMetadata.isMinMaxValueInvalid();
}
private void addColumnMinMaxValueForColumn(String columnName)
throws Exception {
// Skip column without dictionary or with min/max value already set
ColumnMetadata columnMetadata =
_segmentMetadata.getColumnMetadataFor(columnName);
- if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null
- || columnMetadata.getMaxValue() != null) {
+ if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue()
!= null) {
return;
}
- PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName,
StandardIndexes.dictionary());
DataType dataType = columnMetadata.getDataType().getStoredType();
- int length = columnMetadata.getCardinality();
- switch (dataType) {
- case INT:
- try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer,
length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- intDictionary.getStringValue(0),
intDictionary.getStringValue(length - 1));
- }
- break;
- case LONG:
- try (LongDictionary longDictionary = new
LongDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- longDictionary.getStringValue(0),
longDictionary.getStringValue(length - 1));
- }
- break;
- case FLOAT:
- try (FloatDictionary floatDictionary = new
FloatDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- floatDictionary.getStringValue(0),
floatDictionary.getStringValue(length - 1));
- }
- break;
- case DOUBLE:
- try (DoubleDictionary doubleDictionary = new
DoubleDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- doubleDictionary.getStringValue(0),
doubleDictionary.getStringValue(length - 1));
- }
- break;
- case STRING:
- try (StringDictionary stringDictionary = new
StringDictionary(dictionaryBuffer, length,
- columnMetadata.getColumnMaxLength())) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- stringDictionary.getStringValue(0),
stringDictionary.getStringValue(length - 1));
- }
- break;
- case BYTES:
- try (BytesDictionary bytesDictionary = new
BytesDictionary(dictionaryBuffer, length,
- columnMetadata.getColumnMaxLength())) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- bytesDictionary.getStringValue(0),
bytesDictionary.getStringValue(length - 1));
- }
- break;
- default:
- throw new IllegalStateException("Unsupported data type: " + dataType +
" for column: " + columnName);
+ if (columnMetadata.hasDictionary()) {
+ PinotDataBuffer dictionaryBuffer =
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
+ int length = columnMetadata.getCardinality();
+ switch (dataType) {
+ case INT:
+ try (IntDictionary intDictionary = new
IntDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ intDictionary.getStringValue(0),
intDictionary.getStringValue(length - 1));
+ }
+ break;
+ case LONG:
+ try (LongDictionary longDictionary = new
LongDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ longDictionary.getStringValue(0),
longDictionary.getStringValue(length - 1));
+ }
+ break;
+ case FLOAT:
+ try (FloatDictionary floatDictionary = new
FloatDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ floatDictionary.getStringValue(0),
floatDictionary.getStringValue(length - 1));
+ }
+ break;
+ case DOUBLE:
+ try (DoubleDictionary doubleDictionary = new
DoubleDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ doubleDictionary.getStringValue(0),
doubleDictionary.getStringValue(length - 1));
+ }
+ break;
+ case STRING:
+ try (StringDictionary stringDictionary = new
StringDictionary(dictionaryBuffer, length,
+ columnMetadata.getColumnMaxLength())) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ stringDictionary.getStringValue(0),
stringDictionary.getStringValue(length - 1));
+ }
+ break;
+ case BYTES:
+ try (BytesDictionary bytesDictionary = new
BytesDictionary(dictionaryBuffer, length,
+ columnMetadata.getColumnMaxLength())) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ bytesDictionary.getStringValue(0),
bytesDictionary.getStringValue(length - 1));
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported data type: " + dataType
+ " for column: " + columnName);
+ }
+ } else if
(_segmentMetadata.getSchema().getFieldSpecFor(columnName).getFieldType() ==
FieldSpec.FieldType.METRIC) {
Review Comment:
Why metric only?
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -119,65 +121,126 @@ private List<String> getColumnsToAddMinMaxValue() {
private boolean needAddColumnMinMaxValueForColumn(String columnName) {
ColumnMetadata columnMetadata =
_segmentMetadata.getColumnMetadataFor(columnName);
- return columnMetadata.hasDictionary() && columnMetadata.getMinValue() ==
null
+ return columnMetadata.getMinValue() == null
&& columnMetadata.getMaxValue() == null &&
!columnMetadata.isMinMaxValueInvalid();
}
private void addColumnMinMaxValueForColumn(String columnName)
throws Exception {
// Skip column without dictionary or with min/max value already set
ColumnMetadata columnMetadata =
_segmentMetadata.getColumnMetadataFor(columnName);
- if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null
- || columnMetadata.getMaxValue() != null) {
+ if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue()
!= null) {
return;
}
- PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName,
StandardIndexes.dictionary());
DataType dataType = columnMetadata.getDataType().getStoredType();
- int length = columnMetadata.getCardinality();
- switch (dataType) {
- case INT:
- try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer,
length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- intDictionary.getStringValue(0),
intDictionary.getStringValue(length - 1));
- }
- break;
- case LONG:
- try (LongDictionary longDictionary = new
LongDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- longDictionary.getStringValue(0),
longDictionary.getStringValue(length - 1));
- }
- break;
- case FLOAT:
- try (FloatDictionary floatDictionary = new
FloatDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- floatDictionary.getStringValue(0),
floatDictionary.getStringValue(length - 1));
- }
- break;
- case DOUBLE:
- try (DoubleDictionary doubleDictionary = new
DoubleDictionary(dictionaryBuffer, length)) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- doubleDictionary.getStringValue(0),
doubleDictionary.getStringValue(length - 1));
- }
- break;
- case STRING:
- try (StringDictionary stringDictionary = new
StringDictionary(dictionaryBuffer, length,
- columnMetadata.getColumnMaxLength())) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- stringDictionary.getStringValue(0),
stringDictionary.getStringValue(length - 1));
- }
- break;
- case BYTES:
- try (BytesDictionary bytesDictionary = new
BytesDictionary(dictionaryBuffer, length,
- columnMetadata.getColumnMaxLength())) {
-
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
- bytesDictionary.getStringValue(0),
bytesDictionary.getStringValue(length - 1));
- }
- break;
- default:
- throw new IllegalStateException("Unsupported data type: " + dataType +
" for column: " + columnName);
+ if (columnMetadata.hasDictionary()) {
+ PinotDataBuffer dictionaryBuffer =
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
+ int length = columnMetadata.getCardinality();
+ switch (dataType) {
+ case INT:
+ try (IntDictionary intDictionary = new
IntDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ intDictionary.getStringValue(0),
intDictionary.getStringValue(length - 1));
+ }
+ break;
+ case LONG:
+ try (LongDictionary longDictionary = new
LongDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ longDictionary.getStringValue(0),
longDictionary.getStringValue(length - 1));
+ }
+ break;
+ case FLOAT:
+ try (FloatDictionary floatDictionary = new
FloatDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ floatDictionary.getStringValue(0),
floatDictionary.getStringValue(length - 1));
+ }
+ break;
+ case DOUBLE:
+ try (DoubleDictionary doubleDictionary = new
DoubleDictionary(dictionaryBuffer, length)) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ doubleDictionary.getStringValue(0),
doubleDictionary.getStringValue(length - 1));
+ }
+ break;
+ case STRING:
+ try (StringDictionary stringDictionary = new
StringDictionary(dictionaryBuffer, length,
+ columnMetadata.getColumnMaxLength())) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ stringDictionary.getStringValue(0),
stringDictionary.getStringValue(length - 1));
+ }
+ break;
+ case BYTES:
+ try (BytesDictionary bytesDictionary = new
BytesDictionary(dictionaryBuffer, length,
+ columnMetadata.getColumnMaxLength())) {
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ bytesDictionary.getStringValue(0),
bytesDictionary.getStringValue(length - 1));
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported data type: " + dataType
+ " for column: " + columnName);
+ }
+ } else if
(_segmentMetadata.getSchema().getFieldSpecFor(columnName).getFieldType() ==
FieldSpec.FieldType.METRIC) {
+ // setting min/max for non-dictionary columns.
+ PinotDataBuffer forwardBuffer = _segmentWriter.getIndexFor(columnName,
StandardIndexes.forward());
+ switch (dataType) {
+ case INT:
+ try (FixedByteChunkSVForwardIndexReader rawIndexReader = new
FixedByteChunkSVForwardIndexReader(forwardBuffer,
+ DataType.INT); ChunkReaderContext readerContext =
rawIndexReader.createContext()) {
+ Integer[] minMaxValue = {Integer.MAX_VALUE, Integer.MIN_VALUE};
+ for (int docs = 0; docs < columnMetadata.getTotalDocs(); docs++) {
+ minMaxValue = getMinMaxValue(minMaxValue,
rawIndexReader.getInt(docs, readerContext));
+ }
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ String.valueOf(minMaxValue[0]),
String.valueOf(minMaxValue[1]));
+ }
+ break;
+ case LONG:
+ try (FixedByteChunkSVForwardIndexReader rawIndexReader = new
FixedByteChunkSVForwardIndexReader(forwardBuffer,
+ DataType.LONG); ChunkReaderContext readerContext =
rawIndexReader.createContext()) {
+ Long[] minMaxValue = {Long.MAX_VALUE, Long.MIN_VALUE};
+ for (int docs = 0; docs < columnMetadata.getTotalDocs(); docs++) {
+ minMaxValue = getMinMaxValue(minMaxValue,
rawIndexReader.getLong(docs, readerContext));
+ }
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ String.valueOf(minMaxValue[0]),
String.valueOf(minMaxValue[1]));
+ }
+ break;
+ case FLOAT:
+ try (FixedByteChunkSVForwardIndexReader rawIndexReader = new
FixedByteChunkSVForwardIndexReader(forwardBuffer,
+ DataType.FLOAT); ChunkReaderContext readerContext =
rawIndexReader.createContext()) {
+ Float[] minMaxValue = {Float.MAX_VALUE, Float.MIN_VALUE};
+ for (int docs = 0; docs < columnMetadata.getTotalDocs(); docs++) {
+ minMaxValue = getMinMaxValue(minMaxValue,
rawIndexReader.getFloat(docs, readerContext));
+ }
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ String.valueOf(minMaxValue[0]),
String.valueOf(minMaxValue[1]));
+ }
+ break;
+ case DOUBLE:
+ try (FixedByteChunkSVForwardIndexReader rawIndexReader = new
FixedByteChunkSVForwardIndexReader(forwardBuffer,
+ DataType.DOUBLE); ChunkReaderContext readerContext =
rawIndexReader.createContext()) {
+ Double[] minMaxValue = {Double.MAX_VALUE, Double.MIN_VALUE};
+ for (int docs = 0; docs < columnMetadata.getTotalDocs(); docs++) {
+ minMaxValue = getMinMaxValue(minMaxValue,
rawIndexReader.getDouble(docs, readerContext));
+ }
+
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties,
columnName,
+ String.valueOf(minMaxValue[0]),
String.valueOf(minMaxValue[1]));
+ }
+ break;
+ default:
Review Comment:
I think we also want to support `STRING` and `BYTES`
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -570,12 +570,14 @@ public static void
addColumnMetadataInfo(PropertiesConfiguration properties, Str
public static void addColumnMinMaxValueInfo(PropertiesConfiguration
properties, String column, String minValue,
String maxValue) {
if (isValidPropertyValue(minValue)) {
- properties.setProperty(getKeyFor(column, MIN_VALUE), minValue);
+ properties.setProperty(getKeyFor(column, MIN_VALUE),
+ minValue.substring(0, Math.min(minValue.length(),
METADATA_PROPERTY_LENGTH_LIMIT)));
} else {
properties.setProperty(getKeyFor(column, MIN_MAX_VALUE_INVALID), true);
}
if (isValidPropertyValue(maxValue)) {
- properties.setProperty(getKeyFor(column, MAX_VALUE), maxValue);
+ properties.setProperty(getKeyFor(column, MAX_VALUE),
+ maxValue.substring(0, Math.min(maxValue.length(),
METADATA_PROPERTY_LENGTH_LIMIT)));
Review Comment:
Seems we still have invalid values. In that case, that's not fix the length
issue now
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]