This is an automated email from the ASF dual-hosted git repository.
kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 16704a3cb1 Add null value index for default column (#9777)
16704a3cb1 is described below
commit 16704a3cb1fc04958483fb565cef44f69e51cb17
Author: Kartik Khare <[email protected]>
AuthorDate: Thu Nov 17 21:01:10 2022 +0530
Add null value index for default column (#9777)
* Add null value index for default column
* Add check if the index doesn't already exist
* Add test for null value vector for default values
* Only create null index for default cols when nullHandlingEnabled: true
Co-authored-by: Kartik Khare <[email protected]>
---
.../loader/defaultcolumn/BaseDefaultColumnHandler.java | 16 ++++++++++++++++
.../loader/defaultcolumn/V3DefaultColumnHandler.java | 5 +++++
.../segment/index/loader/SegmentPreProcessorTest.java | 11 ++++++++++-
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index ac3555d783..1c9646b97b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -43,6 +43,7 @@ import
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueUnsorte
import
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator;
+import
org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.DoubleColumnPreIndexStatsCollector;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.FloatColumnPreIndexStatsCollector;
@@ -552,6 +553,21 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
}
+ if (_indexLoadingConfig.getTableConfig() != null
+ && _indexLoadingConfig.getTableConfig().getIndexingConfig() != null
+ &&
_indexLoadingConfig.getTableConfig().getIndexingConfig().isNullHandlingEnabled())
{
+ if (!_segmentWriter.hasIndexFor(column,
ColumnIndexType.NULLVALUE_VECTOR)) {
+ try (NullValueVectorCreator nullValueVectorCreator =
+ new NullValueVectorCreator(_indexDir, fieldSpec.getName())) {
+ for (int docId = 0; docId < totalDocs; docId++) {
+ nullValueVectorCreator.setNull(docId);
+ }
+
+ nullValueVectorCreator.seal();
+ }
+ }
+ }
+
// Add the column metadata information to the metadata properties.
SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties,
column, columnIndexCreationInfo, totalDocs,
fieldSpec, true/*hasDictionary*/, dictionaryElementSize);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
index d444e65bc3..89a92b337d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
@@ -74,6 +74,11 @@ public class V3DefaultColumnHandler extends
BaseDefaultColumnHandler {
LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile,
ColumnIndexType.FORWARD_INDEX);
File dictionaryFile = new File(_indexDir, column +
V1Constants.Dict.FILE_EXTENSION);
LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile,
ColumnIndexType.DICTIONARY);
+
+ File nullValueVectorFile = new File(_indexDir, column +
V1Constants.Indexes.NULLVALUE_VECTOR_FILE_EXTENSION);
+ if (nullValueVectorFile.exists()) {
+ LoaderUtils.writeIndexToV3Format(_segmentWriter, column,
nullValueVectorFile, ColumnIndexType.NULLVALUE_VECTOR);
+ }
return true;
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index fbaa85e926..7c9c02ab2c 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -163,7 +163,7 @@ public class SegmentPreProcessorTest {
ingestionConfig.setSegmentTimeValueCheck(false);
_tableConfig =
new
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName("daysSinceEpoch")
- .setIngestionConfig(ingestionConfig).build();
+
.setIngestionConfig(ingestionConfig).setNullHandlingEnabled(true).build();
_indexLoadingConfig = getDefaultIndexLoadingConfig();
// We specify two columns without inverted index ('column1', 'column13'),
one non-existing column ('noSuchColumn')
@@ -219,6 +219,8 @@ public class SegmentPreProcessorTest {
indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW);
indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV);
indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW);
+
+ indexLoadingConfig.setTableConfig(_tableConfig);
return indexLoadingConfig;
}
@@ -1097,6 +1099,13 @@ public class SegmentPreProcessorTest {
assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME,
ColumnIndexType.FORWARD_INDEX));
assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME,
ColumnIndexType.DICTIONARY));
assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME,
ColumnIndexType.FORWARD_INDEX));
+
+ assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
+ assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
+ assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
+ assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
+ assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
+ assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME,
ColumnIndexType.NULLVALUE_VECTOR));
}
// Use the second schema and update default value again.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]