This is an automated email from the ASF dual-hosted git repository.
saurabhd336 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 873992f93d Post build index creation (#11711)
873992f93d is described below
commit 873992f93d350ded958e20856ae455da89f990be
Author: Saurabh Dubey <[email protected]>
AuthorDate: Wed Oct 4 09:55:12 2023 +0530
Post build index creation (#11711)
* Allow creating indexes that depend on completed segment
---------
Co-authored-by: Saurabh Dubey <[email protected]>
Co-authored-by: Saurabh Dubey
<[email protected]>
---
.../creator/impl/SegmentColumnarIndexCreator.java | 12 +-----
.../impl/SegmentIndexCreationDriverImpl.java | 45 ++++++++++++++++++++++
.../index/dictionary/DictionaryIndexType.java | 4 ++
.../index/nullvalue/NullValueIndexType.java | 4 ++
.../apache/pinot/segment/spi/index/IndexType.java | 25 ++++++++++++
5 files changed, 80 insertions(+), 10 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 5ec40796d6..a84c275299 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -94,7 +94,7 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
/**
* Contains, indexed by column name, the creator associated with each index
type.
*
- * Indexes that {@link #hasSpecialLifecycle(IndexType) have a special
lyfecycle} are not included here.
+ * Indexes whose build lifecycle is not DURING_SEGMENT_CREATION are not
included here.
*/
private Map<String, Map<IndexType<?, ?, ?>, IndexCreator>>
_creatorsByColAndIndex = new HashMap<>();
private final Map<String, NullValueVectorCreator> _nullValueVectorCreatorMap
= new HashMap<>();
@@ -195,7 +195,7 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex =
Maps.newHashMapWithExpectedSize(IndexService.getInstance().getAllIndexes().size());
for (IndexType<?, ?, ?> index :
IndexService.getInstance().getAllIndexes()) {
- if (hasSpecialLifecycle(index)) {
+ if (index.getIndexBuildLifecycle() !=
IndexType.BuildLifecycle.DURING_SEGMENT_CREATION) {
continue;
}
tryCreateIndexCreator(creatorsByIndex, index, context, config);
@@ -243,14 +243,6 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
return builder.build();
}
- /**
- * Returns true if the given index type has their own construction lifecycle
and therefore should not be instantiated
- * in the general index loop and shouldn't be notified of each new column.
- */
- private boolean hasSpecialLifecycle(IndexType<?, ?, ?> indexType) {
- return indexType == StandardIndexes.nullValueVector() || indexType ==
StandardIndexes.dictionary();
- }
-
/**
* Creates the {@link IndexCreator} in a type safe way.
*
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
index 15f1611c04..0293e644c4 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
@@ -23,11 +23,14 @@ import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.UUID;
+import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
@@ -37,6 +40,7 @@ import
org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreatio
import org.apache.pinot.segment.local.segment.creator.TransformPipeline;
import
org.apache.pinot.segment.local.segment.index.converter.SegmentFormatConverterFactory;
import
org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
import org.apache.pinot.segment.local.startree.v2.builder.MultipleTreesBuilder;
import org.apache.pinot.segment.local.utils.CrcUtils;
@@ -52,7 +56,13 @@ import
org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
import org.apache.pinot.segment.spi.creator.SegmentPreIndexStatsContainer;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.index.IndexHandler;
+import org.apache.pinot.segment.spi.index.IndexService;
+import org.apache.pinot.segment.spi.index.IndexType;
import org.apache.pinot.segment.spi.index.creator.SegmentIndexCreationInfo;
+import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderContext;
+import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderRegistry;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.config.table.TableConfig;
@@ -65,7 +75,9 @@ import org.apache.pinot.spi.data.readers.FileFormat;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.data.readers.RecordReader;
import org.apache.pinot.spi.data.readers.RecordReaderFactory;
+import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.ReadMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -312,6 +324,7 @@ public class SegmentIndexCreationDriverImpl implements
SegmentIndexCreationDrive
if (_totalDocs > 0) {
buildStarTreeV2IfNecessary(segmentOutputDir);
}
+ updatePostSegmentCreationIndexes(segmentOutputDir);
// Compute CRC and creation time
long crc = CrcUtils.forAllFilesInFolder(segmentOutputDir).computeCrc();
@@ -336,6 +349,38 @@ public class SegmentIndexCreationDriverImpl implements
SegmentIndexCreationDrive
LOGGER.info("Driver, indexing time : {}", _totalIndexTime);
}
+ private void updatePostSegmentCreationIndexes(File indexDir) throws
Exception {
+ Set<IndexType> postSegCreationIndexes =
IndexService.getInstance().getAllIndexes().stream()
+ .filter(indexType -> indexType.getIndexBuildLifecycle() ==
IndexType.BuildLifecycle.POST_SEGMENT_CREATION)
+ .collect(Collectors.toSet());
+
+ if (postSegCreationIndexes.size() > 0) {
+ // Build other indexes
+ Map<String, Object> props = new HashMap<>();
+ props.put(IndexLoadingConfig.READ_MODE_KEY, ReadMode.mmap);
+ PinotConfiguration segmentDirectoryConfigs = new
PinotConfiguration(props);
+
+ SegmentDirectoryLoaderContext segmentLoaderContext =
+ new
SegmentDirectoryLoaderContext.Builder().setTableConfig(_config.getTableConfig())
+ .setSchema(_config.getSchema()).setSegmentName(_segmentName)
+ .setSegmentDirectoryConfigs(segmentDirectoryConfigs).build();
+
+ IndexLoadingConfig indexLoadingConfig =
+ new IndexLoadingConfig(null, _config.getTableConfig(),
_config.getSchema());
+
+ try (SegmentDirectory segmentDirectory =
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
+ .load(indexDir.toURI(), segmentLoaderContext);
+ SegmentDirectory.Writer segmentWriter =
segmentDirectory.createWriter()) {
+ for (IndexType indexType : postSegCreationIndexes) {
+ IndexHandler handler =
+ indexType.createIndexHandler(segmentDirectory,
indexLoadingConfig.getFieldIndexConfigByColName(),
+ _config.getSchema(), _config.getTableConfig());
+ handler.updateIndices(segmentWriter);
+ }
+ }
+ }
+ }
+
private void buildStarTreeV2IfNecessary(File indexDir)
throws Exception {
List<StarTreeIndexConfig> starTreeIndexConfigs =
_config.getStarTreeIndexConfigs();
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
index bba174d732..102361a733 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
@@ -449,4 +449,8 @@ public class DictionaryIndexType
return MutableDictionaryFactory.getMutableDictionary(storedType,
context.isOffHeap(), context.getMemoryManager(),
dictionaryColumnSize, Math.min(estimatedCardinality,
context.getCapacity()), dictionaryAllocationContext);
}
+
+ public BuildLifecycle getIndexBuildLifecycle() {
+ return BuildLifecycle.CUSTOM;
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
index 9280b7ce92..316b72ef0b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
@@ -130,4 +130,8 @@ public class NullValueIndexType extends
AbstractIndexType<IndexConfig, NullValue
@Override
public void convertToNewFormat(TableConfig tableConfig, Schema schema) {
}
+
+ public BuildLifecycle getIndexBuildLifecycle() {
+ return BuildLifecycle.CUSTOM;
+ }
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
index 94ef6a96b7..10a6a416b2 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
@@ -38,6 +38,12 @@ import org.apache.pinot.spi.data.Schema;
* @param <IC> the {@link IndexCreator} subclass that should be used to create
indexes of this type.
*/
public interface IndexType<C extends IndexConfig, IR extends IndexReader, IC
extends IndexCreator> {
+ /**
+ * Returns the {@link BuildLifecycle} for this index type. This is used to
determine when the index should be built.
+ */
+ default BuildLifecycle getIndexBuildLifecycle() {
+ return BuildLifecycle.DURING_SEGMENT_CREATION;
+ }
/**
* The unique id that identifies this index type.
@@ -127,4 +133,23 @@ public interface IndexType<C extends IndexConfig, IR
extends IndexReader, IC ext
default MutableIndex createMutableIndex(MutableIndexContext context, C
config) {
return null;
}
+
+ enum BuildLifecycle {
+ /**
+ * The index will be built during segment creation, using the {@link
IndexCreator#add} call for each of the column
+ * values being added.
+ */
+ DURING_SEGMENT_CREATION,
+
+ /**
+ * The index will be build post the segment file has been created, using
the {@link IndexHandler#updateIndices} call
+ * This is useful for indexes that may need the entire prebuilt segment to
be available before they can be built.
+ */
+ POST_SEGMENT_CREATION,
+
+ /**
+ * The index's built lifecycle is managed in a custom manner.
+ */
+ CUSTOM
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]