[GitHub] [pinot] klsince commented on a diff in pull request #10184: Complete index spi

via GitHub Fri, 24 Mar 2023 11:06:57 -0700


klsince commented on code in PR #10184:
URL: https://github.com/apache/pinot/pull/10184#discussion_r1147885247



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -133,183 +122,144 @@ public void init(SegmentGeneratorConfig 
segmentCreationSpec, SegmentIndexCreatio
       return;
     }
 
-    Collection<FieldSpec> fieldSpecs = schema.getAllFieldSpecs();
-    Set<String> invertedIndexColumns = new HashSet<>();
-    for (String columnName : _config.getInvertedIndexCreationColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create inverted index for column: %s because it is not in 
schema", columnName);
-      invertedIndexColumns.add(columnName);
-    }
+    Map<String, FieldIndexConfigs> indexConfigs = 
segmentCreationSpec.getIndexConfigsByColName();
 
-    Set<String> bloomFilterColumns = new HashSet<>();
-    for (String columnName : _config.getBloomFilterCreationColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create bloom filter for column: %s because it is not in 
schema", columnName);
-      bloomFilterColumns.add(columnName);
-    }
-
-    Set<String> rangeIndexColumns = new HashSet<>();
-    for (String columnName : _config.getRangeIndexCreationColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create range index for column: %s because it is not in 
schema", columnName);
-      rangeIndexColumns.add(columnName);
-    }
-
-    Set<String> textIndexColumns = new HashSet<>();
-    for (String columnName : _config.getTextIndexCreationColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create text index for column: %s because it is not in 
schema", columnName);
-      textIndexColumns.add(columnName);
-    }
-
-    Set<String> fstIndexColumns = new HashSet<>();
-    for (String columnName : _config.getFSTIndexCreationColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create FST index for column: %s because it is not in 
schema", columnName);
-      fstIndexColumns.add(columnName);
-    }
-
-    Map<String, JsonIndexConfig> jsonIndexConfigs = 
_config.getJsonIndexConfigs();
-    for (String columnName : jsonIndexConfigs.keySet()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create json index for column: %s because it is not in 
schema", columnName);
-    }
-
-    Set<String> forwardIndexDisabledColumns = new HashSet<>();
-    for (String columnName : _config.getForwardIndexDisabledColumns()) {
-      Preconditions.checkState(schema.hasColumn(columnName), 
String.format("Invalid config. Can't disable "
-          + "forward index creation for a column: %s that does not exist in 
schema", columnName));
-      forwardIndexDisabledColumns.add(columnName);
-    }
-
-    Map<String, H3IndexConfig> h3IndexConfigs = _config.getH3IndexConfigs();
-    for (String columnName : h3IndexConfigs.keySet()) {
-      Preconditions.checkState(schema.hasColumn(columnName),
-          "Cannot create H3 index for column: %s because it is not in schema", 
columnName);
-    }
+    _creatorsByColAndIndex = 
Maps.newHashMapWithExpectedSize(indexConfigs.keySet().size());
 
-    // Initialize creators for dictionary, forward index and inverted index
-    IndexingConfig indexingConfig = 
_config.getTableConfig().getIndexingConfig();
-    int rangeIndexVersion = indexingConfig.getRangeIndexVersion();
-    for (FieldSpec fieldSpec : fieldSpecs) {
-      // Ignore virtual columns
+    for (String columnName : indexConfigs.keySet()) {
+      FieldSpec fieldSpec = schema.getFieldSpecFor(columnName);
+      if (fieldSpec == null) {
+        Preconditions.checkState(schema.hasColumn(columnName),
+            "Cannot create inverted index for column: %s because it is not in 
schema", columnName);

Review Comment:
   adjust error msg? as I didn't see why it's specific to inverted index here.  



##########
pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java:
##########
@@ -1542,7 +1556,7 @@ public void testLuceneRealtimeWithoutSearcherManager()
     indexReader3.close();
   }
 
-  @Test
+  //@Test

Review Comment:
   un-comment the test? 



##########
pinot-compatibility-verifier/pom.xml:
##########
@@ -92,6 +92,11 @@
       <version>${project.version}</version>
       <type>test-jar</type>
     </dependency>
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>

Review Comment:
   nit: looks like TestUtils.java, this pom.xml and StreamOp.java can merge 
firstly, to trim this PR a little bit..



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java:
##########
@@ -75,9 +81,8 @@ public void build(@Nullable SegmentVersion segmentVersion, 
ServerMetrics serverM
     // realtime segment generation
     genConfig.setSegmentTimeValueCheck(false);
     if (_columnIndicesForRealtimeTable.getInvertedIndexColumns() != null) {
-      for (String column : 
_columnIndicesForRealtimeTable.getInvertedIndexColumns()) {
-        genConfig.createInvertedIndexForColumn(column);
-      }
+      genConfig.setIndexOn(InvertedIndexType.INSTANCE, IndexConfig.ENABLED,

Review Comment:
   Q: when should one use StandardIndexes.inverted() vs. 
InvertedIndexType.INSTANCE? I see both methods mixed in this class. I assume 
StandardIndexes.inverted() is preferred.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java:
##########
@@ -166,6 +165,8 @@ public static ImmutableSegment load(SegmentDirectory 
segmentDirectory, IndexLoad
           segmentMetadata.removeColumn(column);
         }
       }
+    } else {
+      indexLoadingConfig.addKnownColumns(columnMetadataMap.keySet());

Review Comment:
   curious why need to track `_knownColumns` in indexLoadingConfig now? 



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentDictionaryCreator.java:
##########
@@ -62,9 +64,20 @@ public class SegmentDictionaryCreator implements Closeable {
   public SegmentDictionaryCreator(FieldSpec fieldSpec, File indexDir, boolean 
useVarLengthDictionary) {
     _columnName = fieldSpec.getName();
     _storedType = fieldSpec.getDataType().getStoredType();
-    _dictionaryFile = new File(indexDir, _columnName + 
V1Constants.Dict.FILE_EXTENSION);
+    _dictionaryFile = new File(indexDir, _columnName + 
DictionaryIndexType.INSTANCE.getFileExtension());
     _useVarLengthDictionary = useVarLengthDictionary;
   }
+  @Override
+  public void add(@Nonnull Object value, int dictId)
+      throws IOException {
+    throw new UnsupportedOperationException("Dictionary indexes should not be 
build as a normal index");

Review Comment:
   s/Dictionary indexes/Dictionary
   
   would help understand here to comment why/how dict is built differently, not 
using add(). 
   
   I kinda remember this was discussed in the PEP design doc, but still better 
leave some comments here as code iterates much faster than doc.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [pinot] klsince commented on a diff in pull request #10184: Complete index spi

Reply via email to