This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 83ed397210 add multi-value support for native text index (#11204)
83ed397210 is described below

commit 83ed3972103379709d773d8ae96d479e126c19fd
Author: Christopher Peck <[email protected]>
AuthorDate: Tue Aug 1 22:30:44 2023 -0700

    add multi-value support for native text index (#11204)
---
 .../queries/NativeAndLuceneComparisonTest.java     | 122 +++++++++++++++------
 .../impl/invertedindex/NativeMutableTextIndex.java |  23 ++--
 .../creator/impl/text/NativeTextIndexCreator.java  |  41 ++++---
 .../index/readers/text/NativeTextIndexReader.java  |   2 +-
 .../local/segment/index/text/TextIndexType.java    |   3 -
 .../NativeAndLuceneMutableTextIndexTest.java       |  61 ++++++++---
 6 files changed, 176 insertions(+), 76 deletions(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
index 53db2f7abc..a066a45dc4 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
@@ -57,8 +57,10 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
   private static final String TABLE_NAME = "MyTable";
   private static final String SEGMENT_NAME_LUCENE = "testSegmentLucene";
   private static final String SEGMENT_NAME_NATIVE = "testSegmentNative";
-  private static final String DOMAIN_NAMES_COL_LUCENE = "DOMAIN_NAMES_LUCENE";
-  private static final String DOMAIN_NAMES_COL_NATIVE = "DOMAIN_NAMES_NATIVE";
+  private static final String QUOTES_COL_LUCENE = "QUOTES_LUCENE";
+  private static final String QUOTES_COL_NATIVE = "QUOTES_NATIVE";
+  private static final String QUOTES_COL_LUCENE_MV = "QUOTES_LUCENE_MV";
+  private static final String QUOTES_COL_NATIVE_MV = "QUOTES_NATIVE_MV";
   private static final Integer NUM_ROWS = 1024;
 
   private IndexSegment _indexSegment;
@@ -107,8 +109,8 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
     FileUtils.deleteQuietly(INDEX_DIR);
   }
 
-  private List<String> getDomainNames() {
-    return Arrays.asList("Prince Andrew kept looking with an amused smile from 
Pierre",
+  private String[] getTextData() {
+    return new String[]{"Prince Andrew kept looking with an amused smile from 
Pierre",
         "vicomte and from the vicomte to their hostess. In the first moment 
of",
         "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
         "horror-struck. But when she saw that Pierre’s sacrilegious words",
@@ -116,17 +118,34 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
         "impossible to stop him, she rallied her forces and joined the vicomte 
in", "a vigorous attack on the orator",
         "horror-struck. But when she", "she rallied her forces and joined", 
"outburst Anna Pávlovna",
         "she rallied her forces and", "despite her social experience", "had 
not exasperated the vicomte",
-        " despite her social experience", "impossible to stop him", "despite 
her social experience");
+        " despite her social experience", "impossible to stop him", "despite 
her social experience"};
+  }
+
+  private String[][] getMVTextData() {
+    return new String[][]{
+        {"Prince Andrew kept", "looking with an"}, {"amused smile", "from 
Pierre"}, {"vicomte and from the"}, {
+          "vicomte to", "their hostess."}, {"In the first moment of"}, 
{"Pierre’s outburst Anna Pávlovna,"}, {
+          "despite her", "social", "experience, was"}, {"horror-struck.", "But 
when she"}, {"saw that Pierre’s"}, {
+          "sacrilegious words"}, {"had not exasperated the vicomte, and had 
convinced herself that it was"}, {
+          "impossible to stop him,", "she rallied her"}, {"forces and joined 
the vicomte in", "a vigorous attack on "
+        + "the orator"}, {"horror-struck. But when she", "she rallied her 
forces and joined", "outburst Anna "
+        + "Pávlovna"}, {"she rallied her forces and", "despite her social 
experience", "had not exasperated the "
+        + "vicomte"}, {"despite her social experience", "impossible to stop 
him", "despite her social experience"}
+    };
   }
 
   private List<GenericRow> createTestData(int numRows) {
     List<GenericRow> rows = new ArrayList<>();
-    List<String> domainNames = getDomainNames();
+    String[] textData = getTextData();
+    String[][] mvTextData = getMVTextData();
     for (int i = 0; i < numRows; i++) {
-      String domain = domainNames.get(i % domainNames.size());
+      String doc = textData[i % textData.length];
+      String[] mvDoc = mvTextData[i % mvTextData.length];
       GenericRow row = new GenericRow();
-      row.putField(DOMAIN_NAMES_COL_LUCENE, domain);
-      row.putField(DOMAIN_NAMES_COL_NATIVE, domain);
+      row.putValue(QUOTES_COL_LUCENE, doc);
+      row.putValue(QUOTES_COL_NATIVE, doc);
+      row.putValue(QUOTES_COL_LUCENE_MV, mvDoc);
+      row.putValue(QUOTES_COL_NATIVE_MV, mvDoc);
       rows.add(row);
     }
 
@@ -139,13 +158,18 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
     List<FieldConfig> fieldConfigs = new ArrayList<>();
 
     fieldConfigs.add(
-        new FieldConfig(DOMAIN_NAMES_COL_LUCENE, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+        new FieldConfig(QUOTES_COL_LUCENE, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+            null));
+    fieldConfigs.add(
+        new FieldConfig(QUOTES_COL_LUCENE_MV, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
             null));
 
     TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-        
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL_LUCENE)).setFieldConfigList(fieldConfigs).build();
+        .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_LUCENE, 
QUOTES_COL_LUCENE_MV))
+        .setFieldConfigList(fieldConfigs).build();
     Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
-        .addSingleValueDimension(DOMAIN_NAMES_COL_LUCENE, 
FieldSpec.DataType.STRING).build();
+        .addSingleValueDimension(QUOTES_COL_LUCENE, FieldSpec.DataType.STRING)
+        .addMultiValueDimension(QUOTES_COL_LUCENE_MV, 
FieldSpec.DataType.STRING).build();
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, 
schema);
     config.setOutDir(INDEX_DIR.getPath());
     config.setTableName(TABLE_NAME);
@@ -168,13 +192,18 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
     propertiesMap.put(FieldConfig.TEXT_FST_TYPE, 
FieldConfig.TEXT_NATIVE_FST_LITERAL);
 
     fieldConfigs.add(
-        new FieldConfig(DOMAIN_NAMES_COL_NATIVE, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+        new FieldConfig(QUOTES_COL_NATIVE, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+            propertiesMap));
+    fieldConfigs.add(
+        new FieldConfig(QUOTES_COL_NATIVE_MV, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
             propertiesMap));
 
     TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-        
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL_NATIVE)).setFieldConfigList(fieldConfigs).build();
+        .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_NATIVE, 
QUOTES_COL_NATIVE_MV))
+        .setFieldConfigList(fieldConfigs).build();
     Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
-        .addSingleValueDimension(DOMAIN_NAMES_COL_NATIVE, 
FieldSpec.DataType.STRING).build();
+        .addSingleValueDimension(QUOTES_COL_NATIVE, FieldSpec.DataType.STRING)
+        .addMultiValueDimension(QUOTES_COL_NATIVE_MV, 
FieldSpec.DataType.STRING).build();
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, 
schema);
     config.setOutDir(INDEX_DIR.getPath());
     config.setTableName(TABLE_NAME);
@@ -192,10 +221,12 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
       throws Exception {
     IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
     Set<String> textIndexCols = new HashSet<>();
-    textIndexCols.add(DOMAIN_NAMES_COL_LUCENE);
+    textIndexCols.add(QUOTES_COL_LUCENE);
+    textIndexCols.add(QUOTES_COL_LUCENE_MV);
     indexLoadingConfig.setTextIndexColumns(textIndexCols);
     Set<String> invertedIndexCols = new HashSet<>();
-    invertedIndexCols.add(DOMAIN_NAMES_COL_LUCENE);
+    invertedIndexCols.add(QUOTES_COL_LUCENE);
+    invertedIndexCols.add(QUOTES_COL_LUCENE_MV);
     indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
     return ImmutableSegmentLoader.load(new File(INDEX_DIR, 
SEGMENT_NAME_LUCENE), indexLoadingConfig);
   }
@@ -209,13 +240,16 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
 
     Map<String, Map<String, String>> columnPropertiesParentMap = new 
HashMap<>();
     Set<String> textIndexCols = new HashSet<>();
-    textIndexCols.add(DOMAIN_NAMES_COL_NATIVE);
+    textIndexCols.add(QUOTES_COL_NATIVE);
+    textIndexCols.add(QUOTES_COL_NATIVE_MV);
     indexLoadingConfig.setTextIndexColumns(textIndexCols);
     indexLoadingConfig.setFSTIndexType(fstType);
     Set<String> invertedIndexCols = new HashSet<>();
-    invertedIndexCols.add(DOMAIN_NAMES_COL_NATIVE);
+    invertedIndexCols.add(QUOTES_COL_NATIVE);
+    invertedIndexCols.add(QUOTES_COL_NATIVE_MV);
     indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
-    columnPropertiesParentMap.put(DOMAIN_NAMES_COL_NATIVE, propertiesMap);
+    columnPropertiesParentMap.put(QUOTES_COL_NATIVE, propertiesMap);
+    columnPropertiesParentMap.put(QUOTES_COL_NATIVE_MV, propertiesMap);
     indexLoadingConfig.setColumnProperties(columnPropertiesParentMap);
     return ImmutableSegmentLoader.load(new File(INDEX_DIR, 
SEGMENT_NAME_NATIVE), indexLoadingConfig);
   }
@@ -247,29 +281,51 @@ public class NativeAndLuceneComparisonTest extends 
BaseQueriesTest {
       }
     }
   }
-
   @Test
   public void testQueries() {
-    String nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vico.*') LIMIT 50000";
-    String luceneQuery = "SELECT * FROM MyTable WHERE 
TEXT_MATCH(DOMAIN_NAMES_LUCENE, 'vico*') LIMIT 50000";
+
+    String nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE, 'vico.*') LIMIT 50000";
+    String luceneQuery = "SELECT * FROM MyTable WHERE 
TEXT_MATCH(QUOTES_LUCENE, 'vico*') LIMIT 50000";
     testSelectionResults(nativeQuery, luceneQuery);
 
-    nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'convi.*ced') LIMIT 50000";
-    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE, 
'convi*ced') LIMIT 50000";
+    nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE, 
'convi.*ced') LIMIT 50000";
+    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE, 
'convi*ced') LIMIT 50000";
     testSelectionResults(nativeQuery, luceneQuery);
 
-    nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vicomte') AND "
-        + "TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'hos.*') LIMIT 50000";
-    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE, 
'vicomte AND hos*') LIMIT 50000";
+    nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE, 
'vicomte') AND "
+        + "TEXT_CONTAINS(QUOTES_NATIVE, 'hos.*') LIMIT 50000";
+    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE, 
'vicomte AND hos*') LIMIT 50000";
     testSelectionResults(nativeQuery, luceneQuery);
 
-    nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'sac.*') OR "
-        + "TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'herself') LIMIT 50000";
-    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE, 
'sac* OR herself') LIMIT 50000";
+    nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE, 
'sac.*') OR "
+        + "TEXT_CONTAINS(QUOTES_NATIVE, 'herself') LIMIT 50000";
+    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE, 'sac* 
OR herself') LIMIT 50000";
     testSelectionResults(nativeQuery, luceneQuery);
 
-    nativeQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vicomte') LIMIT 50000";
-    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE, 
'vicomte') LIMIT 50000";
+    nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE, 
'vicomte') LIMIT 50000";
+    luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE, 
'vicomte') LIMIT 50000";
     testSelectionResults(nativeQuery, luceneQuery);
+
+    String nativeMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vico.*') LIMIT 50000";
+    String luceneMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_MATCH(QUOTES_LUCENE_MV, 'vico*') LIMIT 50000";
+    testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+    nativeMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'convi.*ced') LIMIT 50000";
+    luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV, 
'convi*ced') LIMIT 50000";
+    testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+    nativeMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vicomte') AND "
+        + "TEXT_CONTAINS(QUOTES_NATIVE_MV, 'hos.*') LIMIT 50000";
+    luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV, 
'vicomte AND hos*') LIMIT 50000";
+    testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+    nativeMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'sac.*') OR "
+        + "TEXT_CONTAINS(QUOTES_NATIVE_MV, 'herself') LIMIT 50000";
+    luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV, 
'sac* OR herself') LIMIT 50000";
+    testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+    nativeMVQuery = "SELECT * FROM MyTable WHERE 
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vicomte') LIMIT 50000";
+    luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV, 
'vicomte') LIMIT 50000";
+    testSelectionResults(nativeMVQuery, luceneMVQuery);
   }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
index b78f2dc296..1e56c57c87 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
@@ -63,9 +63,20 @@ public class NativeMutableTextIndex implements 
MutableTextIndex {
 
   @Override
   public void add(String document) {
-    Iterable<String> tokens;
+    addHelper(document);
+    _nextDocId++;
+  }
+
+  @Override
+  public void add(String[] documents) {
+    for (String document : documents) {
+      addHelper(document);
+    }
+    _nextDocId++;
+  }
 
-    tokens = analyze(document);
+  private void addHelper(String document) {
+    Iterable<String> tokens = analyze(document);
     _writeLock.lock();
     try {
       for (String token : tokens) {
@@ -76,17 +87,11 @@ public class NativeMutableTextIndex implements 
MutableTextIndex {
         });
         _invertedIndex.add(currentDictId, _nextDocId);
       }
-      _nextDocId++;
     } finally {
       _writeLock.unlock();
     }
   }
 
-  @Override
-  public void add(String[] documents) {
-    throw new UnsupportedOperationException("Mutable native text indexes are 
not supported for multi-valued columns");
-  }
-
   @Override
   public ImmutableRoaringBitmap getDictIds(String searchQuery) {
     throw new UnsupportedOperationException();
@@ -114,8 +119,8 @@ public class NativeMutableTextIndex implements 
MutableTextIndex {
   private List<String> analyze(String document) {
     List<String> tokens = new ArrayList<>();
     try (TokenStream tokenStream = _analyzer.tokenStream(_column, document)) {
-      tokenStream.reset();
       CharTermAttribute attribute = 
tokenStream.getAttribute(CharTermAttribute.class);
+      tokenStream.reset();
       while (tokenStream.incrementToken()) {
         tokens.add(attribute.toString());
       }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
index d455c1a789..832801883d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
@@ -67,6 +67,7 @@ public class NativeTextIndexCreator extends 
AbstractTextIndexCreator {
   private final File _tempDir;
   private final File _fstIndexFile;
   private final File _invertedIndexFile;
+  private final Analyzer _analyzer;
   private final Map<String, RoaringBitmapWriter<RoaringBitmap>> 
_postingListMap = new TreeMap<>();
   private final RoaringBitmapWriter.Wizard<Container, RoaringBitmap> 
_bitmapWriterWizard = RoaringBitmapWriter.writer();
   private int _nextDocId = 0;
@@ -86,13 +87,27 @@ public class NativeTextIndexCreator extends 
AbstractTextIndexCreator {
     }
     _fstIndexFile = new File(_tempDir, FST_FILE_NAME);
     _invertedIndexFile = new File(_tempDir, INVERTED_INDEX_FILE_NAME);
+    _analyzer = new 
StandardAnalyzer(LuceneTextIndexCreator.ENGLISH_STOP_WORDS_SET);
   }
 
   @Override
   public void add(String document) {
+    addHelper(document);
+    _nextDocId++;
+  }
+
+  @Override
+  public void add(String[] documents, int length) {
+    for (int i = 0; i < length; i++) {
+      addHelper(documents[i]);
+    }
+    _nextDocId++;
+  }
+
+  private void addHelper(String document) {
     List<String> tokens;
     try {
-      tokens = analyze(document, new 
StandardAnalyzer(LuceneTextIndexCreator.ENGLISH_STOP_WORDS_SET));
+      tokens = analyze(document);
     } catch (IOException e) {
       throw new RuntimeException(e.getMessage());
     }
@@ -100,13 +115,6 @@ public class NativeTextIndexCreator extends 
AbstractTextIndexCreator {
     for (String token : tokens) {
       addToPostingList(token);
     }
-
-    _nextDocId++;
-  }
-
-  @Override
-  public void add(String[] documents, int length) {
-    throw new UnsupportedOperationException("Native text index is not 
supported on MV column: " + _columnName);
   }
 
   @Override
@@ -132,17 +140,22 @@ public class NativeTextIndexCreator extends 
AbstractTextIndexCreator {
   @Override
   public void close()
       throws IOException {
+    _analyzer.close();
     FileUtils.deleteDirectory(_tempDir);
   }
 
-  public List<String> analyze(String text, Analyzer analyzer)
+  public List<String> analyze(String text)
       throws IOException {
     List<String> result = new ArrayList<>();
-    TokenStream tokenStream = analyzer.tokenStream(_columnName, text);
-    CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
-    tokenStream.reset();
-    while (tokenStream.incrementToken()) {
-      result.add(attr.toString());
+    try (TokenStream tokenStream = _analyzer.tokenStream(_columnName, text)) {
+      CharTermAttribute attr = 
tokenStream.addAttribute(CharTermAttribute.class);
+      tokenStream.reset();
+      while (tokenStream.incrementToken()) {
+        result.add(attr.toString());
+      }
+      tokenStream.end();
+    } catch (IOException e) {
+      throw new RuntimeException("Caught exception while tokenizing the 
document for column: " + _columnName, e);
     }
     return result;
   }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
index 3650e3531f..a9cd64c91b 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
@@ -59,7 +59,7 @@ public class NativeTextIndexReader implements TextIndexReader 
{
           PinotDataBuffer.mapFile(indexFile, /* readOnly */ true, 0, 
indexFile.length(), ByteOrder.BIG_ENDIAN, desc);
       populateIndexes();
     } catch (Exception e) {
-      LOGGER.error("Failed to instantiate Lucene text index reader for column 
{}, exception {}", column,
+      LOGGER.error("Failed to instantiate native text index reader for column 
{}, exception {}", column,
           e.getMessage());
       throw new RuntimeException(e);
     }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
index a23d2f550e..54cd746708 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
@@ -184,9 +184,6 @@ public class TextIndexType extends 
AbstractIndexType<TextIndexConfig, TextIndexR
       return null;
     }
     if (config.getFstType() == FSTType.NATIVE) {
-      if (!context.getFieldSpec().isSingleValueField()) {
-        return null;
-      }
       return new NativeMutableTextIndex(context.getFieldSpec().getName());
     }
     if (context.getConsumerDir() == null) {
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
index 6345433d0c..2311943ef7 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
@@ -19,7 +19,7 @@
 package org.apache.pinot.segment.local.realtime.impl.invertedindex;
 
 import java.io.File;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
 import org.apache.commons.io.FileUtils;
 import org.apache.lucene.search.SearcherManager;
@@ -33,38 +33,66 @@ import static org.testng.Assert.assertEquals;
 public class NativeAndLuceneMutableTextIndexTest {
   private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), 
"RealTimeNativeVsLuceneTest");
   private static final String TEXT_COLUMN_NAME = "testColumnName";
+  private static final String MV_TEXT_COLUMN_NAME = "testMVColumnName";
 
   private RealtimeLuceneTextIndex _realtimeLuceneTextIndex;
   private NativeMutableTextIndex _nativeMutableTextIndex;
 
-  private List<String> getTextData() {
-    return Arrays.asList("Prince Andrew kept looking with an amused smile from 
Pierre",
-        "vicomte and from the vicomte to their hostess. In the first moment 
of",
-        "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
-        "horror-struck. But when she saw that Pierre’s sacrilegious words",
-        "had not exasperated the vicomte, and had convinced herself that it 
was",
-        "impossible to stop him, she rallied her forces and joined the vicomte 
in", "a vigorous attack on the orator",
-        "horror-struck. But when she", "she rallied her forces and joined", 
"outburst Anna Pávlovna",
-        "she rallied her forces and", "despite her social experience", "had 
not exasperated the vicomte",
-        " despite her social experience", "impossible to stop him", "despite 
her social experience");
+  private RealtimeLuceneTextIndex _realtimeLuceneMVTextIndex;
+  private NativeMutableTextIndex _nativeMutableMVTextIndex;
+
+  private String[] getTextData() {
+    return new String[]{"Prince Andrew kept looking with an amused smile from 
Pierre",
+      "vicomte and from the vicomte to their hostess. In the first moment of",
+      "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
+      "horror-struck. But when she saw that Pierre’s sacrilegious words",
+      "had not exasperated the vicomte, and had convinced herself that it was",
+      "impossible to stop him, she rallied her forces and joined the vicomte 
in", "a vigorous attack on the orator",
+      "horror-struck. But when she", "she rallied her forces and joined", 
"outburst Anna Pávlovna",
+      "she rallied her forces and", "despite her social experience", "had not 
exasperated the vicomte",
+      " despite her social experience", "impossible to stop him", "despite her 
social experience"};
+  }
+
+  private String[][] getMVTextData() {
+    return new String[][]{{"Prince Andrew kept looking with an amused smile 
from Pierre",
+        "vicomte and from the vicomte to their hostess. In the first moment 
of"}, {
+      "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
+        "horror-struck. But when she saw that Pierre’s sacrilegious words"}, {
+      "had not exasperated the vicomte, and had convinced herself that it 
was"}, {
+      "impossible to stop him, she rallied her forces and joined the vicomte 
in", "a vigorous attack on the orator",
+        "horror-struck. But when she", "she rallied her forces and joined", 
"outburst Anna Pávlovna"}, {
+      "she rallied her forces and", "despite her social experience", "had not 
exasperated the vicomte",
+        " despite her social experience", "impossible to stop him", "despite 
her social experience"}};
   }
 
   @BeforeClass
   public void setUp()
       throws Exception {
-    _realtimeLuceneTextIndex = new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME, 
INDEX_DIR, "fooBar", null,
-        null);
+    _realtimeLuceneTextIndex = new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME, 
INDEX_DIR, "fooBar", null, null);
     _nativeMutableTextIndex = new NativeMutableTextIndex(TEXT_COLUMN_NAME);
-    List<String> documents = getTextData();
 
+    _realtimeLuceneMVTextIndex = new 
RealtimeLuceneTextIndex(MV_TEXT_COLUMN_NAME, INDEX_DIR, "fooBar", null, null);
+    _nativeMutableMVTextIndex = new 
NativeMutableTextIndex(MV_TEXT_COLUMN_NAME);
+
+    String[] documents = getTextData();
     for (String doc : documents) {
       _realtimeLuceneTextIndex.add(doc);
       _nativeMutableTextIndex.add(doc);
     }
 
-    SearcherManager searcherManager = 
_realtimeLuceneTextIndex.getSearcherManager();
+    String[][] mvDocuments = getMVTextData();
+    for (String[] mvDoc : mvDocuments) {
+      _realtimeLuceneMVTextIndex.add(mvDoc);
+      _nativeMutableMVTextIndex.add(mvDoc);
+    }
+
+    List<SearcherManager> searcherManagers = new ArrayList<>();
+    searcherManagers.add(_realtimeLuceneTextIndex.getSearcherManager());
+    searcherManagers.add(_realtimeLuceneMVTextIndex.getSearcherManager());
     try {
-      searcherManager.maybeRefresh();
+      for (SearcherManager searcherManager : searcherManagers) {
+        searcherManager.maybeRefresh();
+      }
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
@@ -100,5 +128,6 @@ public class NativeAndLuceneMutableTextIndexTest {
 
   private void testSelectionResults(String nativeQuery, String luceneQuery) {
     assertEquals(_nativeMutableTextIndex.getDocIds(nativeQuery), 
_realtimeLuceneTextIndex.getDocIds(luceneQuery));
+    assertEquals(_nativeMutableMVTextIndex.getDocIds(nativeQuery), 
_realtimeLuceneMVTextIndex.getDocIds(luceneQuery));
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to