Jackie-Jiang commented on a change in pull request #7435:
URL: https://github.com/apache/pinot/pull/7435#discussion_r710400097



##########
File path: 
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/RangeIndexCreatorTest.java
##########
@@ -35,11 +37,12 @@
 import org.testng.annotations.Test;
 
 import static 
org.apache.pinot.segment.spi.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION;
+import static org.testng.Assert.*;
 
 
 public class RangeIndexCreatorTest {
   private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), 
"RangeIndexCreatorTest");
-  private static final Random RANDOM = new Random();
+  private static final Random RANDOM = new Random(42);

Review comment:
       Don't fix the seed

##########
File path: 
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java
##########
@@ -164,57 +220,71 @@ private long getOffset(final int rangeId) {
    * @param value
    * @return
    */
-  public int findRangeId(int value) {
+  private int findRangeId(int value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].intValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.intValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.intValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(long value) {
+  private int findRangeId(long value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].longValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.longValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.longValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(float value) {
+  private int findRangeId(float value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].floatValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.floatValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.floatValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(double value) {
+  private int findRangeId(double value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].doubleValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.doubleValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.doubleValue() ? _rangeStartArray.length - 1 
: _rangeStartArray.length;
   }
 
   @Override
   public void close() {
     // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by 
the caller and might be reused later. The
     // caller is responsible of closing the PinotDataBuffer.
   }
+
+  private ImmutableRoaringBitmap getMatchesInRange(int firstRangeId, int 
lastRangeId) {
+    // produce bitmap of all ranges fully covered by buckets.
+    // 1. if firstRangeId is -1, the query range covers the first bucket
+    // 2. if lastRangeId is _rangeStartArray.length, the query range covers 
the last bucket
+    // 3. the loop isn't entered if the range ids are equal
+    MutableRoaringBitmap matching = firstRangeId < lastRangeId ? new 
MutableRoaringBitmap() : null;
+    for (int rangeId = firstRangeId + 1; rangeId < lastRangeId; rangeId++) {
+      matching.or(getDocIds(rangeId));
+    }
+    return matching;
+  }
+
+  private ImmutableRoaringBitmap getPartialMatchesInRange(int firstRangeId, 
int lastRangeId) {
+    if (isOutOfRange(firstRangeId)) {
+      return isOutOfRange(lastRangeId) ? null : getDocIds(lastRangeId);
+    }
+    if (isOutOfRange(lastRangeId)) {
+      return getDocIds(firstRangeId);
+    }
+    return ImmutableRoaringBitmap.or(getDocIds(firstRangeId), 
getDocIds(lastRangeId));
+  }
+
+  private boolean isOutOfRange(int docId) {

Review comment:
       ```suggestion
     private boolean isOutOfRange(int rangeId) {
   ```

##########
File path: 
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java
##########
@@ -65,53 +65,109 @@ public RangeIndexReader(PinotDataBuffer dataBuffer) {
     long rangeArrayStartOffset = offset;
 
     _rangeStartArray = new Number[_numRanges];
-    final long lastOffset = dataBuffer.getLong(offset + (_numRanges + 1) * 
_valueType.size() + _numRanges * Long.BYTES);
+    final long lastOffset = dataBuffer.getLong(offset + (long) (_numRanges + 
1) * _valueType.size()
+        + (long) _numRanges * Long.BYTES);
 
-    _bitmapIndexOffset = offset + (_numRanges + 1) * _valueType.size();
+    _bitmapIndexOffset = offset + (long) (_numRanges + 1) * _valueType.size();
 
     Preconditions.checkState(lastOffset == dataBuffer.size(),
         "The last offset should be equal to buffer size! Current lastOffset: " 
+ lastOffset + ", buffer size: "
             + dataBuffer.size());
     switch (_valueType) {
       case INT:
         for (int i = 0; i < _numRanges; i++) {
-          _rangeStartArray[i] = dataBuffer.getInt(rangeArrayStartOffset + i * 
Integer.BYTES);
+          _rangeStartArray[i] = dataBuffer.getInt(rangeArrayStartOffset + 
(long) i * Integer.BYTES);
         }
-        _lastRangeEnd = dataBuffer.getInt(rangeArrayStartOffset + _numRanges * 
Integer.BYTES);
+        _lastRangeEnd = dataBuffer.getInt(rangeArrayStartOffset + (long) 
_numRanges * Integer.BYTES);
         break;
       case LONG:
         for (int i = 0; i < _numRanges; i++) {
-          _rangeStartArray[i] = dataBuffer.getLong(rangeArrayStartOffset + i * 
Long.BYTES);
+          _rangeStartArray[i] = dataBuffer.getLong(rangeArrayStartOffset + 
(long) i * Long.BYTES);
         }
-        _lastRangeEnd = dataBuffer.getLong(rangeArrayStartOffset + _numRanges 
* Long.BYTES);
+        _lastRangeEnd = dataBuffer.getLong(rangeArrayStartOffset + (long) 
_numRanges * Long.BYTES);
         break;
       case FLOAT:
         for (int i = 0; i < _numRanges; i++) {
-          _rangeStartArray[i] = dataBuffer.getFloat(rangeArrayStartOffset + i 
* Float.BYTES);
+          _rangeStartArray[i] = dataBuffer.getFloat(rangeArrayStartOffset + 
(long) i * Float.BYTES);
         }
-        _lastRangeEnd = dataBuffer.getFloat(rangeArrayStartOffset + _numRanges 
* Float.BYTES);
+        _lastRangeEnd = dataBuffer.getFloat(rangeArrayStartOffset + (long) 
_numRanges * Float.BYTES);
         break;
       case DOUBLE:
         for (int i = 0; i < _numRanges; i++) {
-          _rangeStartArray[i] = dataBuffer.getDouble(rangeArrayStartOffset + i 
* Double.BYTES);
+          _rangeStartArray[i] = dataBuffer.getDouble(rangeArrayStartOffset + 
(long) i * Double.BYTES);
         }
-        _lastRangeEnd = dataBuffer.getDouble(rangeArrayStartOffset + 
_numRanges * Double.BYTES);
+        _lastRangeEnd = dataBuffer.getDouble(rangeArrayStartOffset + (long) 
_numRanges * Double.BYTES);
         break;
       default:
         throw new RuntimeException("Range Index Unsupported for dataType:" + 
_valueType);
     }
   }
 
-  @VisibleForTesting
-  public Number[] getRangeStartArray() {
-    return _rangeStartArray;
+  /**
+   * {@inheritDoc}
+   */
+  @Override

Review comment:
       Also annotate the implementations as nullable

##########
File path: 
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java
##########
@@ -164,57 +220,71 @@ private long getOffset(final int rangeId) {
    * @param value
    * @return
    */
-  public int findRangeId(int value) {
+  private int findRangeId(int value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].intValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.intValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.intValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(long value) {
+  private int findRangeId(long value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].longValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.longValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.longValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(float value) {
+  private int findRangeId(float value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].floatValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.floatValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.floatValue() ? _rangeStartArray.length - 1 : 
_rangeStartArray.length;
   }
 
-  public int findRangeId(double value) {
+  private int findRangeId(double value) {
     for (int i = 0; i < _rangeStartArray.length; i++) {
       if (value < _rangeStartArray[i].doubleValue()) {
         return i - 1;
       }
     }
-    if (value <= _lastRangeEnd.doubleValue()) {
-      return _rangeStartArray.length - 1;
-    }
-    return -1;
+    return value <= _lastRangeEnd.doubleValue() ? _rangeStartArray.length - 1 
: _rangeStartArray.length;
   }
 
   @Override
   public void close() {
     // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by 
the caller and might be reused later. The
     // caller is responsible of closing the PinotDataBuffer.
   }
+
+  private ImmutableRoaringBitmap getMatchesInRange(int firstRangeId, int 
lastRangeId) {
+    // produce bitmap of all ranges fully covered by buckets.
+    // 1. if firstRangeId is -1, the query range covers the first bucket
+    // 2. if lastRangeId is _rangeStartArray.length, the query range covers 
the last bucket
+    // 3. the loop isn't entered if the range ids are equal
+    MutableRoaringBitmap matching = firstRangeId < lastRangeId ? new 
MutableRoaringBitmap() : null;

Review comment:
       ```suggestion
       MutableRoaringBitmap matching = firstRangeId < (lastRangeId - 1) ? new 
MutableRoaringBitmap() : null;
   ```

##########
File path: 
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java
##########
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.index.reader;
+
+import java.io.Closeable;
+
+/**
+ * Interface for indexed range queries
+ * @param <T>
+ */
+public interface RangeIndexReader<T> extends Closeable {

Review comment:
       ^^

##########
File path: 
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/RangeIndexCreatorTest.java
##########
@@ -106,23 +109,22 @@ private void testDataType(DataType dataType)
       throws IOException {
     FieldSpec fieldSpec = new DimensionFieldSpec(COLUMN_NAME, dataType, true);
     int numDocs = 1000;
-    Number[] values = new Number[numDocs];
+    Object values = valuesArray(dataType, numDocs);
 
+    int numValuesPerRange;
     try (RangeIndexCreator creator = new RangeIndexCreator(INDEX_DIR, 
fieldSpec, dataType, -1, -1, numDocs, numDocs)) {
       addDataToIndexer(dataType, numDocs, 1, creator, values);
       creator.seal();
+      // account for off by one bug in v1 implementation
+      numValuesPerRange = creator.getNumValuesPerRange() + 1;

Review comment:
       I don't think it is off by one in the implementation. The difference is 
that in the implementation, the same value will always go to the same range id 
(line 286), which is not align with the setup in this test

##########
File path: 
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/RangeIndexCreatorTest.java
##########
@@ -242,141 +242,352 @@ private void addDataToIndexer(DataType dataType, int 
numDocs, int numValuesPerEn
     }
   }
 
-  private void checkValueForDocId(DataType dataType, Number[] values, Number[] 
rangeStartArray, int rangeId, int docId,
+  private void verifyRangesForDataType(DataType dataType, Object values, 
Object ranges, int numValuesPerMVEntry,
+                                       
RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader) {
+    switch (dataType) {
+      case INT: {
+        // single bucket ranges
+        int rangeId = 0;
+        for (int[] range : (int[][]) ranges) {
+          assertNull(rangeIndexReader.getMatchingDocIds(range[0], range[1]),
+              "range index can't guarantee match within a single range");
+          ImmutableRoaringBitmap partialMatches = 
rangeIndexReader.getPartiallyMatchingDocIds(range[0], range[1]);
+          assertNotNull(partialMatches, "partial matches for single range must 
not be null");
+          for (int docId : partialMatches.toArray()) {
+            checkValueForDocId(dataType, values, ranges, rangeId, docId, 
numValuesPerMVEntry);
+          }
+          ++rangeId;
+        }
+        // multi bucket ranges
+        int[] lowerPartialRange = ((int[][]) ranges)[0];
+        int[] coveredRange = ((int[][]) ranges)[1];
+        int[] upperPartialRange = ((int[][]) ranges)[2];
+        ImmutableRoaringBitmap matches = 
rangeIndexReader.getMatchingDocIds(lowerPartialRange[0], upperPartialRange[1]);
+        assertNotNull(matches,  "matches for covered range must not be null");
+        for (int docId : matches.toArray()) {
+          checkValueForDocId(dataType, values, ranges, 1, docId, 
numValuesPerMVEntry);
+        }
+        assertEquals(matches, 
rangeIndexReader.getPartiallyMatchingDocIds(coveredRange[0], coveredRange[1]));
+        // partial matches must be the combination of the two edge buckets
+        ImmutableRoaringBitmap partialMatches = 
rangeIndexReader.getPartiallyMatchingDocIds(
+            lowerPartialRange[0], upperPartialRange[1]);
+        assertNotNull(partialMatches, "partial matches for single range must 
not be null");
+        assertEquals(ImmutableRoaringBitmap.or(
+            rangeIndexReader.getPartiallyMatchingDocIds(lowerPartialRange[0], 
lowerPartialRange[1]),
+            rangeIndexReader.getPartiallyMatchingDocIds(upperPartialRange[0], 
upperPartialRange[1])), partialMatches);
+        // edge cases
+        assertEquals(((int[]) values).length, numValuesPerMVEntry

Review comment:
       We should set a bound for int and long to ensure the random value is not 
MIN or MAX




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to