This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new ebd6c3371f5 Use vector cursors when possible in MSQ (#18305)
ebd6c3371f5 is described below
commit ebd6c3371f587b903d033bb5b61274b59c69956c
Author: Adarsh Sanjeev <[email protected]>
AuthorDate: Thu Jul 31 11:59:52 2025 +0530
Use vector cursors when possible in MSQ (#18305)
* Add shim cursor
* Asserts
* Test
* Refactor dictionary method
* Add multi dim and non-dict string cases
* Implement getRows
* Close stuff
* Refactor
* Test changes
* Fix vectorization
---
.../msq/querykit/scan/ScanQueryFrameProcessor.java | 11 +-
.../org/apache/druid/msq/test/MSQTestBase.java | 36 +-
.../org/apache/druid/segment/ColumnProcessors.java | 12 +
.../segment/VectorColumnProcessorFactory.java | 6 +-
.../segment/shim/ShimColumnSelectorFactory.java | 121 ++++++
.../org/apache/druid/segment/shim/ShimCursor.java | 86 +++++
.../shim/ShimMultiValueDimensionSelector.java | 145 +++++++
.../shim/ShimNumericColumnValueSelector.java | 155 ++++++++
.../shim/ShimObjectColumnValueSelector.java | 80 ++++
.../shim/ShimSingleValueDimensionSelector.java | 147 +++++++
.../segment/shim/ShimVectorObjectDimSelector.java | 159 ++++++++
.../apache/druid/segment/shim/ShimCursorTest.java | 422 +++++++++++++++++++++
12 files changed, 1356 insertions(+), 24 deletions(-)
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
index 73c7917ed62..f000b13208d 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
@@ -80,6 +80,8 @@ import org.apache.druid.segment.SimpleSettableOffset;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.shim.ShimCursor;
+import org.apache.druid.segment.vector.VectorCursor;
import org.apache.druid.utils.CloseableUtils;
import javax.annotation.Nullable;
@@ -287,9 +289,14 @@ public class ScanQueryFrameProcessor extends
BaseLeafFrameProcessor
final Cursor nextCursor;
- // If asCursor() fails, we need to close nextCursorHolder immediately.
+ // If asCursor() or asVectorCursor() fails, we need to close
nextCursorHolder immediately.
try {
- nextCursor = nextCursorHolder.asCursor();
+ if
(query.context().getVectorize().shouldVectorize(nextCursorHolder.canVectorize()))
{
+ final VectorCursor vectorCursor = nextCursorHolder.asVectorCursor();
+ nextCursor = vectorCursor == null ? null : new
ShimCursor(vectorCursor);
+ } else {
+ nextCursor = nextCursorHolder.asCursor();
+ }
}
catch (Throwable t) {
throw CloseableUtils.closeAndWrapInCatch(t, nextCursorHolder);
diff --git
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
index 279f5c751e4..5cc5a6e7592 100644
---
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
+++
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
@@ -51,7 +51,6 @@ import org.apache.druid.frame.testutil.FrameTestUtil;
import org.apache.druid.guice.BuiltInTypesModule;
import org.apache.druid.guice.DruidSecondaryModule;
import org.apache.druid.guice.ExpressionModule;
-import org.apache.druid.guice.GuiceInjectors;
import org.apache.druid.guice.IndexingServiceTuningConfigModule;
import org.apache.druid.guice.JoinableFactoryModule;
import org.apache.druid.guice.JsonConfigProvider;
@@ -364,7 +363,9 @@ public class MSQTestBase extends BaseCalciteQueryTest
return DruidModuleCollection.of(
super.getCoreModule(),
new HllSketchModule(),
- new LocalMsqSqlModule()
+ new LocalMsqSqlModule(),
+ new ExpressionModule(),
+ binder ->
binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT)
);
}
@@ -423,15 +424,8 @@ public class MSQTestBase extends BaseCalciteQueryTest
groupByBuffers = TestGroupByBuffers.createDefault();
SqlTestFramework qf = queryFramework();
- Injector secondInjector = GuiceInjectors.makeStartupInjectorWithModules(
- ImmutableList.of(
- new ExpressionModule(),
- (Module) binder ->
-
binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT)
- )
- );
- ObjectMapper secondMapper = setupObjectMapper(secondInjector);
+ ObjectMapper secondMapper = setupObjectMapper(qf.injector());
indexIO = new IndexIO(secondMapper, ColumnConfig.DEFAULT);
segmentCacheManager = new SegmentCacheManagerFactory(TestIndex.INDEX_IO,
secondMapper).manufacturate(newTempFolder("cacheManager"));
@@ -1377,13 +1371,7 @@ public class MSQTestBase extends BaseCalciteQueryTest
for (List<Object> row :
FrameTestUtil.readRowsFromCursorFactory(cursorFactory).toList()) {
// transforming rows for sketch assertions
List<Object> transformedRow = row.stream()
- .map(r -> {
- if (r instanceof
HyperLogLogCollector) {
- return
((HyperLogLogCollector) r).estimateCardinalityRound();
- } else {
- return r;
- }
- })
+
.map(MSQTestBase.this::segmentToAssertionValueMapper)
.collect(Collectors.toList());
segmentIdVsOutputRowsMap.computeIfAbsent(dataSegment.getId(), r ->
new ArrayList<>()).add(transformedRow);
}
@@ -1534,6 +1522,20 @@ public class MSQTestBase extends BaseCalciteQueryTest
}
}
+ /**
+ * Maps certain fields on the segment to a different equivalent value, which
is easier to assert against.
+ * For example, the HLL collector can't really be directly asserted as part
of the test, so it is converted to its
+ * cardinality.
+ */
+ protected Object segmentToAssertionValueMapper(Object r)
+ {
+ if (r instanceof HyperLogLogCollector) {
+ return ((HyperLogLogCollector) r).estimateCardinalityRound();
+ } else {
+ return r;
+ }
+ }
+
public class SelectTester extends MSQTester<SelectTester>
{
private SelectTester()
diff --git
a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
index bf6c399519f..d0a866a6925 100644
--- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
+++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
@@ -386,4 +386,16 @@ public class ColumnProcessors
{
return capabilities == null ||
capabilities.hasMultipleValues().isMaybeTrue();
}
+
+ /**
+ * Checks the {@link ColumnCapabilities} to see if the column has a
dictionary available
+ * ({@link ColumnCapabilities#isDictionaryEncoded()} is true), and there is
a unique mapping of dictionary
+ * id to value ({@link ColumnCapabilities#areDictionaryValuesUnique()} is
true).
+ */
+ public static boolean useDictionaryEncodedSelector(ColumnCapabilities
capabilities)
+ {
+ Preconditions.checkArgument(capabilities != null, "Capabilities must not
be null");
+ Preconditions.checkArgument(capabilities.is(ValueType.STRING), "Must only
be called on a STRING column");
+ return
capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue();
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java
b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java
index 8b50315e2f4..1a8de2b080b 100644
---
a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java
+++
b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java
@@ -19,9 +19,7 @@
package org.apache.druid.segment;
-import com.google.common.base.Preconditions;
import org.apache.druid.segment.column.ColumnCapabilities;
-import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector;
@@ -114,8 +112,6 @@ public interface VectorColumnProcessorFactory<T>
*/
default boolean useDictionaryEncodedSelector(ColumnCapabilities capabilities)
{
- Preconditions.checkArgument(capabilities != null, "Capabilities must not
be null");
- Preconditions.checkArgument(capabilities.is(ValueType.STRING), "Must only
be called on a STRING column");
- return
capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue();
+ return ColumnProcessors.useDictionaryEncodedSelector(capabilities);
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java
new file mode 100644
index 00000000000..5b0029c3c57
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnProcessors;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implementation of {@link ColumnSelectorFactory} for {@link ShimCursor}.
+ */
+public class ShimColumnSelectorFactory implements ColumnSelectorFactory
+{
+ private final ShimCursor cursor;
+ private final Map<DimensionSpec, DimensionSelector> dimensionSelectors = new
HashMap<>();
+ private final Map<String, ColumnValueSelector<Object>> columnValueSelectors
= new HashMap<>();
+
+ public ShimColumnSelectorFactory(ShimCursor cursor)
+ {
+ this.cursor = cursor;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ return dimensionSelectors.computeIfAbsent(
+ dimensionSpec,
+ spec -> {
+ if (spec.mustDecorate()) {
+ throw DruidException.defensive("Only non-decorated dimensions can
be vectorized.");
+ }
+ final ColumnCapabilities capabilities =
cursor.vectorColumnSelectorFactory
+ .getColumnCapabilities(dimensionSpec.getDimension());
+ if (capabilities == null) {
+ return DimensionSelector.nilSelector();
+ } else if
(ColumnProcessors.useDictionaryEncodedSelector(capabilities)) {
+ if (capabilities.hasMultipleValues().isMaybeTrue()) {
+ final MultiValueDimensionVectorSelector vectorSelector =
+
cursor.vectorColumnSelectorFactory.makeMultiValueDimensionSelector(spec);
+ return new ShimMultiValueDimensionSelector(cursor,
vectorSelector);
+ } else {
+ final SingleValueDimensionVectorSelector vectorSelector =
+
cursor.vectorColumnSelectorFactory.makeSingleValueDimensionSelector(spec);
+ return new ShimSingleValueDimensionSelector(cursor,
vectorSelector);
+ }
+ } else {
+ // Non-dictionary encoded column, like virtual columns.
+ VectorObjectSelector vectorObjectSelector =
+
cursor.vectorColumnSelectorFactory.makeObjectSelector(spec.getDimension());
+ return new ShimVectorObjectDimSelector(cursor,
vectorObjectSelector, capabilities.hasMultipleValues().isMaybeTrue());
+ }
+ }
+ );
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ return columnValueSelectors.computeIfAbsent(
+ columnName,
+ column -> {
+ final ColumnCapabilities capabilities =
cursor.vectorColumnSelectorFactory.getColumnCapabilities(column);
+ if (capabilities == null) {
+ //noinspection unchecked
+ return NilColumnValueSelector.instance();
+ } else if (capabilities.is(ValueType.STRING)) {
+ return makeDimensionSelector(DefaultDimensionSpec.of(columnName));
+ } else if (capabilities.isNumeric()) {
+ return new ShimNumericColumnValueSelector(
+ cursor,
+
cursor.vectorColumnSelectorFactory.makeValueSelector(columnName),
+ capabilities.getType()
+ );
+ } else {
+ return new ShimObjectColumnValueSelector(
+ cursor,
+
cursor.vectorColumnSelectorFactory.makeObjectSelector(columnName)
+ );
+ }
+ }
+ );
+ }
+
+ @Override
+ @Nullable
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return cursor.vectorColumnSelectorFactory.getColumnCapabilities(column);
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimCursor.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimCursor.java
new file mode 100644
index 00000000000..b6e9f87ef9b
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/shim/ShimCursor.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.Cursor;
+import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.VectorCursor;
+
+/**
+ * Adapter from {@link VectorCursor} to {@link Cursor}. Works by reading a
batch and then stepping through it as
+ * {@link Cursor#advance()} is called.
+ */
+public class ShimCursor implements Cursor
+{
+ private final ShimColumnSelectorFactory columnSelectorFactory;
+
+ final VectorCursor vectorCursor;
+ final VectorColumnSelectorFactory vectorColumnSelectorFactory;
+ int currentIndexInVector = 0;
+
+ public ShimCursor(VectorCursor vectorCursor)
+ {
+ this.vectorCursor = vectorCursor;
+ this.vectorColumnSelectorFactory = vectorCursor.getColumnSelectorFactory();
+ this.columnSelectorFactory = new ShimColumnSelectorFactory(this);
+ }
+
+ @Override
+ public ColumnSelectorFactory getColumnSelectorFactory()
+ {
+ return columnSelectorFactory;
+ }
+
+ @Override
+ public void advance()
+ {
+ currentIndexInVector++;
+
+ if (currentIndexInVector == vectorCursor.getCurrentVectorSize()) {
+ vectorCursor.advance();
+ currentIndexInVector = 0;
+ }
+ }
+
+ @Override
+ public void advanceUninterruptibly()
+ {
+ advance();
+ }
+
+ @Override
+ public boolean isDone()
+ {
+ return vectorCursor.isDone();
+ }
+
+ @Override
+ public boolean isDoneOrInterrupted()
+ {
+ return isDone() || Thread.currentThread().isInterrupted();
+ }
+
+ @Override
+ public void reset()
+ {
+ vectorCursor.reset();
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimMultiValueDimensionSelector.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimMultiValueDimensionSelector.java
new file mode 100644
index 00000000000..cfecafefb26
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimMultiValueDimensionSelector.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.DimensionSelectorUtils;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+
+import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
+
+/**
+ * Shim from {@link MultiValueDimensionVectorSelector} to {@link
ColumnValueSelector} for a {@link ShimCursor}.
+ */
+public class ShimMultiValueDimensionSelector implements DimensionSelector
+{
+ private final ShimCursor cursor;
+ private final ReadableVectorInspector vectorInspector;
+ private final MultiValueDimensionVectorSelector vectorSelector;
+ private IndexedInts currentRow = null;
+
+ private IndexedInts[] rowVector;
+ private int currentId = ReadableVectorInspector.NULL_ID;
+
+ public ShimMultiValueDimensionSelector(
+ final ShimCursor cursor,
+ final MultiValueDimensionVectorSelector vectorSelector
+ )
+ {
+ this.cursor = cursor;
+ this.vectorInspector =
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+ this.vectorSelector = vectorSelector;
+ }
+
+ @Override
+ public IndexedInts getRow()
+ {
+ populateRowVector();
+ currentRow = rowVector[cursor.currentIndexInVector];
+ return currentRow;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return defaultGetObject();
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this,
predicateFactory);
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ // Don't bother.
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return vectorSelector.getValueCardinality();
+ }
+
+ @Override
+ public boolean supportsLookupNameUtf8()
+ {
+ return vectorSelector.supportsLookupNameUtf8();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return vectorSelector.lookupName(id);
+ }
+
+ @Nullable
+ @Override
+ public ByteBuffer lookupNameUtf8(int id)
+ {
+ return vectorSelector.lookupNameUtf8(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return vectorSelector.nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return vectorSelector.idLookup();
+ }
+
+ private void populateRowVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != currentId) {
+ rowVector = vectorSelector.getRowVector();
+ currentId = id;
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimNumericColumnValueSelector.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimNumericColumnValueSelector.java
new file mode 100644
index 00000000000..3576ac0ba05
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimNumericColumnValueSelector.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+import java.util.function.Supplier;
+
+/**
+ * Shim from {@link VectorValueSelector} to {@link ColumnValueSelector} for a
{@link ShimCursor}.
+ */
+public class ShimNumericColumnValueSelector implements
ColumnValueSelector<Object>
+{
+ private final ShimCursor cursor;
+ private final ReadableVectorInspector vectorInspector;
+ private final VectorValueSelector vectorSelector;
+ private final Supplier<Object> getObjectSupplier;
+
+ private double[] doubleVector;
+ private float[] floatVector;
+ private long[] longVector;
+ private boolean[] nullVector;
+
+ private int doubleId = ReadableVectorInspector.NULL_ID;
+ private int floatId = ReadableVectorInspector.NULL_ID;
+ private int longId = ReadableVectorInspector.NULL_ID;
+ private int nullId = ReadableVectorInspector.NULL_ID;
+
+ public ShimNumericColumnValueSelector(
+ final ShimCursor cursor,
+ final VectorValueSelector vectorSelector,
+ final ValueType preferredType
+ )
+ {
+ this.cursor = cursor;
+ this.vectorInspector =
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+ this.vectorSelector = vectorSelector;
+
+ if (preferredType == ValueType.DOUBLE) {
+ this.getObjectSupplier = () -> isNull() ? null : getDouble();
+ } else if (preferredType == ValueType.FLOAT) {
+ this.getObjectSupplier = () -> isNull() ? null : getFloat();
+ } else if (preferredType == ValueType.LONG) {
+ this.getObjectSupplier = () -> isNull() ? null : getLong();
+ } else {
+ throw DruidException.defensive("Unsupported preferredType[%s], must be
numeric", preferredType);
+ }
+ }
+
+ @Override
+ public double getDouble()
+ {
+ populateDoubleVector();
+ return doubleVector[cursor.currentIndexInVector];
+ }
+
+ @Override
+ public float getFloat()
+ {
+ populateFloatVector();
+ return floatVector[cursor.currentIndexInVector];
+ }
+
+ @Override
+ public long getLong()
+ {
+ populateLongVector();
+ return longVector[cursor.currentIndexInVector];
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ populateNullVector();
+ return nullVector != null && nullVector[cursor.currentIndexInVector];
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return getObjectSupplier.get();
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Number.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ // Don't bother.
+ }
+
+ private void populateDoubleVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != doubleId) {
+ doubleVector = vectorSelector.getDoubleVector();
+ doubleId = id;
+ }
+ }
+
+ private void populateFloatVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != floatId) {
+ floatVector = vectorSelector.getFloatVector();
+ floatId = id;
+ }
+ }
+
+ private void populateLongVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != longId) {
+ longVector = vectorSelector.getLongVector();
+ longId = id;
+ }
+ }
+
+ private void populateNullVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != nullId) {
+ nullVector = vectorSelector.getNullVector();
+ nullId = id;
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimObjectColumnValueSelector.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimObjectColumnValueSelector.java
new file mode 100644
index 00000000000..35121213ae6
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimObjectColumnValueSelector.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.ObjectColumnSelector;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+
+/**
+ * Shim from {@link VectorObjectSelector} to {@link ColumnValueSelector} for a
{@link ShimCursor}.
+ */
+public class ShimObjectColumnValueSelector extends ObjectColumnSelector<Object>
+{
+ private final ShimCursor cursor;
+ private final ReadableVectorInspector vectorInspector;
+ private final VectorObjectSelector vectorSelector;
+
+ private Object[] objectVector;
+ private int objectId = ReadableVectorInspector.NULL_ID;
+
+ public ShimObjectColumnValueSelector(
+ final ShimCursor cursor,
+ final VectorObjectSelector vectorSelector
+ )
+ {
+ this.cursor = cursor;
+ this.vectorInspector =
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+ this.vectorSelector = vectorSelector;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ populateObjectVector();
+ return objectVector[cursor.currentIndexInVector];
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ // Don't bother.
+ }
+
+ private void populateObjectVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != objectId) {
+ objectVector = vectorSelector.getObjectVector();
+ objectId = id;
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimSingleValueDimensionSelector.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimSingleValueDimensionSelector.java
new file mode 100644
index 00000000000..ea9a411b900
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimSingleValueDimensionSelector.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.DimensionSelectorUtils;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.SingleIndexedInt;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+
+import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
+
+/**
+ * Shim from {@link SingleValueDimensionVectorSelector} to {@link
ColumnValueSelector} for a {@link ShimCursor}.
+ */
+public class ShimSingleValueDimensionSelector implements DimensionSelector
+{
+ private final ShimCursor cursor;
+ private final ReadableVectorInspector vectorInspector;
+ private final SingleValueDimensionVectorSelector vectorSelector;
+ private final SingleIndexedInt currentRow = new SingleIndexedInt();
+
+ private int[] rowVector;
+ private int currentId = ReadableVectorInspector.NULL_ID;
+
+ public ShimSingleValueDimensionSelector(
+ final ShimCursor cursor,
+ final SingleValueDimensionVectorSelector vectorSelector
+ )
+ {
+ this.cursor = cursor;
+ this.vectorInspector =
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+ this.vectorSelector = vectorSelector;
+ }
+
+ @Override
+ public IndexedInts getRow()
+ {
+ populateRowVector();
+ currentRow.setValue(rowVector[cursor.currentIndexInVector]);
+ return currentRow;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ populateRowVector();
+ return lookupName(rowVector[cursor.currentIndexInVector]);
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this,
predicateFactory);
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ // Don't bother.
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return String.class;
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return vectorSelector.getValueCardinality();
+ }
+
+ @Override
+ public boolean supportsLookupNameUtf8()
+ {
+ return vectorSelector.supportsLookupNameUtf8();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return vectorSelector.lookupName(id);
+ }
+
+ @Nullable
+ @Override
+ public ByteBuffer lookupNameUtf8(int id)
+ {
+ return vectorSelector.lookupNameUtf8(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return vectorSelector.nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return vectorSelector.idLookup();
+ }
+
+ private void populateRowVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != currentId) {
+ rowVector = vectorSelector.getRowVector();
+ currentId = id;
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/shim/ShimVectorObjectDimSelector.java
b/processing/src/main/java/org/apache/druid/segment/shim/ShimVectorObjectDimSelector.java
new file mode 100644
index 00000000000..402a0baca15
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/shim/ShimVectorObjectDimSelector.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.DimensionDictionarySelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.DimensionSelectorUtils;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.RangeIndexedInts;
+import org.apache.druid.segment.data.ZeroIndexedInts;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.List;
+
+/**
+ * {@link DimensionSelector} that internally uses a {@link
VectorObjectSelector}. Does not support any dictionary
+ * operations.
+ */
+public class ShimVectorObjectDimSelector implements DimensionSelector
+{
+ private final ShimCursor cursor;
+ private final ReadableVectorInspector vectorInspector;
+ private final VectorObjectSelector vectorSelector;
+ private final boolean hasMultipleValues;
+
+ private Object[] objectVector;
+ private int objectId = ReadableVectorInspector.NULL_ID;
+
+ public ShimVectorObjectDimSelector(
+ final ShimCursor cursor,
+ final VectorObjectSelector vectorSelector,
+ boolean hasMultipleValues
+ )
+ {
+ this.cursor = cursor;
+ this.vectorInspector =
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+ this.vectorSelector = vectorSelector;
+ this.hasMultipleValues = hasMultipleValues;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ populateObjectVector();
+ return objectVector[cursor.currentIndexInVector];
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ // Don't bother.
+ }
+
+ private void populateObjectVector()
+ {
+ final int id = vectorInspector.getId();
+ if (id != objectId) {
+ objectVector = vectorSelector.getObjectVector();
+ objectId = id;
+ }
+ }
+
+ @Override
+ public IndexedInts getRow()
+ {
+ Object object = getObject();
+ if (hasMultipleValues) {
+ if (object == null) {
+ return IndexedInts.empty();
+ } else if (object instanceof String) {
+ return ZeroIndexedInts.instance();
+ } else if (object instanceof List) {
+ List arrayList = (List) object;
+ RangeIndexedInts rangeIndexedInts = new RangeIndexedInts();
+ rangeIndexedInts.setSize(arrayList.size());
+ return rangeIndexedInts;
+ } else {
+ throw DruidException.defensive("Found unexpected object type[%s]",
object.getClass());
+ }
+ }
+ return ZeroIndexedInts.instance();
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
+ {
+ return DimensionSelectorUtils.makeValueMatcherGeneric(this,
predicateFactory);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return false;
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ Object object = getObject();
+ if (hasMultipleValues) {
+ if (object instanceof List) {
+ List arrayList = (List) object;
+ object = arrayList.get(id);
+ }
+ }
+ return object == null ? null : object.toString();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return null;
+ }
+}
diff --git
a/processing/src/test/java/org/apache/druid/segment/shim/ShimCursorTest.java
b/processing/src/test/java/org/apache/druid/segment/shim/ShimCursorTest.java
new file mode 100644
index 00000000000..e915e196ce8
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/shim/ShimCursorTest.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.data.input.MapBasedInputRow;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.math.expr.ExpressionProcessing;
+import org.apache.druid.query.QueryContext;
+import org.apache.druid.query.QueryContexts;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.Cursor;
+import org.apache.druid.segment.CursorBuildSpec;
+import org.apache.druid.segment.CursorHolder;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.QueryableIndexCursorFactory;
+import org.apache.druid.segment.SimpleQueryableIndex;
+import org.apache.druid.segment.TestIndex;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.incremental.IncrementalIndex;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
+import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
+import org.apache.druid.segment.vector.VectorCursor;
+import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Compares the results between using a {@link ShimCursor} and {@link Cursor}
+ */
+public class ShimCursorTest
+{
+ static {
+ ExpressionProcessing.initializeForTests();
+ }
+
+ @Parameterized.Parameters
+ public static Collection<Object> data()
+ {
+ return List.of(1, 2, 4, 7, 512);
+ }
+
+ private Closer closer;
+
+ @BeforeEach
+ void setUp()
+ {
+ closer = Closer.create();
+ }
+
+ @AfterEach
+ void tearDown() throws IOException
+ {
+ closer.close();
+ }
+
+ /**
+ * Tests long and double columns.
+ */
+ @ParameterizedTest(name = "Number columns with vector size {0}")
+ @MethodSource("data")
+ public void testNumberColumns(int vectorSize)
+ {
+ IncrementalIndex incrementalIndex = closer.register(new
OnheapIncrementalIndex.Builder()
+ .setMaxRowCount(100)
+ .setIndexSchema(
+ IncrementalIndexSchema.builder()
+ .withDimensionsSpec(
+ DimensionsSpec.builder()
+ .useSchemaDiscovery(true)
+
.setIncludeAllDimensions(true)
+ .build()
+ )
+ .withRollup(false)
+ .build()
+ )
+ .build());
+
+ final List<String> signature = List.of("A", "B", "C");
+
+ incrementalIndex.add(autoRow(signature, 1, 2.0, 3));
+ incrementalIndex.add(autoRow(signature, 4, 5, 6));
+ incrementalIndex.add(autoRow(signature, null, 423, 13));
+ incrementalIndex.add(autoRow(signature, 51, 0, null));
+ incrementalIndex.add(autoRow(signature, 51, -1.0, 4));
+ incrementalIndex.add(autoRow(signature, 123, 413.132, 2));
+ incrementalIndex.add(autoRow(signature, 0, null, 331));
+ incrementalIndex.add(autoRow(signature, Long.MAX_VALUE, -824.0f,
Long.MIN_VALUE));
+ incrementalIndex.add(autoRow(signature, -1, -2.0d, 112));
+
+ final SimpleQueryableIndex index = closer.register((SimpleQueryableIndex)
TestIndex.persistAndMemoryMap(incrementalIndex));
+ final QueryableIndexCursorFactory queryableIndexCursorFactory = new
QueryableIndexCursorFactory(index);
+
+ CursorBuildSpec cursorBuildSpec = CursorBuildSpec.builder()
+ .setQueryContext(
+ QueryContext.of(
+
Map.of(QueryContexts.VECTOR_SIZE_KEY, vectorSize)
+ )
+ )
+ .build();
+ CursorHolder cursorHolder =
closer.register(queryableIndexCursorFactory.makeCursorHolder(cursorBuildSpec));
+ Assertions.assertTrue(cursorHolder.canVectorize());
+ VectorCursor vectorCursor = cursorHolder.asVectorCursor();
+
+ Cursor cursor = cursorHolder.asCursor();
+ ShimCursor shimCursor = new ShimCursor(vectorCursor);
+
+ compareCursors(List.of("A", "B", "C", "non-existent"), cursor, shimCursor);
+ }
+
+ /**
+ * Tests a few dictionary encoded columns, with a few number columns thrown
in.
+ */
+ @ParameterizedTest(name = "Number columns with vector size {0}")
+ @MethodSource("data")
+ public void testDictionaryColumns(int vectorSize)
+ {
+ IncrementalIndex incrementalIndex = closer.register(new
OnheapIncrementalIndex.Builder()
+ .setMaxRowCount(100)
+ .setIndexSchema(
+ IncrementalIndexSchema.builder()
+ .withDimensionsSpec(
+ DimensionsSpec.builder()
+ .useSchemaDiscovery(true)
+
.setIncludeAllDimensions(true)
+ .build()
+ )
+ .withRollup(false)
+ .build()
+ )
+ .build());
+
+ final List<String> signature = List.of("A", "B", "C", "D", "E", "F");
+
+ incrementalIndex.add(autoRow(signature, 1, 1.0, "Tom", arr("Bat",
"Knife"), obj("A", "B"), null));
+ incrementalIndex.add(autoRow(signature, 2, 2.0, "Bob", arr("Builder",
"Carpenter"), obj("A", "B"), null));
+ incrementalIndex.add(autoRow(signature, 3, 4.0, "Jack", arr("A", "B"),
obj("Jacky", "Bobby"), null));
+ incrementalIndex.add(autoRow(signature, 4, 8.0, "Will", arr("Sing",
"Dance"), obj("Sing", "B"), null));
+ incrementalIndex.add(autoRow(signature, 5, 16.0, "Smith", arr("Car",
"Trunk"), obj(), null));
+ incrementalIndex.add(autoRow(signature, 6, -1, "Cat", arr(), obj("Bat",
"Knife"), null));
+ incrementalIndex.add(autoRow(signature, 7, -2.0, "Drew", arr("Machine",
"Rabbit"), obj("bat", "knife"), null));
+
+ final SimpleQueryableIndex index = closer.register((SimpleQueryableIndex)
TestIndex.persistAndMemoryMap(incrementalIndex));
+ final QueryableIndexCursorFactory queryableIndexCursorFactory = new
QueryableIndexCursorFactory(index);
+
+ CursorBuildSpec cursorBuildSpec = CursorBuildSpec.builder()
+ .setQueryContext(
+ QueryContext.of(
+
Map.of(QueryContexts.VECTOR_SIZE_KEY, vectorSize)
+ )
+ )
+ .build();
+ CursorHolder cursorHolder =
closer.register(queryableIndexCursorFactory.makeCursorHolder(cursorBuildSpec));
+ Assertions.assertTrue(cursorHolder.canVectorize());
+ VectorCursor vectorCursor = cursorHolder.asVectorCursor();
+
+ Cursor cursor = cursorHolder.asCursor();
+ ShimCursor shimCursor = new ShimCursor(vectorCursor);
+
+ compareCursors(signature, cursor, shimCursor);
+ }
+
+ @ParameterizedTest(name = "Non dict-encoded string columns with vector size
{0}")
+ @MethodSource("data")
+ public void testMultiDimColumns(int vectorSize)
+ {
+ IncrementalIndex incrementalIndex = closer.register(new
OnheapIncrementalIndex.Builder()
+ .setMaxRowCount(100)
+ .setIndexSchema(
+ IncrementalIndexSchema.builder()
+ .withDimensionsSpec(
+ DimensionsSpec.builder()
+ .useSchemaDiscovery(false)
+
.setIncludeAllDimensions(true)
+ .build()
+ )
+ .withRollup(false)
+ .build()
+ )
+ .build());
+
+ final List<String> signature = List.of("A", "B");
+
+ incrementalIndex.add(autoRow(signature, 1, arr("Bat", "Knife")));
+ incrementalIndex.add(autoRow(signature, 2, arr("Builder", "Carpenter")));
+ incrementalIndex.add(autoRow(signature, 3, arr("A", "B")));
+ incrementalIndex.add(autoRow(signature, 4, arr("Sing", "Dance")));
+ incrementalIndex.add(autoRow(signature, 5, arr("Car", "Trunk")));
+ incrementalIndex.add(autoRow(signature, 6, arr()));
+ incrementalIndex.add(autoRow(signature, 7, arr("Machine", "Rabbit")));
+
+ final SimpleQueryableIndex index = closer.register((SimpleQueryableIndex)
TestIndex.persistAndMemoryMap(incrementalIndex));
+ final QueryableIndexCursorFactory queryableIndexCursorFactory = new
QueryableIndexCursorFactory(index);
+
+ CursorBuildSpec cursorBuildSpec = CursorBuildSpec.builder()
+ .setQueryContext(
+ QueryContext.of(
+
Map.of(QueryContexts.VECTOR_SIZE_KEY, vectorSize)
+ )
+ )
+ .build();
+ CursorHolder cursorHolder =
closer.register(queryableIndexCursorFactory.makeCursorHolder(cursorBuildSpec));
+ Assertions.assertTrue(cursorHolder.canVectorize());
+ VectorCursor vectorCursor = cursorHolder.asVectorCursor();
+
+ Cursor cursor = cursorHolder.asCursor();
+ ShimCursor shimCursor = new ShimCursor(vectorCursor);
+
+ compareCursors(signature, cursor, shimCursor);
+ }
+
+ @ParameterizedTest(name = "Non dict-encoded string columns with vector size
{0}")
+ @MethodSource("data")
+ public void testNonDictStringColumns(int vectorSize)
+ {
+ IncrementalIndex incrementalIndex = closer.register(new
OnheapIncrementalIndex.Builder()
+ .setMaxRowCount(100)
+ .setIndexSchema(
+ IncrementalIndexSchema.builder()
+ .withDimensionsSpec(
+ DimensionsSpec.builder()
+ .useSchemaDiscovery(false)
+
.setIncludeAllDimensions(true)
+ .build()
+ )
+ .withVirtualColumns(
+ VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "v0",
+ "concat(\"A\", \"B\")",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE)
+ )
+ )
+ .withRollup(false)
+ .build()
+ )
+ .build());
+
+ final List<String> signature = List.of("A", "B");
+
+ incrementalIndex.add(autoRow(signature, 1, "Tom"));
+ incrementalIndex.add(autoRow(signature, 2, "Bob"));
+ incrementalIndex.add(autoRow(signature, 3, "Jack"));
+ incrementalIndex.add(autoRow(signature, 4, "Will"));
+ incrementalIndex.add(autoRow(signature, 5, "Smith"));
+ incrementalIndex.add(autoRow(signature, 6, "Cat"));
+ incrementalIndex.add(autoRow(signature, 7, "Drew"));
+
+ final SimpleQueryableIndex index = closer.register((SimpleQueryableIndex)
TestIndex.persistAndMemoryMap(incrementalIndex));
+ final QueryableIndexCursorFactory queryableIndexCursorFactory = new
QueryableIndexCursorFactory(index);
+
+ CursorBuildSpec cursorBuildSpec = CursorBuildSpec.builder()
+ .setQueryContext(
+ QueryContext.of(
+
Map.of(QueryContexts.VECTOR_SIZE_KEY, vectorSize)
+ )
+ )
+
.setVirtualColumns(VirtualColumns.create(
+
new ExpressionVirtualColumn(
+
"v0",
+
"concat(\"A\", \"B\")",
+
ColumnType.STRING,
+
TestExprMacroTable.INSTANCE)
+ )
+ )
+ .build();
+ CursorHolder cursorHolder =
closer.register(queryableIndexCursorFactory.makeCursorHolder(cursorBuildSpec));
+ Assertions.assertTrue(cursorHolder.canVectorize());
+ VectorCursor vectorCursor = cursorHolder.asVectorCursor();
+
+ Cursor cursor = cursorHolder.asCursor();
+ ShimCursor shimCursor = new ShimCursor(vectorCursor);
+
+ compareCursors(List.of("A", "B", "v0"), cursor, shimCursor);
+ }
+
+ private static List<Object> arr(Object... vals)
+ {
+ return Arrays.asList(vals);
+ }
+
+ private static Map<String, Object> obj(Object... vals)
+ {
+ if (vals.length % 2 != 0) {
+ throw new ISE("vals.length[%d] needs to be a multiple of 2",
vals.length);
+ }
+
+ Map<String, Object> retVal = new LinkedHashMap<>();
+ for (int i = 0; i < vals.length; i += 2) {
+ retVal.put((String) vals[i], vals[i + 1]);
+ }
+ return retVal;
+ }
+
+ /**
+ * Compares an expected {@link Cursor} to a {@link ShimCursor} and asserts
that they perform in a similar way.
+ */
+ private static void compareCursors(List<String> signature, Cursor expected,
ShimCursor actual)
+ {
+ final ColumnSelectorFactory expectedFactory =
expected.getColumnSelectorFactory();
+ final ColumnSelectorFactory actualFactory =
actual.getColumnSelectorFactory();
+ while (!expected.isDone()) {
+ Assertions.assertFalse(actual.isDone());
+ for (String columnName : signature) {
+ compareColumnValueSelector(columnName, expectedFactory, actualFactory);
+ compareDimSelectorIfSupported(columnName, expectedFactory,
actualFactory);
+ }
+
+ expected.advance();
+ actual.advance();
+ }
+
+ Assertions.assertTrue(actual.isDone());
+ }
+
+ private static void compareColumnValueSelector(
+ String columnName,
+ ColumnSelectorFactory expectedFactory,
+ ColumnSelectorFactory actualFactory
+ )
+ {
+ final ColumnCapabilities expectedCapabilities =
expectedFactory.getColumnCapabilities(columnName);
+ final ColumnCapabilities actualCapabilities =
actualFactory.getColumnCapabilities(columnName);
+
+ final ColumnValueSelector<?> expectedSelector =
expectedFactory.makeColumnValueSelector(columnName);
+ final ColumnValueSelector<?> actualSelector =
actualFactory.makeColumnValueSelector(columnName);
+
+ if (expectedCapabilities == null) {
+ Assertions.assertNull(actualCapabilities);
+ Assertions.assertTrue(actualSelector.isNull());
+ return;
+ }
+
+ if (expectedCapabilities.isNumeric()) {
+ Assertions.assertTrue(actualCapabilities.isNumeric());
+ Assertions.assertEquals(expectedSelector.getDouble(),
actualSelector.getDouble());
+ Assertions.assertEquals(expectedSelector.getLong(),
actualSelector.getLong());
+ Assertions.assertEquals(expectedSelector.getFloat(),
actualSelector.getFloat());
+ } else if (expectedCapabilities.isArray()) {
+ Assertions.assertTrue(actualCapabilities.isArray());
+ Assertions.assertArrayEquals((Object[]) expectedSelector.getObject(),
(Object[]) actualSelector.getObject());
+ } else {
+ Assertions.assertEquals(expectedSelector.getObject(),
actualSelector.getObject());
+ }
+ }
+
+ private static void compareDimSelectorIfSupported(
+ String columnName,
+ ColumnSelectorFactory expectedFactory,
+ ColumnSelectorFactory actualFactory
+ )
+ {
+ final ColumnCapabilities expectedCapabilities =
expectedFactory.getColumnCapabilities(columnName);
+
+ if (expectedCapabilities != null &&
expectedCapabilities.toColumnType().equals(ColumnType.STRING)) {
+ final DimensionSelector expectedDimSelector =
expectedFactory.makeDimensionSelector(DefaultDimensionSpec.of(
+ columnName));
+ final DimensionSelector actualDimSelector =
actualFactory.makeDimensionSelector(DefaultDimensionSpec.of(
+ columnName));
+
+ Assertions.assertEquals(expectedDimSelector.getObject(),
actualDimSelector.getObject());
+ IndexedInts expectedInts = expectedDimSelector.getRow();
+ IndexedInts actualInts = actualDimSelector.getRow();
+ int numValues = expectedInts.size();
+ Assertions.assertEquals(numValues, actualInts.size());
+ for (int i = 0; i < numValues; i++) {
+ Assertions.assertEquals(expectedInts.get(i), actualInts.get(i));
+ }
+ }
+ }
+
+ private static MapBasedInputRow autoRow(List<String> signature, Object...
values)
+ {
+ if (signature.size() != values.length) {
+ throw new RuntimeException("Signature and values do not match");
+ }
+ Map<String, Object> row = new HashMap<>();
+
+ for (int i = 0; i < signature.size(); i++) {
+ row.put(signature.get(i), values[i]);
+ }
+
+ return new MapBasedInputRow(0, signature, row);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]