gianm commented on code in PR #18305:
URL: https://github.com/apache/druid/pull/18305#discussion_r2239338636


##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java:
##########
@@ -287,9 +289,14 @@ protected ReturnOrAwait<Unit> runWithSegment(final 
SegmentWithDescriptor segment
 
       final Cursor nextCursor;
 
-      // If asCursor() fails, we need to close nextCursorHolder immediately.
+      // If asCursor() or asVectorCursor() fails, we need to close 
nextCursorHolder immediately.
       try {
-        nextCursor = nextCursorHolder.asCursor();
+        if (nextCursorHolder.canVectorize()) {

Review Comment:
   Replace this with 
`query.context().getVectorize().shouldVectorize(nextCursorHolder.canVectorize())`.
 That way we respect the `vectorize` query parameter (setting `vectorize: 
false` will disable vectorization).



##########
processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java:
##########
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnProcessors;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implementation of {@link ColumnSelectorFactory} for {@link ShimCursor}.
+ */
+public class ShimColumnSelectorFactory implements ColumnSelectorFactory
+{
+  private final ShimCursor cursor;
+  private final Map<DimensionSpec, DimensionSelector> dimensionSelectors = new 
HashMap<>();
+  private final Map<String, ColumnValueSelector<Object>> columnValueSelectors 
= new HashMap<>();
+
+  public ShimColumnSelectorFactory(ShimCursor cursor)
+  {
+    this.cursor = cursor;
+  }
+
+  @Override
+  public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+  {
+    return dimensionSelectors.computeIfAbsent(
+        dimensionSpec,
+        spec -> {
+          if (spec.mustDecorate()) {
+            throw DruidException.defensive("Only non-decorated dimensions can 
be vectorized.");
+          }
+          final ColumnCapabilities capabilities = 
cursor.vectorColumnSelectorFactory

Review Comment:
   Handle null capabilities too- if the column is missing you can return 
`DimensionSelector.nilSelector()`.



##########
processing/src/main/java/org/apache/druid/segment/shim/ShimVectorObjectDimSelector.java:
##########
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.DimensionDictionarySelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.DimensionSelectorUtils;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.RangeIndexedInts;
+import org.apache.druid.segment.data.ZeroIndexedInts;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+
+/**
+ * {@link DimensionSelector} that internally uses a {@link 
VectorObjectSelector}. Does not support any dictionary
+ * operations.
+ */
+public class ShimVectorObjectDimSelector implements DimensionSelector
+{
+  private final ShimCursor cursor;
+  private final ReadableVectorInspector vectorInspector;
+  private final VectorObjectSelector vectorSelector;
+  private final boolean hasMultipleValues;
+
+  private Object[] objectVector;
+  private int objectId = ReadableVectorInspector.NULL_ID;
+
+  public ShimVectorObjectDimSelector(
+      final ShimCursor cursor,
+      final VectorObjectSelector vectorSelector,
+      boolean hasMultipleValues
+  )
+  {
+    this.cursor = cursor;
+    this.vectorInspector = 
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+    this.vectorSelector = vectorSelector;
+    this.hasMultipleValues = hasMultipleValues;
+  }
+
+  @Nullable
+  @Override
+  public Object getObject()
+  {
+    populateObjectVector();
+    return objectVector[cursor.currentIndexInVector];
+  }
+
+  @Override
+  public Class<?> classOfObject()
+  {
+    return Object.class;
+  }
+
+  @Override
+  public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+  {
+    // Don't bother.
+  }
+
+  private void populateObjectVector()
+  {
+    final int id = vectorInspector.getId();
+    if (id != objectId) {
+      objectVector = vectorSelector.getObjectVector();
+      objectId = id;
+    }
+  }
+
+  @Override
+  public IndexedInts getRow()
+  {
+    if (hasMultipleValues) {
+      Object object = getObject();
+      ArrayList arrayList = (ArrayList) object;
+      RangeIndexedInts rangeIndexedInts = new RangeIndexedInts();
+      rangeIndexedInts.setSize(arrayList.size());
+      return rangeIndexedInts;
+    }
+    return ZeroIndexedInts.instance();
+  }
+
+  @Override
+  public ValueMatcher makeValueMatcher(@Nullable String value)
+  {
+    return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
+  }
+
+  @Override
+  public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
+  {
+    return DimensionSelectorUtils.makeValueMatcherGeneric(this, 
predicateFactory);
+  }
+
+  @Override
+  public boolean nameLookupPossibleInAdvance()
+  {
+    return false;
+  }
+
+  @Override
+  public int getValueCardinality()
+  {
+    return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
+  }
+
+  @Nullable
+  @Override
+  public String lookupName(int id)
+  {
+    Object object = getObject();
+    if (hasMultipleValues) {
+      ArrayList arrayList = (ArrayList) object;

Review Comment:
   Same as `getRow`, you should handle `null`, `String`, and `List` for the 
underlying objects.



##########
processing/src/main/java/org/apache/druid/segment/shim/ShimVectorObjectDimSelector.java:
##########
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.DimensionDictionarySelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.DimensionSelectorUtils;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.RangeIndexedInts;
+import org.apache.druid.segment.data.ZeroIndexedInts;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+
+/**
+ * {@link DimensionSelector} that internally uses a {@link 
VectorObjectSelector}. Does not support any dictionary
+ * operations.
+ */
+public class ShimVectorObjectDimSelector implements DimensionSelector
+{
+  private final ShimCursor cursor;
+  private final ReadableVectorInspector vectorInspector;
+  private final VectorObjectSelector vectorSelector;
+  private final boolean hasMultipleValues;
+
+  private Object[] objectVector;
+  private int objectId = ReadableVectorInspector.NULL_ID;
+
+  public ShimVectorObjectDimSelector(
+      final ShimCursor cursor,
+      final VectorObjectSelector vectorSelector,
+      boolean hasMultipleValues
+  )
+  {
+    this.cursor = cursor;
+    this.vectorInspector = 
cursor.vectorColumnSelectorFactory.getReadableVectorInspector();
+    this.vectorSelector = vectorSelector;
+    this.hasMultipleValues = hasMultipleValues;
+  }
+
+  @Nullable
+  @Override
+  public Object getObject()
+  {
+    populateObjectVector();
+    return objectVector[cursor.currentIndexInVector];
+  }
+
+  @Override
+  public Class<?> classOfObject()
+  {
+    return Object.class;
+  }
+
+  @Override
+  public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+  {
+    // Don't bother.
+  }
+
+  private void populateObjectVector()
+  {
+    final int id = vectorInspector.getId();
+    if (id != objectId) {
+      objectVector = vectorSelector.getObjectVector();
+      objectId = id;
+    }
+  }
+
+  @Override
+  public IndexedInts getRow()
+  {
+    if (hasMultipleValues) {
+      Object object = getObject();
+      ArrayList arrayList = (ArrayList) object;

Review Comment:
   This is brittle, it assumes that the underlying object is always going to be 
an `ArrayList`. At least use plain `List`. You should also handle the case here 
where the underlying object is `null` or `String`. I think this can happen with 
some underlying selectors.



##########
processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java:
##########
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnProcessors;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implementation of {@link ColumnSelectorFactory} for {@link ShimCursor}.
+ */
+public class ShimColumnSelectorFactory implements ColumnSelectorFactory
+{
+  private final ShimCursor cursor;
+  private final Map<DimensionSpec, DimensionSelector> dimensionSelectors = new 
HashMap<>();
+  private final Map<String, ColumnValueSelector<Object>> columnValueSelectors 
= new HashMap<>();
+
+  public ShimColumnSelectorFactory(ShimCursor cursor)
+  {
+    this.cursor = cursor;
+  }
+
+  @Override
+  public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+  {
+    return dimensionSelectors.computeIfAbsent(
+        dimensionSpec,
+        spec -> {
+          if (spec.mustDecorate()) {
+            throw DruidException.defensive("Only non-decorated dimensions can 
be vectorized.");
+          }
+          final ColumnCapabilities capabilities = 
cursor.vectorColumnSelectorFactory
+              .getColumnCapabilities(dimensionSpec.getDimension());
+          if (ColumnProcessors.useDictionaryEncodedSelector(capabilities)) {
+            if (capabilities.hasMultipleValues().isTrue()) {

Review Comment:
   We need to go down this path if 
`capabilities.hasMultipleValues().isMaybeTrue()` (if there *may* be 
multi-values present, we have to be ready to handle them).



##########
processing/src/main/java/org/apache/druid/segment/shim/ShimColumnSelectorFactory.java:
##########
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.shim;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnProcessors;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implementation of {@link ColumnSelectorFactory} for {@link ShimCursor}.
+ */
+public class ShimColumnSelectorFactory implements ColumnSelectorFactory
+{
+  private final ShimCursor cursor;
+  private final Map<DimensionSpec, DimensionSelector> dimensionSelectors = new 
HashMap<>();
+  private final Map<String, ColumnValueSelector<Object>> columnValueSelectors 
= new HashMap<>();
+
+  public ShimColumnSelectorFactory(ShimCursor cursor)
+  {
+    this.cursor = cursor;
+  }
+
+  @Override
+  public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+  {
+    return dimensionSelectors.computeIfAbsent(
+        dimensionSpec,
+        spec -> {
+          if (spec.mustDecorate()) {
+            throw DruidException.defensive("Only non-decorated dimensions can 
be vectorized.");
+          }
+          final ColumnCapabilities capabilities = 
cursor.vectorColumnSelectorFactory
+              .getColumnCapabilities(dimensionSpec.getDimension());
+          if (ColumnProcessors.useDictionaryEncodedSelector(capabilities)) {
+            if (capabilities.hasMultipleValues().isTrue()) {
+              final MultiValueDimensionVectorSelector vectorSelector =
+                  
cursor.vectorColumnSelectorFactory.makeMultiValueDimensionSelector(spec);
+              return new ShimMultiValueDimensionSelector(cursor, 
vectorSelector);
+            } else {
+              final SingleValueDimensionVectorSelector vectorSelector =
+                  
cursor.vectorColumnSelectorFactory.makeSingleValueDimensionSelector(spec);
+              return new ShimSingleValueDimensionSelector(cursor, 
vectorSelector);
+            }
+          } else {
+            // Non-dictionary encoded column, like virtual columns.
+            VectorObjectSelector vectorObjectSelector =
+                
cursor.vectorColumnSelectorFactory.makeObjectSelector(spec.getDimension());
+            return new ShimVectorObjectDimSelector(cursor, 
vectorObjectSelector, capabilities.hasMultipleValues().isTrue());

Review Comment:
   Like above, this should look at 
`capabilities.hasMultipleValues().isMaybeTrue()` rather than `isTrue()`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to