clintropolis commented on code in PR #18521:
URL: https://github.com/apache/druid/pull/18521#discussion_r2354531282


##########
processing/src/main/java/org/apache/druid/segment/VirtualColumn.java:
##########
@@ -129,11 +192,40 @@ default boolean canVectorize(ColumnInspector inspector)
   }
 
   /**
-   * Build a {@link SingleValueDimensionVectorSelector} corresponding to this 
virtual column. Also provides the name
-   * that the virtual column was referenced with (through {@link 
DimensionSpec#getDimension()}, which is useful if this
-   * column uses dot notation. The virtual column is expected to apply any 
necessary decoration from the
-   * {@link DimensionSpec}.
+   * Build a selector corresponding to this virtual column.
+   *
+   * The virtual column is expected to apply any necessary {@link 
DimensionSpec#decorate(DimensionSelector)} or
+   * {@link DimensionSpec#getExtractionFn()} from the dimensionSpec.
+   *
+   * @param dimensionSpec  spec the column was referenced with. Also provides 
the name that the
+   *                       virtual column was referenced with, which is useful 
if this column uses dot notation.
+   * @param factory        object for fetching underlying selectors.
+   * @param columnSelector object for fetching underlying columns, if 
available. Generally only available for
+   *                       regular segments.
+   * @param offset         offset to use with underlying columns. Available 
only if columnSelector is available.
    */
+  default SingleValueDimensionVectorSelector 
makeSingleValueVectorDimensionSelector(
+      DimensionSpec dimensionSpec,
+      VectorColumnSelectorFactory factory,
+      ColumnSelector columnSelector,
+      ReadableVectorOffset offset

Review Comment:
   should these be `@Nullable` to be consistent with the non-vectorized 
methods? same comment for other vectorized selector methods



##########
processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java:
##########
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.virtual;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.google.common.base.Preconditions;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.Parser;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.dimension.ExtractionDimensionSpec;
+import org.apache.druid.query.expression.NestedDataExpressions;
+import org.apache.druid.query.extraction.ExtractionFn;
+import org.apache.druid.query.filter.ColumnIndexSelector;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.VirtualColumn;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.SelectableColumn;
+import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.nested.NestedColumnIndexSupplier;
+import org.apache.druid.segment.nested.NestedColumnSelectorFactory;
+import org.apache.druid.segment.nested.NestedColumnTypeInspector;
+import org.apache.druid.segment.nested.NestedPathField;
+import org.apache.druid.segment.nested.NestedPathPart;
+import org.apache.druid.segment.nested.NestedVectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.NilVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorOffset;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Specialized virtual column for {@link 
NestedDataExpressions.JsonObjectExprMacro}.
+ */
+@JsonTypeName("nested-object")
+public class NestedObjectVirtualColumn extends 
SpecializedExpressionVirtualColumn
+{
+  private final Map<String, TypedExpression> keyExprMap;
+  private final ExprMacroTable macroTable;
+
+  @JsonCreator
+  public NestedObjectVirtualColumn(
+      @JsonProperty("name") String name,
+      @JsonProperty("object") Map<String, TypedExpression> keyExprMap,
+      @JacksonInject ExprMacroTable macroTable
+  )
+  {
+    super(
+        new ExpressionVirtualColumn(
+            name,
+            StringUtils.format(
+                "%s(%s)",
+                NestedDataExpressions.JsonObjectExprMacro.NAME,
+                keyExprMap.entrySet().stream().map(entry -> {
+                  final String key = entry.getKey();
+                  final TypedExpression valueExpr = entry.getValue();
+                  return Parser.constant(key).stringify() + ',' + 
valueExpr.expression;
+                }).collect(Collectors.joining(","))
+            ),
+            ColumnType.NESTED_DATA,
+            macroTable
+        )
+    );
+
+    this.keyExprMap = keyExprMap;
+    this.macroTable = macroTable;
+  }
+
+  @JsonProperty("object")
+  public Map<String, TypedExpression> getKeyExprMap()
+  {
+    return keyExprMap;
+  }
+
+  @Override
+  public SelectableColumn toSelectableColumn(final ColumnIndexSelector 
columnSelector)
+  {
+    if (keyExprMap.values().stream().allMatch(te -> te.type.isPrimitive())) {
+      return new SelectableColumn()
+      {
+        @Override
+        public <T> T as(Class<T> clazz)
+        {
+          if (NestedColumnTypeInspector.class.equals(clazz)
+              || NestedColumnSelectorFactory.class.equals(clazz)
+              || NestedVectorColumnSelectorFactory.class.equals(clazz)
+              || NestedColumnIndexSupplier.class.equals(clazz)) {
+            return (T) new AsNestedColumn(columnSelector);
+          }
+
+          return null;
+        }
+      };
+    } else {
+      return super.toSelectableColumn(columnSelector);
+    }
+  }
+
+  @Override
+  public String toString()
+  {
+    return "NestedObjectVirtualColumn{" +
+           "name='" + getOutputName() + '\'' +
+           ", object=" + keyExprMap +
+           '}';
+  }
+
+  /**
+   * Nested column view of the json_object function. Only usable for 
single-level objects, where all
+   * values are primitives.
+   */
+  class AsNestedColumn implements
+      NestedColumnTypeInspector,
+      NestedVectorColumnSelectorFactory,
+      NestedColumnSelectorFactory,
+      NestedColumnIndexSupplier
+  {
+    private final ColumnIndexSelector columnSelector;
+
+    /**
+     * Map of field name to result of {@link 
#getVirtualColumnForExpression(List, TypedExpression)}.
+     */
+    private final Map<String, VirtualColumn> keyColumnMap = new HashMap<>();
+
+    public AsNestedColumn(final ColumnIndexSelector columnSelector)
+    {
+      this.columnSelector = columnSelector;
+    }
+
+    @Override
+    public List<List<NestedPathPart>> getNestedFields()
+    {
+      final List<List<NestedPathPart>> retVal = new ArrayList<>();
+      for (final String keyString : keyExprMap.keySet()) {
+        retVal.add(Collections.singletonList(new NestedPathField(keyString)));
+      }
+      return retVal;
+    }
+
+    @Override
+    @Nullable
+    public Set<ColumnType> getFieldTypes(List<NestedPathPart> path)
+    {
+      if (path.isEmpty()) {
+        return Set.of(ColumnType.NESTED_DATA);
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      if (typedExpression != null) {
+        return Set.of(typedExpression.type);
+      } else {
+        return Set.of();
+      }
+    }
+
+    @Override
+    @Nullable
+    public ColumnType getFieldLogicalType(List<NestedPathPart> path)
+    {
+      final Set<ColumnType> fieldTypes = getFieldTypes(path);
+      if (fieldTypes == null || fieldTypes.isEmpty()) {
+        return null;
+      } else {
+        return fieldTypes.iterator().next();
+      }
+    }
+
+    @Override
+    public boolean isNumeric(List<NestedPathPart> path)
+    {
+      final ColumnType type = getFieldLogicalType(path);
+      return type != null && type.isNumeric();
+    }
+
+    @Override
+    public DimensionSelector makeDimensionSelector(
+        List<NestedPathPart> path,
+        @Nullable ExtractionFn extractionFn,
+        ColumnSelectorFactory columnSelectorFactory,
+        ReadableOffset readableOffset
+    )
+    {
+      if (path.isEmpty()) {
+        return NestedObjectVirtualColumn.this.makeDimensionSelector(
+            
DefaultDimensionSpec.of(NestedObjectVirtualColumn.this.getOutputName()),
+            columnSelectorFactory,
+            columnSelector,
+            readableOffset
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+      if (vc == null) {
+        return DimensionSelector.constant(null, extractionFn);
+      }
+
+      final String key = path.get(0).getPartIdentifier();
+      final DimensionSpec dimensionSpec;
+
+      if (extractionFn != null) {
+        dimensionSpec = new ExtractionDimensionSpec(key, key, 
typedExpression.type, extractionFn);
+      } else {
+        dimensionSpec = DefaultDimensionSpec.of(key, typedExpression.type);
+      }
+
+      return vc.makeDimensionSelector(dimensionSpec, columnSelectorFactory, 
columnSelector, readableOffset);
+    }
+
+    @Override
+    public ColumnValueSelector<?> makeColumnValueSelector(
+        List<NestedPathPart> path,
+        ColumnSelectorFactory columnSelectorFactory,
+        ReadableOffset readableOffset
+    )
+    {
+      if (path.isEmpty()) {
+        return NestedObjectVirtualColumn.this.makeColumnValueSelector(
+            NestedObjectVirtualColumn.this.getOutputName(),
+            columnSelectorFactory,
+            columnSelector,
+            readableOffset
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+      if (vc == null) {
+        return NilColumnValueSelector.instance();
+      }
+
+      final String key = path.get(0).getPartIdentifier();
+
+      return vc.makeColumnValueSelector(
+          key,
+          columnSelectorFactory,
+          columnSelector,
+          readableOffset
+      );
+    }
+
+    @Override
+    public SingleValueDimensionVectorSelector 
makeSingleValueDimensionVectorSelector(
+        List<NestedPathPart> path,
+        VectorColumnSelectorFactory columnSelectorFactory,
+        ReadableVectorOffset readableOffset
+    )
+    {
+      if (path.isEmpty()) {
+        return 
NestedObjectVirtualColumn.this.makeSingleValueVectorDimensionSelector(
+            
DefaultDimensionSpec.of(NestedObjectVirtualColumn.this.getOutputName()),
+            columnSelectorFactory,
+            columnSelector,
+            readableOffset
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+      if (vc == null) {
+        return NilVectorSelector.create(readableOffset);
+      }
+
+      final String key = path.get(0).getPartIdentifier();
+
+      return vc.makeSingleValueVectorDimensionSelector(
+          DefaultDimensionSpec.of(key),
+          columnSelectorFactory,
+          columnSelector,
+          readableOffset
+      );
+    }
+
+    @Override
+    public VectorObjectSelector makeVectorObjectSelector(
+        List<NestedPathPart> path,
+        VectorColumnSelectorFactory columnSelectorFactory,
+        ReadableVectorOffset readableOffset
+    )
+    {
+      if (path.isEmpty()) {
+        return NestedObjectVirtualColumn.this.makeVectorObjectSelector(
+            NestedObjectVirtualColumn.this.getOutputName(),
+            columnSelectorFactory,
+            columnSelector,
+            readableOffset
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+      if (vc == null) {
+        return NilVectorSelector.create(readableOffset);
+      }
+
+      final String key = path.get(0).getPartIdentifier();
+
+      return vc.makeVectorObjectSelector(
+          key,
+          columnSelectorFactory,
+          columnSelector,
+          readableOffset
+      );
+    }
+
+    @Override
+    public VectorValueSelector makeVectorValueSelector(
+        List<NestedPathPart> path,
+        VectorColumnSelectorFactory columnSelectorFactory,
+        ReadableVectorOffset readableOffset
+    )
+    {
+      if (path.isEmpty()) {
+        return NestedObjectVirtualColumn.this.makeVectorValueSelector(
+            NestedObjectVirtualColumn.this.getOutputName(),
+            columnSelectorFactory,
+            columnSelector,
+            readableOffset
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+      if (vc == null) {
+        return NilVectorSelector.create(readableOffset);
+      }
+
+      final String key = path.get(0).getPartIdentifier();
+
+      return vc.makeVectorValueSelector(
+          key,
+          columnSelectorFactory,
+          columnSelector,
+          readableOffset
+      );
+    }
+
+    @Override
+    @Nullable
+    public ColumnIndexSupplier getColumnIndexSupplier(List<NestedPathPart> 
path)
+    {
+      if (path.isEmpty()) {
+        return NestedObjectVirtualColumn.this.getIndexSupplier(
+            NestedObjectVirtualColumn.this.getOutputName(),
+            columnSelector
+        );
+      }
+
+      final TypedExpression typedExpression = getExpressionForPath(path);
+      final VirtualColumn vc = getVirtualColumnForExpression(path, 
typedExpression);
+
+      if (vc != null) {
+        return vc.getIndexSupplier(vc.getOutputName(), columnSelector);
+      } else {
+        return null;
+      }
+    }
+
+    /**
+     * Returns the {@link TypedExpression} corresponding to a path. Do not 
call this method with an empty path;
+     * this case should be handled without using {@link TypedExpression}.
+     */
+    @Nullable
+    private TypedExpression getExpressionForPath(final List<NestedPathPart> 
path)
+    {
+      if (path.isEmpty()) {
+        throw DruidException.defensive("Empty path should be handled by the 
caller, not here.");
+      } else if (path.size() > 1) {
+        // AsNestedColumn is only created if all values are primitive; cannot 
have anything beyond one level.
+        return null;
+      } else {
+        return (TypedExpression) path.get(0).find(keyExprMap);
+      }
+    }
+
+    /**
+     * Returns the {@link VirtualColumn} corresponding to a path. Do not call 
this method with an empty path;
+     * this case should be handled without using {@link TypedExpression}.
+     *
+     * @param path            the path
+     * @param typedExpression result of {@link #getExpressionForPath(List)}
+     */
+    @Nullable
+    private VirtualColumn getVirtualColumnForExpression(
+        final List<NestedPathPart> path,
+        @Nullable final TypedExpression typedExpression
+    )
+    {
+      if (typedExpression != null) {
+        return keyColumnMap.computeIfAbsent(
+            path.get(0).getPartIdentifier(),
+            fieldName -> new ExpressionVirtualColumn(
+                fieldName,
+                typedExpression.expression,
+                typedExpression.type,
+                macroTable
+            )
+        );
+      } else {
+        return null;
+      }
+    }
+  }
+
+  /**
+   * Represents an expression and a target type. Like a miniature {@link 
ExpressionVirtualColumn} with no output name.
+   */
+  public static class TypedExpression

Review Comment:
   feels like a bit of overlap with 
https://github.com/apache/druid/blob/master/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java#L435,
 though not exactly. I don't think need to worry about in this PR, just 
thinking about if there is some room for simplifying things later



##########
processing/src/main/java/org/apache/druid/segment/VirtualColumns.java:
##########
@@ -297,137 +287,98 @@ public boolean canVectorize(ColumnInspector 
columnInspector)
   /**
    * Create a single value dimension vector (string) selector.
    *
-   * @throws IllegalArgumentException if the virtual column does not exist 
(see {@link #exists(String)}
-   */
-  public SingleValueDimensionVectorSelector 
makeSingleValueDimensionVectorSelector(
-      DimensionSpec dimensionSpec,
-      VectorColumnSelectorFactory factory
-  )
-  {
-    final VirtualColumn virtualColumn = 
getVirtualColumnForSelector(dimensionSpec.getDimension());
-    final SingleValueDimensionVectorSelector selector = 
virtualColumn.makeSingleValueVectorDimensionSelector(
-        dimensionSpec,
-        factory
-    );
-    Preconditions.checkNotNull(selector, "selector");
-    return selector;
-  }
-
-  /**
-   * Try to create an optimized single value dimension (string) vector 
selector, directly from a
-   * {@link ColumnSelector}. If this method returns null, callers should try 
to fallback to
-   * {@link #makeSingleValueDimensionVectorSelector(DimensionSpec, 
VectorColumnSelectorFactory)}  instead.
+   * @param dimensionSpec  spec the column was referenced with
+   * @param factory        object for fetching underlying selectors
+   * @param columnSelector object for fetching underlying columns
+   * @param offset         offset to use with underlying columns
    *
    * @throws IllegalArgumentException if the virtual column does not exist 
(see {@link #exists(String)}
    */
-  @Nullable
   public SingleValueDimensionVectorSelector 
makeSingleValueDimensionVectorSelector(
       DimensionSpec dimensionSpec,
+      VectorColumnSelectorFactory factory,
       ColumnSelector columnSelector,
       ReadableVectorOffset offset

Review Comment:
   same comment about `@Nullable`



##########
processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java:
##########
@@ -274,6 +267,10 @@ public ColumnIndexSupplier getIndexSupplier(
       ColumnIndexSelector columnIndexSelector
   )
   {
+    if (isDirectAccess(columnIndexSelector)) {
+      return 
columnIndexSelector.getIndexSupplier(expression.parsed.get().getBindingIfIdentifier());
+    }
+

Review Comment:
   this isn't really necessary, `IdentifierExpr` implements 
`asColumnIndexSupplier` that does this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to