This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 54bb2fdef76 SQL: Use specialized virtual columns for JSON_OBJECT, 
JSON_MERGE. (#18553)
54bb2fdef76 is described below

commit 54bb2fdef764aef2af7078be88bdeab7e69da9e3
Author: Gian Merlino <[email protected]>
AuthorDate: Wed Oct 15 20:08:59 2025 -0400

    SQL: Use specialized virtual columns for JSON_OBJECT, JSON_MERGE. (#18553)
    
    * SQL: Use specialized virtual columns for JSON_OBJECT, JSON_MERGE.
    
    Building on #18521, this patch makes virtual column specialization
    recursive. It also now happens immediately on calling
    getOrCreateVirtualColumnForExpression.
    
    Specializations are added for JSON_OBJECT and JSON_MERGE. Now, chains of
    JSON_MERGE, JSON_OBJECT, and JSON_VALUE can preserve lazy evaluation,
    index usage, dictionary usage, etc.
    
    There is a change to VirtualColumnCreator that can affect extensions
    that add SQL operators. To allow the creator to access rewritten
    arguments, a "DruidExpression self" parameter is added.
    The "String expression" is no longer needed so it is removed.
    
    * Correct javadoc.
    
    * Update expectations.
    
    * NestedFieldVirtualColumn: Process arrays when making object selectors, 
even if not from VariantColumn.
    
    * Update test.
---
 .../segment/virtual/NestedFieldVirtualColumn.java  |  20 +-
 .../segment/virtual/NestedObjectVirtualColumn.java |  11 +-
 .../virtual/NestedObjectVirtualColumnTest.java     |   8 +-
 .../qaArray/ops_funcs_mv_funcs.08.all.iq           | 207 +++++++++++++++++----
 .../funcs_and_sql_func_json_object.03.msq.iq       |  20 +-
 .../funcs_and_sql_func_json_object.03.std.iq       |  20 +-
 .../sql/calcite/expression/DruidExpression.java    |  35 +++-
 .../MultiValueStringOperatorConversions.java       |  24 +--
 .../builtin/NestedDataOperatorConversions.java     | 146 ++++++++++++---
 .../apache/druid/sql/calcite/rel/DruidQuery.java   |  70 ++++---
 .../sql/calcite/rel/VirtualColumnRegistry.java     |  94 +++++++---
 .../calcite/CalciteMultiValueStringQueryTest.java  |  16 +-
 .../sql/calcite/CalciteNestedDataQueryTest.java    | 195 ++++++++++++++++++-
 13 files changed, 682 insertions(+), 184 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index a2fac947e8c..185a1a3ad9e 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -414,8 +414,14 @@ public class NestedFieldVirtualColumn implements 
VirtualColumn
       return new ArrayElementColumnValueSelector(arraySelector, elementNumber);
     }
 
-    // we are not a nested column and are being asked for a path that will 
never exist, so we are nil selector
-    return NilColumnValueSelector.instance();
+    if (holder.getCapabilities().isArray() || 
ColumnType.NESTED_DATA.equals(holder.getCapabilities().toColumnType())) {
+      // Not a root access and no specialized path available. But the 
underlying column is array or nested typed,
+      // so we may still be able to walk it using exprs. Try that.
+      return 
makeColumnValueSelectorUsingColumnSelectorFactory(selectorFactory);
+    } else {
+      // we are not a nested or array column, and are being asked for a path 
that will never exist, so nil selector
+      return NilColumnValueSelector.instance();
+    }
   }
 
   @Override
@@ -486,11 +492,11 @@ public class NestedFieldVirtualColumn implements 
VirtualColumn
     final NestedVectorColumnSelectorFactory nestedColumnSelectorFactory =
         column.as(NestedVectorColumnSelectorFactory.class);
 
-    if (isNestedColumn(holder)) {
+    if (isNestedColumn(holder) || holder.getCapabilities().isArray()) {
       if (fieldSpec.processFromRaw || nestedTypeInspector == null || 
nestedColumnSelectorFactory == null) {
         // 1) If processFromRaw is true, that means JSON_QUERY.
-        // 2) If no nestedTypeInspector, nestedColumnSelectorFactory then that 
means this is a nested type that is
-        //    not exposed as a nested column.
+        // 2) If no nestedTypeInspector, nestedColumnSelectorFactory then that 
means this is a nested or array
+        //    type that is not exposed as a nested column.
         // Either way, we read and process raw objects.
         return new RawFieldVectorObjectSelector(
             selectorFactory.makeObjectSelector(fieldSpec.columnName),
@@ -499,7 +505,9 @@ public class NestedFieldVirtualColumn implements 
VirtualColumn
         );
       }
       final ColumnType leastRestrictiveType = 
nestedTypeInspector.getFieldLogicalType(fieldSpec.parts);
-      if (leastRestrictiveType != null && leastRestrictiveType.isNumeric() && 
!Types.isNumeric(fieldSpec.expectedType)) {
+      if (leastRestrictiveType != null
+          && leastRestrictiveType.isNumeric()
+          && !Types.isNumeric(fieldSpec.expectedType)) {
         return ExpressionVectorSelectors.castValueSelectorToObject(
             offset,
             columnName,
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
index 1e1e94c8b3d..ee473391226 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
@@ -88,11 +88,12 @@ public class NestedObjectVirtualColumn extends 
SpecializedExpressionVirtualColum
             StringUtils.format(
                 "%s(%s)",
                 NestedDataExpressions.JsonObjectExprMacro.NAME,
-                
keyExprMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> {
-                  final String key = entry.getKey();
-                  final TypedExpression valueExpr = entry.getValue();
-                  return Parser.constant(key).stringify() + ',' + 
valueExpr.expression;
-                }).collect(Collectors.joining(","))
+                Preconditions.checkNotNull(keyExprMap, "object")
+                             .entrySet().stream().map(entry -> {
+                               final String key = entry.getKey();
+                               final TypedExpression valueExpr = 
entry.getValue();
+                               return Parser.constant(key).stringify() + ',' + 
valueExpr.expression;
+                             }).collect(Collectors.joining(","))
             ),
             ColumnType.NESTED_DATA,
             macroTable
diff --git 
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
 
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
index 91e7f2aac82..5302ffbcd94 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
@@ -29,7 +29,6 @@ import org.apache.druid.segment.column.ColumnType;
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.util.HashMap;
 import java.util.Map;
 
 public class NestedObjectVirtualColumnTest
@@ -56,9 +55,10 @@ public class NestedObjectVirtualColumnTest
   @Test
   public void testGetKeyExprMap()
   {
-    Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap = new 
HashMap<>();
-    keyExprMap.put("key1", new 
NestedObjectVirtualColumn.TypedExpression("expr1", ColumnType.STRING));
-    keyExprMap.put("key2", new 
NestedObjectVirtualColumn.TypedExpression("expr2", ColumnType.DOUBLE));
+    Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap = 
ImmutableMap.of(
+        "key1", new NestedObjectVirtualColumn.TypedExpression("expr1", 
ColumnType.STRING),
+        "key2", new NestedObjectVirtualColumn.TypedExpression("expr2", 
ColumnType.DOUBLE)
+    );
 
     NestedObjectVirtualColumn column = new NestedObjectVirtualColumn(
         "test_obj",
diff --git 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
index 6a74aaa623f..4c495a8787d 100644
--- 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
+++ 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
@@ -119,16 +119,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[0]' RETURNING boolean array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col          |
++--------------+
+| [1, 0, null] |
+| [1, 0, null] |
+| [null, 0, 1] |
+| [null, 0, 1] |
++--------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C3_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][0]' RETURNING boolean array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col          |
++--------------+
+| [1, 0, null] |
+| [1, 0, null] |
+| [null, 0, 1] |
+| [null, 0, 1] |
++--------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C4_D1
@@ -152,16 +170,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[1]' RETURNING bigint array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col          |
++--------------+
+| [1, 2, null] |
+| [1, 2, null] |
+| [null, 2, 1] |
+| [null, 2, 1] |
++--------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C6_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][1]' RETURNING bigint array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col          |
++--------------+
+| [1, 2, null] |
+| [1, 2, null] |
+| [null, 2, 1] |
+| [null, 2, 1] |
++--------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C7_D1
@@ -185,16 +221,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[2]' RETURNING decimal array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++------------------+
+| col              |
++------------------+
+| [0.1, 0.2, null] |
+| [0.1, 0.2, null] |
+| [null, 0.2, 0.1] |
+| [null, 0.2, 0.1] |
++------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C9_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][2]' RETURNING decimal array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++------------------+
+| col              |
++------------------+
+| [0.1, 0.2, null] |
+| [0.1, 0.2, null] |
+| [null, 0.2, 0.1] |
+| [null, 0.2, 0.1] |
++------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C10_D1
@@ -218,16 +272,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[3]' RETURNING varchar array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++----------------+
+| col            |
++----------------+
+| [S1, S2, null] |
+| [S1, S2, null] |
+| [null, S2, S1] |
+| [null, S2, S1] |
++----------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C12_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][3]' RETURNING varchar array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++----------------+
+| col            |
++----------------+
+| [S1, S2, null] |
+| [S1, S2, null] |
+| [null, S2, S1] |
+| [null, S2, S1] |
++----------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C13_D1
@@ -251,16 +323,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[4]' RETURNING varchar array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------+
+| col                |
++--------------------+
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
++--------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C15_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][4]' RETURNING varchar array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------+
+| col                |
++--------------------+
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
++--------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C16_D1
@@ -284,16 +374,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[5]' RETURNING varchar array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| []  |
+| []  |
+| []  |
+| []  |
++-----+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C18_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][5]' RETURNING varchar array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| []  |
+| []  |
+| []  |
+| []  |
++-----+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C19_D1
@@ -317,16 +425,34 @@ FROM test_array;
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[6]' RETURNING varchar array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------------+
+| col                      |
++--------------------------+
+| [null, S1, 0.1, 1, true] |
+| [null, S1, 0.1, 1, true] |
+| [true, 1, 0.1, S1, null] |
+| [true, 1, 0.1, S1, null] |
++--------------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C21_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][6]' RETURNING varchar array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------------+
+| col                      |
++--------------------------+
+| [null, S1, 0.1, 1, true] |
+| [null, S1, 0.1, 1, true] |
+| [true, 1, 0.1, S1, null] |
+| [true, 1, 0.1, S1, null] |
++--------------------------+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C22_D1
@@ -341,14 +467,31 @@ Cannot apply 'MV_TO_ARRAY' to arguments of type 
'MV_TO_ARRAY(
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7]' RETURNING varchar array)) AS col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+|     |
+|     |
+|     |
+|     |
++-----+
+(4 rows)
+
+!ok
 
 #-------------------------------------------------------------------------
 # TESTCASE: test_mv_funcs TEST_ID: A1_B16_C24_D1
 #-------------------------------------------------------------------------
 SELECT mv_to_array(json_value(a_nested, '$[7][7]' RETURNING varchar array)) AS 
col
 FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+|     |
+|     |
+|     |
+|     |
++-----+
+(4 rows)
 
+!ok
diff --git 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
index 4f2bf8b90c8..ac7adb76703 100644
--- 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
+++ 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
@@ -666,10 +666,14 @@ FROM test_json_cols;
           "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
         },
         "virtualColumns" : [ {
-          "type" : "expression",
+          "type" : "nested-object",
           "name" : "v0",
-          "expression" : "json_object('a',\"c1\")",
-          "outputType" : "COMPLEX<json>"
+          "object" : {
+            "a" : {
+              "expression" : "\"c1\"",
+              "type" : "COMPLEX<json>"
+            }
+          }
         } ],
         "resultFormat" : "compactedList",
         "columns" : [ "v0" ],
@@ -761,10 +765,14 @@ FROM test_json_cols;
           "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
         },
         "virtualColumns" : [ {
-          "type" : "expression",
+          "type" : "nested-object",
           "name" : "v0",
-          "expression" : "json_object('a',\"c1\")",
-          "outputType" : "COMPLEX<json>"
+          "object" : {
+            "a" : {
+              "expression" : "\"c1\"",
+              "type" : "COMPLEX<json>"
+            }
+          }
         } ],
         "resultFormat" : "compactedList",
         "columns" : [ "v0" ],
diff --git 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
index 5e8f30930e7..176c72e73fa 100644
--- 
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
+++ 
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
@@ -681,10 +681,14 @@ FROM test_json_cols;
     "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
   },
   "virtualColumns" : [ {
-    "type" : "expression",
+    "type" : "nested-object",
     "name" : "v0",
-    "expression" : "json_object('a',\"c1\")",
-    "outputType" : "COMPLEX<json>"
+    "object" : {
+      "a" : {
+        "expression" : "\"c1\"",
+        "type" : "COMPLEX<json>"
+      }
+    }
   } ],
   "resultFormat" : "compactedList",
   "columns" : [ "v0" ],
@@ -725,10 +729,14 @@ FROM test_json_cols;
     "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
   },
   "virtualColumns" : [ {
-    "type" : "expression",
+    "type" : "nested-object",
     "name" : "v0",
-    "expression" : "json_object('a',\"c1\")",
-    "outputType" : "COMPLEX<json>"
+    "object" : {
+      "a" : {
+        "expression" : "\"c1\"",
+        "type" : "COMPLEX<json>"
+      }
+    }
   } ],
   "resultFormat" : "compactedList",
   "columns" : [ "v0" ],
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
index ec257230e7b..0783c992389 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
@@ -369,7 +369,7 @@ public class DruidExpression
 
   public String getDirectColumn()
   {
-    return Preconditions.checkNotNull(simpleExtraction.getColumn());
+    return Preconditions.checkNotNull(simpleExtraction, 
"simpleExtraction").getColumn();
   }
 
   public boolean isSimpleExtraction()
@@ -401,7 +401,7 @@ public class DruidExpression
       final ExpressionParser parser
   )
   {
-    return virtualColumnCreator.create(name, outputType, expression.get(), 
parser);
+    return virtualColumnCreator.create(name, outputType, parser, this);
   }
 
   public VirtualColumn toExpressionVirtualColumn(
@@ -410,7 +410,7 @@ public class DruidExpression
       final ExpressionParser parser
   )
   {
-    return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType, 
expression.get(), parser);
+    return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType, parser, 
this);
   }
 
   public NodeType getType()
@@ -418,6 +418,16 @@ public class DruidExpression
     return nodeType;
   }
 
+  /**
+   * Returns whether this expression is {@link NodeType#IDENTIFIER} or {@link 
NodeType#SPECIALIZED}. Useful because
+   * these are the expressions that can be expected to become direct column 
references once virtual columns have gone
+   * through a specialization pass.
+   */
+  public boolean isIdentifierOrSpecialized()
+  {
+    return nodeType == NodeType.IDENTIFIER || nodeType == NodeType.SPECIALIZED;
+  }
+
   /**
    * The {@link ColumnType} of this expression as inferred when this 
expression was created. This is likely the result
    * of converting the output of {@link 
org.apache.calcite.rex.RexNode#getType()} using
@@ -609,15 +619,28 @@ public class DruidExpression
   @FunctionalInterface
   public interface VirtualColumnCreator
   {
-    VirtualColumn create(String name, ColumnType outputType, String 
expression, ExpressionParser parser);
+    /**
+     * Create a virtual column for an expression.
+     *
+     * @param name       name of the virtual column
+     * @param outputType type of the virtual column
+     * @param parser     expression parser, if needed
+     * @param self       expression, possibly rewritten to refer to 
specialized virtual columns
+     */
+    VirtualColumn create(
+        String name,
+        ColumnType outputType,
+        ExpressionParser parser,
+        DruidExpression self
+    );
   }
 
   public static class ExpressionVirtualColumnCreator implements 
VirtualColumnCreator
   {
     @Override
-    public VirtualColumn create(String name, ColumnType outputType, String 
expression, ExpressionParser parser)
+    public VirtualColumn create(String name, ColumnType outputType, 
ExpressionParser parser, DruidExpression self)
     {
-      return new ExpressionVirtualColumn(name, expression, 
parser.parse(expression), outputType);
+      return new ExpressionVirtualColumn(name, self.getExpression(), 
parser.parse(self.getExpression()), outputType);
     }
   }
 }
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
index 0ef99ba3c17..8bbf3c9c3b0 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
@@ -389,11 +389,11 @@ public class MultiValueStringOperatorConversions
             Calcites.getColumnTypeForRelDataType(rexNode.getType()),
             builder,
             druidExpressions,
-            (name, outputType, expression, macroTable) -> new 
ListFilteredVirtualColumn(
+            (name, outputType, parser, self) -> new ListFilteredVirtualColumn(
                 name,
-                druidExpressions.get(0)
-                                .getSimpleExtraction()
-                                
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+                self.getArguments().get(0)
+                    .getSimpleExtraction()
+                    
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
                 literals,
                 isAllowList()
             )
@@ -470,11 +470,11 @@ public class MultiValueStringOperatorConversions
             Calcites.getColumnTypeForRelDataType(rexNode.getType()),
             builder,
             druidExpressions,
-            (name, outputType, expression, macroTable) -> new 
RegexFilteredVirtualColumn(
+            (name, outputType, parser, self) -> new RegexFilteredVirtualColumn(
                 name,
-                druidExpressions.get(0)
-                                .getSimpleExtraction()
-                                
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+                self.getArguments().get(0)
+                    .getSimpleExtraction()
+                    
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
                 pattern
             )
         );
@@ -555,11 +555,11 @@ public class MultiValueStringOperatorConversions
             Calcites.getColumnTypeForRelDataType(rexNode.getType()),
             builder,
             druidExpressions,
-            (name, outputType, expression, macroTable) -> new 
PrefixFilteredVirtualColumn(
+            (name, outputType, parser, self) -> new 
PrefixFilteredVirtualColumn(
                 name,
-                druidExpressions.get(0)
-                                .getSimpleExtraction()
-                                
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+                self.getArguments().get(0)
+                    .getSimpleExtraction()
+                    
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
                 prefix
             )
         );
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
index 61fa1f11851..260b5a9932e 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
@@ -46,6 +46,7 @@ import org.apache.druid.error.DruidException;
 import org.apache.druid.error.InvalidSqlInput;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.InputBindings;
 import org.apache.druid.query.expression.NestedDataExpressions;
@@ -54,12 +55,15 @@ import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.nested.NestedPathFinder;
 import org.apache.druid.segment.nested.NestedPathPart;
 import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
+import org.apache.druid.segment.virtual.NestedMergeVirtualColumn;
+import org.apache.druid.segment.virtual.NestedObjectVirtualColumn;
 import org.apache.druid.sql.calcite.expression.DirectOperatorConversion;
 import org.apache.druid.sql.calcite.expression.DruidExpression;
 import org.apache.druid.sql.calcite.expression.Expressions;
 import org.apache.druid.sql.calcite.expression.OperatorConversions;
 import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
 import org.apache.druid.sql.calcite.planner.Calcites;
+import org.apache.druid.sql.calcite.planner.ExpressionParser;
 import org.apache.druid.sql.calcite.planner.PlannerContext;
 import org.apache.druid.sql.calcite.planner.convertlet.DruidConvertletFactory;
 import org.apache.druid.sql.calcite.table.RowSignatures;
@@ -67,7 +71,10 @@ import org.apache.druid.sql.calcite.table.RowSignatures;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.util.Collections;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 
 public class NestedDataOperatorConversions
 {
@@ -233,8 +240,8 @@ public class NestedDataOperatorConversions
             ImmutableList.of(
                 DruidExpression.ofColumn(ColumnType.NESTED_DATA, 
druidExpressions.get(0).getDirectColumn())
             ),
-            (name, outputType, expression, macroTable) -> new 
NestedFieldVirtualColumn(
-                druidExpressions.get(0).getDirectColumn(),
+            (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+                self.getArguments().get(0).getDirectColumn(),
                 name,
                 outputType,
                 parts,
@@ -414,16 +421,13 @@ public class NestedDataOperatorConversions
           DruidExpression.stringLiteral(druidType.asTypeString())
       );
 
-      if (druidExpressions.get(0).isSimpleExtraction()) {
-
+      if (druidExpressions.get(0).isIdentifierOrSpecialized()) {
         return DruidExpression.ofVirtualColumn(
             druidType,
             builder,
-            ImmutableList.of(
-                DruidExpression.ofColumn(ColumnType.NESTED_DATA, 
druidExpressions.get(0).getDirectColumn())
-            ),
-            (name, outputType, expression, macroTable) -> new 
NestedFieldVirtualColumn(
-                druidExpressions.get(0).getDirectColumn(),
+            List.of(druidExpressions.get(0)),
+            (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+                self.getArguments().get(0).getDirectColumn(),
                 name,
                 outputType,
                 parts,
@@ -556,8 +560,8 @@ public class NestedDataOperatorConversions
             ImmutableList.of(
                 DruidExpression.ofColumn(ColumnType.NESTED_DATA, 
druidExpressions.get(0).getDirectColumn())
             ),
-            (name, outputType, expression, macroTable) -> new 
NestedFieldVirtualColumn(
-                druidExpressions.get(0).getDirectColumn(),
+            (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+                self.getArguments().get(0).getDirectColumn(),
                 name,
                 outputType,
                 parts,
@@ -720,8 +724,8 @@ public class NestedDataOperatorConversions
             ImmutableList.of(
                 DruidExpression.ofColumn(ColumnType.NESTED_DATA, 
druidExpressions.get(0).getDirectColumn())
             ),
-            (name, outputType, expression, macroTable) -> new 
NestedFieldVirtualColumn(
-                druidExpressions.get(0).getDirectColumn(),
+            (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+                self.getArguments().get(0).getDirectColumn(),
                 name,
                 null,
                 parts,
@@ -768,14 +772,6 @@ public class NestedDataOperatorConversions
     @Override
     public DruidExpression toDruidExpression(PlannerContext plannerContext, 
RowSignature rowSignature, RexNode rexNode)
     {
-      final DruidExpression.DruidExpressionCreator expressionFunction = 
druidExpressions ->
-          DruidExpression.ofExpression(
-              ColumnType.NESTED_DATA,
-              null,
-              DruidExpression.functionCall(FUNCTION_NAME),
-              druidExpressions
-          );
-
       final RexCall call = (RexCall) rexNode;
 
       // we ignore the first argument because calcite sets a 'nullBehavior' 
parameter by the time it gets here
@@ -790,7 +786,88 @@ public class NestedDataOperatorConversions
         return null;
       }
 
-      return expressionFunction.create(druidExpressions);
+      if (allKeysLiteral(druidExpressions, 
plannerContext.getExpressionParser())) {
+        return DruidExpression.ofVirtualColumn(
+            ColumnType.NESTED_DATA,
+            
DruidExpression.functionCall(NestedDataExpressions.JsonObjectExprMacro.NAME),
+            druidExpressions,
+            (name, outputType, parser, self) -> new NestedObjectVirtualColumn(
+                name,
+                argsAsObjectMap(self.getArguments(), 
plannerContext.getExpressionParser()),
+                plannerContext.getExprMacroTable()
+            )
+        );
+      } else {
+        return DruidExpression.ofFunctionCall(
+            ColumnType.NESTED_DATA,
+            NestedDataExpressions.JsonObjectExprMacro.NAME,
+            druidExpressions
+        );
+      }
+    }
+
+    /**
+     * Returns whether the object represented by the given arguments has all 
literal keys.
+     */
+    private static boolean allKeysLiteral(
+        final List<DruidExpression> args,
+        final ExpressionParser parser
+    )
+    {
+      for (int i = 0; i < args.size(); i += 2) {
+        final DruidExpression key = args.get(i);
+
+        if (key.getType() != DruidExpression.NodeType.LITERAL) {
+          return false;
+        }
+
+        final String keyLiteral = 
Evals.asString(parser.parse(key.getExpression()).getLiteralValue());
+        if (keyLiteral == null) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    /**
+     * Returns a map for the object represented by the given arguments, if
+     * {@link #allKeysLiteral(List, ExpressionParser)} is true. Otherwise 
throws an exception.
+     */
+    private static Map<String, NestedObjectVirtualColumn.TypedExpression> 
argsAsObjectMap(
+        final List<DruidExpression> args,
+        final ExpressionParser parser
+    )
+    {
+      // Use LinkedHashMap to preserve order.
+      final Map<String, NestedObjectVirtualColumn.TypedExpression> retVal = 
new LinkedHashMap<>();
+      for (int i = 0; i < args.size(); i += 2) {
+        final DruidExpression key = args.get(i);
+        if (key.getType() != DruidExpression.NodeType.LITERAL) {
+          throw DruidException.defensive("Do not call this method unless 
allKeysLiteral returns true");
+        }
+
+        final String keyLiteral = 
Evals.asString(parser.parse(key.getExpression()).getLiteralValue());
+        if (keyLiteral == null) {
+          throw DruidException.defensive("Do not call this method unless 
allKeysLiteral returns true");
+        }
+
+        final NestedObjectVirtualColumn.TypedExpression typedExpression;
+        if (i + 1 < args.size()) {
+          typedExpression = new NestedObjectVirtualColumn.TypedExpression(
+              args.get(i + 1).getExpression(),
+              args.get(i + 1).getDruidType()
+          );
+        } else {
+          typedExpression = new NestedObjectVirtualColumn.TypedExpression(
+              DruidExpression.nullLiteral(),
+              ColumnType.LONG
+          );
+        }
+
+        retVal.put(keyLiteral, typedExpression);
+      }
+      return retVal;
     }
   }
 
@@ -831,11 +908,26 @@ public class NestedDataOperatorConversions
           plannerContext,
           rowSignature,
           rexNode,
-          druidExpressions -> DruidExpression.ofExpression(
-              ColumnType.NESTED_DATA,
-              DruidExpression.functionCall("json_merge"),
-              druidExpressions
-          )
+          druidExpressions -> {
+            if 
(druidExpressions.stream().allMatch(DruidExpression::isIdentifierOrSpecialized))
 {
+              return DruidExpression.ofVirtualColumn(
+                  ColumnType.NESTED_DATA,
+                  DruidExpression.functionCall("json_merge"),
+                  druidExpressions,
+                  (name, outputType, parser, self) -> new 
NestedMergeVirtualColumn(
+                      name,
+                      
self.getArguments().stream().map(DruidExpression::getDirectColumn).collect(Collectors.toList()),
+                      plannerContext.getExprMacroTable()
+                  )
+              );
+            } else {
+              return DruidExpression.ofFunctionCall(
+                  ColumnType.NESTED_DATA,
+                  "json_merge",
+                  druidExpressions
+              );
+            }
+          }
       );
     }
   }
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index 5aaf1d80ef8..67c2271bfe7 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -22,6 +22,7 @@ package org.apache.druid.sql.calcite.rel;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSortedMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
@@ -723,35 +724,15 @@ public class DruidQuery
     }
   }
 
-  VirtualColumns getVirtualColumns(final boolean includeDimensions)
+  /**
+   * Returns the virtual columns required for a query. Does not necessarily 
return all virtual columns from
+   * {@link #sourceRowSignature}, only the ones actually referenced  (and any 
that they depend on).
+   *
+   * @param includeDimensions whether to include virtual columns refered by 
{@link Grouping#getDimensions()}
+   */
+  VirtualColumns computeVirtualColumns(final boolean includeDimensions)
   {
-    // 'sourceRowSignature' could provide a list of all defined virtual 
columns while constructing a query, but we
-    // still want to collect the set of VirtualColumns this way to ensure we 
only add what is still being used after
-    // the various transforms and optimizations
-    Set<VirtualColumn> virtualColumns = new HashSet<>();
-
-
-    // rewrite any "specialized" virtual column expressions as top level 
virtual columns so that their native
-    // implementation can be used instead of being composed as part of some 
expression tree in an expresson virtual
-    // column
-    Set<String> specialized = new HashSet<>();
-    final boolean forceExpressionVirtualColumns =
-        plannerContext.getPlannerConfig().isForceExpressionVirtualColumns();
-    virtualColumnRegistry.visitAllSubExpressions((expression) -> {
-      if (!forceExpressionVirtualColumns && expression.getType() == 
DruidExpression.NodeType.SPECIALIZED) {
-        // add the expression to the top level of the registry as a standalone 
virtual column
-        final String name = 
virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
-            expression,
-            expression.getDruidType()
-        );
-        specialized.add(name);
-        // replace with an identifier expression of the new virtual column name
-        return DruidExpression.ofColumn(expression.getDruidType(), name);
-      } else {
-        // do nothing
-        return expression;
-      }
-    });
+    final Set<VirtualColumn> virtualColumns = new HashSet<>();
 
     // we always want to add any virtual columns used by the query level 
DimFilter
     if (filter != null) {
@@ -802,9 +783,23 @@ public class DruidQuery
       }
     }
 
-    for (String columnName : specialized) {
-      if (virtualColumnRegistry.isVirtualColumnDefined(columnName)) {
-        virtualColumns.add(virtualColumnRegistry.getVirtualColumn(columnName));
+    // Include any specialized virtual columns that we need for the top-level 
virtualColumns.
+    final Set<VirtualColumn> checkVirtualColumns = new 
HashSet<>(virtualColumns);
+    while (!checkVirtualColumns.isEmpty()) {
+      final ImmutableSet<VirtualColumn> checkVirtualColumnsCopy = 
ImmutableSet.copyOf(checkVirtualColumns);
+      checkVirtualColumns.clear();
+
+      // Check all virtual columns that were in checkVirtualColumns to ensure 
we have their dependencies.
+      // If other virtual columns are encountered, add them to 
checkVirtualColumns so we check them recursively.
+      for (final VirtualColumn virtualColumn : checkVirtualColumnsCopy) {
+        for (final String requiredColumnName : 
virtualColumn.requiredColumns()) {
+          if 
(virtualColumnRegistry.isVirtualColumnDefined(requiredColumnName)) {
+            final VirtualColumn requiredVirtualColumn = 
virtualColumnRegistry.getVirtualColumn(requiredColumnName);
+            if (virtualColumns.add(requiredVirtualColumn)) {
+              checkVirtualColumns.add(requiredVirtualColumn);
+            }
+          }
+        }
       }
     }
 
@@ -1110,7 +1105,7 @@ public class DruidQuery
           plannerContext.getJoinableFactoryWrapper()
       );
 
-      if (!getVirtualColumns(true).isEmpty()) {
+      if (!computeVirtualColumns(true).isEmpty()) {
         // timeBoundary query does not support virtual columns.
         return null;
       }
@@ -1226,6 +1221,7 @@ public class DruidQuery
     }
     theContext.putAll(plannerContext.queryContextMap());
 
+    final VirtualColumns virtualColumns = computeVirtualColumns(false);
     final Pair<DataSource, Filtration> dataSourceFiltrationPair = 
getFiltration(
         dataSource,
         filter,
@@ -1248,7 +1244,7 @@ public class DruidQuery
         newDataSource,
         filtration.getQuerySegmentSpec(),
         descending,
-        getVirtualColumns(false),
+        virtualColumns,
         filtration.getDimFilter(),
         queryGranularity,
         grouping.getAggregatorFactories(),
@@ -1331,6 +1327,7 @@ public class DruidQuery
       return null;
     }
 
+    final VirtualColumns virtualColumns = computeVirtualColumns(true);
     final Pair<DataSource, Filtration> dataSourceFiltrationPair = 
getFiltration(
         dataSource,
         filter,
@@ -1347,7 +1344,7 @@ public class DruidQuery
 
     return new TopNQuery(
         newDataSource,
-        getVirtualColumns(true),
+        virtualColumns,
         dimensionSpec,
         topNMetricSpec,
         Ints.checkedCast(sorting.getOffsetLimit().getLimit()),
@@ -1377,6 +1374,7 @@ public class DruidQuery
       return null;
     }
 
+    final VirtualColumns virtualColumns = computeVirtualColumns(true);
     final Pair<DataSource, Filtration> dataSourceFiltrationPair = 
getFiltration(
         dataSource,
         filter,
@@ -1405,7 +1403,7 @@ public class DruidQuery
     GroupByQuery query = new GroupByQuery(
         newDataSource,
         filtration.getQuerySegmentSpec(),
-        getVirtualColumns(true),
+        virtualColumns,
         filtration.getDimFilter(),
         Granularities.ALL,
         grouping.getDimensionSpecs(),
@@ -1655,6 +1653,7 @@ public class DruidQuery
       return null;
     }
 
+    final VirtualColumns virtualColumns = computeVirtualColumns(true);
     final Pair<DataSource, Filtration> dataSourceFiltrationPair = 
getFiltration(
         dataSource,
         filter,
@@ -1716,7 +1715,6 @@ public class DruidQuery
     final Set<String> scanColumns = new 
LinkedHashSet<>(outputRowSignature.getColumnNames());
     orderByColumns.forEach(column -> scanColumns.add(column.getColumnName()));
 
-    final VirtualColumns virtualColumns = getVirtualColumns(true);
     final ImmutableList<String> scanColumnsList = 
ImmutableList.copyOf(scanColumns);
 
     return new ScanQuery(
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
index 3734d2a6d42..8a716757379 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
@@ -21,6 +21,7 @@ package org.apache.druid.sql.calcite.rel;
 
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.druid.java.util.common.NonnullPair;
 import org.apache.druid.segment.VirtualColumn;
 import org.apache.druid.segment.VirtualColumns;
 import org.apache.druid.segment.column.ColumnCapabilities;
@@ -35,6 +36,7 @@ import javax.annotation.Nullable;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -56,8 +58,8 @@ public class VirtualColumnRegistry
   private final Map<ExpressionAndTypeHint, String> virtualColumnsByExpression;
   private final Map<String, ExpressionAndTypeHint> virtualColumnsByName;
   private final String virtualColumnPrefix;
+  private final boolean forceExpressionVirtualColumns;
   private int virtualColumnCounter;
-  private boolean forceExpressionVirtualColumns;
 
   private VirtualColumnRegistry(
       RowSignature baseRowSignature,
@@ -116,18 +118,13 @@ public class VirtualColumnRegistry
     final ExpressionAndTypeHint candidate = wrap(expression, typeHint);
     if (!virtualColumnsByExpression.containsKey(candidate)) {
       final String virtualColumnName = virtualColumnPrefix + 
virtualColumnCounter++;
-
-      virtualColumnsByExpression.put(
-          candidate,
-          virtualColumnName
-      );
-      virtualColumnsByName.put(
-          virtualColumnName,
-          candidate
-      );
+      virtualColumnsByExpression.put(candidate, virtualColumnName);
+      virtualColumnsByName.put(virtualColumnName, candidate);
+      specialize(virtualColumnName, candidate);
+      return virtualColumnName;
+    } else {
+      return virtualColumnsByExpression.get(candidate);
     }
-
-    return virtualColumnsByExpression.get(candidate);
   }
 
   /**
@@ -214,21 +211,6 @@ public class VirtualColumnRegistry
                      .collect(Collectors.toList());
   }
 
-  public void visitAllSubExpressions(DruidExpression.DruidExpressionShuttle 
shuttle)
-  {
-    final Queue<Map.Entry<String, ExpressionAndTypeHint>> toVisit = new 
ArrayDeque<>(virtualColumnsByName.entrySet());
-    while (!toVisit.isEmpty()) {
-      final Map.Entry<String, ExpressionAndTypeHint> entry = toVisit.poll();
-      final String key = entry.getKey();
-      final ExpressionAndTypeHint wrapped = entry.getValue();
-      final List<DruidExpression> newArgs = 
shuttle.visitAll(wrapped.getExpression().getArguments());
-      final ExpressionAndTypeHint newWrapped = 
wrap(wrapped.getExpression().withArguments(newArgs), wrapped.getTypeHint());
-      virtualColumnsByName.put(key, newWrapped);
-      virtualColumnsByExpression.remove(wrapped);
-      virtualColumnsByExpression.put(newWrapped, key);
-    }
-  }
-
   public Collection<? extends VirtualColumn> getAllVirtualColumns(List<String> 
requiredColumns)
   {
     return requiredColumns.stream()
@@ -327,4 +309,62 @@ public class VirtualColumnRegistry
     columns.sort(Comparator.comparing(VirtualColumn::getOutputName));
     return VirtualColumns.create(columns);
   }
+
+  /**
+   * Called to specialize subexpressions of a new virtual column immediately 
after adding it. Specialization is
+   * recursive: this function may create chains of virtual columns that call 
into each other.
+   */
+  private void specialize(final String name, final ExpressionAndTypeHint 
expressionAndTypeHint)
+  {
+    if (forceExpressionVirtualColumns) {
+      return;
+    }
+
+    final Queue<NonnullPair<String, ExpressionAndTypeHint>> toVisit =
+        new ArrayDeque<>(Collections.singletonList(new NonnullPair<>(name, 
expressionAndTypeHint)));
+    final SpecializationShuttle shuttle = new SpecializationShuttle();
+
+    while (!toVisit.isEmpty()) {
+      final NonnullPair<String, ExpressionAndTypeHint> entry = toVisit.poll();
+      final String virtualColumnName = entry.lhs;
+      final ExpressionAndTypeHint expression = entry.rhs;
+      final List<DruidExpression> newArgs = 
shuttle.visitAll(expression.getExpression().getArguments());
+      ExpressionAndTypeHint newExpression = wrap(
+          shuttle.visit(expression.getExpression().withArguments(newArgs)),
+          expression.getTypeHint()
+      );
+
+      // If the expression becomes a direct access of another virtual column 
after rewriting, then map this
+      // virtual column name to the expression for the referenced virtual 
column.
+      if (newExpression.getExpression().isDirectColumnAccess()
+          && 
virtualColumnsByName.containsKey(newExpression.getExpression().getDirectColumn()))
 {
+        newExpression = 
virtualColumnsByName.get(newExpression.getExpression().getDirectColumn());
+      }
+
+      // Map both the old and new expression to the same virtual column name.
+      virtualColumnsByName.put(virtualColumnName, newExpression);
+      virtualColumnsByExpression.put(expression, virtualColumnName);
+      virtualColumnsByExpression.putIfAbsent(newExpression, virtualColumnName);
+    }
+  }
+
+  /**
+   * Shuttle used by {@link #specialize(String, ExpressionAndTypeHint)}.
+   */
+  private class SpecializationShuttle implements 
DruidExpression.DruidExpressionShuttle
+  {
+    @Override
+    public DruidExpression visit(DruidExpression expression)
+    {
+      if (expression.getType() == DruidExpression.NodeType.SPECIALIZED) {
+        // add the expression to the top level of the registry as a standalone 
virtual column
+        final String name = getOrCreateVirtualColumnForExpression(expression, 
expression.getDruidType());
+        // replace with an identifier expression of the new virtual column name
+        return DruidExpression.ofColumn(expression.getDruidType(), name);
+      } else {
+        // do nothing
+        return expression;
+      }
+    }
+  }
 }
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
index ae3a6a0eec3..5b8ac66487d 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
@@ -1606,25 +1606,25 @@ public class CalciteMultiValueStringQueryTest extends 
BaseCalciteQueryTest
                         .setVirtualColumns(
                             expressionVirtualColumn(
                                 "v0",
-                                "array_length(\"v2\")",
-                                ColumnType.LONG
-                            ),
-                            expressionVirtualColumn(
-                                "v1",
-                                "array_length(\"dim3\")",
+                                "array_length(\"v1\")",
                                 ColumnType.LONG
                             ),
                             new ListFilteredVirtualColumn(
-                                "v2",
+                                "v1",
                                 DefaultDimensionSpec.of("dim3"),
                                 ImmutableSet.of("b"),
                                 true
+                            ),
+                            expressionVirtualColumn(
+                                "v2",
+                                "array_length(\"dim3\")",
+                                ColumnType.LONG
                             )
                         )
                         .setDimensions(
                             dimensions(
                                 new DefaultDimensionSpec("v0", "d0", 
ColumnType.LONG),
-                                new DefaultDimensionSpec("v1", "d1", 
ColumnType.LONG)
+                                new DefaultDimensionSpec("v2", "d1", 
ColumnType.LONG)
                             )
                         )
                         .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 1325dc93373..8d04c66ec11 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -69,6 +69,8 @@ import org.apache.druid.segment.nested.NestedPathField;
 import org.apache.druid.segment.nested.ObjectStorageEncoding;
 import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
+import org.apache.druid.segment.virtual.NestedMergeVirtualColumn;
+import org.apache.druid.segment.virtual.NestedObjectVirtualColumn;
 import 
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
 import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import 
org.apache.druid.sql.calcite.CalciteNestedDataQueryTest.NestedComponentSupplier;
@@ -88,6 +90,7 @@ import org.mockito.Mockito;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;
 
 import static org.hamcrest.MatcherAssert.assertThat;
@@ -4911,10 +4914,12 @@ public abstract class CalciteNestedDataQueryTest 
extends BaseCalciteQueryTest
                   .dataSource(DATA_SOURCE)
                   .intervals(querySegmentSpec(Filtration.eternity()))
                   .virtualColumns(
-                      new ExpressionVirtualColumn(
+                      new NestedObjectVirtualColumn(
                           "v0",
-                          "json_object('n',\"v1\",'x',\"v2\")",
-                          ColumnType.NESTED_DATA,
+                          ImmutableMap.of(
+                              "n", new 
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.NESTED_DATA),
+                              "x", new 
NestedObjectVirtualColumn.TypedExpression("\"v2\"", ColumnType.STRING)
+                          ),
                           queryFramework().macroTable()
                       ),
                       new NestedFieldVirtualColumn(
@@ -4949,7 +4954,7 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   }
 
   @Test
-  public void testJsonMerging()
+  public void testJsonMerging_lhsStringLiteral()
   {
     testQuery(
         "SELECT "
@@ -4962,7 +4967,7 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                   .virtualColumns(
                       new ExpressionVirtualColumn(
                           "v0",
-                          
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
+                          
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',\"v2\")",
                           ColumnType.NESTED_DATA,
                           queryFramework().macroTable()
                       ),
@@ -4974,6 +4979,17 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                           false,
                           null,
                           false
+                      ),
+                      new NestedObjectVirtualColumn(
+                          "v2",
+                          ImmutableMap.of(
+                              "x",
+                              new NestedObjectVirtualColumn.TypedExpression(
+                                  "\"v1\"",
+                                  ColumnType.STRING
+                              )
+                          ),
+                          queryFramework().macroTable()
                       )
                   )
                   .columns("v0")
@@ -4996,6 +5012,159 @@ public abstract class CalciteNestedDataQueryTest 
extends BaseCalciteQueryTest
     );
   }
 
+  @Test
+  public void testJsonMerging_lhsJsonObject()
+  {
+    testQuery(
+        "SELECT\n"
+        + "JSON_MERGE(\n"
+        + "  JSON_OBJECT('a': 'x'),\n"
+        + "  JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x'))"
+        + ")\n"
+        + "FROM druid.nested",
+        ImmutableList.of(
+            Druids.newScanQueryBuilder()
+                  .dataSource(DATA_SOURCE)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(
+                      new NestedMergeVirtualColumn("v0", List.of("v1", "v3"), 
queryFramework().macroTable()),
+                      new NestedObjectVirtualColumn(
+                          "v1",
+                          ImmutableMap.of("a", new 
NestedObjectVirtualColumn.TypedExpression("'x'", ColumnType.STRING)),
+                          queryFramework().macroTable()
+                      ),
+                      new NestedFieldVirtualColumn(
+                          "nest",
+                          "v2",
+                          ColumnType.STRING,
+                          ImmutableList.of(new NestedPathField("x")),
+                          false,
+                          null,
+                          false
+                      ),
+                      new NestedObjectVirtualColumn(
+                          "v3",
+                          ImmutableMap.of(
+                              "x",
+                              new 
NestedObjectVirtualColumn.TypedExpression("\"v2\"", ColumnType.STRING)
+                          ),
+                          queryFramework().macroTable()
+                      )
+                  )
+                  .columns("v0")
+                  .columnTypes(ColumnType.ofComplex("json"))
+                  
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"}
+        ),
+        RowSignature.builder()
+                    .add("EXPR$0", ColumnType.NESTED_DATA)
+                    .build()
+    );
+  }
+
+  @Test
+  public void testJsonValue_onJsonObject_onJsonValue()
+  {
+    testQuery(
+        "SELECT\n"
+        + "JSON_VALUE(\n"
+        + "  JSON_OBJECT('a': JSON_VALUE(nest, '$.x')),\n"
+        + "  '$.a'\n"
+        + ")\n"
+        + "FROM druid.nested",
+        ImmutableList.of(
+            Druids.newScanQueryBuilder()
+                  .dataSource(DATA_SOURCE)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(
+                      new NestedFieldVirtualColumn(
+                          "v2",
+                          "v0",
+                          ColumnType.STRING,
+                          ImmutableList.of(new NestedPathField("a")),
+                          false,
+                          null,
+                          false
+                      ),
+                      new NestedFieldVirtualColumn(
+                          "nest",
+                          "v1",
+                          ColumnType.STRING,
+                          ImmutableList.of(new NestedPathField("x")),
+                          false,
+                          null,
+                          false
+                      ),
+                      new NestedObjectVirtualColumn(
+                          "v2",
+                          ImmutableMap.of(
+                              "a",
+                              new 
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.STRING)
+                          ),
+                          queryFramework().macroTable()
+                      )
+                  )
+                  .columns("v0")
+                  .columnTypes(ColumnType.STRING)
+                  
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"100"},
+            new Object[]{null},
+            new Object[]{"200"},
+            new Object[]{null},
+            new Object[]{null},
+            new Object[]{"100"},
+            new Object[]{null}
+        ),
+        RowSignature.builder()
+                    .add("EXPR$0", ColumnType.STRING)
+                    .build()
+    );
+  }
+
+  @Test
+  public void testJsonMerging_withAggregateFilterAndQueryFilter()
+  {
+    testQuery(
+        "SELECT\n"
+        + "COUNT(*) FILTER (WHERE TIME_IN_INTERVAL(__time, '1000/3000'))\n"
+        + "FROM nested\n"
+        + "WHERE JSON_VALUE(JSON_MERGE(JSON_OBJECT(), \"nest\"), '$.x') = 100",
+        ImmutableList.of(
+            Druids.newTimeseriesQueryBuilder()
+                  .dataSource(DATA_SOURCE)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(
+                      new NestedFieldVirtualColumn("v2", "$.x", "v0", 
ColumnType.LONG),
+                      new NestedObjectVirtualColumn("v1", Map.of(), 
queryFramework().macroTable()),
+                      new NestedMergeVirtualColumn("v2", List.of("v1", 
"nest"), queryFramework().macroTable())
+                  )
+                  .filters(equality("v0", 100L, ColumnType.LONG))
+                  .aggregators(
+                      new FilteredAggregatorFactory(
+                          new CountAggregatorFactory("a0"),
+                          range("__time", ColumnType.LONG, timestamp("1000"), 
timestamp("3000"), false, true)
+                      )
+                  )
+                  .context(QUERY_CONTEXT_DEFAULT)
+                  .build()
+        ),
+        ImmutableList.of(new Object[]{2L}),
+        RowSignature.builder().add("EXPR$0", ColumnType.LONG).build()
+    );
+  }
+
   @Test
   public void testCompositionTyping()
   {
@@ -5008,11 +5177,11 @@ public abstract class CalciteNestedDataQueryTest 
extends BaseCalciteQueryTest
                   .dataSource(DATA_SOURCE)
                   .intervals(querySegmentSpec(Filtration.eternity()))
                   .virtualColumns(
-                      new ExpressionVirtualColumn(
+                      new NestedFieldVirtualColumn(
+                          "v2",
+                          "$.x",
                           "v0",
-                          "json_value(json_object('x',\"v1\"),'$.x', 'LONG')",
-                          ColumnType.LONG,
-                          queryFramework().macroTable()
+                          ColumnType.LONG
                       ),
                       new NestedFieldVirtualColumn(
                           "nest",
@@ -5022,6 +5191,14 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                           false,
                           "$.x",
                           false
+                      ),
+                      new NestedObjectVirtualColumn(
+                          "v2",
+                          ImmutableMap.of(
+                              "x",
+                              new 
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.LONG)
+                          ),
+                          queryFramework().macroTable()
                       )
                   )
                   .columns("v0")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to