This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 54bb2fdef76 SQL: Use specialized virtual columns for JSON_OBJECT,
JSON_MERGE. (#18553)
54bb2fdef76 is described below
commit 54bb2fdef764aef2af7078be88bdeab7e69da9e3
Author: Gian Merlino <[email protected]>
AuthorDate: Wed Oct 15 20:08:59 2025 -0400
SQL: Use specialized virtual columns for JSON_OBJECT, JSON_MERGE. (#18553)
* SQL: Use specialized virtual columns for JSON_OBJECT, JSON_MERGE.
Building on #18521, this patch makes virtual column specialization
recursive. It also now happens immediately on calling
getOrCreateVirtualColumnForExpression.
Specializations are added for JSON_OBJECT and JSON_MERGE. Now, chains of
JSON_MERGE, JSON_OBJECT, and JSON_VALUE can preserve lazy evaluation,
index usage, dictionary usage, etc.
There is a change to VirtualColumnCreator that can affect extensions
that add SQL operators. To allow the creator to access rewritten
arguments, a "DruidExpression self" parameter is added.
The "String expression" is no longer needed so it is removed.
* Correct javadoc.
* Update expectations.
* NestedFieldVirtualColumn: Process arrays when making object selectors,
even if not from VariantColumn.
* Update test.
---
.../segment/virtual/NestedFieldVirtualColumn.java | 20 +-
.../segment/virtual/NestedObjectVirtualColumn.java | 11 +-
.../virtual/NestedObjectVirtualColumnTest.java | 8 +-
.../qaArray/ops_funcs_mv_funcs.08.all.iq | 207 +++++++++++++++++----
.../funcs_and_sql_func_json_object.03.msq.iq | 20 +-
.../funcs_and_sql_func_json_object.03.std.iq | 20 +-
.../sql/calcite/expression/DruidExpression.java | 35 +++-
.../MultiValueStringOperatorConversions.java | 24 +--
.../builtin/NestedDataOperatorConversions.java | 146 ++++++++++++---
.../apache/druid/sql/calcite/rel/DruidQuery.java | 70 ++++---
.../sql/calcite/rel/VirtualColumnRegistry.java | 94 +++++++---
.../calcite/CalciteMultiValueStringQueryTest.java | 16 +-
.../sql/calcite/CalciteNestedDataQueryTest.java | 195 ++++++++++++++++++-
13 files changed, 682 insertions(+), 184 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index a2fac947e8c..185a1a3ad9e 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -414,8 +414,14 @@ public class NestedFieldVirtualColumn implements
VirtualColumn
return new ArrayElementColumnValueSelector(arraySelector, elementNumber);
}
- // we are not a nested column and are being asked for a path that will
never exist, so we are nil selector
- return NilColumnValueSelector.instance();
+ if (holder.getCapabilities().isArray() ||
ColumnType.NESTED_DATA.equals(holder.getCapabilities().toColumnType())) {
+ // Not a root access and no specialized path available. But the
underlying column is array or nested typed,
+ // so we may still be able to walk it using exprs. Try that.
+ return
makeColumnValueSelectorUsingColumnSelectorFactory(selectorFactory);
+ } else {
+ // we are not a nested or array column, and are being asked for a path
that will never exist, so nil selector
+ return NilColumnValueSelector.instance();
+ }
}
@Override
@@ -486,11 +492,11 @@ public class NestedFieldVirtualColumn implements
VirtualColumn
final NestedVectorColumnSelectorFactory nestedColumnSelectorFactory =
column.as(NestedVectorColumnSelectorFactory.class);
- if (isNestedColumn(holder)) {
+ if (isNestedColumn(holder) || holder.getCapabilities().isArray()) {
if (fieldSpec.processFromRaw || nestedTypeInspector == null ||
nestedColumnSelectorFactory == null) {
// 1) If processFromRaw is true, that means JSON_QUERY.
- // 2) If no nestedTypeInspector, nestedColumnSelectorFactory then that
means this is a nested type that is
- // not exposed as a nested column.
+ // 2) If no nestedTypeInspector, nestedColumnSelectorFactory then that
means this is a nested or array
+ // type that is not exposed as a nested column.
// Either way, we read and process raw objects.
return new RawFieldVectorObjectSelector(
selectorFactory.makeObjectSelector(fieldSpec.columnName),
@@ -499,7 +505,9 @@ public class NestedFieldVirtualColumn implements
VirtualColumn
);
}
final ColumnType leastRestrictiveType =
nestedTypeInspector.getFieldLogicalType(fieldSpec.parts);
- if (leastRestrictiveType != null && leastRestrictiveType.isNumeric() &&
!Types.isNumeric(fieldSpec.expectedType)) {
+ if (leastRestrictiveType != null
+ && leastRestrictiveType.isNumeric()
+ && !Types.isNumeric(fieldSpec.expectedType)) {
return ExpressionVectorSelectors.castValueSelectorToObject(
offset,
columnName,
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
index 1e1e94c8b3d..ee473391226 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
@@ -88,11 +88,12 @@ public class NestedObjectVirtualColumn extends
SpecializedExpressionVirtualColum
StringUtils.format(
"%s(%s)",
NestedDataExpressions.JsonObjectExprMacro.NAME,
-
keyExprMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> {
- final String key = entry.getKey();
- final TypedExpression valueExpr = entry.getValue();
- return Parser.constant(key).stringify() + ',' +
valueExpr.expression;
- }).collect(Collectors.joining(","))
+ Preconditions.checkNotNull(keyExprMap, "object")
+ .entrySet().stream().map(entry -> {
+ final String key = entry.getKey();
+ final TypedExpression valueExpr =
entry.getValue();
+ return Parser.constant(key).stringify() + ',' +
valueExpr.expression;
+ }).collect(Collectors.joining(","))
),
ColumnType.NESTED_DATA,
macroTable
diff --git
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
index 91e7f2aac82..5302ffbcd94 100644
---
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java
@@ -29,7 +29,6 @@ import org.apache.druid.segment.column.ColumnType;
import org.junit.Assert;
import org.junit.Test;
-import java.util.HashMap;
import java.util.Map;
public class NestedObjectVirtualColumnTest
@@ -56,9 +55,10 @@ public class NestedObjectVirtualColumnTest
@Test
public void testGetKeyExprMap()
{
- Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap = new
HashMap<>();
- keyExprMap.put("key1", new
NestedObjectVirtualColumn.TypedExpression("expr1", ColumnType.STRING));
- keyExprMap.put("key2", new
NestedObjectVirtualColumn.TypedExpression("expr2", ColumnType.DOUBLE));
+ Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap =
ImmutableMap.of(
+ "key1", new NestedObjectVirtualColumn.TypedExpression("expr1",
ColumnType.STRING),
+ "key2", new NestedObjectVirtualColumn.TypedExpression("expr2",
ColumnType.DOUBLE)
+ );
NestedObjectVirtualColumn column = new NestedObjectVirtualColumn(
"test_obj",
diff --git
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
index 6a74aaa623f..4c495a8787d 100644
---
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
+++
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaArray/ops_funcs_mv_funcs.08.all.iq
@@ -119,16 +119,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[0]' RETURNING boolean array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col |
++--------------+
+| [1, 0, null] |
+| [1, 0, null] |
+| [null, 0, 1] |
+| [null, 0, 1] |
++--------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C3_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][0]' RETURNING boolean array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col |
++--------------+
+| [1, 0, null] |
+| [1, 0, null] |
+| [null, 0, 1] |
+| [null, 0, 1] |
++--------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C4_D1
@@ -152,16 +170,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[1]' RETURNING bigint array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col |
++--------------+
+| [1, 2, null] |
+| [1, 2, null] |
+| [null, 2, 1] |
+| [null, 2, 1] |
++--------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C6_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][1]' RETURNING bigint array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------+
+| col |
++--------------+
+| [1, 2, null] |
+| [1, 2, null] |
+| [null, 2, 1] |
+| [null, 2, 1] |
++--------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C7_D1
@@ -185,16 +221,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[2]' RETURNING decimal array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++------------------+
+| col |
++------------------+
+| [0.1, 0.2, null] |
+| [0.1, 0.2, null] |
+| [null, 0.2, 0.1] |
+| [null, 0.2, 0.1] |
++------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C9_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][2]' RETURNING decimal array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++------------------+
+| col |
++------------------+
+| [0.1, 0.2, null] |
+| [0.1, 0.2, null] |
+| [null, 0.2, 0.1] |
+| [null, 0.2, 0.1] |
++------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C10_D1
@@ -218,16 +272,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[3]' RETURNING varchar array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++----------------+
+| col |
++----------------+
+| [S1, S2, null] |
+| [S1, S2, null] |
+| [null, S2, S1] |
+| [null, S2, S1] |
++----------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C12_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][3]' RETURNING varchar array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++----------------+
+| col |
++----------------+
+| [S1, S2, null] |
+| [S1, S2, null] |
+| [null, S2, S1] |
+| [null, S2, S1] |
++----------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C13_D1
@@ -251,16 +323,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[4]' RETURNING varchar array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------+
+| col |
++--------------------+
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
++--------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C15_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][4]' RETURNING varchar array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------+
+| col |
++--------------------+
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
+| [null, null, null] |
++--------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C16_D1
@@ -284,16 +374,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[5]' RETURNING varchar array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| [] |
+| [] |
+| [] |
+| [] |
++-----+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C18_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][5]' RETURNING varchar array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| [] |
+| [] |
+| [] |
+| [] |
++-----+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C19_D1
@@ -317,16 +425,34 @@ FROM test_array;
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[6]' RETURNING varchar array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------------+
+| col |
++--------------------------+
+| [null, S1, 0.1, 1, true] |
+| [null, S1, 0.1, 1, true] |
+| [true, 1, 0.1, S1, null] |
+| [true, 1, 0.1, S1, null] |
++--------------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C21_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][6]' RETURNING varchar array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++--------------------------+
+| col |
++--------------------------+
+| [null, S1, 0.1, 1, true] |
+| [null, S1, 0.1, 1, true] |
+| [true, 1, 0.1, S1, null] |
+| [true, 1, 0.1, S1, null] |
++--------------------------+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C22_D1
@@ -341,14 +467,31 @@ Cannot apply 'MV_TO_ARRAY' to arguments of type
'MV_TO_ARRAY(
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7]' RETURNING varchar array)) AS col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| |
+| |
+| |
+| |
++-----+
+(4 rows)
+
+!ok
#-------------------------------------------------------------------------
# TESTCASE: test_mv_funcs TEST_ID: A1_B16_C24_D1
#-------------------------------------------------------------------------
SELECT mv_to_array(json_value(a_nested, '$[7][7]' RETURNING varchar array)) AS
col
FROM test_array;
-should be an identifier expression. Use array() instead
-!error
++-----+
+| col |
++-----+
+| |
+| |
+| |
+| |
++-----+
+(4 rows)
+!ok
diff --git
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
index 4f2bf8b90c8..ac7adb76703 100644
---
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
+++
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq
@@ -666,10 +666,14 @@ FROM test_json_cols;
"intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
- "type" : "expression",
+ "type" : "nested-object",
"name" : "v0",
- "expression" : "json_object('a',\"c1\")",
- "outputType" : "COMPLEX<json>"
+ "object" : {
+ "a" : {
+ "expression" : "\"c1\"",
+ "type" : "COMPLEX<json>"
+ }
+ }
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
@@ -761,10 +765,14 @@ FROM test_json_cols;
"intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
- "type" : "expression",
+ "type" : "nested-object",
"name" : "v0",
- "expression" : "json_object('a',\"c1\")",
- "outputType" : "COMPLEX<json>"
+ "object" : {
+ "a" : {
+ "expression" : "\"c1\"",
+ "type" : "COMPLEX<json>"
+ }
+ }
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
diff --git
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
index 5e8f30930e7..176c72e73fa 100644
---
a/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
+++
b/quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq
@@ -681,10 +681,14 @@ FROM test_json_cols;
"intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
- "type" : "expression",
+ "type" : "nested-object",
"name" : "v0",
- "expression" : "json_object('a',\"c1\")",
- "outputType" : "COMPLEX<json>"
+ "object" : {
+ "a" : {
+ "expression" : "\"c1\"",
+ "type" : "COMPLEX<json>"
+ }
+ }
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
@@ -725,10 +729,14 @@ FROM test_json_cols;
"intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
- "type" : "expression",
+ "type" : "nested-object",
"name" : "v0",
- "expression" : "json_object('a',\"c1\")",
- "outputType" : "COMPLEX<json>"
+ "object" : {
+ "a" : {
+ "expression" : "\"c1\"",
+ "type" : "COMPLEX<json>"
+ }
+ }
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
index ec257230e7b..0783c992389 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
@@ -369,7 +369,7 @@ public class DruidExpression
public String getDirectColumn()
{
- return Preconditions.checkNotNull(simpleExtraction.getColumn());
+ return Preconditions.checkNotNull(simpleExtraction,
"simpleExtraction").getColumn();
}
public boolean isSimpleExtraction()
@@ -401,7 +401,7 @@ public class DruidExpression
final ExpressionParser parser
)
{
- return virtualColumnCreator.create(name, outputType, expression.get(),
parser);
+ return virtualColumnCreator.create(name, outputType, parser, this);
}
public VirtualColumn toExpressionVirtualColumn(
@@ -410,7 +410,7 @@ public class DruidExpression
final ExpressionParser parser
)
{
- return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType,
expression.get(), parser);
+ return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType, parser,
this);
}
public NodeType getType()
@@ -418,6 +418,16 @@ public class DruidExpression
return nodeType;
}
+ /**
+ * Returns whether this expression is {@link NodeType#IDENTIFIER} or {@link
NodeType#SPECIALIZED}. Useful because
+ * these are the expressions that can be expected to become direct column
references once virtual columns have gone
+ * through a specialization pass.
+ */
+ public boolean isIdentifierOrSpecialized()
+ {
+ return nodeType == NodeType.IDENTIFIER || nodeType == NodeType.SPECIALIZED;
+ }
+
/**
* The {@link ColumnType} of this expression as inferred when this
expression was created. This is likely the result
* of converting the output of {@link
org.apache.calcite.rex.RexNode#getType()} using
@@ -609,15 +619,28 @@ public class DruidExpression
@FunctionalInterface
public interface VirtualColumnCreator
{
- VirtualColumn create(String name, ColumnType outputType, String
expression, ExpressionParser parser);
+ /**
+ * Create a virtual column for an expression.
+ *
+ * @param name name of the virtual column
+ * @param outputType type of the virtual column
+ * @param parser expression parser, if needed
+ * @param self expression, possibly rewritten to refer to
specialized virtual columns
+ */
+ VirtualColumn create(
+ String name,
+ ColumnType outputType,
+ ExpressionParser parser,
+ DruidExpression self
+ );
}
public static class ExpressionVirtualColumnCreator implements
VirtualColumnCreator
{
@Override
- public VirtualColumn create(String name, ColumnType outputType, String
expression, ExpressionParser parser)
+ public VirtualColumn create(String name, ColumnType outputType,
ExpressionParser parser, DruidExpression self)
{
- return new ExpressionVirtualColumn(name, expression,
parser.parse(expression), outputType);
+ return new ExpressionVirtualColumn(name, self.getExpression(),
parser.parse(self.getExpression()), outputType);
}
}
}
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
index 0ef99ba3c17..8bbf3c9c3b0 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
@@ -389,11 +389,11 @@ public class MultiValueStringOperatorConversions
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
builder,
druidExpressions,
- (name, outputType, expression, macroTable) -> new
ListFilteredVirtualColumn(
+ (name, outputType, parser, self) -> new ListFilteredVirtualColumn(
name,
- druidExpressions.get(0)
- .getSimpleExtraction()
-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+ self.getArguments().get(0)
+ .getSimpleExtraction()
+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
literals,
isAllowList()
)
@@ -470,11 +470,11 @@ public class MultiValueStringOperatorConversions
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
builder,
druidExpressions,
- (name, outputType, expression, macroTable) -> new
RegexFilteredVirtualColumn(
+ (name, outputType, parser, self) -> new RegexFilteredVirtualColumn(
name,
- druidExpressions.get(0)
- .getSimpleExtraction()
-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+ self.getArguments().get(0)
+ .getSimpleExtraction()
+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
pattern
)
);
@@ -555,11 +555,11 @@ public class MultiValueStringOperatorConversions
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
builder,
druidExpressions,
- (name, outputType, expression, macroTable) -> new
PrefixFilteredVirtualColumn(
+ (name, outputType, parser, self) -> new
PrefixFilteredVirtualColumn(
name,
- druidExpressions.get(0)
- .getSimpleExtraction()
-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
+ self.getArguments().get(0)
+ .getSimpleExtraction()
+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
prefix
)
);
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
index 61fa1f11851..260b5a9932e 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
@@ -46,6 +46,7 @@ import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidSqlInput;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.expression.NestedDataExpressions;
@@ -54,12 +55,15 @@ import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
+import org.apache.druid.segment.virtual.NestedMergeVirtualColumn;
+import org.apache.druid.segment.virtual.NestedObjectVirtualColumn;
import org.apache.druid.sql.calcite.expression.DirectOperatorConversion;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.Calcites;
+import org.apache.druid.sql.calcite.planner.ExpressionParser;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.planner.convertlet.DruidConvertletFactory;
import org.apache.druid.sql.calcite.table.RowSignatures;
@@ -67,7 +71,10 @@ import org.apache.druid.sql.calcite.table.RowSignatures;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Collections;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
public class NestedDataOperatorConversions
{
@@ -233,8 +240,8 @@ public class NestedDataOperatorConversions
ImmutableList.of(
DruidExpression.ofColumn(ColumnType.NESTED_DATA,
druidExpressions.get(0).getDirectColumn())
),
- (name, outputType, expression, macroTable) -> new
NestedFieldVirtualColumn(
- druidExpressions.get(0).getDirectColumn(),
+ (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+ self.getArguments().get(0).getDirectColumn(),
name,
outputType,
parts,
@@ -414,16 +421,13 @@ public class NestedDataOperatorConversions
DruidExpression.stringLiteral(druidType.asTypeString())
);
- if (druidExpressions.get(0).isSimpleExtraction()) {
-
+ if (druidExpressions.get(0).isIdentifierOrSpecialized()) {
return DruidExpression.ofVirtualColumn(
druidType,
builder,
- ImmutableList.of(
- DruidExpression.ofColumn(ColumnType.NESTED_DATA,
druidExpressions.get(0).getDirectColumn())
- ),
- (name, outputType, expression, macroTable) -> new
NestedFieldVirtualColumn(
- druidExpressions.get(0).getDirectColumn(),
+ List.of(druidExpressions.get(0)),
+ (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+ self.getArguments().get(0).getDirectColumn(),
name,
outputType,
parts,
@@ -556,8 +560,8 @@ public class NestedDataOperatorConversions
ImmutableList.of(
DruidExpression.ofColumn(ColumnType.NESTED_DATA,
druidExpressions.get(0).getDirectColumn())
),
- (name, outputType, expression, macroTable) -> new
NestedFieldVirtualColumn(
- druidExpressions.get(0).getDirectColumn(),
+ (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+ self.getArguments().get(0).getDirectColumn(),
name,
outputType,
parts,
@@ -720,8 +724,8 @@ public class NestedDataOperatorConversions
ImmutableList.of(
DruidExpression.ofColumn(ColumnType.NESTED_DATA,
druidExpressions.get(0).getDirectColumn())
),
- (name, outputType, expression, macroTable) -> new
NestedFieldVirtualColumn(
- druidExpressions.get(0).getDirectColumn(),
+ (name, outputType, parser, self) -> new NestedFieldVirtualColumn(
+ self.getArguments().get(0).getDirectColumn(),
name,
null,
parts,
@@ -768,14 +772,6 @@ public class NestedDataOperatorConversions
@Override
public DruidExpression toDruidExpression(PlannerContext plannerContext,
RowSignature rowSignature, RexNode rexNode)
{
- final DruidExpression.DruidExpressionCreator expressionFunction =
druidExpressions ->
- DruidExpression.ofExpression(
- ColumnType.NESTED_DATA,
- null,
- DruidExpression.functionCall(FUNCTION_NAME),
- druidExpressions
- );
-
final RexCall call = (RexCall) rexNode;
// we ignore the first argument because calcite sets a 'nullBehavior'
parameter by the time it gets here
@@ -790,7 +786,88 @@ public class NestedDataOperatorConversions
return null;
}
- return expressionFunction.create(druidExpressions);
+ if (allKeysLiteral(druidExpressions,
plannerContext.getExpressionParser())) {
+ return DruidExpression.ofVirtualColumn(
+ ColumnType.NESTED_DATA,
+
DruidExpression.functionCall(NestedDataExpressions.JsonObjectExprMacro.NAME),
+ druidExpressions,
+ (name, outputType, parser, self) -> new NestedObjectVirtualColumn(
+ name,
+ argsAsObjectMap(self.getArguments(),
plannerContext.getExpressionParser()),
+ plannerContext.getExprMacroTable()
+ )
+ );
+ } else {
+ return DruidExpression.ofFunctionCall(
+ ColumnType.NESTED_DATA,
+ NestedDataExpressions.JsonObjectExprMacro.NAME,
+ druidExpressions
+ );
+ }
+ }
+
+ /**
+ * Returns whether the object represented by the given arguments has all
literal keys.
+ */
+ private static boolean allKeysLiteral(
+ final List<DruidExpression> args,
+ final ExpressionParser parser
+ )
+ {
+ for (int i = 0; i < args.size(); i += 2) {
+ final DruidExpression key = args.get(i);
+
+ if (key.getType() != DruidExpression.NodeType.LITERAL) {
+ return false;
+ }
+
+ final String keyLiteral =
Evals.asString(parser.parse(key.getExpression()).getLiteralValue());
+ if (keyLiteral == null) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Returns a map for the object represented by the given arguments, if
+ * {@link #allKeysLiteral(List, ExpressionParser)} is true. Otherwise
throws an exception.
+ */
+ private static Map<String, NestedObjectVirtualColumn.TypedExpression>
argsAsObjectMap(
+ final List<DruidExpression> args,
+ final ExpressionParser parser
+ )
+ {
+ // Use LinkedHashMap to preserve order.
+ final Map<String, NestedObjectVirtualColumn.TypedExpression> retVal =
new LinkedHashMap<>();
+ for (int i = 0; i < args.size(); i += 2) {
+ final DruidExpression key = args.get(i);
+ if (key.getType() != DruidExpression.NodeType.LITERAL) {
+ throw DruidException.defensive("Do not call this method unless
allKeysLiteral returns true");
+ }
+
+ final String keyLiteral =
Evals.asString(parser.parse(key.getExpression()).getLiteralValue());
+ if (keyLiteral == null) {
+ throw DruidException.defensive("Do not call this method unless
allKeysLiteral returns true");
+ }
+
+ final NestedObjectVirtualColumn.TypedExpression typedExpression;
+ if (i + 1 < args.size()) {
+ typedExpression = new NestedObjectVirtualColumn.TypedExpression(
+ args.get(i + 1).getExpression(),
+ args.get(i + 1).getDruidType()
+ );
+ } else {
+ typedExpression = new NestedObjectVirtualColumn.TypedExpression(
+ DruidExpression.nullLiteral(),
+ ColumnType.LONG
+ );
+ }
+
+ retVal.put(keyLiteral, typedExpression);
+ }
+ return retVal;
}
}
@@ -831,11 +908,26 @@ public class NestedDataOperatorConversions
plannerContext,
rowSignature,
rexNode,
- druidExpressions -> DruidExpression.ofExpression(
- ColumnType.NESTED_DATA,
- DruidExpression.functionCall("json_merge"),
- druidExpressions
- )
+ druidExpressions -> {
+ if
(druidExpressions.stream().allMatch(DruidExpression::isIdentifierOrSpecialized))
{
+ return DruidExpression.ofVirtualColumn(
+ ColumnType.NESTED_DATA,
+ DruidExpression.functionCall("json_merge"),
+ druidExpressions,
+ (name, outputType, parser, self) -> new
NestedMergeVirtualColumn(
+ name,
+
self.getArguments().stream().map(DruidExpression::getDirectColumn).collect(Collectors.toList()),
+ plannerContext.getExprMacroTable()
+ )
+ );
+ } else {
+ return DruidExpression.ofFunctionCall(
+ ColumnType.NESTED_DATA,
+ "json_merge",
+ druidExpressions
+ );
+ }
+ }
);
}
}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index 5aaf1d80ef8..67c2271bfe7 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -22,6 +22,7 @@ package org.apache.druid.sql.calcite.rel;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
@@ -723,35 +724,15 @@ public class DruidQuery
}
}
- VirtualColumns getVirtualColumns(final boolean includeDimensions)
+ /**
+ * Returns the virtual columns required for a query. Does not necessarily
return all virtual columns from
+ * {@link #sourceRowSignature}, only the ones actually referenced (and any
that they depend on).
+ *
+ * @param includeDimensions whether to include virtual columns refered by
{@link Grouping#getDimensions()}
+ */
+ VirtualColumns computeVirtualColumns(final boolean includeDimensions)
{
- // 'sourceRowSignature' could provide a list of all defined virtual
columns while constructing a query, but we
- // still want to collect the set of VirtualColumns this way to ensure we
only add what is still being used after
- // the various transforms and optimizations
- Set<VirtualColumn> virtualColumns = new HashSet<>();
-
-
- // rewrite any "specialized" virtual column expressions as top level
virtual columns so that their native
- // implementation can be used instead of being composed as part of some
expression tree in an expresson virtual
- // column
- Set<String> specialized = new HashSet<>();
- final boolean forceExpressionVirtualColumns =
- plannerContext.getPlannerConfig().isForceExpressionVirtualColumns();
- virtualColumnRegistry.visitAllSubExpressions((expression) -> {
- if (!forceExpressionVirtualColumns && expression.getType() ==
DruidExpression.NodeType.SPECIALIZED) {
- // add the expression to the top level of the registry as a standalone
virtual column
- final String name =
virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
- expression,
- expression.getDruidType()
- );
- specialized.add(name);
- // replace with an identifier expression of the new virtual column name
- return DruidExpression.ofColumn(expression.getDruidType(), name);
- } else {
- // do nothing
- return expression;
- }
- });
+ final Set<VirtualColumn> virtualColumns = new HashSet<>();
// we always want to add any virtual columns used by the query level
DimFilter
if (filter != null) {
@@ -802,9 +783,23 @@ public class DruidQuery
}
}
- for (String columnName : specialized) {
- if (virtualColumnRegistry.isVirtualColumnDefined(columnName)) {
- virtualColumns.add(virtualColumnRegistry.getVirtualColumn(columnName));
+ // Include any specialized virtual columns that we need for the top-level
virtualColumns.
+ final Set<VirtualColumn> checkVirtualColumns = new
HashSet<>(virtualColumns);
+ while (!checkVirtualColumns.isEmpty()) {
+ final ImmutableSet<VirtualColumn> checkVirtualColumnsCopy =
ImmutableSet.copyOf(checkVirtualColumns);
+ checkVirtualColumns.clear();
+
+ // Check all virtual columns that were in checkVirtualColumns to ensure
we have their dependencies.
+ // If other virtual columns are encountered, add them to
checkVirtualColumns so we check them recursively.
+ for (final VirtualColumn virtualColumn : checkVirtualColumnsCopy) {
+ for (final String requiredColumnName :
virtualColumn.requiredColumns()) {
+ if
(virtualColumnRegistry.isVirtualColumnDefined(requiredColumnName)) {
+ final VirtualColumn requiredVirtualColumn =
virtualColumnRegistry.getVirtualColumn(requiredColumnName);
+ if (virtualColumns.add(requiredVirtualColumn)) {
+ checkVirtualColumns.add(requiredVirtualColumn);
+ }
+ }
+ }
}
}
@@ -1110,7 +1105,7 @@ public class DruidQuery
plannerContext.getJoinableFactoryWrapper()
);
- if (!getVirtualColumns(true).isEmpty()) {
+ if (!computeVirtualColumns(true).isEmpty()) {
// timeBoundary query does not support virtual columns.
return null;
}
@@ -1226,6 +1221,7 @@ public class DruidQuery
}
theContext.putAll(plannerContext.queryContextMap());
+ final VirtualColumns virtualColumns = computeVirtualColumns(false);
final Pair<DataSource, Filtration> dataSourceFiltrationPair =
getFiltration(
dataSource,
filter,
@@ -1248,7 +1244,7 @@ public class DruidQuery
newDataSource,
filtration.getQuerySegmentSpec(),
descending,
- getVirtualColumns(false),
+ virtualColumns,
filtration.getDimFilter(),
queryGranularity,
grouping.getAggregatorFactories(),
@@ -1331,6 +1327,7 @@ public class DruidQuery
return null;
}
+ final VirtualColumns virtualColumns = computeVirtualColumns(true);
final Pair<DataSource, Filtration> dataSourceFiltrationPair =
getFiltration(
dataSource,
filter,
@@ -1347,7 +1344,7 @@ public class DruidQuery
return new TopNQuery(
newDataSource,
- getVirtualColumns(true),
+ virtualColumns,
dimensionSpec,
topNMetricSpec,
Ints.checkedCast(sorting.getOffsetLimit().getLimit()),
@@ -1377,6 +1374,7 @@ public class DruidQuery
return null;
}
+ final VirtualColumns virtualColumns = computeVirtualColumns(true);
final Pair<DataSource, Filtration> dataSourceFiltrationPair =
getFiltration(
dataSource,
filter,
@@ -1405,7 +1403,7 @@ public class DruidQuery
GroupByQuery query = new GroupByQuery(
newDataSource,
filtration.getQuerySegmentSpec(),
- getVirtualColumns(true),
+ virtualColumns,
filtration.getDimFilter(),
Granularities.ALL,
grouping.getDimensionSpecs(),
@@ -1655,6 +1653,7 @@ public class DruidQuery
return null;
}
+ final VirtualColumns virtualColumns = computeVirtualColumns(true);
final Pair<DataSource, Filtration> dataSourceFiltrationPair =
getFiltration(
dataSource,
filter,
@@ -1716,7 +1715,6 @@ public class DruidQuery
final Set<String> scanColumns = new
LinkedHashSet<>(outputRowSignature.getColumnNames());
orderByColumns.forEach(column -> scanColumns.add(column.getColumnName()));
- final VirtualColumns virtualColumns = getVirtualColumns(true);
final ImmutableList<String> scanColumnsList =
ImmutableList.copyOf(scanColumns);
return new ScanQuery(
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
index 3734d2a6d42..8a716757379 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/VirtualColumnRegistry.java
@@ -21,6 +21,7 @@ package org.apache.druid.sql.calcite.rel;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.druid.java.util.common.NonnullPair;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
@@ -35,6 +36,7 @@ import javax.annotation.Nullable;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@@ -56,8 +58,8 @@ public class VirtualColumnRegistry
private final Map<ExpressionAndTypeHint, String> virtualColumnsByExpression;
private final Map<String, ExpressionAndTypeHint> virtualColumnsByName;
private final String virtualColumnPrefix;
+ private final boolean forceExpressionVirtualColumns;
private int virtualColumnCounter;
- private boolean forceExpressionVirtualColumns;
private VirtualColumnRegistry(
RowSignature baseRowSignature,
@@ -116,18 +118,13 @@ public class VirtualColumnRegistry
final ExpressionAndTypeHint candidate = wrap(expression, typeHint);
if (!virtualColumnsByExpression.containsKey(candidate)) {
final String virtualColumnName = virtualColumnPrefix +
virtualColumnCounter++;
-
- virtualColumnsByExpression.put(
- candidate,
- virtualColumnName
- );
- virtualColumnsByName.put(
- virtualColumnName,
- candidate
- );
+ virtualColumnsByExpression.put(candidate, virtualColumnName);
+ virtualColumnsByName.put(virtualColumnName, candidate);
+ specialize(virtualColumnName, candidate);
+ return virtualColumnName;
+ } else {
+ return virtualColumnsByExpression.get(candidate);
}
-
- return virtualColumnsByExpression.get(candidate);
}
/**
@@ -214,21 +211,6 @@ public class VirtualColumnRegistry
.collect(Collectors.toList());
}
- public void visitAllSubExpressions(DruidExpression.DruidExpressionShuttle
shuttle)
- {
- final Queue<Map.Entry<String, ExpressionAndTypeHint>> toVisit = new
ArrayDeque<>(virtualColumnsByName.entrySet());
- while (!toVisit.isEmpty()) {
- final Map.Entry<String, ExpressionAndTypeHint> entry = toVisit.poll();
- final String key = entry.getKey();
- final ExpressionAndTypeHint wrapped = entry.getValue();
- final List<DruidExpression> newArgs =
shuttle.visitAll(wrapped.getExpression().getArguments());
- final ExpressionAndTypeHint newWrapped =
wrap(wrapped.getExpression().withArguments(newArgs), wrapped.getTypeHint());
- virtualColumnsByName.put(key, newWrapped);
- virtualColumnsByExpression.remove(wrapped);
- virtualColumnsByExpression.put(newWrapped, key);
- }
- }
-
public Collection<? extends VirtualColumn> getAllVirtualColumns(List<String>
requiredColumns)
{
return requiredColumns.stream()
@@ -327,4 +309,62 @@ public class VirtualColumnRegistry
columns.sort(Comparator.comparing(VirtualColumn::getOutputName));
return VirtualColumns.create(columns);
}
+
+ /**
+ * Called to specialize subexpressions of a new virtual column immediately
after adding it. Specialization is
+ * recursive: this function may create chains of virtual columns that call
into each other.
+ */
+ private void specialize(final String name, final ExpressionAndTypeHint
expressionAndTypeHint)
+ {
+ if (forceExpressionVirtualColumns) {
+ return;
+ }
+
+ final Queue<NonnullPair<String, ExpressionAndTypeHint>> toVisit =
+ new ArrayDeque<>(Collections.singletonList(new NonnullPair<>(name,
expressionAndTypeHint)));
+ final SpecializationShuttle shuttle = new SpecializationShuttle();
+
+ while (!toVisit.isEmpty()) {
+ final NonnullPair<String, ExpressionAndTypeHint> entry = toVisit.poll();
+ final String virtualColumnName = entry.lhs;
+ final ExpressionAndTypeHint expression = entry.rhs;
+ final List<DruidExpression> newArgs =
shuttle.visitAll(expression.getExpression().getArguments());
+ ExpressionAndTypeHint newExpression = wrap(
+ shuttle.visit(expression.getExpression().withArguments(newArgs)),
+ expression.getTypeHint()
+ );
+
+ // If the expression becomes a direct access of another virtual column
after rewriting, then map this
+ // virtual column name to the expression for the referenced virtual
column.
+ if (newExpression.getExpression().isDirectColumnAccess()
+ &&
virtualColumnsByName.containsKey(newExpression.getExpression().getDirectColumn()))
{
+ newExpression =
virtualColumnsByName.get(newExpression.getExpression().getDirectColumn());
+ }
+
+ // Map both the old and new expression to the same virtual column name.
+ virtualColumnsByName.put(virtualColumnName, newExpression);
+ virtualColumnsByExpression.put(expression, virtualColumnName);
+ virtualColumnsByExpression.putIfAbsent(newExpression, virtualColumnName);
+ }
+ }
+
+ /**
+ * Shuttle used by {@link #specialize(String, ExpressionAndTypeHint)}.
+ */
+ private class SpecializationShuttle implements
DruidExpression.DruidExpressionShuttle
+ {
+ @Override
+ public DruidExpression visit(DruidExpression expression)
+ {
+ if (expression.getType() == DruidExpression.NodeType.SPECIALIZED) {
+ // add the expression to the top level of the registry as a standalone
virtual column
+ final String name = getOrCreateVirtualColumnForExpression(expression,
expression.getDruidType());
+ // replace with an identifier expression of the new virtual column name
+ return DruidExpression.ofColumn(expression.getDruidType(), name);
+ } else {
+ // do nothing
+ return expression;
+ }
+ }
+ }
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
index ae3a6a0eec3..5b8ac66487d 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
@@ -1606,25 +1606,25 @@ public class CalciteMultiValueStringQueryTest extends
BaseCalciteQueryTest
.setVirtualColumns(
expressionVirtualColumn(
"v0",
- "array_length(\"v2\")",
- ColumnType.LONG
- ),
- expressionVirtualColumn(
- "v1",
- "array_length(\"dim3\")",
+ "array_length(\"v1\")",
ColumnType.LONG
),
new ListFilteredVirtualColumn(
- "v2",
+ "v1",
DefaultDimensionSpec.of("dim3"),
ImmutableSet.of("b"),
true
+ ),
+ expressionVirtualColumn(
+ "v2",
+ "array_length(\"dim3\")",
+ ColumnType.LONG
)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0",
ColumnType.LONG),
- new DefaultDimensionSpec("v1", "d1",
ColumnType.LONG)
+ new DefaultDimensionSpec("v2", "d1",
ColumnType.LONG)
)
)
.setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 1325dc93373..8d04c66ec11 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -69,6 +69,8 @@ import org.apache.druid.segment.nested.NestedPathField;
import org.apache.druid.segment.nested.ObjectStorageEncoding;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
+import org.apache.druid.segment.virtual.NestedMergeVirtualColumn;
+import org.apache.druid.segment.virtual.NestedObjectVirtualColumn;
import
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import
org.apache.druid.sql.calcite.CalciteNestedDataQueryTest.NestedComponentSupplier;
@@ -88,6 +90,7 @@ import org.mockito.Mockito;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat;
@@ -4911,10 +4914,12 @@ public abstract class CalciteNestedDataQueryTest
extends BaseCalciteQueryTest
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
- new ExpressionVirtualColumn(
+ new NestedObjectVirtualColumn(
"v0",
- "json_object('n',\"v1\",'x',\"v2\")",
- ColumnType.NESTED_DATA,
+ ImmutableMap.of(
+ "n", new
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.NESTED_DATA),
+ "x", new
NestedObjectVirtualColumn.TypedExpression("\"v2\"", ColumnType.STRING)
+ ),
queryFramework().macroTable()
),
new NestedFieldVirtualColumn(
@@ -4949,7 +4954,7 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
}
@Test
- public void testJsonMerging()
+ public void testJsonMerging_lhsStringLiteral()
{
testQuery(
"SELECT "
@@ -4962,7 +4967,7 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.virtualColumns(
new ExpressionVirtualColumn(
"v0",
-
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
+
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',\"v2\")",
ColumnType.NESTED_DATA,
queryFramework().macroTable()
),
@@ -4974,6 +4979,17 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
false,
null,
false
+ ),
+ new NestedObjectVirtualColumn(
+ "v2",
+ ImmutableMap.of(
+ "x",
+ new NestedObjectVirtualColumn.TypedExpression(
+ "\"v1\"",
+ ColumnType.STRING
+ )
+ ),
+ queryFramework().macroTable()
)
)
.columns("v0")
@@ -4996,6 +5012,159 @@ public abstract class CalciteNestedDataQueryTest
extends BaseCalciteQueryTest
);
}
+ @Test
+ public void testJsonMerging_lhsJsonObject()
+ {
+ testQuery(
+ "SELECT\n"
+ + "JSON_MERGE(\n"
+ + " JSON_OBJECT('a': 'x'),\n"
+ + " JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x'))"
+ + ")\n"
+ + "FROM druid.nested",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(DATA_SOURCE)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(
+ new NestedMergeVirtualColumn("v0", List.of("v1", "v3"),
queryFramework().macroTable()),
+ new NestedObjectVirtualColumn(
+ "v1",
+ ImmutableMap.of("a", new
NestedObjectVirtualColumn.TypedExpression("'x'", ColumnType.STRING)),
+ queryFramework().macroTable()
+ ),
+ new NestedFieldVirtualColumn(
+ "nest",
+ "v2",
+ ColumnType.STRING,
+ ImmutableList.of(new NestedPathField("x")),
+ false,
+ null,
+ false
+ ),
+ new NestedObjectVirtualColumn(
+ "v3",
+ ImmutableMap.of(
+ "x",
+ new
NestedObjectVirtualColumn.TypedExpression("\"v2\"", ColumnType.STRING)
+ ),
+ queryFramework().macroTable()
+ )
+ )
+ .columns("v0")
+ .columnTypes(ColumnType.ofComplex("json"))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"}
+ ),
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.NESTED_DATA)
+ .build()
+ );
+ }
+
+ @Test
+ public void testJsonValue_onJsonObject_onJsonValue()
+ {
+ testQuery(
+ "SELECT\n"
+ + "JSON_VALUE(\n"
+ + " JSON_OBJECT('a': JSON_VALUE(nest, '$.x')),\n"
+ + " '$.a'\n"
+ + ")\n"
+ + "FROM druid.nested",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(DATA_SOURCE)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(
+ new NestedFieldVirtualColumn(
+ "v2",
+ "v0",
+ ColumnType.STRING,
+ ImmutableList.of(new NestedPathField("a")),
+ false,
+ null,
+ false
+ ),
+ new NestedFieldVirtualColumn(
+ "nest",
+ "v1",
+ ColumnType.STRING,
+ ImmutableList.of(new NestedPathField("x")),
+ false,
+ null,
+ false
+ ),
+ new NestedObjectVirtualColumn(
+ "v2",
+ ImmutableMap.of(
+ "a",
+ new
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.STRING)
+ ),
+ queryFramework().macroTable()
+ )
+ )
+ .columns("v0")
+ .columnTypes(ColumnType.STRING)
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"100"},
+ new Object[]{null},
+ new Object[]{"200"},
+ new Object[]{null},
+ new Object[]{null},
+ new Object[]{"100"},
+ new Object[]{null}
+ ),
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .build()
+ );
+ }
+
+ @Test
+ public void testJsonMerging_withAggregateFilterAndQueryFilter()
+ {
+ testQuery(
+ "SELECT\n"
+ + "COUNT(*) FILTER (WHERE TIME_IN_INTERVAL(__time, '1000/3000'))\n"
+ + "FROM nested\n"
+ + "WHERE JSON_VALUE(JSON_MERGE(JSON_OBJECT(), \"nest\"), '$.x') = 100",
+ ImmutableList.of(
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(DATA_SOURCE)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(
+ new NestedFieldVirtualColumn("v2", "$.x", "v0",
ColumnType.LONG),
+ new NestedObjectVirtualColumn("v1", Map.of(),
queryFramework().macroTable()),
+ new NestedMergeVirtualColumn("v2", List.of("v1",
"nest"), queryFramework().macroTable())
+ )
+ .filters(equality("v0", 100L, ColumnType.LONG))
+ .aggregators(
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a0"),
+ range("__time", ColumnType.LONG, timestamp("1000"),
timestamp("3000"), false, true)
+ )
+ )
+ .context(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ ImmutableList.of(new Object[]{2L}),
+ RowSignature.builder().add("EXPR$0", ColumnType.LONG).build()
+ );
+ }
+
@Test
public void testCompositionTyping()
{
@@ -5008,11 +5177,11 @@ public abstract class CalciteNestedDataQueryTest
extends BaseCalciteQueryTest
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
- new ExpressionVirtualColumn(
+ new NestedFieldVirtualColumn(
+ "v2",
+ "$.x",
"v0",
- "json_value(json_object('x',\"v1\"),'$.x', 'LONG')",
- ColumnType.LONG,
- queryFramework().macroTable()
+ ColumnType.LONG
),
new NestedFieldVirtualColumn(
"nest",
@@ -5022,6 +5191,14 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
false,
"$.x",
false
+ ),
+ new NestedObjectVirtualColumn(
+ "v2",
+ ImmutableMap.of(
+ "x",
+ new
NestedObjectVirtualColumn.TypedExpression("\"v1\"", ColumnType.LONG)
+ ),
+ queryFramework().macroTable()
)
)
.columns("v0")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]