[
https://issues.apache.org/jira/browse/CALCITE-7614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated CALCITE-7614:
------------------------------------
Labels: pull-request-available (was: )
> UNNEST of an unqualified struct-rooted array path fails validation: "Column
> 's.s' not found"
> --------------------------------------------------------------------------------------------
>
> Key: CALCITE-7614
> URL: https://issues.apache.org/jira/browse/CALCITE-7614
> Project: Calcite
> Issue Type: Bug
> Components: core
> Affects Versions: 1.40.0
> Reporter: tnakama
> Priority: Major
> Labels: pull-request-available
>
> A table column `s` is a STRUCT (StructKind.PEEK_FIELDS) containing an array
> field `arr ARRAY<VARCHAR>`. UNNEST of that array via an *unqualified* path
> fails validation, while the table-qualified path validates. The struct-column
> name is duplicated during qualification (note the doubled 's.s').
> Schema (Frameworks, default conformance): t(s STRUCT<arr ARRAY<VARCHAR>>)
> // PEEK_FIELDS
> (1) SELECT s.arr FROM t -> OK
> (2) SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS x -> OK
> (3) SELECT * FROM t CROSS JOIN UNNEST(s.arr) AS x -> FAILS:
> org.apache.calcite.sql.validate.SqlValidatorException:
> Column 's.s' not found in table 't'
> Analysis:
> UnnestNamespace.getTable() and UnnestNamespace.getColumnUnnestedFrom() call
> scope.fullyQualify(operand) on the UNNEST operand. For an unqualified
> identifier whose leading component is a PEEK_FIELDS struct column,
> DelegatingScope.fullyQualify() duplicates the struct-column segment, so
> resolution throws columnNotFoundInTable (DelegatingScope, the
> suffix-shortening loop). The table-qualified form disambiguates and avoids it.
> Reproduction Code
> ```
> import java.util.List;
> import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
> import org.apache.calcite.rel.type.RelDataType;
> import org.apache.calcite.rel.type.RelDataTypeFactory;
> import org.apache.calcite.rel.type.RelDataTypeSystem;
> import org.apache.calcite.schema.SchemaPlus;
> import org.apache.calcite.schema.impl.AbstractTable;
> import org.apache.calcite.avatica.util.Casing;
> import org.apache.calcite.sql.SqlNode;
> import org.apache.calcite.sql.parser.SqlParser;
> import org.apache.calcite.sql.type.SqlTypeName;
> import org.apache.calcite.tools.FrameworkConfig;
> import org.apache.calcite.tools.Frameworks;
> import org.apache.calcite.tools.Planner;
> import org.apache.calcite.rel.type.StructKind;
> import org.junit.jupiter.api.Test;
> /**
> * PROBE (throwaway): pure-Calcite reproduction, with NO Mila SqlConverter,
> operator table, or
> * custom \{@code MilaSqlConformance}. Uses \{@link Frameworks} default
> config (default conformance).
> *
> * <p>Schema: {@code nested_struct_tbl(user_id VARCHAR, purchase
> STRUCT<order_id VARCHAR, items
> * ARRAY<STRUCT<name VARCHAR, price BIGINT>>>)} with \{@link
> StructKind#PEEK_FIELDS} so
> * that compound dotted access into the struct is allowed by identifier
> resolution.
> *
> * <p>Goal: determine whether \{@code UNNEST(purchase.items)} (unqualified)
> fails in vanilla Calcite,
> * which would confirm a Calcite-core bug rather than a Mila type/conformance
> interaction.
> */
> class VanillaCalciteUnnestStructReproTest {
> private static FrameworkConfig config() {
> RelDataTypeFactory tf = new
> JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
> RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
> RelDataType bigint = tf.createSqlType(SqlTypeName.BIGINT);
> // STRUCT<name VARCHAR, price BIGINT> with PEEK_FIELDS
> RelDataType itemStruct =
> tf.createStructType(
> StructKind.PEEK_FIELDS, List.of(varchar, bigint), List.of("name",
> "price"));
> RelDataType itemsArray = tf.createArrayType(itemStruct, -1);
> // STRUCT<order_id VARCHAR, items ARRAY<...>> with PEEK_FIELDS
> RelDataType purchaseStruct =
> tf.createStructType(
> StructKind.PEEK_FIELDS, List.of(varchar, itemsArray),
> List.of("order_id", "items"));
> RelDataType rowType =
> tf.createStructType(
> StructKind.FULLY_QUALIFIED,
> List.of(varchar, purchaseStruct),
> List.of("user_id", "purchase"));
> AbstractTable table =
> new AbstractTable() {
> @Override
> public RelDataType getRowType(RelDataTypeFactory factory) {
> return rowType;
> }
> };
> SchemaPlus root = Frameworks.createRootSchema(true);
> root.add("nested_struct_tbl", table);
> SqlParser.Config parserConfig =
>
> SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
> return
> Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
> }
> private void run(String label, String sql) {
> Planner planner = Frameworks.getPlanner(config());
> try {
> SqlNode parsed = planner.parse(sql);
> planner.validate(parsed);
> System.out.println("VANILLA[" + label + "]: OK (validated)");
> } catch (Throwable t) {
> System.out.println("VANILLA[" + label + "]: FAILED");
> for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause())
> {
> System.out.println(" cause: " + c.getClass().getName() + " : " +
> c.getMessage());
> }
> } finally {
> planner.close();
> }
> }
> /** Minimal schema: \{@code s STRUCT<arr ARRAY<VARCHAR>>}; tightest
> possible repro. */
> private static FrameworkConfig minimalConfig() {
> RelDataTypeFactory tf = new
> JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
> RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
> RelDataType arr = tf.createArrayType(varchar, -1);
> RelDataType s =
> tf.createStructType(StructKind.PEEK_FIELDS, List.of(arr),
> List.of("arr"));
> RelDataType rowType =
> tf.createStructType(StructKind.FULLY_QUALIFIED, List.of(s),
> List.of("s"));
> AbstractTable table =
> new AbstractTable() {
> @Override
> public RelDataType getRowType(RelDataTypeFactory factory) {
> return rowType;
> }
> };
> SchemaPlus root = Frameworks.createRootSchema(true);
> root.add("t", table);
> SqlParser.Config parserConfig =
>
> SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
> return
> Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
> }
> private void runMinimal(String label, String sql) {
> Planner planner = Frameworks.getPlanner(minimalConfig());
> try {
> planner.validate(planner.parse(sql));
> System.out.println("MINIMAL[" + label + "]: OK (validated)");
> } catch (Throwable t) {
> System.out.println("MINIMAL[" + label + "]: FAILED");
> for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause())
> {
> System.out.println(" cause: " + c.getClass().getName() + " : " +
> c.getMessage());
> }
> } finally {
> planner.close();
> }
> }
> @Test
> void minimalMatrix() {
> runMinimal("aliased", "SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS
> x");
> runMinimal("unqualified", "SELECT * FROM t CROSS JOIN UNNEST(s.arr) AS
> x");
> }
> @Test
> void vanillaMatrix() {
> // Controls — expected OK in vanilla Calcite.
> run("normal-aliased", "SELECT t.purchase.order_id FROM nested_struct_tbl
> AS t");
> run("normal-unqualified", "SELECT purchase.order_id FROM
> nested_struct_tbl");
> run(
> "unnest-aliased",
> "SELECT item.name FROM nested_struct_tbl AS t "
> + "CROSS JOIN UNNEST(t.purchase.items) AS item");
> // The suspect case.
> run(
> "unnest-unqualified",
> "SELECT item.name FROM nested_struct_tbl "
> + "CROSS JOIN UNNEST(purchase.items) AS item");
> }
> }
> ```
--
This message was sent by Atlassian Jira
(v8.20.10#820010)