This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch fix-null-offset-array in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6d70a7f98195d711f7fa5fe88881e7bb8e097fd5 Author: englefly <[email protected]> AuthorDate: Tue Jun 9 11:57:42 2026 +0800 [fix](nereids) strip NULL access path when OFFSET path exists for the same field in NestedColumnPruning stripNullSuffixPaths() already contains the logic to remove [col.NULL] when [col.OFFSET] covers the same prefix (lines 834-839), but it was only called in the string-like and general branches, not in the array/map early-continue branch. This caused queries like `SELECT cardinality(arr), arr IS NULL` to emit redundant [arr.NULL] alongside [arr.OFFSET] in the access paths. Fix: move stripNullSuffixPaths() call to line 278 (before all type-specific early-continue branches) so it applies uniformly to all complex data types. Add regression tests covering array root, map root, struct string subfield, struct array subfield, and struct map subfield. --- .../nereids/rules/rewrite/NestedColumnPruning.java | 6 +- .../string_length_column_pruning.groovy | 79 +++++++++++++++++++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 50416a29408..096b38cc0bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -275,7 +275,7 @@ public class NestedColumnPruning implements CustomRewriter { Slot slot = kv.getKey(); DataTypeAccessTree accessTree = kv.getValue(); DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); - + stripNullSuffixPaths(slot, allAccessPaths); if (slot.getDataType().isStringLikeType()) { if (accessTree.hasStringOffsetOnlyAccess()) { if (skipDataSkippingOnlyAccessPath) { @@ -284,7 +284,6 @@ public class NestedColumnPruning implements CustomRewriter { // Offset-only access (e.g. length(str_col)): type stays varchar, // but we must still send the access path to BE so it skips the char data. stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths, allAccessPaths); - stripNullSuffixPaths(slot, allAccessPaths); List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths); result.put(slot.getExprId().asInt(), new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); @@ -359,9 +358,6 @@ public class NestedColumnPruning implements CustomRewriter { // of gating this logic on the root slot type. stripCoveredOffsetSuffixPaths(slot, allAccessPaths, allAccessPaths); - // Strip NULL-suffix paths when a non-NULL path also exists for the same slot. - // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is needed, NULL path is redundant. - stripNullSuffixPaths(slot, allAccessPaths); List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths); if (shouldSkipAccessInfo(slot, prunedDataType, allPaths, predicateAccessPaths)) { continue; diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy index 48c0cb37a49..8ee676d0315 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy @@ -12,7 +12,7 @@ // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the -// specific language governing permissions and limitations +// specific language governing permissions and limitation // under the License. // Regression tests for the string-length OFFSET-only optimization. @@ -662,4 +662,81 @@ suite("string_length_column_pruning") { contains "OFFSET" } order_qt_length_varchar "select length(v) from slcp_varchar_tbl" + + // ─── OFFSET covers NULL across all complex data types ────────────────────── + // + // When both OFFSET and NULL access paths exist for the same field/subfield, + // the NULL path is redundant because the OFFSET data already provides + // nullness information for variable-length columns. stripNullSuffixPaths() + // removes [col.NULL] when [col.OFFSET] exists for the same prefix. + // + // This applies uniformly to all complex data types: array, map, struct + // subfields of string, array, and map. + + // Array root: cardinality(arr_col) -> [arr_col.OFFSET] + // arr_col IS NULL -> [arr_col.NULL] + // OFFSET covers NULL → [arr_col.NULL] must not appear. + explain { + sql "select cardinality(arr_col), arr_col is null from slcp_str_tbl" + contains "nested columns" + contains "arr_col.OFFSET" + notContains "arr_col.NULL" + } + sql "select cardinality(arr_col), arr_col is null from slcp_str_tbl" + + // Map root: cardinality(map_col) -> [map_col.OFFSET] + // map_col IS NULL -> [map_col.NULL] + // OFFSET covers NULL → [map_col.NULL] must not appear. + explain { + sql "select cardinality(map_col), map_col is null from slcp_str_tbl" + contains "nested columns" + contains "map_col.OFFSET" + notContains "map_col.NULL" + } + sql "select cardinality(map_col), map_col is null from slcp_str_tbl" + + // Struct string subfield: length(element_at(struct_col, 'f3')) -> [struct_col.f3.OFFSET] + // element_at(struct_col, 'f3') IS NULL -> [struct_col.f3.NULL] + // OFFSET covers NULL → [struct_col.f3.NULL] must not appear. + explain { + sql """select length(element_at(struct_col, 'f3')), + element_at(struct_col, 'f3') is null + from slcp_str_tbl""" + contains "nested columns" + contains "OFFSET" + notContains "struct_col.f3.NULL" + } + sql """select length(element_at(struct_col, 'f3')), + element_at(struct_col, 'f3') is null + from slcp_str_tbl""" + + // Struct array subfield: cardinality(element_at(s, 'arr')) -> [s.arr.OFFSET] + // element_at(s, 'arr') IS NULL -> [s.arr.NULL] + // OFFSET covers NULL → [s.arr.NULL] must not appear. + explain { + sql """select cardinality(element_at(s, 'arr')), + element_at(s, 'arr') is null + from slcp_struct_root_tbl""" + contains "nested columns" + contains "OFFSET" + notContains "s.arr.NULL" + } + sql """select cardinality(element_at(s, 'arr')), + element_at(s, 'arr') is null + from slcp_struct_root_tbl""" + + // Struct map subfield: cardinality(element_at(s, 'm')) -> [s.m.OFFSET] + // element_at(s, 'm') IS NULL -> [s.m.NULL] + // OFFSET covers NULL → [s.m.NULL] must not appear. + explain { + sql """select cardinality(element_at(s, 'm')), + element_at(s, 'm') is null + from slcp_struct_root_tbl""" + contains "nested columns" + contains "OFFSET" + notContains "s.m.NULL" + } + sql """select cardinality(element_at(s, 'm')), + element_at(s, 'm') is null + from slcp_struct_root_tbl""" } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
