github-actions[bot] commented on code in PR #64535:
URL: https://github.com/apache/doris/pull/64535#discussion_r3422445853
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java:
##########
@@ -488,27 +581,153 @@ private static void stripCoveredOffsetSuffixPaths(
return;
}
- List<List<String>> nonOffsetPaths = new ArrayList<>();
+ // Row 1: deeper data paths cover OFFSET (with map key
supplementation).
+ List<List<String>> dataPaths = new ArrayList<>();
for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
List<String> path = p.second;
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- nonOffsetPaths.add(path);
+ if (!path.isEmpty() && !isMetaPath(path)) {
+ dataPaths.add(path);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> path = p.second;
+ if (!path.isEmpty() && !isMetaPath(path)) {
+ dataPaths.add(path);
+ }
+ }
+ stripCoveredOffsetByPaths(slot, targetAccessPaths, dataPaths);
+
+ // Rows 2+3: deeper OFFSET/NULL paths cover shallower OFFSET.
+ // Merge meta paths from covering + target so both inbound and
intra-target
+ // coverage are checked.
+ List<List<String>> deeperMetaPaths = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
+ if (!p.second.isEmpty() && isMetaPath(p.second)) {
+ deeperMetaPaths.add(p.second);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ if (!p.second.isEmpty() && isMetaPath(p.second)) {
+ deeperMetaPaths.add(p.second);
+ }
+ }
+ stripCoveredMetaByPrefix(slot.getDataType(), slotId,
AccessPathInfo.ACCESS_OFFSET,
+ deeperMetaPaths, targetAccessPaths);
+ }
+
+ /**
+ * Level 2 — deeper paths cover shallower NULL paths:
+ * <ul>
+ * <li>Deeper {@code Data}: any deeper non-meta path whose prefix
strictly
+ * contains the NULL path's prefix strips it.</li>
+ * <li>Deeper {@code OFFSET} / {@code NULL}: delegates to
+ * {@link #stripCoveredMetaByPrefix}.</li>
+ * </ul>
+ */
+ private static void stripShallowerNullPaths(
+ Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> allAccessPaths) {
+ int slotId = slot.getExprId().asInt();
+ Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths =
allAccessPaths.get(slotId);
+ if (slotPaths.isEmpty()) {
+ return;
+ }
+
+ // Row 1: deeper data paths cover shallower NULL paths.
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+ List<String> path = p.second;
+ if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ for (Pair<ColumnAccessPathType, List<String>> q : slotPaths) {
+ List<String> other = q.second;
+ if (other == path || other.isEmpty() || isMetaPath(other)) {
+ continue;
+ }
+ // [a] strips [a, NULL]; [a, b, c] strips [a, b, NULL].
+ if (other.equals(prefix) || hasStrictPrefix(other, prefix)) {
+ toRemove.add(p);
+ break;
+ }
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> r : toRemove) {
+ allAccessPaths.remove(slotId, r);
+ }
+
+ // Rows 2+3: deeper OFFSET/NULL paths cover shallower NULL paths.
+ List<List<String>> metaPaths = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+ if (!p.second.isEmpty() && isMetaPath(p.second)) {
+ metaPaths.add(p.second);
}
}
+ stripCoveredMetaByPrefix(slot.getDataType(), slotId,
AccessPathInfo.ACCESS_NULL,
+ metaPaths, allAccessPaths);
+ }
+
+ /**
+ * Level 2 — for each target path ending with {@code targetSuffix}, remove
it
+ * when a strictly deeper meta path (ending with OFFSET or NULL) has the
target
+ * prefix as a strict prefix.
+ *
+ * <p>Both target and covering paths have their meta suffix stripped before
+ * comparison, so only genuinely deeper paths match. Same-depth cross-type
+ * (e.g. {@code [a, OFFSET]} vs {@code [a, NULL]}) is handled by
+ * {@link #stripNullBySameDepthOffset} instead.
+ */
+ private static void stripCoveredMetaByPrefix(
+ DataType slotType, int slotId, String targetSuffix,
+ List<List<String>> coveringMetaPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
+ targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty() || coveringMetaPaths.isEmpty()) {
+ return;
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
List<String> path = p.second;
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- nonOffsetPaths.add(path);
+ if (path.isEmpty() || !targetSuffix.equals(path.get(path.size() -
1))) {
+ continue;
}
+ List<String> targetPrefix = path.subList(0, path.size() - 1);
+ for (List<String> other : coveringMetaPaths) {
+ if (other == path || other.isEmpty()) {
+ continue;
+ }
+ List<String> otherPrefix = other.subList(0, other.size() - 1);
+ // Use type-aware comparison so that * ≡ VALUES
+ // (and * ≡ KEYS) at map positions are recognized.
+ OffsetPathRewrite rewrite = compareOffsetPrefixCoverage(
+ slotType, targetPrefix, otherPrefix);
Review Comment:
This is a different direction from the existing `[a, OFFSET]` plus deeper
`NULL` thread: a shallower meta path can now delete a deeper required meta path.
Reduced plan:
```text
Project(a IS NULL, cardinality(element_at(a, 1)))
OlapScan(a: ARRAY<ARRAY<INT>>)
```
The collector emits `[a, NULL]` for `a IS NULL` and `[a, *, OFFSET]` for
`cardinality(element_at(a, 1))`. In `stripShallowerOffsetPaths`, the target
prefix for `[a, *, OFFSET]` is `[a, *]`, while `coveringMetaPaths` also
contains the shallower `[a, NULL]`. This call turns that into
`compareOffsetPrefixCoverage(slotType, [a, *], [a])`, which returns remove
after matching only the slot-name prefix because the helper was written for
data/full coverage and allows shorter covering paths.
That removes `[a, *, OFFSET]` before `stripShallowerNullPaths` runs; with no
deeper path left, `[a, NULL]` remains. BE then strips the slot name, sees
`[NULL]` in `ArrayFileColumnIterator::_check_and_set_meta_read_mode`, enters
`NULL_MAP_ONLY`, and skips the item iterator, so `cardinality(element_at(a,
1))` is evaluated from default-filled inner arrays instead of the real inner
offsets.
Please make this meta-vs-meta check require the covering meta prefix to be
strictly deeper than the target prefix (using type-aware depth so the earlier
`*` vs `VALUES` fix still works), and add a unit case for the reverse
combination above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]