From 18c6f7026816b71ad911d10848ff282d7f6194e4 Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 15 Jun 2026 18:15:36 +0800 Subject: [PATCH 1/4] fix-26175 and refacotry --- .../AccessPathExpressionCollector.java | 6 +- .../nereids/rules/rewrite/AccessPathInfo.java | 2 +- .../rewrite/AccessPathPlanCollector.java | 2 +- .../rules/rewrite/NestedColumnPruning.java | 137 +++++++++++------- .../rules/rewrite/PruneNestedColumnTest.java | 27 ++++ 5 files changed, 120 insertions(+), 54 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java index fac90248986897..68869fc0615eb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java @@ -205,7 +205,7 @@ public Void visitLength(Length length, CollectorContext context) { && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); return arg.accept(this, offsetContext); } // fall through to default (recurse into children with fresh contexts) @@ -219,7 +219,7 @@ public Void visitMapSize(MapSize mapSize, CollectorContext context) { if (argType.isMapType() && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); return arg.accept(this, offsetContext); } return visit(mapSize, context); @@ -234,7 +234,7 @@ public Void visitCardinality(Cardinality cardinality, CollectorContext context) if ((argType.isArrayType() || argType.isMapType()) && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); // cardinality(map_keys(m)) == cardinality(m) == cardinality(map_values(m)): // all three count map entries, so emit the same [map_col, OFFSET] path. Expression effectiveArg = (arg instanceof MapKeys || arg instanceof MapValues) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java index 0b361c2276030c..8656aaacef7dc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java @@ -29,7 +29,7 @@ public class AccessPathInfo { public static final String ACCESS_MAP_VALUES = "VALUES"; // Suffix appended to a string-column path to indicate that only the offset array // (not the char data) is needed — agreed with BE as the special path component name. - public static final String ACCESS_STRING_OFFSET = "OFFSET"; + public static final String ACCESS_OFFSET = "OFFSET"; // Suffix appended to a column path to indicate that only the null flag // (not the actual data) is needed — used when the column is only accessed via IS NULL / IS NOT NULL. public static final String ACCESS_NULL = "NULL"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java index 6d592f584d26cb..b502b6baf917a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java @@ -417,6 +417,6 @@ private static boolean isDataSkippingOnlyAccessPath(List path) { } String lastComponent = path.get(path.size() - 1); return AccessPathInfo.ACCESS_NULL.equals(lastComponent) - || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent); + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 50416a2940816b..3d7edb0c3c513c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -204,7 +204,7 @@ private static boolean expressionContainsNullCheck(Expression expr) { private static Map pruneDataType( Map> slotToAccessPaths, - boolean skipDataSkippingOnlyAccessPath) { + boolean ignoreMetaPath) { Map result = new LinkedHashMap<>(); Map slotIdToAllAccessTree = new LinkedHashMap<>(); Map slotIdToPredicateAccessTree = new LinkedHashMap<>(); @@ -236,8 +236,7 @@ private static Map pruneDataType( } continue; } - if (skipDataSkippingOnlyAccessPath - && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { + if (ignoreMetaPath && containsMetaPath(collectAccessPathResults)) { // An MV rewrite child context optimizes a temporary plan fragment rather // than the final plan. A nested metadata-only path such as // [s, city, NULL] or [s, city, OFFSET] would otherwise prune the scan slot @@ -278,7 +277,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { if (slot.getDataType().isStringLikeType()) { if (accessTree.hasStringOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (ignoreMetaPath) { continue; } // Offset-only access (e.g. length(str_col)): type stays varchar, @@ -289,7 +288,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { result.put(slot.getExprId().asInt(), new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); } else if (accessTree.hasNullCheckOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (ignoreMetaPath) { continue; } // Null-check-only access (e.g. str_col IS NULL): type stays varchar, @@ -304,7 +303,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { if ((slot.getDataType().isArrayType() || slot.getDataType().isMapType()) && accessTree.hasStringOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (ignoreMetaPath) { continue; } // Offset-only access (e.g. length(arr_col) / length(map_col)): type stays unchanged, @@ -318,7 +317,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { // Null-check-only access (e.g. col IS NULL / col IS NOT NULL): type stays unchanged, // but we must send the [col, NULL] access path to BE so it only reads the null flag. if (accessTree.hasNullCheckOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (ignoreMetaPath) { continue; } List allPaths = buildColumnAccessPaths(slot, allAccessPaths); @@ -328,7 +327,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { } if (slot.getDataType().isMapType() && accessTree.hasMapValueOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (ignoreMetaPath) { continue; } // length(map_col['key']): keys read in full (element lookup) + values offset-only. @@ -340,7 +339,7 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { ColumnAccessPath valsOffsetColumnPath = ColumnAccessPath.data( new ArrayList<>(ImmutableList.of(colName, AccessPathInfo.ACCESS_MAP_VALUES, - AccessPathInfo.ACCESS_STRING_OFFSET))); + AccessPathInfo.ACCESS_OFFSET))); result.put(slot.getExprId().asInt(), new AccessPathInfo( slot.getDataType(), @@ -409,23 +408,23 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { return result; } - private static boolean containsDataSkippingOnlyAccessPath( + private static boolean containsMetaPath( List collectAccessPathResults) { for (CollectAccessPathResult collectAccessPathResult : collectAccessPathResults) { - if (isDataSkippingOnlyAccessPath(collectAccessPathResult.getPath())) { + if (isMetaPath(collectAccessPathResult.getPath())) { return true; } } return false; } - private static boolean isDataSkippingOnlyAccessPath(List path) { + private static boolean isMetaPath(List path) { if (path.isEmpty()) { return false; } String lastComponent = path.get(path.size() - 1); return AccessPathInfo.ACCESS_NULL.equals(lastComponent) - || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent); + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); } /** @@ -440,11 +439,20 @@ private static boolean isDataSkippingOnlyAccessPath(List path) { private static OffsetPathRewrite analyzeOffsetPathRewrite( DataType slotType, List path, List> nonOffsetPaths) { if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { + || !AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1))) { return OffsetPathRewrite.keep(); } List prefix = path.subList(0, path.size() - 1); - return analyzePrefixCoverage(slotType, prefix, nonOffsetPaths); + // Filter out the path itself to prevent self-coverage removal. + // A deeper OFFSET path (e.g. [aa, *, OFFSET]) covering a shallower one + // (e.g. [aa, OFFSET]) is still handled — they are different paths. + List> filteredNonOffsetPaths = new ArrayList<>(); + for (List p : nonOffsetPaths) { + if (!p.equals(path)) { + filteredNonOffsetPaths.add(p); + } + } + return analyzePrefixCoverage(slotType, prefix, filteredNonOffsetPaths); } private static OffsetPathRewrite analyzePrefixCoverage( @@ -491,15 +499,30 @@ private static void stripCoveredOffsetSuffixPaths( List> nonOffsetPaths = new ArrayList<>(); for (Pair> p : coveringAccessPaths.get(slotId)) { List path = p.second; - if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { + if (path.isEmpty()) { + continue; + } + boolean isOffset = AccessPathInfo.ACCESS_OFFSET.equals( + path.get(path.size() - 1)); + if (!isOffset) { + nonOffsetPaths.add(path); + } else if (path.size() > 2) { + // Deeper OFFSET paths that traverse into array/map items (e.g. + // [aa, *, OFFSET]) must read through the container structure, + // therefore they cover the container's own [aa, OFFSET] path. nonOffsetPaths.add(path); } } for (Pair> p : targetPaths) { List path = p.second; - if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { + if (path.isEmpty()) { + continue; + } + boolean isOffset = AccessPathInfo.ACCESS_OFFSET.equals( + path.get(path.size() - 1)); + if (!isOffset) { + nonOffsetPaths.add(path); + } else if (path.size() > 2) { nonOffsetPaths.add(path); } } @@ -605,12 +628,12 @@ private static void stripExactCoveredDataSkippingSuffixPaths( List> fullAccessPaths = new ArrayList<>(); for (Pair> p : coveringAccessPaths.get(slotId)) { - if (!isDataSkippingOnlyAccessPath(p.second)) { + if (!isMetaPath(p.second)) { fullAccessPaths.add(p.second); } } for (Pair> p : targetPaths) { - if (!isDataSkippingOnlyAccessPath(p.second)) { + if (!isMetaPath(p.second)) { fullAccessPaths.add(p.second); } } @@ -618,7 +641,7 @@ private static void stripExactCoveredDataSkippingSuffixPaths( List>> pathsToRemove = new ArrayList<>(); for (Pair> p : targetPaths) { List path = p.second; - if (!isDataSkippingOnlyAccessPath(path)) { + if (!isMetaPath(path)) { continue; } List prefix = path.subList(0, path.size() - 1); @@ -819,8 +842,21 @@ private static void stripNullSuffixPaths( boolean covered = false; for (Pair> q : slotPaths) { List other = q.second; - if (other.isEmpty() - || AccessPathInfo.ACCESS_NULL.equals(other.get(other.size() - 1))) { + if (other.isEmpty()) { + continue; + } + if (other == path) { + continue; + } + if (AccessPathInfo.ACCESS_NULL.equals(other.get(other.size() - 1))) { + // A deeper NULL path (e.g. [a, *, NULL]) covers a shallower one + // (e.g. [a, NULL]) because reading the deeper path requires + // traversing the container, which materializes the container's + // null information. + if (hasStrictPrefix(other, prefix)) { + covered = true; + break; + } continue; } if (other.equals(prefix)) { @@ -832,7 +868,7 @@ private static void stripNullSuffixPaths( break; } if (other.size() == prefix.size() + 1 - && AccessPathInfo.ACCESS_STRING_OFFSET.equals(other.get(other.size() - 1)) + && AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1)) && other.subList(0, prefix.size()).equals(prefix)) { covered = true; break; @@ -941,15 +977,18 @@ public static class DataTypeAccessTree { // if access 's.a.b' the node 's' and 'a' has accessPartialChild, and node 'b' has accessAll private boolean accessPartialChild; private boolean accessAll; - // True when this string-typed node is accessed ONLY via the offset array - // (e.g. length(str_col) or length(element_at(c_struct,'f3'))). - // When this flag is set and accessAll is NOT set, pruneDataType() returns BigIntType - // to signal that the BE only needs to read the offset array, not the chars data. - private boolean isStringOffsetOnly; - // True when this column node is accessed ONLY via IS NULL / IS NOT NULL. - // When this flag is set and accessAll is NOT set, the BE only needs to read the null flag, - // not the actual column data. - private boolean isNullCheckOnly; + // Cached marker set by setAccessByPath() when a path component is OFFSET. + // Avoids scanning the multimap: hasStringOffsetOnlyAccess() reads this flag in + // O(1) instead of checking every path for an OFFSET suffix. Used for array, map, + // and string-like types (they share offset-based storage in BE). + // When set without accessAll, pruneDataType() keeps the node's type so that BE + // reads only the offset structure, skipping element / key-value / chars data. + private boolean hasOffsetPath; + // Cached marker set by setAccessByPath() when a path component is NULL. + // Same purpose as hasOffsetPath — O(1) flag read instead of multimap scan + // in hasNullCheckOnlyAccess(). When set without accessAll, BE reads only the + // null bitmap, skipping actual column data. + private boolean hasNullPath; // for the future, only access the meta of the column, // e.g. `is not null` can only access the column's offset, not need to read the data private ColumnAccessPathType pathType; @@ -1012,7 +1051,7 @@ public boolean hasMapValueOffsetOnlyAccess() { return false; } // Values must be accessed offset-only (no deeper element reads). - if (!valsChild.isStringOffsetOnly || valsChild.accessAll) { + if (!valsChild.hasOffsetPath || valsChild.accessAll) { return false; } if (valsChild.type.isStringLikeType()) { @@ -1034,7 +1073,7 @@ public boolean hasMapValueOffsetOnlyAccess() { public boolean hasStringOffsetOnlyAccess() { if (isRoot) { DataTypeAccessTree child = children.values().iterator().next(); - if (!child.isStringOffsetOnly || child.accessAll) { + if (!child.hasOffsetPath || child.accessAll) { return false; } if (child.type.isStringLikeType()) { @@ -1054,7 +1093,7 @@ public boolean hasStringOffsetOnlyAccess() { } return false; } - return type.isStringLikeType() && isStringOffsetOnly && !accessAll; + return type.isStringLikeType() && hasOffsetPath && !accessAll; } /** True when the column is accessed ONLY via IS NULL / IS NOT NULL, @@ -1062,10 +1101,10 @@ public boolean hasStringOffsetOnlyAccess() { public boolean hasNullCheckOnlyAccess() { if (isRoot) { DataTypeAccessTree child = children.values().iterator().next(); - return child.isNullCheckOnly && !child.accessAll - && !child.isStringOffsetOnly && !child.accessPartialChild; + return child.hasNullPath && !child.accessAll + && !child.hasOffsetPath && !child.accessPartialChild; } - return isNullCheckOnly && !accessAll && !isStringOffsetOnly && !accessPartialChild; + return hasNullPath && !accessAll && !hasOffsetPath && !accessPartialChild; } /** pruneCastType */ @@ -1163,7 +1202,7 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath // Mark null-check-only and return without setting accessAll or accessPartialChild, // so that parent nodes can distinguish "null-only leaf" from "has real sub-access". if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_NULL)) { - isNullCheckOnly = true; + hasNullPath = true; return; } @@ -1180,9 +1219,9 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath } return; } else if (this.type.isArrayType()) { - if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { + if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) { // length(array_col) — only the offset array is needed, not element data. - isStringOffsetOnly = true; + hasOffsetPath = true; return; } DataTypeAccessTree child = children.get(AccessPathInfo.ACCESS_ALL); @@ -1193,9 +1232,9 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath return; } else if (this.type.isMapType()) { String fieldName = path.get(accessIndex); - if (fieldName.equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { + if (fieldName.equals(AccessPathInfo.ACCESS_OFFSET)) { // length(map_col) — only the offset array is needed, not key/value data. - isStringOffsetOnly = true; + hasOffsetPath = true; return; } if (fieldName.equals(AccessPathInfo.ACCESS_ALL)) { @@ -1225,8 +1264,8 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath } else if (type.isStringLikeType()) { // String leaf accessed via the offset array (e.g. path ends in "offset"). // Mark offset-only so pruneDataType() can return BigIntType instead of full data. - if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { - isStringOffsetOnly = true; + if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) { + hasOffsetPath = true; return; // do NOT set accessAll — offset-only is distinguishable from full access } // Any other sub-path on a string column means full data is needed. @@ -1269,10 +1308,10 @@ public Optional pruneDataType() { return children.values().iterator().next().pruneDataType(); } else if (accessAll) { return Optional.of(type); - } else if (isStringOffsetOnly && !accessPartialChild) { + } else if (hasOffsetPath && !accessPartialChild) { // Only the offset array is accessed (e.g. length(str_col)). return Optional.of(type); - } else if (isNullCheckOnly && !accessPartialChild) { + } else if (hasNullPath && !accessPartialChild) { // Only the null flag is accessed (e.g. col IS NULL / element_at(s,'f') IS NULL). // Return the node's type so that parent nodes include this child in their pruned type, // while the access path (ending in NULL) tells BE to skip actual data reading. diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index a50d4cdddc2b04..1190c160710ed9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -199,6 +199,33 @@ public void testCardinalityArrayElementKeepsOffsetPath() throws Exception { ImmutableList.of(path("a", "*"))); } + @Test + public void testDeeperOffsetPathCoversShallowerOffsetPath() throws Exception { + // cardinality(a) on ARRAY> generates [a, OFFSET] + // cardinality(element_at(a, 1)) generates [a, *, OFFSET] + // [a, *, OFFSET] traverses into the outer array's items, so it must read + // through the outer array structure. The shallower [a, OFFSET] is therefore + // redundant and should be stripped. + assertAllAccessPathsContain( + "select cardinality(a), cardinality(element_at(a, 1)) from nested_array_tbl", + ImmutableList.of(path("a", "*", "OFFSET")), + ImmutableList.of(path("a", "OFFSET"))); + } + + @Test + public void testDeeperNullPathCoversShallowerNullPath() throws Exception { + // a IS NULL on ARRAY> generates [a, NULL] + // element_at(a, 1) IS NULL generates [a, *, NULL] + // [a, *, NULL] traverses into the outer array's items, which requires + // reading the outer array's null bitmap. Therefore [a, NULL] is redundant + // and should be stripped. + assertAllAccessPathsContain( + "select a is null, element_at(a, 1) is null from nested_array_tbl", + ImmutableList.of(path("a", "*", "NULL")), + ImmutableList.of(path("a", "NULL"))); + } + + @Test public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl", From 319573970f51e2199f58626aa03ce660d5cc9386 Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 15 Jun 2026 18:46:13 +0800 Subject: [PATCH 2/4] [refactor](fe) Consolidate nested meta path cleanup ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: Consolidate duplicated nested column metadata-only access handling for string, array, and map columns. The refactor routes NULL/OFFSET-only access through one branch and centralizes covered metadata suffix cleanup in a helper that documents each strip case and its implementation path. OFFSET cleanup is split by coverage type so data-path coverage and deeper-OFFSET coverage have separate maintenance boundaries. It also adds a regression case for OFFSET plus NULL on the same prefix and removes an extra blank line in the related test file so FE checkstyle passes. ### Release note None ### Check List (For Author) - Test: Unit Test - ./run-fe-ut.sh --run org.apache.doris.nereids.rules.rewrite.PruneNestedColumnTest - cd fe && mvn checkstyle:check -pl fe-core - Behavior changed: No - Does this need documentation: No refactory accessTree.hasStringOffsetOnlyAccess() and other type merge --- .../rules/rewrite/NestedColumnPruning.java | 206 ++++++++++-------- .../rules/rewrite/PruneNestedColumnTest.java | 9 +- 2 files changed, 123 insertions(+), 92 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 3d7edb0c3c513c..c7715adc25b7c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -275,54 +275,19 @@ private static Map pruneDataType( DataTypeAccessTree accessTree = kv.getValue(); DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); - if (slot.getDataType().isStringLikeType()) { - if (accessTree.hasStringOffsetOnlyAccess()) { - if (ignoreMetaPath) { - continue; - } - // Offset-only access (e.g. length(str_col)): type stays varchar, - // but we must still send the access path to BE so it skips the char data. - stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths, allAccessPaths); - stripNullSuffixPaths(slot, allAccessPaths); - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); - } else if (accessTree.hasNullCheckOnlyAccess()) { - if (ignoreMetaPath) { - continue; - } - // Null-check-only access (e.g. str_col IS NULL): type stays varchar, - // but we send [col, NULL] access path so BE only reads the null flag. - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); - } - // direct access (accessAll=true) or other: skip — no type change, no access paths needed. - continue; - } - - if ((slot.getDataType().isArrayType() || slot.getDataType().isMapType()) - && accessTree.hasStringOffsetOnlyAccess()) { + if (accessTree.hasOffsetOnlyAccess() || accessTree.hasNullCheckOnlyAccess()) { if (ignoreMetaPath) { continue; } - // Offset-only access (e.g. length(arr_col) / length(map_col)): type stays unchanged, - // but we must send the OFFSET access path to BE so it skips element/key-value data. + stripCoveredMetaSuffixPaths(slot, allAccessPaths, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); result.put(slot.getExprId().asInt(), new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); continue; } - // Null-check-only access (e.g. col IS NULL / col IS NOT NULL): type stays unchanged, - // but we must send the [col, NULL] access path to BE so it only reads the null flag. - if (accessTree.hasNullCheckOnlyAccess()) { - if (ignoreMetaPath) { - continue; - } - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); + if (slot.getDataType().isStringLikeType()) { + // direct access (accessAll=true) or other: skip — no type change, no access paths needed. continue; } @@ -348,19 +313,7 @@ private static Map pruneDataType( continue; } - // If a field is read in full, its metadata-only NULL/OFFSET access is redundant - // for any data type: e.g. [s] covers both [s.NULL] and [s.OFFSET]. - stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths, allAccessPaths); - - // Strip OFFSET-suffix paths when a non-OFFSET path covers the same nested field or - // container. The overlapping array/map container may live under the root slot itself - // or under a nested struct field, so compare against the actual nested prefix instead - // of gating this logic on the root slot type. - stripCoveredOffsetSuffixPaths(slot, allAccessPaths, allAccessPaths); - - // Strip NULL-suffix paths when a non-NULL path also exists for the same slot. - // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is needed, NULL path is redundant. - stripNullSuffixPaths(slot, allAccessPaths); + stripCoveredMetaSuffixPaths(slot, allAccessPaths, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); if (shouldSkipAccessInfo(slot, prunedDataType, allPaths, predicateAccessPaths)) { continue; @@ -379,10 +332,7 @@ private static Map pruneDataType( // third: build predicate access path for (Entry kv : slotIdToPredicateAccessTree.entrySet()) { Slot slot = kv.getKey(); - stripExactCoveredDataSkippingSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripCoveredOffsetSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripCoveredArrayNullSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripNullSuffixPaths(slot, predicateAccessPaths); + stripCoveredMetaSuffixPaths(slot, predicateAccessPaths, allAccessPaths); List predicatePaths = buildColumnAccessPaths(slot, predicateAccessPaths); AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt()); @@ -428,8 +378,51 @@ private static boolean isMetaPath(List path) { } /** - * Decide whether an OFFSET-suffix path can be removed because another non-OFFSET path - * already covers the same container. + * Strip redundant metadata-only NULL/OFFSET paths while keeping enough real paths for BE readers. + * + *

The strip cases are: + *

    + *
  • Full/data access covers metadata access, e.g. {@code [a]} removes + * {@code [a, NULL]} and {@code [a, OFFSET]}. This is handled by + * {@link #stripExactCoveredDataSkippingSuffixPaths}.
  • + *
  • A deeper data access covers an upper metadata path, e.g. {@code [a, b, c]} + * removes {@code [a, b, NULL]}, and {@code [a, *, field]} removes + * {@code [a, OFFSET]}. Exact NULL/data coverage is handled by + * {@link #stripNullSuffixPaths}; OFFSET/container coverage is handled by + * {@link #stripCoveredOffsetByDataPaths}.
  • + *
  • A deeper OFFSET access covers an upper OFFSET path, e.g. + * {@code [a, *, OFFSET]} removes {@code [a, OFFSET]}. This is handled by + * {@link #stripCoveredOffsetByDeeperOffsetPaths}.
  • + *
  • OFFSET access covers NULL access with the same prefix, e.g. + * {@code [a, OFFSET]} removes {@code [a, NULL]}. This is handled by + * {@link #stripNullSuffixPaths}.
  • + *
  • A deeper NULL access covers an upper NULL path, e.g. + * {@code [a, *, NULL]} removes {@code [a, NULL]}. This is handled by + * {@link #stripNullSuffixPaths}.
  • + *
  • Map value-side coverage may need a supplemental key path, e.g. + * {@code [m, *, OFFSET]} plus {@code [m, VALUES]} becomes + * {@code [m, KEYS]} plus {@code [m, VALUES]}. This is handled by + * {@link #stripCoveredOffsetByDataPaths} via {@link #compareOffsetPrefixCoverage} + * and {@link #buildMapKeysOnlyPath}.
  • + *
  • Array NULL-only paths covered by value/data access may also need supplemental + * map keys, e.g. {@code [m, *, NULL]} plus {@code [m, VALUES, *, field]} + * becomes {@code [m, KEYS]} plus {@code [m, VALUES, *, field]}. This is + * handled by {@link #stripCoveredArrayNullSuffixPaths}.
  • + *
+ */ + private static void stripCoveredMetaSuffixPaths( + Slot slot, Multimap>> targetAccessPaths, + Multimap>> coveringAccessPaths) { + stripExactCoveredDataSkippingSuffixPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredOffsetByDataPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredOffsetByDeeperOffsetPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredArrayNullSuffixPaths(slot, targetAccessPaths, coveringAccessPaths); + stripNullSuffixPaths(slot, targetAccessPaths); + } + + /** + * Decide whether an OFFSET-suffix path can be removed because another path already covers + * the same container. * *

For map element_at paths, {@code *} means "read keys fully, then follow the rest of * the path on the value side". So a VALUES path can cover the value-side OFFSET access, @@ -437,7 +430,7 @@ private static boolean isMetaPath(List path) { * and add a KEYS-only path instead. */ private static OffsetPathRewrite analyzeOffsetPathRewrite( - DataType slotType, List path, List> nonOffsetPaths) { + DataType slotType, List path, List> coveringPaths) { if (path.isEmpty() || !AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1))) { return OffsetPathRewrite.keep(); @@ -446,20 +439,20 @@ private static OffsetPathRewrite analyzeOffsetPathRewrite( // Filter out the path itself to prevent self-coverage removal. // A deeper OFFSET path (e.g. [aa, *, OFFSET]) covering a shallower one // (e.g. [aa, OFFSET]) is still handled — they are different paths. - List> filteredNonOffsetPaths = new ArrayList<>(); - for (List p : nonOffsetPaths) { + List> filteredCoveringPaths = new ArrayList<>(); + for (List p : coveringPaths) { if (!p.equals(path)) { - filteredNonOffsetPaths.add(p); + filteredCoveringPaths.add(p); } } - return analyzePrefixCoverage(slotType, prefix, filteredNonOffsetPaths); + return analyzePrefixCoverage(slotType, prefix, filteredCoveringPaths); } private static OffsetPathRewrite analyzePrefixCoverage( - DataType slotType, List prefix, List> nonOffsetPaths) { + DataType slotType, List prefix, List> coveringPaths) { List> supplementalPaths = new ArrayList<>(); - for (List nonOffset : nonOffsetPaths) { - OffsetPathRewrite candidate = compareOffsetPrefixCoverage(slotType, prefix, nonOffset); + for (List coveringPath : coveringPaths) { + OffsetPathRewrite candidate = compareOffsetPrefixCoverage(slotType, prefix, coveringPath); if (!candidate.shouldRemoveOffsetPath()) { continue; } @@ -475,7 +468,7 @@ private static OffsetPathRewrite analyzePrefixCoverage( } /** - * Remove OFFSET-only paths from {@code targetAccessPaths} when data paths in + * Remove OFFSET-only paths from {@code targetAccessPaths} when non-meta data paths in * {@code coveringAccessPaths} already read the same array/map/string container or a child * under it. * @@ -487,7 +480,7 @@ private static OffsetPathRewrite analyzePrefixCoverage( * {@code map['k']} still needs full keys for lookup, while values cover the offset. * */ - private static void stripCoveredOffsetSuffixPaths( + private static void stripCoveredOffsetByDataPaths( Slot slot, Multimap>> targetAccessPaths, Multimap>> coveringAccessPaths) { int slotId = slot.getExprId().asInt(); @@ -496,42 +489,73 @@ private static void stripCoveredOffsetSuffixPaths( return; } - List> nonOffsetPaths = new ArrayList<>(); + List> dataPaths = new ArrayList<>(); for (Pair> p : coveringAccessPaths.get(slotId)) { List path = p.second; - if (path.isEmpty()) { - continue; - } - boolean isOffset = AccessPathInfo.ACCESS_OFFSET.equals( - path.get(path.size() - 1)); - if (!isOffset) { - nonOffsetPaths.add(path); - } else if (path.size() > 2) { - // Deeper OFFSET paths that traverse into array/map items (e.g. - // [aa, *, OFFSET]) must read through the container structure, - // therefore they cover the container's own [aa, OFFSET] path. - nonOffsetPaths.add(path); + if (!path.isEmpty() && !isMetaPath(path)) { + dataPaths.add(path); } } for (Pair> p : targetPaths) { List path = p.second; - if (path.isEmpty()) { - continue; + if (!path.isEmpty() && !isMetaPath(path)) { + dataPaths.add(path); } - boolean isOffset = AccessPathInfo.ACCESS_OFFSET.equals( - path.get(path.size() - 1)); - if (!isOffset) { - nonOffsetPaths.add(path); - } else if (path.size() > 2) { - nonOffsetPaths.add(path); + } + + stripCoveredOffsetByPaths(slot, targetAccessPaths, dataPaths); + } + + /** + * Remove upper OFFSET-only paths when a deeper OFFSET path traverses the same container. + * + *

Example: {@code [a, *, OFFSET]} removes {@code [a, OFFSET]} because reading the inner + * array offset requires traversing the outer array structure. + */ + private static void stripCoveredOffsetByDeeperOffsetPaths( + Slot slot, Multimap>> targetAccessPaths, + Multimap>> coveringAccessPaths) { + int slotId = slot.getExprId().asInt(); + Collection>> targetPaths = targetAccessPaths.get(slotId); + if (targetPaths.isEmpty()) { + return; + } + + List> deeperOffsetPaths = new ArrayList<>(); + for (Pair> p : coveringAccessPaths.get(slotId)) { + List path = p.second; + if (isDeeperOffsetPath(path)) { + deeperOffsetPaths.add(path); + } + } + for (Pair> p : targetPaths) { + List path = p.second; + if (isDeeperOffsetPath(path)) { + deeperOffsetPaths.add(path); } } + stripCoveredOffsetByPaths(slot, targetAccessPaths, deeperOffsetPaths); + } + + private static boolean isDeeperOffsetPath(List path) { + return path.size() > 2 && AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1)); + } + + private static void stripCoveredOffsetByPaths( + Slot slot, Multimap>> targetAccessPaths, + List> coveringPaths) { + int slotId = slot.getExprId().asInt(); + Collection>> targetPaths = targetAccessPaths.get(slotId); + if (targetPaths.isEmpty() || coveringPaths.isEmpty()) { + return; + } + List>> pathsToRemove = new ArrayList<>(); List>> pathsToAdd = new ArrayList<>(); for (Pair> p : new ArrayList<>(targetPaths)) { OffsetPathRewrite rewrite = analyzeOffsetPathRewrite( - slot.getDataType(), p.second, nonOffsetPaths); + slot.getDataType(), p.second, coveringPaths); if (!rewrite.shouldRemoveOffsetPath()) { continue; } @@ -1070,7 +1094,7 @@ public boolean hasMapValueOffsetOnlyAccess() { /** True when the column is accessed ONLY via the offset array (e.g. length(str_col), * length(arr_col), length(map_col)), meaning the type must not change but an access * path still needs to be sent to BE so it can skip the char/element data. */ - public boolean hasStringOffsetOnlyAccess() { + public boolean hasOffsetOnlyAccess() { if (isRoot) { DataTypeAccessTree child = children.values().iterator().next(); if (!child.hasOffsetPath || child.accessAll) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index 1190c160710ed9..3a0ae14073979f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -212,6 +212,14 @@ public void testDeeperOffsetPathCoversShallowerOffsetPath() throws Exception { ImmutableList.of(path("a", "OFFSET"))); } + @Test + public void testOffsetPathCoversNullPathWithSamePrefix() throws Exception { + assertAllAccessPathsContain( + "select cardinality(a), a is null from nested_array_tbl", + ImmutableList.of(path("a", "OFFSET")), + ImmutableList.of(path("a", "NULL"))); + } + @Test public void testDeeperNullPathCoversShallowerNullPath() throws Exception { // a IS NULL on ARRAY> generates [a, NULL] @@ -225,7 +233,6 @@ public void testDeeperNullPathCoversShallowerNullPath() throws Exception { ImmutableList.of(path("a", "NULL"))); } - @Test public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl", From f7f8355c622cc48b7664848ae5219790c5dbe0c8 Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 16 Jun 2026 07:39:40 +0800 Subject: [PATCH 3/4] [fix](fe) Normalize nested map value offset paths ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: length(map_col['key']) was normalized from [map_col, *, OFFSET] to [map_col, KEYS] and [map_col, VALUES, OFFSET], but the logic only handled root MAP columns. For nested maps such as length(element_at(s, 'm')['key']), FE kept [s, m, *, OFFSET], which BE cannot interpret as split key/value access. This change recursively detects map element lookups whose value side is offset-only and normalizes both root and nested map paths before the common metadata strip logic runs. ### Release note None ### Check List (For Author) - Test: Unit Test - cd fe && mvn checkstyle:check -pl fe-core - ./run-fe-ut.sh --run org.apache.doris.nereids.rules.rewrite.PruneNestedColumnTest#testNestedMapElementLengthKeepsValueOffsetPath - ./run-fe-ut.sh --run org.apache.doris.nereids.rules.rewrite.PruneNestedColumnTest - Behavior changed: No - Does this need documentation: No --- .../rules/rewrite/NestedColumnPruning.java | 107 ++++++++++++------ .../rules/rewrite/PruneNestedColumnTest.java | 8 ++ 2 files changed, 83 insertions(+), 32 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index c7715adc25b7c4..7301e8ac73ce6e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -274,6 +274,7 @@ private static Map pruneDataType( Slot slot = kv.getKey(); DataTypeAccessTree accessTree = kv.getValue(); DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); + normalizeMapValueOffsetOnlyAccessPaths(slot, accessTree, allAccessPaths); if (accessTree.hasOffsetOnlyAccess() || accessTree.hasNullCheckOnlyAccess()) { if (ignoreMetaPath) { @@ -291,28 +292,6 @@ private static Map pruneDataType( continue; } - if (slot.getDataType().isMapType() && accessTree.hasMapValueOffsetOnlyAccess()) { - if (ignoreMetaPath) { - continue; - } - // length(map_col['key']): keys read in full (element lookup) + values offset-only. - // Emit [col, KEYS] and [col, VALUES, OFFSET] directly instead of the collected - // [col, *, OFFSET] path which the BE cannot interpret for split key/value access. - String colName = slot.getName().toLowerCase(); - ColumnAccessPath keysColumnPath = ColumnAccessPath.data( - new ArrayList<>(ImmutableList.of(colName, AccessPathInfo.ACCESS_MAP_KEYS))); - - ColumnAccessPath valsOffsetColumnPath = ColumnAccessPath.data( - new ArrayList<>(ImmutableList.of(colName, AccessPathInfo.ACCESS_MAP_VALUES, - AccessPathInfo.ACCESS_OFFSET))); - - result.put(slot.getExprId().asInt(), new AccessPathInfo( - slot.getDataType(), - ImmutableList.of(keysColumnPath, valsOffsetColumnPath), - new ArrayList<>())); - continue; - } - stripCoveredMetaSuffixPaths(slot, allAccessPaths, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); if (shouldSkipAccessInfo(slot, prunedDataType, allPaths, predicateAccessPaths)) { @@ -332,6 +311,7 @@ private static Map pruneDataType( // third: build predicate access path for (Entry kv : slotIdToPredicateAccessTree.entrySet()) { Slot slot = kv.getKey(); + normalizeMapValueOffsetOnlyAccessPaths(slot, kv.getValue(), predicateAccessPaths); stripCoveredMetaSuffixPaths(slot, predicateAccessPaths, allAccessPaths); List predicatePaths = buildColumnAccessPaths(slot, predicateAccessPaths); @@ -377,11 +357,52 @@ private static boolean isMetaPath(List path) { || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); } + private static void normalizeMapValueOffsetOnlyAccessPaths( + Slot slot, DataTypeAccessTree accessTree, + Multimap>> accessPaths) { + int slotId = slot.getExprId().asInt(); + List> mapPrefixes = accessTree.collectMapValueOffsetOnlyAccessPaths(); + if (mapPrefixes.isEmpty()) { + return; + } + + Collection>> slotPaths = accessPaths.get(slotId); + List>> pathsToRemove = new ArrayList<>(); + List>> pathsToAdd = new ArrayList<>(); + for (List mapPrefix : mapPrefixes) { + List starOffsetPath = new ArrayList<>(mapPrefix); + starOffsetPath.add(AccessPathInfo.ACCESS_ALL); + starOffsetPath.add(AccessPathInfo.ACCESS_OFFSET); + + for (Pair> p : slotPaths) { + if (!p.second.equals(starOffsetPath)) { + continue; + } + pathsToRemove.add(p); + + List keysPath = new ArrayList<>(mapPrefix); + keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS); + pathsToAdd.add(Pair.of(p.first, keysPath)); + + List valuesOffsetPath = new ArrayList<>(mapPrefix); + valuesOffsetPath.add(AccessPathInfo.ACCESS_MAP_VALUES); + valuesOffsetPath.add(AccessPathInfo.ACCESS_OFFSET); + pathsToAdd.add(Pair.of(p.first, valuesOffsetPath)); + } + } + slotPaths.removeAll(pathsToRemove); + slotPaths.addAll(pathsToAdd); + } + /** * Strip redundant metadata-only NULL/OFFSET paths while keeping enough real paths for BE readers. * *

The strip cases are: *

    + *
  • Map element lookup with value offset-only access is normalized first, e.g. + * {@code [m, *, OFFSET]} becomes {@code [m, KEYS]} plus + * {@code [m, VALUES, OFFSET]}. This is handled by + * {@link #normalizeMapValueOffsetOnlyAccessPaths}.
  • *
  • Full/data access covers metadata access, e.g. {@code [a]} removes * {@code [a, NULL]} and {@code [a, OFFSET]}. This is handled by * {@link #stripExactCoveredDataSkippingSuffixPaths}.
  • @@ -1055,21 +1076,43 @@ public Map getChildren() { } /** - * True when a MAP column is accessed as {@code length(map_col['key'])}: the keys must - * be read in full (for the element lookup) while the values only need the offset array - * (since only their length, not their content, is used). - * Expected access paths: [col, KEYS] and [col, VALUES, OFFSET]. + * Collect MAP nodes accessed as {@code length(map_col['key'])}: the keys must be read + * in full for the element lookup, while the values only need the offset array since only + * their length is used. Expected access paths for each returned prefix: + * [prefix, KEYS] and [prefix, VALUES, OFFSET]. */ - public boolean hasMapValueOffsetOnlyAccess() { + public List> collectMapValueOffsetOnlyAccessPaths() { + List> mapPrefixes = new ArrayList<>(); if (!isRoot) { - return false; + collectMapValueOffsetOnlyAccessPaths(new ArrayList<>(), mapPrefixes); + return mapPrefixes; } - DataTypeAccessTree child = children.values().iterator().next(); - if (!child.type.isMapType() || child.accessAll) { + for (Entry child : children.entrySet()) { + List path = new ArrayList<>(); + path.add(child.getKey()); + child.getValue().collectMapValueOffsetOnlyAccessPaths(path, mapPrefixes); + } + return mapPrefixes; + } + + private void collectMapValueOffsetOnlyAccessPaths( + List currentPath, List> mapPrefixes) { + if (isMapValueOffsetOnlyAccess()) { + mapPrefixes.add(new ArrayList<>(currentPath)); + } + for (Entry child : children.entrySet()) { + List childPath = new ArrayList<>(currentPath); + childPath.add(child.getKey()); + child.getValue().collectMapValueOffsetOnlyAccessPaths(childPath, mapPrefixes); + } + } + + private boolean isMapValueOffsetOnlyAccess() { + if (!type.isMapType() || accessAll) { return false; } - DataTypeAccessTree keysChild = child.children.get(AccessPathInfo.ACCESS_MAP_KEYS); - DataTypeAccessTree valsChild = child.children.get(AccessPathInfo.ACCESS_MAP_VALUES); + DataTypeAccessTree keysChild = children.get(AccessPathInfo.ACCESS_MAP_KEYS); + DataTypeAccessTree valsChild = children.get(AccessPathInfo.ACCESS_MAP_VALUES); // Keys must be fully accessed (element-at lookup). if (!keysChild.accessAll) { return false; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index 3a0ae14073979f..9280081c1af9f6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -241,6 +241,14 @@ public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { ImmutableList.of()); } + @Test + public void testNestedMapElementLengthKeepsValueOffsetPath() throws Exception { + assertColumn("select length(element_at(element_at(s, 'm'), 'a')) from nested_container_tbl", + "struct>", + ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "OFFSET")), + ImmutableList.of()); + } + @Test public void testFullFieldAccessStripsExactDataSkippingPath() throws Exception { assertColumn("select element_at(s, 'city') from tbl " From df569935e53a729fa99ea1ded241b3666a0fb7ff Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 16 Jun 2026 12:44:27 +0800 Subject: [PATCH 4/4] refactory --- .../rules/rewrite/NestedColumnPruning.java | 129 ++++++++++-------- .../rules/rewrite/PruneNestedColumnTest.java | 12 +- 2 files changed, 85 insertions(+), 56 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 7301e8ac73ce6e..d99339cd90dc1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -274,9 +274,9 @@ private static Map pruneDataType( Slot slot = kv.getKey(); DataTypeAccessTree accessTree = kv.getValue(); DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); - normalizeMapValueOffsetOnlyAccessPaths(slot, accessTree, allAccessPaths); + normalizeMapValueMetaOnlyAccessPaths(slot, accessTree, allAccessPaths); - if (accessTree.hasOffsetOnlyAccess() || accessTree.hasNullCheckOnlyAccess()) { + if (accessTree.hasOffsetOnlyAccess() || accessTree.hasNullOnlyAccess()) { if (ignoreMetaPath) { continue; } @@ -311,7 +311,7 @@ private static Map pruneDataType( // third: build predicate access path for (Entry kv : slotIdToPredicateAccessTree.entrySet()) { Slot slot = kv.getKey(); - normalizeMapValueOffsetOnlyAccessPaths(slot, kv.getValue(), predicateAccessPaths); + normalizeMapValueMetaOnlyAccessPaths(slot, kv.getValue(), predicateAccessPaths); stripCoveredMetaSuffixPaths(slot, predicateAccessPaths, allAccessPaths); List predicatePaths = buildColumnAccessPaths(slot, predicateAccessPaths); @@ -357,11 +357,28 @@ private static boolean isMetaPath(List path) { || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); } - private static void normalizeMapValueOffsetOnlyAccessPaths( + private static void normalizeMapValueMetaOnlyAccessPaths( Slot slot, DataTypeAccessTree accessTree, Multimap>> accessPaths) { int slotId = slot.getExprId().asInt(); - List> mapPrefixes = accessTree.collectMapValueOffsetOnlyAccessPaths(); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_OFFSET), + accessPaths, AccessPathInfo.ACCESS_OFFSET); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_NULL), + accessPaths, AccessPathInfo.ACCESS_NULL); + } + + /** + * Normalize map value meta-only (OFFSET or NULL) star access paths for a single meta type. + *

    For each map prefix where {@code [prefix, *, META]} exists, replaces it with + * {@code [prefix, KEYS]} plus {@code [prefix, VALUES, META]}, so that keys are read fully + * for element lookup while values only read the metadata. + */ + private static void normalizeMapValueMetaPathHelper( + int slotId, List> mapPrefixes, + Multimap>> accessPaths, + String metaSuffix) { if (mapPrefixes.isEmpty()) { return; } @@ -370,12 +387,12 @@ private static void normalizeMapValueOffsetOnlyAccessPaths( List>> pathsToRemove = new ArrayList<>(); List>> pathsToAdd = new ArrayList<>(); for (List mapPrefix : mapPrefixes) { - List starOffsetPath = new ArrayList<>(mapPrefix); - starOffsetPath.add(AccessPathInfo.ACCESS_ALL); - starOffsetPath.add(AccessPathInfo.ACCESS_OFFSET); + List starMetaPath = new ArrayList<>(mapPrefix); + starMetaPath.add(AccessPathInfo.ACCESS_ALL); + starMetaPath.add(metaSuffix); for (Pair> p : slotPaths) { - if (!p.second.equals(starOffsetPath)) { + if (!p.second.equals(starMetaPath)) { continue; } pathsToRemove.add(p); @@ -384,10 +401,10 @@ private static void normalizeMapValueOffsetOnlyAccessPaths( keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS); pathsToAdd.add(Pair.of(p.first, keysPath)); - List valuesOffsetPath = new ArrayList<>(mapPrefix); - valuesOffsetPath.add(AccessPathInfo.ACCESS_MAP_VALUES); - valuesOffsetPath.add(AccessPathInfo.ACCESS_OFFSET); - pathsToAdd.add(Pair.of(p.first, valuesOffsetPath)); + List valuesMetaPath = new ArrayList<>(mapPrefix); + valuesMetaPath.add(AccessPathInfo.ACCESS_MAP_VALUES); + valuesMetaPath.add(metaSuffix); + pathsToAdd.add(Pair.of(p.first, valuesMetaPath)); } } slotPaths.removeAll(pathsToRemove); @@ -399,10 +416,11 @@ private static void normalizeMapValueOffsetOnlyAccessPaths( * *

    The strip cases are: *

      - *
    • Map element lookup with value offset-only access is normalized first, e.g. - * {@code [m, *, OFFSET]} becomes {@code [m, KEYS]} plus - * {@code [m, VALUES, OFFSET]}. This is handled by - * {@link #normalizeMapValueOffsetOnlyAccessPaths}.
    • + *
    • Map element lookup with value meta-only (OFFSET or NULL) access is normalized first, + * e.g. {@code [m, *, OFFSET]} becomes {@code [m, KEYS]} plus + * {@code [m, VALUES, OFFSET]}, and {@code [m, *, NULL]} becomes + * {@code [m, KEYS]} plus {@code [m, VALUES, NULL]}. This is handled by + * {@link #normalizeMapValueMetaOnlyAccessPaths}.
    • *
    • Full/data access covers metadata access, e.g. {@code [a]} removes * {@code [a, NULL]} and {@code [a, OFFSET]}. This is handled by * {@link #stripExactCoveredDataSkippingSuffixPaths}.
    • @@ -1076,38 +1094,40 @@ public Map getChildren() { } /** - * Collect MAP nodes accessed as {@code length(map_col['key'])}: the keys must be read - * in full for the element lookup, while the values only need the offset array since only - * their length is used. Expected access paths for each returned prefix: - * [prefix, KEYS] and [prefix, VALUES, OFFSET]. + * Collect MAP nodes where keys are fully accessed for element lookup while values + * only need metadata (OFFSET or NULL). Expected access paths for each returned prefix: + * [prefix, KEYS] and [prefix, VALUES, metaSuffix]. + * + * @param metaSuffix {@link AccessPathInfo#ACCESS_OFFSET} or + * {@link AccessPathInfo#ACCESS_NULL} */ - public List> collectMapValueOffsetOnlyAccessPaths() { + public List> collectMapValueMetaOnlyAccessPaths(String metaSuffix) { List> mapPrefixes = new ArrayList<>(); if (!isRoot) { - collectMapValueOffsetOnlyAccessPaths(new ArrayList<>(), mapPrefixes); + collectMapValueMetaOnlyAccessPaths(new ArrayList<>(), mapPrefixes, metaSuffix); return mapPrefixes; } for (Entry child : children.entrySet()) { List path = new ArrayList<>(); path.add(child.getKey()); - child.getValue().collectMapValueOffsetOnlyAccessPaths(path, mapPrefixes); + child.getValue().collectMapValueMetaOnlyAccessPaths(path, mapPrefixes, metaSuffix); } return mapPrefixes; } - private void collectMapValueOffsetOnlyAccessPaths( - List currentPath, List> mapPrefixes) { - if (isMapValueOffsetOnlyAccess()) { + private void collectMapValueMetaOnlyAccessPaths( + List currentPath, List> mapPrefixes, String metaSuffix) { + if (isMapValueMetaOnlyAccess(metaSuffix)) { mapPrefixes.add(new ArrayList<>(currentPath)); } for (Entry child : children.entrySet()) { List childPath = new ArrayList<>(currentPath); childPath.add(child.getKey()); - child.getValue().collectMapValueOffsetOnlyAccessPaths(childPath, mapPrefixes); + child.getValue().collectMapValueMetaOnlyAccessPaths(childPath, mapPrefixes, metaSuffix); } } - private boolean isMapValueOffsetOnlyAccess() { + private boolean isMapValueMetaOnlyAccess(String metaSuffix) { if (!type.isMapType() || accessAll) { return false; } @@ -1117,16 +1137,17 @@ private boolean isMapValueOffsetOnlyAccess() { if (!keysChild.accessAll) { return false; } - // Values must be accessed offset-only (no deeper element reads). - if (!valsChild.hasOffsetPath || valsChild.accessAll) { + boolean hasMeta = AccessPathInfo.ACCESS_OFFSET.equals(metaSuffix) + ? valsChild.hasOffsetPath + : valsChild.hasNullPath; + if (!hasMeta || valsChild.accessAll) { return false; } if (valsChild.type.isStringLikeType()) { - // String value: accessAll check above is sufficient. return true; } if (valsChild.type.isArrayType()) { - // Array value (e.g. MAP>): verify no element was read directly + // Array value: verify no element was read directly // (e.g. map_col['k'][0] would set allChild.accessAll=true). DataTypeAccessTree allChild = valsChild.children.get(AccessPathInfo.ACCESS_ALL); return !allChild.accessAll && !allChild.accessPartialChild; @@ -1138,34 +1159,34 @@ private boolean isMapValueOffsetOnlyAccess() { * length(arr_col), length(map_col)), meaning the type must not change but an access * path still needs to be sent to BE so it can skip the char/element data. */ public boolean hasOffsetOnlyAccess() { + DataTypeAccessTree selfOrRootChild = this; if (isRoot) { - DataTypeAccessTree child = children.values().iterator().next(); - if (!child.hasOffsetPath || child.accessAll) { - return false; - } - if (child.type.isStringLikeType()) { - return true; - } - if (child.type.isArrayType()) { - // True only if no element was accessed (element_at / explode etc.) - DataTypeAccessTree allChild = child.children.get(AccessPathInfo.ACCESS_ALL); - return !allChild.accessAll && !allChild.accessPartialChild; - } - if (child.type.isMapType()) { - // True only if neither keys nor values were accessed directly - DataTypeAccessTree keysChild = child.children.get(AccessPathInfo.ACCESS_MAP_KEYS); - DataTypeAccessTree valsChild = child.children.get(AccessPathInfo.ACCESS_MAP_VALUES); - return !keysChild.accessAll && !keysChild.accessPartialChild - && !valsChild.accessAll && !valsChild.accessPartialChild; - } + selfOrRootChild = children.values().iterator().next(); + } + if (!selfOrRootChild.hasOffsetPath || selfOrRootChild.accessAll) { return false; } - return type.isStringLikeType() && hasOffsetPath && !accessAll; + if (selfOrRootChild.type.isStringLikeType()) { + return true; + } + if (selfOrRootChild.type.isArrayType()) { + // True only if no element was accessed (element_at / explode etc.) + DataTypeAccessTree allChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_ALL); + return !allChild.accessAll && !allChild.accessPartialChild; + } + if (selfOrRootChild.type.isMapType()) { + // True only if neither keys nor values were accessed directly + DataTypeAccessTree keysChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_MAP_KEYS); + DataTypeAccessTree valsChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_MAP_VALUES); + return !keysChild.accessAll && !keysChild.accessPartialChild + && !valsChild.accessAll && !valsChild.accessPartialChild; + } + return false; } /** True when the column is accessed ONLY via IS NULL / IS NOT NULL, * meaning the BE only needs to read the null flag, not the actual data. */ - public boolean hasNullCheckOnlyAccess() { + public boolean hasNullOnlyAccess() { if (isRoot) { DataTypeAccessTree child = children.values().iterator().next(); return child.hasNullPath && !child.accessAll diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index 9280081c1af9f6..bbad4085677be2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -249,6 +249,14 @@ public void testNestedMapElementLengthKeepsValueOffsetPath() throws Exception { ImmutableList.of()); } + @Test + public void testNestedMapElementIsNullKeepsValueIsNullPath() throws Exception { + assertColumn("select (element_at(element_at(s, 'm'), 'a')) is null from nested_container_tbl", + "struct>", + ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "NULL")), + ImmutableList.of()); + } + @Test public void testFullFieldAccessStripsExactDataSkippingPath() throws Exception { assertColumn("select element_at(s, 'city') from tbl " @@ -652,8 +660,8 @@ public void testFilter() throws Throwable { ); assertColumn("select 100 from tbl where element_at(s, 'data')[1][1] is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "*", "NULL")), - ImmutableList.of(path("s", "data", "*", "*", "NULL")) + ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "NULL")), + ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "NULL")) ); assertColumn("select 100 from tbl where element_at(element_at(s, 'data')[1][1], 'a') is not null", "struct>>>",