diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java index fac90248986897..68869fc0615eb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java @@ -205,7 +205,7 @@ public Void visitLength(Length length, CollectorContext context) { && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); return arg.accept(this, offsetContext); } // fall through to default (recurse into children with fresh contexts) @@ -219,7 +219,7 @@ public Void visitMapSize(MapSize mapSize, CollectorContext context) { if (argType.isMapType() && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); return arg.accept(this, offsetContext); } return visit(mapSize, context); @@ -234,7 +234,7 @@ public Void visitCardinality(Cardinality cardinality, CollectorContext context) if ((argType.isArrayType() || argType.isMapType()) && context.accessPathBuilder.isEmpty()) { CollectorContext offsetContext = new CollectorContext(context.statementContext, context.bottomFilter); - offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET); + offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET); // cardinality(map_keys(m)) == cardinality(m) == cardinality(map_values(m)): // all three count map entries, so emit the same [map_col, OFFSET] path. Expression effectiveArg = (arg instanceof MapKeys || arg instanceof MapValues) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java index 0b361c2276030c..8656aaacef7dc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java @@ -29,7 +29,7 @@ public class AccessPathInfo { public static final String ACCESS_MAP_VALUES = "VALUES"; // Suffix appended to a string-column path to indicate that only the offset array // (not the char data) is needed — agreed with BE as the special path component name. - public static final String ACCESS_STRING_OFFSET = "OFFSET"; + public static final String ACCESS_OFFSET = "OFFSET"; // Suffix appended to a column path to indicate that only the null flag // (not the actual data) is needed — used when the column is only accessed via IS NULL / IS NOT NULL. public static final String ACCESS_NULL = "NULL"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java index 6d592f584d26cb..b502b6baf917a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java @@ -417,6 +417,6 @@ private static boolean isDataSkippingOnlyAccessPath(List path) { } String lastComponent = path.get(path.size() - 1); return AccessPathInfo.ACCESS_NULL.equals(lastComponent) - || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent); + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 50416a2940816b..d99339cd90dc1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -204,7 +204,7 @@ private static boolean expressionContainsNullCheck(Expression expr) { private static Map pruneDataType( Map> slotToAccessPaths, - boolean skipDataSkippingOnlyAccessPath) { + boolean ignoreMetaPath) { Map result = new LinkedHashMap<>(); Map slotIdToAllAccessTree = new LinkedHashMap<>(); Map slotIdToPredicateAccessTree = new LinkedHashMap<>(); @@ -236,8 +236,7 @@ private static Map pruneDataType( } continue; } - if (skipDataSkippingOnlyAccessPath - && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { + if (ignoreMetaPath && containsMetaPath(collectAccessPathResults)) { // An MV rewrite child context optimizes a temporary plan fragment rather // than the final plan. A nested metadata-only path such as // [s, city, NULL] or [s, city, OFFSET] would otherwise prune the scan slot @@ -275,93 +274,25 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { Slot slot = kv.getKey(); DataTypeAccessTree accessTree = kv.getValue(); DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); + normalizeMapValueMetaOnlyAccessPaths(slot, accessTree, allAccessPaths); - if (slot.getDataType().isStringLikeType()) { - if (accessTree.hasStringOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { - continue; - } - // Offset-only access (e.g. length(str_col)): type stays varchar, - // but we must still send the access path to BE so it skips the char data. - stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths, allAccessPaths); - stripNullSuffixPaths(slot, allAccessPaths); - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); - } else if (accessTree.hasNullCheckOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { - continue; - } - // Null-check-only access (e.g. str_col IS NULL): type stays varchar, - // but we send [col, NULL] access path so BE only reads the null flag. - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); - } - // direct access (accessAll=true) or other: skip — no type change, no access paths needed. - continue; - } - - if ((slot.getDataType().isArrayType() || slot.getDataType().isMapType()) - && accessTree.hasStringOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { - continue; - } - // Offset-only access (e.g. length(arr_col) / length(map_col)): type stays unchanged, - // but we must send the OFFSET access path to BE so it skips element/key-value data. - List allPaths = buildColumnAccessPaths(slot, allAccessPaths); - result.put(slot.getExprId().asInt(), - new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); - continue; - } - - // Null-check-only access (e.g. col IS NULL / col IS NOT NULL): type stays unchanged, - // but we must send the [col, NULL] access path to BE so it only reads the null flag. - if (accessTree.hasNullCheckOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { + if (accessTree.hasOffsetOnlyAccess() || accessTree.hasNullOnlyAccess()) { + if (ignoreMetaPath) { continue; } + stripCoveredMetaSuffixPaths(slot, allAccessPaths, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); result.put(slot.getExprId().asInt(), new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); continue; } - if (slot.getDataType().isMapType() && accessTree.hasMapValueOffsetOnlyAccess()) { - if (skipDataSkippingOnlyAccessPath) { - continue; - } - // length(map_col['key']): keys read in full (element lookup) + values offset-only. - // Emit [col, KEYS] and [col, VALUES, OFFSET] directly instead of the collected - // [col, *, OFFSET] path which the BE cannot interpret for split key/value access. - String colName = slot.getName().toLowerCase(); - ColumnAccessPath keysColumnPath = ColumnAccessPath.data( - new ArrayList<>(ImmutableList.of(colName, AccessPathInfo.ACCESS_MAP_KEYS))); - - ColumnAccessPath valsOffsetColumnPath = ColumnAccessPath.data( - new ArrayList<>(ImmutableList.of(colName, AccessPathInfo.ACCESS_MAP_VALUES, - AccessPathInfo.ACCESS_STRING_OFFSET))); - - result.put(slot.getExprId().asInt(), new AccessPathInfo( - slot.getDataType(), - ImmutableList.of(keysColumnPath, valsOffsetColumnPath), - new ArrayList<>())); + if (slot.getDataType().isStringLikeType()) { + // direct access (accessAll=true) or other: skip — no type change, no access paths needed. continue; } - // If a field is read in full, its metadata-only NULL/OFFSET access is redundant - // for any data type: e.g. [s] covers both [s.NULL] and [s.OFFSET]. - stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths, allAccessPaths); - - // Strip OFFSET-suffix paths when a non-OFFSET path covers the same nested field or - // container. The overlapping array/map container may live under the root slot itself - // or under a nested struct field, so compare against the actual nested prefix instead - // of gating this logic on the root slot type. - stripCoveredOffsetSuffixPaths(slot, allAccessPaths, allAccessPaths); - - // Strip NULL-suffix paths when a non-NULL path also exists for the same slot. - // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is needed, NULL path is redundant. - stripNullSuffixPaths(slot, allAccessPaths); + stripCoveredMetaSuffixPaths(slot, allAccessPaths, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); if (shouldSkipAccessInfo(slot, prunedDataType, allPaths, predicateAccessPaths)) { continue; @@ -380,10 +311,8 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { // third: build predicate access path for (Entry kv : slotIdToPredicateAccessTree.entrySet()) { Slot slot = kv.getKey(); - stripExactCoveredDataSkippingSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripCoveredOffsetSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripCoveredArrayNullSuffixPaths(slot, predicateAccessPaths, allAccessPaths); - stripNullSuffixPaths(slot, predicateAccessPaths); + normalizeMapValueMetaOnlyAccessPaths(slot, kv.getValue(), predicateAccessPaths); + stripCoveredMetaSuffixPaths(slot, predicateAccessPaths, allAccessPaths); List predicatePaths = buildColumnAccessPaths(slot, predicateAccessPaths); AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt()); @@ -409,28 +338,130 @@ && containsDataSkippingOnlyAccessPath(collectAccessPathResults)) { return result; } - private static boolean containsDataSkippingOnlyAccessPath( + private static boolean containsMetaPath( List collectAccessPathResults) { for (CollectAccessPathResult collectAccessPathResult : collectAccessPathResults) { - if (isDataSkippingOnlyAccessPath(collectAccessPathResult.getPath())) { + if (isMetaPath(collectAccessPathResult.getPath())) { return true; } } return false; } - private static boolean isDataSkippingOnlyAccessPath(List path) { + private static boolean isMetaPath(List path) { if (path.isEmpty()) { return false; } String lastComponent = path.get(path.size() - 1); return AccessPathInfo.ACCESS_NULL.equals(lastComponent) - || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent); + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); + } + + private static void normalizeMapValueMetaOnlyAccessPaths( + Slot slot, DataTypeAccessTree accessTree, + Multimap>> accessPaths) { + int slotId = slot.getExprId().asInt(); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_OFFSET), + accessPaths, AccessPathInfo.ACCESS_OFFSET); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_NULL), + accessPaths, AccessPathInfo.ACCESS_NULL); } /** - * Decide whether an OFFSET-suffix path can be removed because another non-OFFSET path - * already covers the same container. + * Normalize map value meta-only (OFFSET or NULL) star access paths for a single meta type. + *

For each map prefix where {@code [prefix, *, META]} exists, replaces it with + * {@code [prefix, KEYS]} plus {@code [prefix, VALUES, META]}, so that keys are read fully + * for element lookup while values only read the metadata. + */ + private static void normalizeMapValueMetaPathHelper( + int slotId, List> mapPrefixes, + Multimap>> accessPaths, + String metaSuffix) { + if (mapPrefixes.isEmpty()) { + return; + } + + Collection>> slotPaths = accessPaths.get(slotId); + List>> pathsToRemove = new ArrayList<>(); + List>> pathsToAdd = new ArrayList<>(); + for (List mapPrefix : mapPrefixes) { + List starMetaPath = new ArrayList<>(mapPrefix); + starMetaPath.add(AccessPathInfo.ACCESS_ALL); + starMetaPath.add(metaSuffix); + + for (Pair> p : slotPaths) { + if (!p.second.equals(starMetaPath)) { + continue; + } + pathsToRemove.add(p); + + List keysPath = new ArrayList<>(mapPrefix); + keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS); + pathsToAdd.add(Pair.of(p.first, keysPath)); + + List valuesMetaPath = new ArrayList<>(mapPrefix); + valuesMetaPath.add(AccessPathInfo.ACCESS_MAP_VALUES); + valuesMetaPath.add(metaSuffix); + pathsToAdd.add(Pair.of(p.first, valuesMetaPath)); + } + } + slotPaths.removeAll(pathsToRemove); + slotPaths.addAll(pathsToAdd); + } + + /** + * Strip redundant metadata-only NULL/OFFSET paths while keeping enough real paths for BE readers. + * + *

The strip cases are: + *

    + *
  • Map element lookup with value meta-only (OFFSET or NULL) access is normalized first, + * e.g. {@code [m, *, OFFSET]} becomes {@code [m, KEYS]} plus + * {@code [m, VALUES, OFFSET]}, and {@code [m, *, NULL]} becomes + * {@code [m, KEYS]} plus {@code [m, VALUES, NULL]}. This is handled by + * {@link #normalizeMapValueMetaOnlyAccessPaths}.
  • + *
  • Full/data access covers metadata access, e.g. {@code [a]} removes + * {@code [a, NULL]} and {@code [a, OFFSET]}. This is handled by + * {@link #stripExactCoveredDataSkippingSuffixPaths}.
  • + *
  • A deeper data access covers an upper metadata path, e.g. {@code [a, b, c]} + * removes {@code [a, b, NULL]}, and {@code [a, *, field]} removes + * {@code [a, OFFSET]}. Exact NULL/data coverage is handled by + * {@link #stripNullSuffixPaths}; OFFSET/container coverage is handled by + * {@link #stripCoveredOffsetByDataPaths}.
  • + *
  • A deeper OFFSET access covers an upper OFFSET path, e.g. + * {@code [a, *, OFFSET]} removes {@code [a, OFFSET]}. This is handled by + * {@link #stripCoveredOffsetByDeeperOffsetPaths}.
  • + *
  • OFFSET access covers NULL access with the same prefix, e.g. + * {@code [a, OFFSET]} removes {@code [a, NULL]}. This is handled by + * {@link #stripNullSuffixPaths}.
  • + *
  • A deeper NULL access covers an upper NULL path, e.g. + * {@code [a, *, NULL]} removes {@code [a, NULL]}. This is handled by + * {@link #stripNullSuffixPaths}.
  • + *
  • Map value-side coverage may need a supplemental key path, e.g. + * {@code [m, *, OFFSET]} plus {@code [m, VALUES]} becomes + * {@code [m, KEYS]} plus {@code [m, VALUES]}. This is handled by + * {@link #stripCoveredOffsetByDataPaths} via {@link #compareOffsetPrefixCoverage} + * and {@link #buildMapKeysOnlyPath}.
  • + *
  • Array NULL-only paths covered by value/data access may also need supplemental + * map keys, e.g. {@code [m, *, NULL]} plus {@code [m, VALUES, *, field]} + * becomes {@code [m, KEYS]} plus {@code [m, VALUES, *, field]}. This is + * handled by {@link #stripCoveredArrayNullSuffixPaths}.
  • + *
+ */ + private static void stripCoveredMetaSuffixPaths( + Slot slot, Multimap>> targetAccessPaths, + Multimap>> coveringAccessPaths) { + stripExactCoveredDataSkippingSuffixPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredOffsetByDataPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredOffsetByDeeperOffsetPaths(slot, targetAccessPaths, coveringAccessPaths); + stripCoveredArrayNullSuffixPaths(slot, targetAccessPaths, coveringAccessPaths); + stripNullSuffixPaths(slot, targetAccessPaths); + } + + /** + * Decide whether an OFFSET-suffix path can be removed because another path already covers + * the same container. * *

For map element_at paths, {@code *} means "read keys fully, then follow the rest of * the path on the value side". So a VALUES path can cover the value-side OFFSET access, @@ -438,20 +469,29 @@ private static boolean isDataSkippingOnlyAccessPath(List path) { * and add a KEYS-only path instead. */ private static OffsetPathRewrite analyzeOffsetPathRewrite( - DataType slotType, List path, List> nonOffsetPaths) { + DataType slotType, List path, List> coveringPaths) { if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { + || !AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1))) { return OffsetPathRewrite.keep(); } List prefix = path.subList(0, path.size() - 1); - return analyzePrefixCoverage(slotType, prefix, nonOffsetPaths); + // Filter out the path itself to prevent self-coverage removal. + // A deeper OFFSET path (e.g. [aa, *, OFFSET]) covering a shallower one + // (e.g. [aa, OFFSET]) is still handled — they are different paths. + List> filteredCoveringPaths = new ArrayList<>(); + for (List p : coveringPaths) { + if (!p.equals(path)) { + filteredCoveringPaths.add(p); + } + } + return analyzePrefixCoverage(slotType, prefix, filteredCoveringPaths); } private static OffsetPathRewrite analyzePrefixCoverage( - DataType slotType, List prefix, List> nonOffsetPaths) { + DataType slotType, List prefix, List> coveringPaths) { List> supplementalPaths = new ArrayList<>(); - for (List nonOffset : nonOffsetPaths) { - OffsetPathRewrite candidate = compareOffsetPrefixCoverage(slotType, prefix, nonOffset); + for (List coveringPath : coveringPaths) { + OffsetPathRewrite candidate = compareOffsetPrefixCoverage(slotType, prefix, coveringPath); if (!candidate.shouldRemoveOffsetPath()) { continue; } @@ -467,7 +507,7 @@ private static OffsetPathRewrite analyzePrefixCoverage( } /** - * Remove OFFSET-only paths from {@code targetAccessPaths} when data paths in + * Remove OFFSET-only paths from {@code targetAccessPaths} when non-meta data paths in * {@code coveringAccessPaths} already read the same array/map/string container or a child * under it. * @@ -479,7 +519,7 @@ private static OffsetPathRewrite analyzePrefixCoverage( * {@code map['k']} still needs full keys for lookup, while values cover the offset. * */ - private static void stripCoveredOffsetSuffixPaths( + private static void stripCoveredOffsetByDataPaths( Slot slot, Multimap>> targetAccessPaths, Multimap>> coveringAccessPaths) { int slotId = slot.getExprId().asInt(); @@ -488,27 +528,73 @@ private static void stripCoveredOffsetSuffixPaths( return; } - List> nonOffsetPaths = new ArrayList<>(); + List> dataPaths = new ArrayList<>(); for (Pair> p : coveringAccessPaths.get(slotId)) { List path = p.second; - if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { - nonOffsetPaths.add(path); + if (!path.isEmpty() && !isMetaPath(path)) { + dataPaths.add(path); } } for (Pair> p : targetPaths) { List path = p.second; - if (path.isEmpty() - || !AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) { - nonOffsetPaths.add(path); + if (!path.isEmpty() && !isMetaPath(path)) { + dataPaths.add(path); } } + stripCoveredOffsetByPaths(slot, targetAccessPaths, dataPaths); + } + + /** + * Remove upper OFFSET-only paths when a deeper OFFSET path traverses the same container. + * + *

Example: {@code [a, *, OFFSET]} removes {@code [a, OFFSET]} because reading the inner + * array offset requires traversing the outer array structure. + */ + private static void stripCoveredOffsetByDeeperOffsetPaths( + Slot slot, Multimap>> targetAccessPaths, + Multimap>> coveringAccessPaths) { + int slotId = slot.getExprId().asInt(); + Collection>> targetPaths = targetAccessPaths.get(slotId); + if (targetPaths.isEmpty()) { + return; + } + + List> deeperOffsetPaths = new ArrayList<>(); + for (Pair> p : coveringAccessPaths.get(slotId)) { + List path = p.second; + if (isDeeperOffsetPath(path)) { + deeperOffsetPaths.add(path); + } + } + for (Pair> p : targetPaths) { + List path = p.second; + if (isDeeperOffsetPath(path)) { + deeperOffsetPaths.add(path); + } + } + + stripCoveredOffsetByPaths(slot, targetAccessPaths, deeperOffsetPaths); + } + + private static boolean isDeeperOffsetPath(List path) { + return path.size() > 2 && AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1)); + } + + private static void stripCoveredOffsetByPaths( + Slot slot, Multimap>> targetAccessPaths, + List> coveringPaths) { + int slotId = slot.getExprId().asInt(); + Collection>> targetPaths = targetAccessPaths.get(slotId); + if (targetPaths.isEmpty() || coveringPaths.isEmpty()) { + return; + } + List>> pathsToRemove = new ArrayList<>(); List>> pathsToAdd = new ArrayList<>(); for (Pair> p : new ArrayList<>(targetPaths)) { OffsetPathRewrite rewrite = analyzeOffsetPathRewrite( - slot.getDataType(), p.second, nonOffsetPaths); + slot.getDataType(), p.second, coveringPaths); if (!rewrite.shouldRemoveOffsetPath()) { continue; } @@ -605,12 +691,12 @@ private static void stripExactCoveredDataSkippingSuffixPaths( List> fullAccessPaths = new ArrayList<>(); for (Pair> p : coveringAccessPaths.get(slotId)) { - if (!isDataSkippingOnlyAccessPath(p.second)) { + if (!isMetaPath(p.second)) { fullAccessPaths.add(p.second); } } for (Pair> p : targetPaths) { - if (!isDataSkippingOnlyAccessPath(p.second)) { + if (!isMetaPath(p.second)) { fullAccessPaths.add(p.second); } } @@ -618,7 +704,7 @@ private static void stripExactCoveredDataSkippingSuffixPaths( List>> pathsToRemove = new ArrayList<>(); for (Pair> p : targetPaths) { List path = p.second; - if (!isDataSkippingOnlyAccessPath(path)) { + if (!isMetaPath(path)) { continue; } List prefix = path.subList(0, path.size() - 1); @@ -819,8 +905,21 @@ private static void stripNullSuffixPaths( boolean covered = false; for (Pair> q : slotPaths) { List other = q.second; - if (other.isEmpty() - || AccessPathInfo.ACCESS_NULL.equals(other.get(other.size() - 1))) { + if (other.isEmpty()) { + continue; + } + if (other == path) { + continue; + } + if (AccessPathInfo.ACCESS_NULL.equals(other.get(other.size() - 1))) { + // A deeper NULL path (e.g. [a, *, NULL]) covers a shallower one + // (e.g. [a, NULL]) because reading the deeper path requires + // traversing the container, which materializes the container's + // null information. + if (hasStrictPrefix(other, prefix)) { + covered = true; + break; + } continue; } if (other.equals(prefix)) { @@ -832,7 +931,7 @@ private static void stripNullSuffixPaths( break; } if (other.size() == prefix.size() + 1 - && AccessPathInfo.ACCESS_STRING_OFFSET.equals(other.get(other.size() - 1)) + && AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1)) && other.subList(0, prefix.size()).equals(prefix)) { covered = true; break; @@ -941,15 +1040,18 @@ public static class DataTypeAccessTree { // if access 's.a.b' the node 's' and 'a' has accessPartialChild, and node 'b' has accessAll private boolean accessPartialChild; private boolean accessAll; - // True when this string-typed node is accessed ONLY via the offset array - // (e.g. length(str_col) or length(element_at(c_struct,'f3'))). - // When this flag is set and accessAll is NOT set, pruneDataType() returns BigIntType - // to signal that the BE only needs to read the offset array, not the chars data. - private boolean isStringOffsetOnly; - // True when this column node is accessed ONLY via IS NULL / IS NOT NULL. - // When this flag is set and accessAll is NOT set, the BE only needs to read the null flag, - // not the actual column data. - private boolean isNullCheckOnly; + // Cached marker set by setAccessByPath() when a path component is OFFSET. + // Avoids scanning the multimap: hasStringOffsetOnlyAccess() reads this flag in + // O(1) instead of checking every path for an OFFSET suffix. Used for array, map, + // and string-like types (they share offset-based storage in BE). + // When set without accessAll, pruneDataType() keeps the node's type so that BE + // reads only the offset structure, skipping element / key-value / chars data. + private boolean hasOffsetPath; + // Cached marker set by setAccessByPath() when a path component is NULL. + // Same purpose as hasOffsetPath — O(1) flag read instead of multimap scan + // in hasNullCheckOnlyAccess(). When set without accessAll, BE reads only the + // null bitmap, skipping actual column data. + private boolean hasNullPath; // for the future, only access the meta of the column, // e.g. `is not null` can only access the column's offset, not need to read the data private ColumnAccessPathType pathType; @@ -992,35 +1094,60 @@ public Map getChildren() { } /** - * True when a MAP column is accessed as {@code length(map_col['key'])}: the keys must - * be read in full (for the element lookup) while the values only need the offset array - * (since only their length, not their content, is used). - * Expected access paths: [col, KEYS] and [col, VALUES, OFFSET]. + * Collect MAP nodes where keys are fully accessed for element lookup while values + * only need metadata (OFFSET or NULL). Expected access paths for each returned prefix: + * [prefix, KEYS] and [prefix, VALUES, metaSuffix]. + * + * @param metaSuffix {@link AccessPathInfo#ACCESS_OFFSET} or + * {@link AccessPathInfo#ACCESS_NULL} */ - public boolean hasMapValueOffsetOnlyAccess() { + public List> collectMapValueMetaOnlyAccessPaths(String metaSuffix) { + List> mapPrefixes = new ArrayList<>(); if (!isRoot) { - return false; + collectMapValueMetaOnlyAccessPaths(new ArrayList<>(), mapPrefixes, metaSuffix); + return mapPrefixes; + } + for (Entry child : children.entrySet()) { + List path = new ArrayList<>(); + path.add(child.getKey()); + child.getValue().collectMapValueMetaOnlyAccessPaths(path, mapPrefixes, metaSuffix); } - DataTypeAccessTree child = children.values().iterator().next(); - if (!child.type.isMapType() || child.accessAll) { + return mapPrefixes; + } + + private void collectMapValueMetaOnlyAccessPaths( + List currentPath, List> mapPrefixes, String metaSuffix) { + if (isMapValueMetaOnlyAccess(metaSuffix)) { + mapPrefixes.add(new ArrayList<>(currentPath)); + } + for (Entry child : children.entrySet()) { + List childPath = new ArrayList<>(currentPath); + childPath.add(child.getKey()); + child.getValue().collectMapValueMetaOnlyAccessPaths(childPath, mapPrefixes, metaSuffix); + } + } + + private boolean isMapValueMetaOnlyAccess(String metaSuffix) { + if (!type.isMapType() || accessAll) { return false; } - DataTypeAccessTree keysChild = child.children.get(AccessPathInfo.ACCESS_MAP_KEYS); - DataTypeAccessTree valsChild = child.children.get(AccessPathInfo.ACCESS_MAP_VALUES); + DataTypeAccessTree keysChild = children.get(AccessPathInfo.ACCESS_MAP_KEYS); + DataTypeAccessTree valsChild = children.get(AccessPathInfo.ACCESS_MAP_VALUES); // Keys must be fully accessed (element-at lookup). if (!keysChild.accessAll) { return false; } - // Values must be accessed offset-only (no deeper element reads). - if (!valsChild.isStringOffsetOnly || valsChild.accessAll) { + boolean hasMeta = AccessPathInfo.ACCESS_OFFSET.equals(metaSuffix) + ? valsChild.hasOffsetPath + : valsChild.hasNullPath; + if (!hasMeta || valsChild.accessAll) { return false; } if (valsChild.type.isStringLikeType()) { - // String value: accessAll check above is sufficient. return true; } if (valsChild.type.isArrayType()) { - // Array value (e.g. MAP>): verify no element was read directly + // Array value: verify no element was read directly // (e.g. map_col['k'][0] would set allChild.accessAll=true). DataTypeAccessTree allChild = valsChild.children.get(AccessPathInfo.ACCESS_ALL); return !allChild.accessAll && !allChild.accessPartialChild; @@ -1031,41 +1158,41 @@ public boolean hasMapValueOffsetOnlyAccess() { /** True when the column is accessed ONLY via the offset array (e.g. length(str_col), * length(arr_col), length(map_col)), meaning the type must not change but an access * path still needs to be sent to BE so it can skip the char/element data. */ - public boolean hasStringOffsetOnlyAccess() { + public boolean hasOffsetOnlyAccess() { + DataTypeAccessTree selfOrRootChild = this; if (isRoot) { - DataTypeAccessTree child = children.values().iterator().next(); - if (!child.isStringOffsetOnly || child.accessAll) { - return false; - } - if (child.type.isStringLikeType()) { - return true; - } - if (child.type.isArrayType()) { - // True only if no element was accessed (element_at / explode etc.) - DataTypeAccessTree allChild = child.children.get(AccessPathInfo.ACCESS_ALL); - return !allChild.accessAll && !allChild.accessPartialChild; - } - if (child.type.isMapType()) { - // True only if neither keys nor values were accessed directly - DataTypeAccessTree keysChild = child.children.get(AccessPathInfo.ACCESS_MAP_KEYS); - DataTypeAccessTree valsChild = child.children.get(AccessPathInfo.ACCESS_MAP_VALUES); - return !keysChild.accessAll && !keysChild.accessPartialChild - && !valsChild.accessAll && !valsChild.accessPartialChild; - } + selfOrRootChild = children.values().iterator().next(); + } + if (!selfOrRootChild.hasOffsetPath || selfOrRootChild.accessAll) { return false; } - return type.isStringLikeType() && isStringOffsetOnly && !accessAll; + if (selfOrRootChild.type.isStringLikeType()) { + return true; + } + if (selfOrRootChild.type.isArrayType()) { + // True only if no element was accessed (element_at / explode etc.) + DataTypeAccessTree allChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_ALL); + return !allChild.accessAll && !allChild.accessPartialChild; + } + if (selfOrRootChild.type.isMapType()) { + // True only if neither keys nor values were accessed directly + DataTypeAccessTree keysChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_MAP_KEYS); + DataTypeAccessTree valsChild = selfOrRootChild.children.get(AccessPathInfo.ACCESS_MAP_VALUES); + return !keysChild.accessAll && !keysChild.accessPartialChild + && !valsChild.accessAll && !valsChild.accessPartialChild; + } + return false; } /** True when the column is accessed ONLY via IS NULL / IS NOT NULL, * meaning the BE only needs to read the null flag, not the actual data. */ - public boolean hasNullCheckOnlyAccess() { + public boolean hasNullOnlyAccess() { if (isRoot) { DataTypeAccessTree child = children.values().iterator().next(); - return child.isNullCheckOnly && !child.accessAll - && !child.isStringOffsetOnly && !child.accessPartialChild; + return child.hasNullPath && !child.accessAll + && !child.hasOffsetPath && !child.accessPartialChild; } - return isNullCheckOnly && !accessAll && !isStringOffsetOnly && !accessPartialChild; + return hasNullPath && !accessAll && !hasOffsetPath && !accessPartialChild; } /** pruneCastType */ @@ -1163,7 +1290,7 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath // Mark null-check-only and return without setting accessAll or accessPartialChild, // so that parent nodes can distinguish "null-only leaf" from "has real sub-access". if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_NULL)) { - isNullCheckOnly = true; + hasNullPath = true; return; } @@ -1180,9 +1307,9 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath } return; } else if (this.type.isArrayType()) { - if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { + if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) { // length(array_col) — only the offset array is needed, not element data. - isStringOffsetOnly = true; + hasOffsetPath = true; return; } DataTypeAccessTree child = children.get(AccessPathInfo.ACCESS_ALL); @@ -1193,9 +1320,9 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath return; } else if (this.type.isMapType()) { String fieldName = path.get(accessIndex); - if (fieldName.equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { + if (fieldName.equals(AccessPathInfo.ACCESS_OFFSET)) { // length(map_col) — only the offset array is needed, not key/value data. - isStringOffsetOnly = true; + hasOffsetPath = true; return; } if (fieldName.equals(AccessPathInfo.ACCESS_ALL)) { @@ -1225,8 +1352,8 @@ public void setAccessByPath(List path, int accessIndex, ColumnAccessPath } else if (type.isStringLikeType()) { // String leaf accessed via the offset array (e.g. path ends in "offset"). // Mark offset-only so pruneDataType() can return BigIntType instead of full data. - if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) { - isStringOffsetOnly = true; + if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) { + hasOffsetPath = true; return; // do NOT set accessAll — offset-only is distinguishable from full access } // Any other sub-path on a string column means full data is needed. @@ -1269,10 +1396,10 @@ public Optional pruneDataType() { return children.values().iterator().next().pruneDataType(); } else if (accessAll) { return Optional.of(type); - } else if (isStringOffsetOnly && !accessPartialChild) { + } else if (hasOffsetPath && !accessPartialChild) { // Only the offset array is accessed (e.g. length(str_col)). return Optional.of(type); - } else if (isNullCheckOnly && !accessPartialChild) { + } else if (hasNullPath && !accessPartialChild) { // Only the null flag is accessed (e.g. col IS NULL / element_at(s,'f') IS NULL). // Return the node's type so that parent nodes include this child in their pruned type, // while the access path (ending in NULL) tells BE to skip actual data reading. diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index a50d4cdddc2b04..bbad4085677be2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -199,6 +199,40 @@ public void testCardinalityArrayElementKeepsOffsetPath() throws Exception { ImmutableList.of(path("a", "*"))); } + @Test + public void testDeeperOffsetPathCoversShallowerOffsetPath() throws Exception { + // cardinality(a) on ARRAY> generates [a, OFFSET] + // cardinality(element_at(a, 1)) generates [a, *, OFFSET] + // [a, *, OFFSET] traverses into the outer array's items, so it must read + // through the outer array structure. The shallower [a, OFFSET] is therefore + // redundant and should be stripped. + assertAllAccessPathsContain( + "select cardinality(a), cardinality(element_at(a, 1)) from nested_array_tbl", + ImmutableList.of(path("a", "*", "OFFSET")), + ImmutableList.of(path("a", "OFFSET"))); + } + + @Test + public void testOffsetPathCoversNullPathWithSamePrefix() throws Exception { + assertAllAccessPathsContain( + "select cardinality(a), a is null from nested_array_tbl", + ImmutableList.of(path("a", "OFFSET")), + ImmutableList.of(path("a", "NULL"))); + } + + @Test + public void testDeeperNullPathCoversShallowerNullPath() throws Exception { + // a IS NULL on ARRAY> generates [a, NULL] + // element_at(a, 1) IS NULL generates [a, *, NULL] + // [a, *, NULL] traverses into the outer array's items, which requires + // reading the outer array's null bitmap. Therefore [a, NULL] is redundant + // and should be stripped. + assertAllAccessPathsContain( + "select a is null, element_at(a, 1) is null from nested_array_tbl", + ImmutableList.of(path("a", "*", "NULL")), + ImmutableList.of(path("a", "NULL"))); + } + @Test public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl", @@ -207,6 +241,22 @@ public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { ImmutableList.of()); } + @Test + public void testNestedMapElementLengthKeepsValueOffsetPath() throws Exception { + assertColumn("select length(element_at(element_at(s, 'm'), 'a')) from nested_container_tbl", + "struct>", + ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "OFFSET")), + ImmutableList.of()); + } + + @Test + public void testNestedMapElementIsNullKeepsValueIsNullPath() throws Exception { + assertColumn("select (element_at(element_at(s, 'm'), 'a')) is null from nested_container_tbl", + "struct>", + ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "NULL")), + ImmutableList.of()); + } + @Test public void testFullFieldAccessStripsExactDataSkippingPath() throws Exception { assertColumn("select element_at(s, 'city') from tbl " @@ -610,8 +660,8 @@ public void testFilter() throws Throwable { ); assertColumn("select 100 from tbl where element_at(s, 'data')[1][1] is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "*", "NULL")), - ImmutableList.of(path("s", "data", "*", "*", "NULL")) + ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "NULL")), + ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "NULL")) ); assertColumn("select 100 from tbl where element_at(element_at(s, 'data')[1][1], 'a') is not null", "struct>>>",