From d641c72102a347a20ff26bf4edf299993c191e45 Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Mon, 8 Jun 2026 19:47:36 +0800 Subject: [PATCH 1/2] [fix](be) Preserve nested paths for lazy rowid fetch ### What problem does this PR solve? Issue Number: None Problem Summary: TopN lazy materialization can fetch pruned complex columns by row id after nested-column pruning. The materialization tuple kept the pruned slot type but did not preserve the relation slot access paths, so BE could build full storage iterators and read full child layouts into pruned result columns. Row-store lazy fetch also cannot apply nested access paths, so FE now rejects row-store fetch for complex or nested-pruned lazy slots. This patch carries relation slot access paths into lazy materialization slots, passes slot access paths to storage rowid fetch, keeps rowid fetch on the normal query reader path, preserves FE's row-store fetch decision in the BE request and row-store decode path, and keeps struct iterators readable when only some child fields are pruned. ### Release note None ### Check List (For Author) - Test: - Build: ~/.codex/skills/doris-local-regression/scripts/doris-local-regression.sh --network 10.26.20.3/24 build - Unit Test: ./run-fe-ut.sh --run org.apache.doris.nereids.processor.post.materialize.MaterializeProbeVisitorTest - Unit Test: ./run-fe-ut.sh --run org.apache.doris.nereids.glue.translator.PhysicalPlanTranslatorTest#testCanUseRowStoreForLazySlots - Regression test: ~/.codex/skills/doris-local-regression/scripts/doris-local-regression.sh --network 10.26.20.3/24 run -d nereids_rules_p0/column_pruning -s topn_lazy_nested_column_pruning - Format: build-support/clang-format.sh be/src/exec/rowid_fetcher.cpp - Check: git diff --check - Behavior changed: No - Does this need documentation: No --- be/src/exec/rowid_fetcher.cpp | 24 ++++ be/src/storage/segment/column_reader.cpp | 1 - .../materialize/MaterializeProbeVisitor.java | 20 +++- .../nereids/util/RowStoreFetchChecker.java | 13 ++- .../PhysicalPlanTranslatorTest.java | 14 +++ .../MaterializeProbeVisitorTest.java | 71 +++++++++++- .../topn_lazy_nested_column_pruning.out | 4 + .../topn_lazy_nested_column_pruning.groovy | 103 ++++++++++++++++++ 8 files changed, 236 insertions(+), 14 deletions(-) diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index a3790862cd21e0..c2317a103e5ee3 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -318,6 +318,28 @@ struct IteratorItem { StorageReadOptions storage_read_options; }; +static void set_slot_access_paths(const SlotDescriptor& slot, const TabletSchema& schema, + StorageReadOptions& storage_read_options) { + int32_t unique_id = slot.col_unique_id(); + const int field_index = + unique_id >= 0 ? schema.field_index(unique_id) : schema.field_index(slot.col_name()); + if (field_index >= 0) { + const auto& column = schema.column(field_index); + unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id(); + } + if (unique_id < 0) { + return; + } + + if (!slot.all_access_paths().empty()) { + storage_read_options.all_access_paths[unique_id] = slot.all_access_paths(); + } + + if (!slot.predicate_access_paths().empty()) { + storage_read_options.predicate_access_paths[unique_id] = slot.predicate_access_paths(); + } +} + struct SegItem { BaseTabletSPtr tablet; BetaRowsetSharedPtr rowset; @@ -474,6 +496,7 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request, iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; } segment = iterator_item.segment; + set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options); RETURN_IF_ERROR(segment->seek_and_read_by_rowid( full_read_schema, &slots[x], row_ids, column, iterator_item.storage_read_options, iterator_item.iterator)); @@ -1111,6 +1134,7 @@ Status RowIdStorageReader::read_doris_format_row( iterator_item.storage_read_options.stats = &stats; iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; } + set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options); RETURN_IF_ERROR(segment->seek_and_read_by_rowid( full_read_schema, &slots[x], row_ids, column, iterator_item.storage_read_options, iterator_item.iterator)); diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index de526a432b89d1..40fa4276435a18 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -1603,7 +1603,6 @@ Status StructFileColumnIterator::set_access_paths( } if (!need_to_read) { - set_reading_flag(ReadingFlag::SKIP_READING); sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING); DLOG(INFO) << "Struct column iterator set sub-column " << name << " to SKIP_READING"; continue; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java index b27510cad99531..0a49789660096d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java @@ -93,7 +93,9 @@ public Optional visitPhysicalFilter(PhysicalFilter visitPhysicalOlapScan(PhysicalOlapScan scan, if (context.requiredMaterializedSlots.contains(context.slot)) { return Optional.empty(); } - return Optional.of(new MaterializeSource(scan, context.slot)); + return Optional.of( + new MaterializeSource(scan, findRelationOutputSlot(scan, context.slot).orElse(context.slot))); } @Override @@ -173,7 +176,8 @@ public Optional visitPhysicalCatalogRelation( && !context.requiredMaterializedSlots.contains(context.slot)) { // lazy materialize slot must be backed by a base column. if (context.slot.getOriginalColumn().isPresent()) { - return Optional.of(new MaterializeSource(relation, context.slot)); + return Optional.of(new MaterializeSource( + relation, findRelationOutputSlot(relation, context.slot).orElse(context.slot))); } else { context.requiredMaterializedSlots.addAll(relation.getOutputSet()); LOG.info("lazy materialize {} failed, because its column is empty", context.slot); @@ -190,7 +194,8 @@ public Optional visitPhysicalTVFRelation( && !context.requiredMaterializedSlots.contains(context.slot)) { // lazy materialize slot must be backed by a base column. if (context.slot.getOriginalColumn().isPresent()) { - return Optional.of(new MaterializeSource(tvfRelation, context.slot)); + return Optional.of(new MaterializeSource( + tvfRelation, findRelationOutputSlot(tvfRelation, context.slot).orElse(context.slot))); } else { LOG.info("lazy materialize {} failed, because its column is empty", context.slot); } @@ -250,4 +255,11 @@ public Optional visitPhysicalProject( } } + private Optional findRelationOutputSlot(Relation relation, SlotReference contextSlot) { + return relation.getOutput().stream() + .filter(slot -> slot instanceof SlotReference && slot.equals(contextSlot)) + .map(slot -> (SlotReference) slot) + .findFirst(); + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java index 21f12e8c6758ff..71bdeea2bca00e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java @@ -41,15 +41,22 @@ public static boolean canUseRowStoreForLazySlots(List lazySlots) { return false; } SlotReference slotReference = (SlotReference) lazySlot; - // BE row-store fetch maps values only by col_unique_id and does not carry sub-column paths. - if (slotReference.hasSubColPath()) { + // BE row-store fetch maps values only by col_unique_id and does not apply sub-column + // paths or nested-column pruning access paths. + if (slotReference.hasSubColPath() || hasNestedAccessPaths(slotReference)) { return false; } Optional originalColumn = slotReference.getOriginalColumn(); - if (!originalColumn.isPresent() || !originalColumnUniqueIds.add(originalColumn.get().getUniqueId())) { + if (!originalColumn.isPresent() || originalColumn.get().getType().isComplexType() + || !originalColumnUniqueIds.add(originalColumn.get().getUniqueId())) { return false; } } return true; } + + private static boolean hasNestedAccessPaths(SlotReference slotReference) { + return slotReference.getAllAccessPaths().map(paths -> !paths.isEmpty()).orElse(false) + || slotReference.getPredicateAccessPaths().map(paths -> !paths.isEmpty()).orElse(false); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java index c31f96792f28e7..06a7236cbd37ee 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.glue.translator; +import org.apache.doris.analysis.ColumnAccessPath; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.GroupingInfo; import org.apache.doris.analysis.SlotRef; @@ -275,6 +276,9 @@ public void testCanUseRowStoreForLazySlots() { distinctB.setUniqueId(2); Column sharedVariant = new Column("kv", org.apache.doris.catalog.Type.VARIANT); sharedVariant.setUniqueId(3); + Column arrayColumn = new Column("arr", + new org.apache.doris.catalog.ArrayType(org.apache.doris.catalog.Type.INT)); + arrayColumn.setUniqueId(4); SlotReference distinctSlotA = new SlotReference(StatementScopeIdGenerator.newExprId(), "a", IntegerType.INSTANCE, true, ImmutableList.of(), null, distinctA, null, distinctA); @@ -289,6 +293,12 @@ public void testCanUseRowStoreForLazySlots() { SlotReference variantSubColumnSlot = new SlotReference(StatementScopeIdGenerator.newExprId(), "kv", org.apache.doris.nereids.types.VariantType.INSTANCE, true, ImmutableList.of(), null, sharedVariant, null, sharedVariant, ImmutableList.of("ssl")); + SlotReference nestedPrunedSlot = distinctSlotA.withAccessPaths( + ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), + ImmutableList.of()); + SlotReference arraySlot = new SlotReference(StatementScopeIdGenerator.newExprId(), "arr", + org.apache.doris.nereids.types.ArrayType.of(IntegerType.INSTANCE), true, ImmutableList.of(), + null, arrayColumn, null, arrayColumn); Assertions.assertTrue(PhysicalPlanTranslator.canUseRowStoreForLazySlots( ImmutableList.of(distinctSlotA, distinctSlotB))); @@ -296,5 +306,9 @@ public void testCanUseRowStoreForLazySlots() { ImmutableList.of(singleVariantSubColumnSlot))); Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( ImmutableList.of(variantRootSlot, variantSubColumnSlot))); + Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( + ImmutableList.of(nestedPrunedSlot))); + Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( + ImmutableList.of(arraySlot))); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java index 80f96c5419cbce..100c70cfb4e098 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.processor.post.materialize; +import org.apache.doris.analysis.ColumnAccessPath; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.nereids.trees.expressions.Add; @@ -25,9 +26,11 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -44,12 +47,7 @@ public class MaterializeProbeVisitorTest { @Test public void testOlapScanRejectsRequiredMaterializedSlots() { SlotReference baseSlot = new SlotReference("a", IntegerType.INSTANCE); - OlapTable table = Mockito.mock(OlapTable.class); - Mockito.when(table.getBaseIndexId()).thenReturn(1L); - Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS); - PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class); - Mockito.when(scan.getSelectedIndexId()).thenReturn(1L); - Mockito.when(scan.getTable()).thenReturn(table); + PhysicalOlapScan scan = mockBaseOlapScan(baseSlot); Set requiredMaterializedSlots = new HashSet<>(); requiredMaterializedSlots.add(baseSlot); @@ -60,6 +58,56 @@ public void testOlapScanRejectsRequiredMaterializedSlots() { Assertions.assertFalse(source.isPresent()); } + @Test + public void testOlapScanUsesRelationSlotWithAccessPaths() { + SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE); + SlotReference relationSlot = contextSlot.withAccessPaths( + ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of()); + contextSlot = (SlotReference) contextSlot.withNullable(false); + PhysicalOlapScan scan = mockBaseOlapScan(relationSlot); + + MaterializeProbeVisitor.ProbeContext context = new MaterializeProbeVisitor.ProbeContext(contextSlot); + Optional source = new MaterializeProbeVisitor().visitPhysicalOlapScan(scan, context); + + Assertions.assertTrue(source.isPresent()); + Assertions.assertSame(relationSlot, source.get().baseSlot); + Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths()); + } + + @Test + @SuppressWarnings("unchecked") + public void testFilterUsingIndexUsesRelationSlotWithAccessPaths() { + ConnectContext oldContext = ConnectContext.get(); + ConnectContext context = new ConnectContext(); + context.getSessionVariable().topNLazyMaterializationUsingIndex = true; + context.setThreadLocalInfo(); + try { + SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE); + SlotReference relationSlot = contextSlot.withAccessPaths( + ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of()); + contextSlot = (SlotReference) contextSlot.withNullable(false); + PhysicalOlapScan scan = mockBaseOlapScan(relationSlot); + + PhysicalFilter filter = Mockito.mock(PhysicalFilter.class); + Mockito.when(filter.child()).thenReturn(scan); + Mockito.when(filter.getInputSlots()).thenReturn(ImmutableSet.of(contextSlot)); + + MaterializeProbeVisitor.ProbeContext probeContext = new MaterializeProbeVisitor.ProbeContext(contextSlot); + Optional source = + new MaterializeProbeVisitor().visitPhysicalFilter(filter, probeContext); + + Assertions.assertTrue(source.isPresent()); + Assertions.assertSame(relationSlot, source.get().baseSlot); + Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths()); + } finally { + if (oldContext == null) { + ConnectContext.remove(); + } else { + oldContext.setThreadLocalInfo(); + } + } + } + @Test @SuppressWarnings("unchecked") public void testComplexProjectInputSlotsAreRequiredMaterialized() { @@ -107,4 +155,15 @@ public void testPushedDownProjectSlotInputsAreRequiredMaterialized() { Assertions.assertFalse(source.isPresent()); Assertions.assertEquals(ImmutableSet.of(baseSlot, pushedDownSlot), requiredMaterializedSlots); } + + private PhysicalOlapScan mockBaseOlapScan(SlotReference outputSlot) { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getBaseIndexId()).thenReturn(1L); + Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS); + PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class); + Mockito.when(scan.getSelectedIndexId()).thenReturn(1L); + Mockito.when(scan.getTable()).thenReturn(table); + Mockito.when(scan.getOutput()).thenReturn(ImmutableList.of(outputSlot)); + return scan; + } } diff --git a/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out b/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out index ef64dd77ea22d7..10b718e1ac03e7 100644 --- a/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out +++ b/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out @@ -37,3 +37,7 @@ -- !project_under_topn_consumed_slot -- 1 hello {"city":null, "zip":10001} [1, 2, 3] {"a":1, "b":2} 1 \N +-- !sparse_struct_map_array_result -- +3 9 9 3 false +1 7 7 3 false + diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy index e41921273de786..61068f6648acd4 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy @@ -362,4 +362,107 @@ suite("topn_lazy_nested_column_pruning") { limit 3 """ sql """ set enable_topn_expr_pullup = true; """ + + // ============================================= + // Test 19: TopN lazy rowid fetch should honor nested access paths + // Sparse multi-path STRUCT/MAP/ARRAY pruning uses a pruned slot type. + // Rowid fetch must pass the slot access paths to storage iterators so + // the iterator child layout matches the pruned result column layout. + // ============================================= + sql """ set enable_decimal256 = true; """ + sql """ set enable_prune_nested_column = true; """ + sql """ DROP TABLE IF EXISTS tlncp_sparse_nested_tbl """ + sql """ + CREATE TABLE tlncp_sparse_nested_tbl ( + pk INT, + deep STRUCT< + nested_str: VARCHAR(64), + inner_s: STRUCT, + deep_map: MAP> + > NULL, + typed STRUCT< + string_leaf: STRING, + decimal_leaf: DECIMAL(76,56), + typed_arr: ARRAY>, + typed_map: MAP> + > NULL + ) ENGINE = OLAP + UNIQUE KEY(pk) + DISTRIBUTED BY HASH(pk) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "store_row_column" = "true" + ) + """ + + sql """ + INSERT INTO tlncp_sparse_nested_tbl VALUES + (1, + named_struct( + 'nested_str', 'unused-one', + 'inner_s', named_struct('deep_str', 'DeepOne', 'flag', true, 'deep_char', 'dc1'), + 'deep_map', map('b', named_struct('leaf', 'leaf-one', 'n', 11, 'char_leaf', 'cb1'))), + named_struct( + 'string_leaf', 'root-one', + 'decimal_leaf', cast('10.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-one', 'decimal_leaf', + cast('1.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-one', 'decimal_leaf', + cast('2.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))), + (2, + named_struct( + 'nested_str', 'unused-two', + 'inner_s', named_struct('deep_str', 'DeepTwo', 'flag', false, 'deep_char', 'dc2'), + 'deep_map', map('b', named_struct('leaf', 'leaf-two', 'n', 22, 'char_leaf', 'cb2'))), + named_struct( + 'string_leaf', 'root-two', + 'decimal_leaf', cast('20.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-two', 'decimal_leaf', + cast('3.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-two', 'decimal_leaf', + cast('4.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))), + (3, + named_struct( + 'nested_str', 'unused-three', + 'inner_s', named_struct('deep_str', 'DeepThree', 'flag', true, 'deep_char', 'dc3'), + 'deep_map', map('b', named_struct('leaf', 'leaf-three', 'n', 33, 'char_leaf', 'cb3'))), + named_struct( + 'string_leaf', 'root-three', + 'decimal_leaf', cast('30.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-three', 'decimal_leaf', + cast('5.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-three', 'decimal_leaf', + cast('6.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))) + """ + + explain { + sql """ + SELECT + pk, + CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len, + LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len, + LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len, + ((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null + FROM tlncp_sparse_nested_tbl + WHERE pk <= 3 + ORDER BY ABS(pk % 3), pk + LIMIT 2 + """ + contains("VMaterializeNode") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__tlncp_sparse_nested_tbl]") + } + + qt_sparse_struct_map_array_result """ + SELECT + pk, + CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len, + LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len, + LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len, + ((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null + FROM tlncp_sparse_nested_tbl + WHERE pk <= 3 + ORDER BY ABS(pk % 3), pk + LIMIT 2 + """ } From 7d7d152feae3fb22df5cdcadab89e5010aa0865a Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Wed, 10 Jun 2026 18:40:23 +0800 Subject: [PATCH 2/2] [fix](fe) Preserve lazy materialize access paths ### What problem does this PR solve? Issue Number: None Related PR: #64242 Problem Summary: Lazy TopN materialization needs nested access paths from the relation output slot to reach the BE rowid fetch SlotDescriptor, and row-store lazy fetch must be disabled when a lazily fetched source column has nested access paths. Preserve the relation output slot as MaterializeSource.baseSlot for lazy materialization output, and make PhysicalPlanTranslator.shouldUseRowStore check the lazy slots against the relation output before enabling row-store fetch. ### Release note None ### Check List (For Author) - Test: Unit Test / Regression test / Build - ./run-fe-ut.sh --run org.apache.doris.nereids.glue.translator.PhysicalPlanTranslatorTest,org.apache.doris.nereids.processor.post.materialize.MaterializeProbeVisitorTest - ~/.codex/skills/doris-local-regression/scripts/doris-local-regression.sh --network 10.26.20.3/24 run -d nereids_rules_p0/column_pruning -s topn_lazy_nested_column_pruning - ./build.sh --be --fe - git diff --check - Behavior changed: No - Does this need documentation: No --- .../translator/PhysicalPlanTranslator.java | 27 ++++++++++++++++++- .../nereids/util/RowStoreFetchChecker.java | 13 +++------ .../PhysicalPlanTranslatorTest.java | 14 ---------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 9e9215554229fd..5af16b93920140 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -2807,7 +2807,32 @@ private boolean shouldUseRowStore(Relation rel, List lazySlots) { useRowStore = olapTable.storeRowColumn() && CollectionUtils.isEmpty(olapTable.getTableProperty().getCopiedRowStoreColumns()); } - return useRowStore && canUseRowStoreForLazySlots(lazySlots); + return useRowStore && canUseRowStoreForLazySlots(lazySlots) + && !hasNestedAccessPaths(rel, lazySlots); + } + + private boolean hasNestedAccessPaths(Relation rel, List lazySlots) { + Set lazyColumnUniqueIds = new HashSet<>(); + for (Slot lazySlot : lazySlots) { + SlotReference slotReference = (SlotReference) lazySlot; + lazyColumnUniqueIds.add(slotReference.getOriginalColumn().get().getUniqueId()); + } + for (Slot outputSlot : rel.getOutput()) { + if (outputSlot instanceof SlotReference) { + SlotReference slotReference = (SlotReference) outputSlot; + if (slotReference.getOriginalColumn().isPresent() + && lazyColumnUniqueIds.contains(slotReference.getOriginalColumn().get().getUniqueId()) + && hasNestedAccessPaths(slotReference)) { + return true; + } + } + } + return false; + } + + private boolean hasNestedAccessPaths(SlotReference slotReference) { + return slotReference.getAllAccessPaths().map(paths -> !paths.isEmpty()).orElse(false) + || slotReference.getPredicateAccessPaths().map(paths -> !paths.isEmpty()).orElse(false); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java index 71bdeea2bca00e..21f12e8c6758ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/RowStoreFetchChecker.java @@ -41,22 +41,15 @@ public static boolean canUseRowStoreForLazySlots(List lazySlots) { return false; } SlotReference slotReference = (SlotReference) lazySlot; - // BE row-store fetch maps values only by col_unique_id and does not apply sub-column - // paths or nested-column pruning access paths. - if (slotReference.hasSubColPath() || hasNestedAccessPaths(slotReference)) { + // BE row-store fetch maps values only by col_unique_id and does not carry sub-column paths. + if (slotReference.hasSubColPath()) { return false; } Optional originalColumn = slotReference.getOriginalColumn(); - if (!originalColumn.isPresent() || originalColumn.get().getType().isComplexType() - || !originalColumnUniqueIds.add(originalColumn.get().getUniqueId())) { + if (!originalColumn.isPresent() || !originalColumnUniqueIds.add(originalColumn.get().getUniqueId())) { return false; } } return true; } - - private static boolean hasNestedAccessPaths(SlotReference slotReference) { - return slotReference.getAllAccessPaths().map(paths -> !paths.isEmpty()).orElse(false) - || slotReference.getPredicateAccessPaths().map(paths -> !paths.isEmpty()).orElse(false); - } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java index 06a7236cbd37ee..c31f96792f28e7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java @@ -17,7 +17,6 @@ package org.apache.doris.nereids.glue.translator; -import org.apache.doris.analysis.ColumnAccessPath; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.GroupingInfo; import org.apache.doris.analysis.SlotRef; @@ -276,9 +275,6 @@ public void testCanUseRowStoreForLazySlots() { distinctB.setUniqueId(2); Column sharedVariant = new Column("kv", org.apache.doris.catalog.Type.VARIANT); sharedVariant.setUniqueId(3); - Column arrayColumn = new Column("arr", - new org.apache.doris.catalog.ArrayType(org.apache.doris.catalog.Type.INT)); - arrayColumn.setUniqueId(4); SlotReference distinctSlotA = new SlotReference(StatementScopeIdGenerator.newExprId(), "a", IntegerType.INSTANCE, true, ImmutableList.of(), null, distinctA, null, distinctA); @@ -293,12 +289,6 @@ public void testCanUseRowStoreForLazySlots() { SlotReference variantSubColumnSlot = new SlotReference(StatementScopeIdGenerator.newExprId(), "kv", org.apache.doris.nereids.types.VariantType.INSTANCE, true, ImmutableList.of(), null, sharedVariant, null, sharedVariant, ImmutableList.of("ssl")); - SlotReference nestedPrunedSlot = distinctSlotA.withAccessPaths( - ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), - ImmutableList.of()); - SlotReference arraySlot = new SlotReference(StatementScopeIdGenerator.newExprId(), "arr", - org.apache.doris.nereids.types.ArrayType.of(IntegerType.INSTANCE), true, ImmutableList.of(), - null, arrayColumn, null, arrayColumn); Assertions.assertTrue(PhysicalPlanTranslator.canUseRowStoreForLazySlots( ImmutableList.of(distinctSlotA, distinctSlotB))); @@ -306,9 +296,5 @@ public void testCanUseRowStoreForLazySlots() { ImmutableList.of(singleVariantSubColumnSlot))); Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( ImmutableList.of(variantRootSlot, variantSubColumnSlot))); - Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( - ImmutableList.of(nestedPrunedSlot))); - Assertions.assertFalse(PhysicalPlanTranslator.canUseRowStoreForLazySlots( - ImmutableList.of(arraySlot))); } }