diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index a3790862cd21e0..c2317a103e5ee3 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -318,6 +318,28 @@ struct IteratorItem { StorageReadOptions storage_read_options; }; +static void set_slot_access_paths(const SlotDescriptor& slot, const TabletSchema& schema, + StorageReadOptions& storage_read_options) { + int32_t unique_id = slot.col_unique_id(); + const int field_index = + unique_id >= 0 ? schema.field_index(unique_id) : schema.field_index(slot.col_name()); + if (field_index >= 0) { + const auto& column = schema.column(field_index); + unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id(); + } + if (unique_id < 0) { + return; + } + + if (!slot.all_access_paths().empty()) { + storage_read_options.all_access_paths[unique_id] = slot.all_access_paths(); + } + + if (!slot.predicate_access_paths().empty()) { + storage_read_options.predicate_access_paths[unique_id] = slot.predicate_access_paths(); + } +} + struct SegItem { BaseTabletSPtr tablet; BetaRowsetSharedPtr rowset; @@ -474,6 +496,7 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request, iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; } segment = iterator_item.segment; + set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options); RETURN_IF_ERROR(segment->seek_and_read_by_rowid( full_read_schema, &slots[x], row_ids, column, iterator_item.storage_read_options, iterator_item.iterator)); @@ -1111,6 +1134,7 @@ Status RowIdStorageReader::read_doris_format_row( iterator_item.storage_read_options.stats = &stats; iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; } + set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options); RETURN_IF_ERROR(segment->seek_and_read_by_rowid( full_read_schema, &slots[x], row_ids, column, iterator_item.storage_read_options, iterator_item.iterator)); diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index de526a432b89d1..40fa4276435a18 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -1603,7 +1603,6 @@ Status StructFileColumnIterator::set_access_paths( } if (!need_to_read) { - set_reading_flag(ReadingFlag::SKIP_READING); sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING); DLOG(INFO) << "Struct column iterator set sub-column " << name << " to SKIP_READING"; continue; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 9e9215554229fd..5af16b93920140 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -2807,7 +2807,32 @@ private boolean shouldUseRowStore(Relation rel, List lazySlots) { useRowStore = olapTable.storeRowColumn() && CollectionUtils.isEmpty(olapTable.getTableProperty().getCopiedRowStoreColumns()); } - return useRowStore && canUseRowStoreForLazySlots(lazySlots); + return useRowStore && canUseRowStoreForLazySlots(lazySlots) + && !hasNestedAccessPaths(rel, lazySlots); + } + + private boolean hasNestedAccessPaths(Relation rel, List lazySlots) { + Set lazyColumnUniqueIds = new HashSet<>(); + for (Slot lazySlot : lazySlots) { + SlotReference slotReference = (SlotReference) lazySlot; + lazyColumnUniqueIds.add(slotReference.getOriginalColumn().get().getUniqueId()); + } + for (Slot outputSlot : rel.getOutput()) { + if (outputSlot instanceof SlotReference) { + SlotReference slotReference = (SlotReference) outputSlot; + if (slotReference.getOriginalColumn().isPresent() + && lazyColumnUniqueIds.contains(slotReference.getOriginalColumn().get().getUniqueId()) + && hasNestedAccessPaths(slotReference)) { + return true; + } + } + } + return false; + } + + private boolean hasNestedAccessPaths(SlotReference slotReference) { + return slotReference.getAllAccessPaths().map(paths -> !paths.isEmpty()).orElse(false) + || slotReference.getPredicateAccessPaths().map(paths -> !paths.isEmpty()).orElse(false); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java index b27510cad99531..0a49789660096d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java @@ -93,7 +93,9 @@ public Optional visitPhysicalFilter(PhysicalFilter visitPhysicalOlapScan(PhysicalOlapScan scan, if (context.requiredMaterializedSlots.contains(context.slot)) { return Optional.empty(); } - return Optional.of(new MaterializeSource(scan, context.slot)); + return Optional.of( + new MaterializeSource(scan, findRelationOutputSlot(scan, context.slot).orElse(context.slot))); } @Override @@ -173,7 +176,8 @@ public Optional visitPhysicalCatalogRelation( && !context.requiredMaterializedSlots.contains(context.slot)) { // lazy materialize slot must be backed by a base column. if (context.slot.getOriginalColumn().isPresent()) { - return Optional.of(new MaterializeSource(relation, context.slot)); + return Optional.of(new MaterializeSource( + relation, findRelationOutputSlot(relation, context.slot).orElse(context.slot))); } else { context.requiredMaterializedSlots.addAll(relation.getOutputSet()); LOG.info("lazy materialize {} failed, because its column is empty", context.slot); @@ -190,7 +194,8 @@ public Optional visitPhysicalTVFRelation( && !context.requiredMaterializedSlots.contains(context.slot)) { // lazy materialize slot must be backed by a base column. if (context.slot.getOriginalColumn().isPresent()) { - return Optional.of(new MaterializeSource(tvfRelation, context.slot)); + return Optional.of(new MaterializeSource( + tvfRelation, findRelationOutputSlot(tvfRelation, context.slot).orElse(context.slot))); } else { LOG.info("lazy materialize {} failed, because its column is empty", context.slot); } @@ -250,4 +255,11 @@ public Optional visitPhysicalProject( } } + private Optional findRelationOutputSlot(Relation relation, SlotReference contextSlot) { + return relation.getOutput().stream() + .filter(slot -> slot instanceof SlotReference && slot.equals(contextSlot)) + .map(slot -> (SlotReference) slot) + .findFirst(); + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java index 80f96c5419cbce..100c70cfb4e098 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitorTest.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.processor.post.materialize; +import org.apache.doris.analysis.ColumnAccessPath; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.nereids.trees.expressions.Add; @@ -25,9 +26,11 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -44,12 +47,7 @@ public class MaterializeProbeVisitorTest { @Test public void testOlapScanRejectsRequiredMaterializedSlots() { SlotReference baseSlot = new SlotReference("a", IntegerType.INSTANCE); - OlapTable table = Mockito.mock(OlapTable.class); - Mockito.when(table.getBaseIndexId()).thenReturn(1L); - Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS); - PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class); - Mockito.when(scan.getSelectedIndexId()).thenReturn(1L); - Mockito.when(scan.getTable()).thenReturn(table); + PhysicalOlapScan scan = mockBaseOlapScan(baseSlot); Set requiredMaterializedSlots = new HashSet<>(); requiredMaterializedSlots.add(baseSlot); @@ -60,6 +58,56 @@ public void testOlapScanRejectsRequiredMaterializedSlots() { Assertions.assertFalse(source.isPresent()); } + @Test + public void testOlapScanUsesRelationSlotWithAccessPaths() { + SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE); + SlotReference relationSlot = contextSlot.withAccessPaths( + ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of()); + contextSlot = (SlotReference) contextSlot.withNullable(false); + PhysicalOlapScan scan = mockBaseOlapScan(relationSlot); + + MaterializeProbeVisitor.ProbeContext context = new MaterializeProbeVisitor.ProbeContext(contextSlot); + Optional source = new MaterializeProbeVisitor().visitPhysicalOlapScan(scan, context); + + Assertions.assertTrue(source.isPresent()); + Assertions.assertSame(relationSlot, source.get().baseSlot); + Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths()); + } + + @Test + @SuppressWarnings("unchecked") + public void testFilterUsingIndexUsesRelationSlotWithAccessPaths() { + ConnectContext oldContext = ConnectContext.get(); + ConnectContext context = new ConnectContext(); + context.getSessionVariable().topNLazyMaterializationUsingIndex = true; + context.setThreadLocalInfo(); + try { + SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE); + SlotReference relationSlot = contextSlot.withAccessPaths( + ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of()); + contextSlot = (SlotReference) contextSlot.withNullable(false); + PhysicalOlapScan scan = mockBaseOlapScan(relationSlot); + + PhysicalFilter filter = Mockito.mock(PhysicalFilter.class); + Mockito.when(filter.child()).thenReturn(scan); + Mockito.when(filter.getInputSlots()).thenReturn(ImmutableSet.of(contextSlot)); + + MaterializeProbeVisitor.ProbeContext probeContext = new MaterializeProbeVisitor.ProbeContext(contextSlot); + Optional source = + new MaterializeProbeVisitor().visitPhysicalFilter(filter, probeContext); + + Assertions.assertTrue(source.isPresent()); + Assertions.assertSame(relationSlot, source.get().baseSlot); + Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths()); + } finally { + if (oldContext == null) { + ConnectContext.remove(); + } else { + oldContext.setThreadLocalInfo(); + } + } + } + @Test @SuppressWarnings("unchecked") public void testComplexProjectInputSlotsAreRequiredMaterialized() { @@ -107,4 +155,15 @@ public void testPushedDownProjectSlotInputsAreRequiredMaterialized() { Assertions.assertFalse(source.isPresent()); Assertions.assertEquals(ImmutableSet.of(baseSlot, pushedDownSlot), requiredMaterializedSlots); } + + private PhysicalOlapScan mockBaseOlapScan(SlotReference outputSlot) { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getBaseIndexId()).thenReturn(1L); + Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS); + PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class); + Mockito.when(scan.getSelectedIndexId()).thenReturn(1L); + Mockito.when(scan.getTable()).thenReturn(table); + Mockito.when(scan.getOutput()).thenReturn(ImmutableList.of(outputSlot)); + return scan; + } } diff --git a/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out b/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out index ef64dd77ea22d7..10b718e1ac03e7 100644 --- a/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out +++ b/regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out @@ -37,3 +37,7 @@ -- !project_under_topn_consumed_slot -- 1 hello {"city":null, "zip":10001} [1, 2, 3] {"a":1, "b":2} 1 \N +-- !sparse_struct_map_array_result -- +3 9 9 3 false +1 7 7 3 false + diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy index e41921273de786..61068f6648acd4 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy @@ -362,4 +362,107 @@ suite("topn_lazy_nested_column_pruning") { limit 3 """ sql """ set enable_topn_expr_pullup = true; """ + + // ============================================= + // Test 19: TopN lazy rowid fetch should honor nested access paths + // Sparse multi-path STRUCT/MAP/ARRAY pruning uses a pruned slot type. + // Rowid fetch must pass the slot access paths to storage iterators so + // the iterator child layout matches the pruned result column layout. + // ============================================= + sql """ set enable_decimal256 = true; """ + sql """ set enable_prune_nested_column = true; """ + sql """ DROP TABLE IF EXISTS tlncp_sparse_nested_tbl """ + sql """ + CREATE TABLE tlncp_sparse_nested_tbl ( + pk INT, + deep STRUCT< + nested_str: VARCHAR(64), + inner_s: STRUCT, + deep_map: MAP> + > NULL, + typed STRUCT< + string_leaf: STRING, + decimal_leaf: DECIMAL(76,56), + typed_arr: ARRAY>, + typed_map: MAP> + > NULL + ) ENGINE = OLAP + UNIQUE KEY(pk) + DISTRIBUTED BY HASH(pk) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "store_row_column" = "true" + ) + """ + + sql """ + INSERT INTO tlncp_sparse_nested_tbl VALUES + (1, + named_struct( + 'nested_str', 'unused-one', + 'inner_s', named_struct('deep_str', 'DeepOne', 'flag', true, 'deep_char', 'dc1'), + 'deep_map', map('b', named_struct('leaf', 'leaf-one', 'n', 11, 'char_leaf', 'cb1'))), + named_struct( + 'string_leaf', 'root-one', + 'decimal_leaf', cast('10.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-one', 'decimal_leaf', + cast('1.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-one', 'decimal_leaf', + cast('2.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))), + (2, + named_struct( + 'nested_str', 'unused-two', + 'inner_s', named_struct('deep_str', 'DeepTwo', 'flag', false, 'deep_char', 'dc2'), + 'deep_map', map('b', named_struct('leaf', 'leaf-two', 'n', 22, 'char_leaf', 'cb2'))), + named_struct( + 'string_leaf', 'root-two', + 'decimal_leaf', cast('20.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-two', 'decimal_leaf', + cast('3.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-two', 'decimal_leaf', + cast('4.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))), + (3, + named_struct( + 'nested_str', 'unused-three', + 'inner_s', named_struct('deep_str', 'DeepThree', 'flag', true, 'deep_char', 'dc3'), + 'deep_map', map('b', named_struct('leaf', 'leaf-three', 'n', 33, 'char_leaf', 'cb3'))), + named_struct( + 'string_leaf', 'root-three', + 'decimal_leaf', cast('30.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)), + 'typed_arr', array(named_struct('string_leaf', 'arr-three', 'decimal_leaf', + cast('5.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))), + 'typed_map', map('b', named_struct('string_leaf', 'map-three', 'decimal_leaf', + cast('6.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))) + """ + + explain { + sql """ + SELECT + pk, + CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len, + LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len, + LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len, + ((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null + FROM tlncp_sparse_nested_tbl + WHERE pk <= 3 + ORDER BY ABS(pk % 3), pk + LIMIT 2 + """ + contains("VMaterializeNode") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__tlncp_sparse_nested_tbl]") + } + + qt_sparse_struct_map_array_result """ + SELECT + pk, + CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len, + LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len, + LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len, + ((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null + FROM tlncp_sparse_nested_tbl + WHERE pk <= 3 + ORDER BY ABS(pk % 3), pk + LIMIT 2 + """ }