Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions be/src/exec/rowid_fetcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,28 @@ struct IteratorItem {
StorageReadOptions storage_read_options;
};

static void set_slot_access_paths(const SlotDescriptor& slot, const TabletSchema& schema,
StorageReadOptions& storage_read_options) {
int32_t unique_id = slot.col_unique_id();
const int field_index =
unique_id >= 0 ? schema.field_index(unique_id) : schema.field_index(slot.col_name());
if (field_index >= 0) {
const auto& column = schema.column(field_index);
unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id();
}
if (unique_id < 0) {
return;
}

if (!slot.all_access_paths().empty()) {
storage_read_options.all_access_paths[unique_id] = slot.all_access_paths();
}

if (!slot.predicate_access_paths().empty()) {
storage_read_options.predicate_access_paths[unique_id] = slot.predicate_access_paths();
}
}

struct SegItem {
BaseTabletSPtr tablet;
BetaRowsetSharedPtr rowset;
Expand Down Expand Up @@ -474,6 +496,7 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request,
iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY;
}
segment = iterator_item.segment;
set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options);
RETURN_IF_ERROR(segment->seek_and_read_by_rowid(
full_read_schema, &slots[x], row_ids, column,
iterator_item.storage_read_options, iterator_item.iterator));
Expand Down Expand Up @@ -1111,6 +1134,7 @@ Status RowIdStorageReader::read_doris_format_row(
iterator_item.storage_read_options.stats = &stats;
iterator_item.storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY;
}
set_slot_access_paths(slots[x], full_read_schema, iterator_item.storage_read_options);
RETURN_IF_ERROR(segment->seek_and_read_by_rowid(
full_read_schema, &slots[x], row_ids, column,
iterator_item.storage_read_options, iterator_item.iterator));
Expand Down
1 change: 0 additions & 1 deletion be/src/storage/segment/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,6 @@ Status StructFileColumnIterator::set_access_paths(
}

if (!need_to_read) {
set_reading_flag(ReadingFlag::SKIP_READING);
sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
DLOG(INFO) << "Struct column iterator set sub-column " << name << " to SKIP_READING";
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2807,7 +2807,32 @@ private boolean shouldUseRowStore(Relation rel, List<Slot> lazySlots) {
useRowStore = olapTable.storeRowColumn()
&& CollectionUtils.isEmpty(olapTable.getTableProperty().getCopiedRowStoreColumns());
}
return useRowStore && canUseRowStoreForLazySlots(lazySlots);
return useRowStore && canUseRowStoreForLazySlots(lazySlots)
&& !hasNestedAccessPaths(rel, lazySlots);
}

private boolean hasNestedAccessPaths(Relation rel, List<Slot> lazySlots) {
Set<Integer> lazyColumnUniqueIds = new HashSet<>();
for (Slot lazySlot : lazySlots) {
SlotReference slotReference = (SlotReference) lazySlot;
lazyColumnUniqueIds.add(slotReference.getOriginalColumn().get().getUniqueId());
}
for (Slot outputSlot : rel.getOutput()) {
if (outputSlot instanceof SlotReference) {
SlotReference slotReference = (SlotReference) outputSlot;
if (slotReference.getOriginalColumn().isPresent()
&& lazyColumnUniqueIds.contains(slotReference.getOriginalColumn().get().getUniqueId())
&& hasNestedAccessPaths(slotReference)) {
return true;
}
}
}
return false;
}

private boolean hasNestedAccessPaths(SlotReference slotReference) {
return slotReference.getAllAccessPaths().map(paths -> !paths.isEmpty()).orElse(false)
|| slotReference.getPredicateAccessPaths().map(paths -> !paths.isEmpty()).orElse(false);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ public Optional<MaterializeSource> visitPhysicalFilter(PhysicalFilter<? extends
return Optional.empty();
}
if (filter.getInputSlots().contains(context.slot)) {
return Optional.of(new MaterializeSource((Relation) filter.child(), context.slot));
Relation relation = (Relation) filter.child();
return Optional.of(new MaterializeSource(
relation, findRelationOutputSlot(relation, context.slot).orElse(context.slot)));
} else {
return filter.child().accept(this, context);
}
Expand Down Expand Up @@ -161,7 +163,8 @@ public Optional<MaterializeSource> visitPhysicalOlapScan(PhysicalOlapScan scan,
if (context.requiredMaterializedSlots.contains(context.slot)) {
return Optional.empty();
}
return Optional.of(new MaterializeSource(scan, context.slot));
return Optional.of(
new MaterializeSource(scan, findRelationOutputSlot(scan, context.slot).orElse(context.slot)));
}

@Override
Expand All @@ -173,7 +176,8 @@ public Optional<MaterializeSource> visitPhysicalCatalogRelation(
&& !context.requiredMaterializedSlots.contains(context.slot)) {
// lazy materialize slot must be backed by a base column.
if (context.slot.getOriginalColumn().isPresent()) {
return Optional.of(new MaterializeSource(relation, context.slot));
return Optional.of(new MaterializeSource(
relation, findRelationOutputSlot(relation, context.slot).orElse(context.slot)));
} else {
context.requiredMaterializedSlots.addAll(relation.getOutputSet());
LOG.info("lazy materialize {} failed, because its column is empty", context.slot);
Expand All @@ -190,7 +194,8 @@ public Optional<MaterializeSource> visitPhysicalTVFRelation(
&& !context.requiredMaterializedSlots.contains(context.slot)) {
// lazy materialize slot must be backed by a base column.
if (context.slot.getOriginalColumn().isPresent()) {
return Optional.of(new MaterializeSource(tvfRelation, context.slot));
return Optional.of(new MaterializeSource(
tvfRelation, findRelationOutputSlot(tvfRelation, context.slot).orElse(context.slot)));
} else {
LOG.info("lazy materialize {} failed, because its column is empty", context.slot);
}
Expand Down Expand Up @@ -250,4 +255,11 @@ public Optional<MaterializeSource> visitPhysicalProject(
}
}

private Optional<SlotReference> findRelationOutputSlot(Relation relation, SlotReference contextSlot) {
return relation.getOutput().stream()
.filter(slot -> slot instanceof SlotReference && slot.equals(contextSlot))
.map(slot -> (SlotReference) slot)
.findFirst();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.nereids.processor.post.materialize;

import org.apache.doris.analysis.ColumnAccessPath;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.nereids.trees.expressions.Add;
Expand All @@ -25,9 +26,11 @@
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.qe.ConnectContext;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
Expand All @@ -44,12 +47,7 @@ public class MaterializeProbeVisitorTest {
@Test
public void testOlapScanRejectsRequiredMaterializedSlots() {
SlotReference baseSlot = new SlotReference("a", IntegerType.INSTANCE);
OlapTable table = Mockito.mock(OlapTable.class);
Mockito.when(table.getBaseIndexId()).thenReturn(1L);
Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS);
PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class);
Mockito.when(scan.getSelectedIndexId()).thenReturn(1L);
Mockito.when(scan.getTable()).thenReturn(table);
PhysicalOlapScan scan = mockBaseOlapScan(baseSlot);

Set<Slot> requiredMaterializedSlots = new HashSet<>();
requiredMaterializedSlots.add(baseSlot);
Expand All @@ -60,6 +58,56 @@ public void testOlapScanRejectsRequiredMaterializedSlots() {
Assertions.assertFalse(source.isPresent());
}

@Test
public void testOlapScanUsesRelationSlotWithAccessPaths() {
SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE);
SlotReference relationSlot = contextSlot.withAccessPaths(
ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of());
contextSlot = (SlotReference) contextSlot.withNullable(false);
PhysicalOlapScan scan = mockBaseOlapScan(relationSlot);

MaterializeProbeVisitor.ProbeContext context = new MaterializeProbeVisitor.ProbeContext(contextSlot);
Optional<MaterializeSource> source = new MaterializeProbeVisitor().visitPhysicalOlapScan(scan, context);

Assertions.assertTrue(source.isPresent());
Assertions.assertSame(relationSlot, source.get().baseSlot);
Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths());
}

@Test
@SuppressWarnings("unchecked")
public void testFilterUsingIndexUsesRelationSlotWithAccessPaths() {
ConnectContext oldContext = ConnectContext.get();
ConnectContext context = new ConnectContext();
context.getSessionVariable().topNLazyMaterializationUsingIndex = true;
context.setThreadLocalInfo();
try {
SlotReference contextSlot = new SlotReference("a", IntegerType.INSTANCE);
SlotReference relationSlot = contextSlot.withAccessPaths(
ImmutableList.of(ColumnAccessPath.data(ImmutableList.of("nested"))), ImmutableList.of());
contextSlot = (SlotReference) contextSlot.withNullable(false);
PhysicalOlapScan scan = mockBaseOlapScan(relationSlot);

PhysicalFilter<PhysicalOlapScan> filter = Mockito.mock(PhysicalFilter.class);
Mockito.when(filter.child()).thenReturn(scan);
Mockito.when(filter.getInputSlots()).thenReturn(ImmutableSet.of(contextSlot));

MaterializeProbeVisitor.ProbeContext probeContext = new MaterializeProbeVisitor.ProbeContext(contextSlot);
Optional<MaterializeSource> source =
new MaterializeProbeVisitor().visitPhysicalFilter(filter, probeContext);

Assertions.assertTrue(source.isPresent());
Assertions.assertSame(relationSlot, source.get().baseSlot);
Assertions.assertEquals(relationSlot.getAllAccessPaths(), source.get().baseSlot.getAllAccessPaths());
} finally {
if (oldContext == null) {
ConnectContext.remove();
} else {
oldContext.setThreadLocalInfo();
}
}
}

@Test
@SuppressWarnings("unchecked")
public void testComplexProjectInputSlotsAreRequiredMaterialized() {
Expand Down Expand Up @@ -107,4 +155,15 @@ public void testPushedDownProjectSlotInputsAreRequiredMaterialized() {
Assertions.assertFalse(source.isPresent());
Assertions.assertEquals(ImmutableSet.of(baseSlot, pushedDownSlot), requiredMaterializedSlots);
}

private PhysicalOlapScan mockBaseOlapScan(SlotReference outputSlot) {
OlapTable table = Mockito.mock(OlapTable.class);
Mockito.when(table.getBaseIndexId()).thenReturn(1L);
Mockito.when(table.getKeysType()).thenReturn(KeysType.DUP_KEYS);
PhysicalOlapScan scan = Mockito.mock(PhysicalOlapScan.class);
Mockito.when(scan.getSelectedIndexId()).thenReturn(1L);
Mockito.when(scan.getTable()).thenReturn(table);
Mockito.when(scan.getOutput()).thenReturn(ImmutableList.of(outputSlot));
return scan;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@
-- !project_under_topn_consumed_slot --
1 hello {"city":null, "zip":10001} [1, 2, 3] {"a":1, "b":2} 1 \N

-- !sparse_struct_map_array_result --
3 9 9 3 false
1 7 7 3 false

Original file line number Diff line number Diff line change
Expand Up @@ -362,4 +362,107 @@ suite("topn_lazy_nested_column_pruning") {
limit 3
"""
sql """ set enable_topn_expr_pullup = true; """

// =============================================
// Test 19: TopN lazy rowid fetch should honor nested access paths
// Sparse multi-path STRUCT/MAP/ARRAY pruning uses a pruned slot type.
// Rowid fetch must pass the slot access paths to storage iterators so
// the iterator child layout matches the pruned result column layout.
// =============================================
sql """ set enable_decimal256 = true; """
sql """ set enable_prune_nested_column = true; """
sql """ DROP TABLE IF EXISTS tlncp_sparse_nested_tbl """
sql """
CREATE TABLE tlncp_sparse_nested_tbl (
pk INT,
deep STRUCT<
nested_str: VARCHAR(64),
inner_s: STRUCT<deep_str: VARCHAR(64), flag: BOOLEAN, deep_char: CHAR(8)>,
deep_map: MAP<VARCHAR(32), STRUCT<leaf: VARCHAR(64), n: INT, char_leaf: CHAR(8)>>
> NULL,
typed STRUCT<
string_leaf: STRING,
decimal_leaf: DECIMAL(76,56),
typed_arr: ARRAY<STRUCT<string_leaf: STRING, decimal_leaf: DECIMAL(76,56)>>,
typed_map: MAP<VARCHAR(32), STRUCT<string_leaf: STRING, decimal_leaf: DECIMAL(76,56)>>
> NULL
) ENGINE = OLAP
UNIQUE KEY(pk)
DISTRIBUTED BY HASH(pk) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"enable_unique_key_merge_on_write" = "true",
"store_row_column" = "true"
)
"""

sql """
INSERT INTO tlncp_sparse_nested_tbl VALUES
(1,
named_struct(
'nested_str', 'unused-one',
'inner_s', named_struct('deep_str', 'DeepOne', 'flag', true, 'deep_char', 'dc1'),
'deep_map', map('b', named_struct('leaf', 'leaf-one', 'n', 11, 'char_leaf', 'cb1'))),
named_struct(
'string_leaf', 'root-one',
'decimal_leaf', cast('10.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)),
'typed_arr', array(named_struct('string_leaf', 'arr-one', 'decimal_leaf',
cast('1.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))),
'typed_map', map('b', named_struct('string_leaf', 'map-one', 'decimal_leaf',
cast('2.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))),
(2,
named_struct(
'nested_str', 'unused-two',
'inner_s', named_struct('deep_str', 'DeepTwo', 'flag', false, 'deep_char', 'dc2'),
'deep_map', map('b', named_struct('leaf', 'leaf-two', 'n', 22, 'char_leaf', 'cb2'))),
named_struct(
'string_leaf', 'root-two',
'decimal_leaf', cast('20.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)),
'typed_arr', array(named_struct('string_leaf', 'arr-two', 'decimal_leaf',
cast('3.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))),
'typed_map', map('b', named_struct('string_leaf', 'map-two', 'decimal_leaf',
cast('4.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))))),
(3,
named_struct(
'nested_str', 'unused-three',
'inner_s', named_struct('deep_str', 'DeepThree', 'flag', true, 'deep_char', 'dc3'),
'deep_map', map('b', named_struct('leaf', 'leaf-three', 'n', 33, 'char_leaf', 'cb3'))),
named_struct(
'string_leaf', 'root-three',
'decimal_leaf', cast('30.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)),
'typed_arr', array(named_struct('string_leaf', 'arr-three', 'decimal_leaf',
cast('5.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56)))),
'typed_map', map('b', named_struct('string_leaf', 'map-three', 'decimal_leaf',
cast('6.00000000000000000000000000000000000000000000000000000000' as DECIMAL(76,56))))))
"""

explain {
sql """
SELECT
pk,
CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len,
LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len,
LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len,
((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null
FROM tlncp_sparse_nested_tbl
WHERE pk <= 3
ORDER BY ABS(pk % 3), pk
LIMIT 2
"""
contains("VMaterializeNode")
contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__tlncp_sparse_nested_tbl]")
}

qt_sparse_struct_map_array_result """
SELECT
pk,
CHAR_LENGTH(element_at(element_at(element_at(typed, 'typed_map'), 'b'), 'string_leaf')) AS char_len,
LENGTH(LOWER(element_at(element_at(deep, 'inner_s'), 'deep_str'))) AS lower_len,
LENGTH(element_at(element_at(element_at(deep, 'deep_map'), 'b'), 'char_leaf')) AS char_storage_len,
((element_at(element_at(element_at(typed, 'typed_arr'), 1), 'decimal_leaf') + 1) IS NULL) AS expr_is_null
FROM tlncp_sparse_nested_tbl
WHERE pk <= 3
ORDER BY ABS(pk % 3), pk
LIMIT 2
"""
}
Loading