Skip to content

Commit cd0aab0

Browse files
authored
Two-stage ANALYZE with adaptive count-min sketch params (#30206)
1 parent 2924eb2 commit cd0aab0

25 files changed

+778
-475
lines changed

ydb/core/protos/statistics.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,8 @@ message TEvAggregateStatisticsResponse {
198198
}
199199
repeated TFailedTablet FailedTablets = 3;
200200
}
201+
202+
message TSimpleColumnStatistics {
203+
optional uint64 Count = 1;
204+
optional uint64 CountDistinct = 2;
205+
};

ydb/core/statistics/aggregator/aggregator_impl.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -608,25 +608,23 @@ void TStatisticsAggregator::SaveStatisticsToTable() {
608608

609609
PendingSaveStatistics = false;
610610

611-
std::vector<ui32> columnTags;
612-
std::vector<TString> data;
613-
auto count = CountMinSketches.size();
614-
if (count == 0) {
615-
Send(SelfId(), new TEvStatistics::TEvSaveStatisticsQueryResponse(
616-
Ydb::StatusIds::SUCCESS, {}, TraversalPathId));
617-
return;
618-
}
619-
columnTags.reserve(count);
620-
data.reserve(count);
611+
std::vector<TStatisticsItem> items = std::exchange(StatisticsToSave, {});
621612

622613
for (auto& [tag, sketch] : CountMinSketches) {
623-
columnTags.push_back(tag);
614+
if (!ColumnNames.contains(tag)) {
615+
continue;
616+
}
624617
TString strSketch(sketch->AsStringBuf());
625-
data.push_back(strSketch);
618+
items.emplace_back(tag, EStatType::COUNT_MIN_SKETCH, std::move(strSketch));
619+
}
620+
621+
if (items.empty()) {
622+
Send(SelfId(), new TEvStatistics::TEvSaveStatisticsQueryResponse(
623+
Ydb::StatusIds::SUCCESS, {}, TraversalPathId));
624+
return;
626625
}
627626

628-
Register(CreateSaveStatisticsQuery(SelfId(), Database,
629-
TraversalPathId, EStatType::COUNT_MIN_SKETCH, std::move(columnTags), std::move(data)));
627+
Register(CreateSaveStatisticsQuery(SelfId(), Database, TraversalPathId, std::move(items)));
630628
}
631629

632630
void TStatisticsAggregator::DeleteStatisticsFromTable() {
@@ -677,6 +675,8 @@ void TStatisticsAggregator::ScheduleNextAnalyze(NIceDb::TNiceDb& db, const TActo
677675
UpdateForceTraversalTableStatus(
678676
TForceTraversalTable::EStatus::AnalyzeStarted, operation.OperationId, operationTable, db);
679677

678+
// operation.Types field is not used, TAnalyzeActor will determine suitable
679+
// statistic types itself.
680680
ctx.RegisterWithSameMailbox(new TAnalyzeActor(
681681
SelfId(), operation.OperationId, operation.DatabaseName, operationTable.PathId,
682682
operationTable.ColumnTags));

ydb/core/statistics/aggregator/aggregator_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ class TStatisticsAggregator : public TActor<TStatisticsAggregator>, public NTabl
284284

285285
bool IsStatisticsTableCreated = false;
286286
bool PendingSaveStatistics = false;
287+
std::vector<TStatisticsItem> StatisticsToSave;
287288
bool PendingDeleteStatistics = false;
288289

289290
std::vector<NScheme::TTypeInfo> KeyColumnTypes;

0 commit comments

Comments
 (0)