From 983fde1541196a98dee562409bae3aadd93c9dda Mon Sep 17 00:00:00 2001 From: jinhelin Date: Tue, 18 Mar 2025 14:48:26 +0800 Subject: [PATCH] ci --- dbms/src/Debug/TiFlashTestEnv.cpp | 6 +- dbms/src/Interpreters/Settings.h | 2 +- .../DeltaMerge/BitmapFilter/BitmapFilter.cpp | 5 +- .../DeltaMerge/BitmapFilter/BitmapFilter.h | 10 + dbms/src/Storages/DeltaMerge/DMContext.cpp | 5 + dbms/src/Storages/DeltaMerge/DMContext.h | 1 + .../Storages/DeltaMerge/Index/MinMaxIndex.cpp | 4 + .../Storages/DeltaMerge/Index/MinMaxIndex.h | 2 + dbms/src/Storages/DeltaMerge/Segment.cpp | 217 +++++++++++++++++- dbms/src/Storages/DeltaMerge/Segment.h | 10 + .../VersionChain/DeleteMarkFilter.cpp | 22 +- .../VersionChain/MVCCBitmapFilter.cpp | 4 + .../DeltaMerge/VersionChain/VersionChain.h | 3 + 13 files changed, 280 insertions(+), 11 deletions(-) diff --git a/dbms/src/Debug/TiFlashTestEnv.cpp b/dbms/src/Debug/TiFlashTestEnv.cpp index 0e23682a8bb..342e646f9c5 100644 --- a/dbms/src/Debug/TiFlashTestEnv.cpp +++ b/dbms/src/Debug/TiFlashTestEnv.cpp @@ -100,7 +100,11 @@ void TiFlashTestEnv::initializeGlobalContext( PageStorageRunMode ps_run_mode, uint64_t bg_thread_count) { - addGlobalContext(DB::Settings(), testdata_path, ps_run_mode, bg_thread_count); + // In unit-tests, compare the MVCC bitmap between version chain and delta index, if version chain is enabled. + DB::Settings settings; + if (settings.enable_version_chain == static_cast(DM::VersionChainMode::Enabled)) + settings.set("enable_version_chain", std::to_string(static_cast(DM::VersionChainMode::EnabledForTest))); + addGlobalContext(settings, testdata_path, ps_run_mode, bg_thread_count); } void TiFlashTestEnv::addGlobalContext( diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index ac32f7822fa..ee7cf9dc15a 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -184,7 +184,7 @@ struct Settings M(SettingUInt64, dt_merged_file_max_size, 16 * 1024 * 1024, "Small files are merged into one or more files not larger than dt_merged_file_max_size") \ M(SettingDouble, dt_page_gc_threshold, 0.5, "Max valid rate of deciding to do a GC in PageStorage") \ M(SettingDouble, dt_page_gc_threshold_raft_data, 0.05, "Max valid rate of deciding to do a GC for BlobFile storing PageData in PageStorage") \ - M(SettingInt64, enable_version_chain, 0, "Enable version chain or not: 0 - disable, 1 - enabled. " \ + M(SettingInt64, enable_version_chain, 1, "Enable version chain or not: 0 - disable, 1 - enabled, 2 - enabled_for_test. " \ "More details are in the comments of `enum class VersionChainMode`." \ "Modifying this configuration requires a restart to reset the in-memory state.") \ /* DeltaTree engine testing settings */\ diff --git a/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp b/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp index baef3d37b9c..bd71d541a0c 100644 --- a/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp @@ -19,7 +19,10 @@ namespace DB::DM { BitmapFilter::BitmapFilter(UInt32 size_, bool default_value) - : filter(size_, static_cast(default_value)) + : version_filter_by_delta(size_, static_cast(0)) + , version_filter_by_read_ts(size_, static_cast(0)) + , version_filter_by_stable(size_, static_cast(0)) + , filter(size_, static_cast(default_value)) , all_match(default_value) {} diff --git a/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h b/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h index ee71fd8860d..f504615a91a 100644 --- a/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h +++ b/dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h @@ -64,6 +64,16 @@ class BitmapFilter friend class BitmapFilterView; + // Debug helpers + void saveRowKeyFilterForDebug() { rowkey_filter.assign(filter); } + void saveVersionFilterForDebug() { version_filter.assign(filter); } + IColumn::Filter rowkey_filter; + IColumn::Filter version_filter; + + IColumn::Filter version_filter_by_delta; + IColumn::Filter version_filter_by_read_ts; + IColumn::Filter version_filter_by_stable; + private: IColumn::Filter filter; bool all_match; diff --git a/dbms/src/Storages/DeltaMerge/DMContext.cpp b/dbms/src/Storages/DeltaMerge/DMContext.cpp index 78c15e42c92..ba17e9cf844 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.cpp +++ b/dbms/src/Storages/DeltaMerge/DMContext.cpp @@ -76,4 +76,9 @@ bool DMContext::isVersionChainEnabled() const { return global_context.getSettingsRef().enable_version_chain != static_cast(VersionChainMode::Disabled); } + +bool DMContext::isVersionChainForTestEnabled() const +{ + return global_context.getSettingsRef().enable_version_chain == static_cast(VersionChainMode::EnabledForTest); +} } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/DMContext.h b/dbms/src/Storages/DeltaMerge/DMContext.h index f0031bf2f4c..d97c8450710 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.h +++ b/dbms/src/Storages/DeltaMerge/DMContext.h @@ -168,6 +168,7 @@ struct DMContext : private boost::noncopyable DM::DMConfigurationOpt createChecksumConfig() const { return DMChecksumConfig::fromDBContext(global_context); } bool isVersionChainEnabled() const; + bool isVersionChainForTestEnabled() const; private: DMContext( diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 473decaaec3..fc50cb4fec4 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -228,6 +228,10 @@ std::pair MinMaxIndex::getUInt64MinMax(size_t pack_index) const { return {minmaxes->get64(pack_index * 2), minmaxes->get64(pack_index * 2 + 1)}; } +std::pair MinMaxIndex::getUInt64MinMax2(size_t pack_index) const +{ + return {minmaxes->getUInt(pack_index * 2), minmaxes->getUInt(pack_index * 2 + 1)}; +} template RSResults MinMaxIndex::checkNullableInImpl( diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 21f5dcdccbb..639d6672e7e 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -66,6 +66,8 @@ class MinMaxIndex std::pair getStringMinMax(size_t pack_index) const; std::pair getUInt64MinMax(size_t pack_index) const; + std::pair getUInt64MinMax2(size_t pack_index) const; + template RSResults checkCmp(size_t start_pack, size_t pack_count, const Field & value, const DataTypePtr & type); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 8ba58192771..e1a6fed6612 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3082,13 +3082,35 @@ BitmapFilterPtr Segment::buildMVCCBitmapFilter( if (enable_version_chain) { consumeBuildMVCCReadBytesRU(dm_context, segment_snap, pack_filter_results, start_ts); - return ::DB::DM::buildMVCCBitmapFilter( + auto bitmap_filter = ::DB::DM::buildMVCCBitmapFilter( dm_context, *segment_snap, read_ranges, pack_filter_results, start_ts, *version_chain); + if (dm_context.isVersionChainForTestEnabled()) + { + if (is_common_handle) + checkMVCCBitmap( + dm_context, + segment_snap, + read_ranges, + pack_filter_results, + start_ts, + expected_block_size, + *bitmap_filter); + else + checkMVCCBitmap( + dm_context, + segment_snap, + read_ranges, + pack_filter_results, + start_ts, + expected_block_size, + *bitmap_filter); + } + return bitmap_filter; } } @@ -3307,6 +3329,7 @@ BitmapFilterPtr Segment::buildMVCCBitmapFilterStableOnly( "buildMVCCBitmapFilterStableOnly not have use packs, total_rows={}, cost={:.3f}ms", segment_snap->stable->getDMFilesRows(), elapse_ms); + bitmap_filter->runOptimize(); return bitmap_filter; } @@ -3748,6 +3771,198 @@ BitmapFilterPtr Segment::buildBitmapFilter( return mvcc_bitmap_filter; } +// `checkMVCCBitmap` is only used for testing. +// It will check both bitmaps that generating by version chain and delta index. +// If the bitmaps are not equal, it will throw an exception. +template +void Segment::checkMVCCBitmap( + const DMContext & dm_context, + const SegmentSnapshotPtr & segment_snap, + const RowKeyRanges & read_ranges, + const DMFilePackFilterResults & pack_filter_results, + UInt64 start_ts, + size_t expected_block_size, + const BitmapFilter & bitmap_filter) +{ + auto new_bitmap_filter = buildMVCCBitmapFilter( + dm_context, + segment_snap, + read_ranges, + pack_filter_results, + start_ts, + expected_block_size, + /*enable_version_chain*/ false); + if (*new_bitmap_filter == bitmap_filter) + { + LOG_DEBUG( + segment_snap->log, + "{} success, snapshot={}, dt_bitmap_filter={}/{}, bitmap_filter={}/{}", + __FUNCTION__, + segment_snap->detailInfo(), + new_bitmap_filter->count(), + new_bitmap_filter->size(), + bitmap_filter.count(), + bitmap_filter.size()); + return; + } + + auto print_minmax_ver = [&]() { + const auto & global_context = dm_context.global_context; + const auto & dmfile = segment_snap->stable->getDMFiles().front(); + auto [type, minmax_index] = DMFilePackFilter::loadIndex( + *dmfile, + global_context.getFileProvider(), + global_context.getMinMaxIndexCache(), + /*set_cache_if_miss*/ true, + MutSup::version_col_id, + global_context.getReadLimiter(), + dm_context.scan_context); + for (size_t i = 0; i < dmfile->getPacks(); ++i) + LOG_ERROR( + segment_snap->log, + "pack={} minmax={} minmax2={}", + i, + minmax_index->getUInt64MinMax(i), + minmax_index->getUInt64MinMax2(i)); + }; + + static std::mutex check_mtx; + static bool check_failed = false; + // To Avoid concurrent check that logs are mixed together. + // Since this function is only used for test, we don't need to consider performance. + std::lock_guard lock(check_mtx); + if (check_failed && std::is_same_v) + { + // Aoid too many logs. + LOG_ERROR(segment_snap->log, "{} failed, skip", __FUNCTION__); + return; + } + print_minmax_ver(); + check_failed = true; + + if (new_bitmap_filter->size() != bitmap_filter.size()) + { + LOG_ERROR( + segment_snap->log, + "{} failed, snapshot={}, dt_bitmap_filter_size={}, bitmap_filter_size={}", + __FUNCTION__, + segment_snap->detailInfo(), + new_bitmap_filter->size(), + bitmap_filter.size()); + throw Exception("Bitmap size not equal", ErrorCodes::LOGICAL_ERROR); + } + + if (new_bitmap_filter->isAllMatch() != bitmap_filter.isAllMatch()) + { + LOG_ERROR( + segment_snap->log, + "{} failed, snapshot={}, dt_bitmap_filter_all_match={}, bitmap_filter_all_match={}", + __FUNCTION__, + segment_snap->detailInfo(), + new_bitmap_filter->isAllMatch(), + bitmap_filter.isAllMatch()); + throw Exception("Bitmap all_match not euqal", ErrorCodes::LOGICAL_ERROR); + } + + LOG_ERROR( + segment_snap->log, + "{} failed, snapshot={}, dt_bitmap_filter: count/size={}/{}, bitmap_filter: count/size={}/{} " + "read_ranges={}, segment_range={}", + __FUNCTION__, + segment_snap->detailInfo(), + new_bitmap_filter->count(), + new_bitmap_filter->size(), + bitmap_filter.count(), + bitmap_filter.size(), + read_ranges, + rowkey_range); + + const auto cds = ColumnDefines{ + getExtraHandleColumnDefine(dm_context.is_common_handle), + getVersionColumnDefine(), + getTagColumnDefine(), + }; + auto stream = getConcatSkippableBlockInputStream( + segment_snap, + dm_context, + cds, + /*read_ranges*/ {}, + /*pack_filter_results*/ {}, + start_ts, + expected_block_size, + ReadTag::Query); + Blocks blocks; + while (true) + { + auto block = stream->read(); + if (!block) + break; + blocks.emplace_back(std::move(block)); + } + auto block = vstackBlocks(std::move(blocks)); + RUNTIME_CHECK_MSG( + block.rows() == bitmap_filter.size(), + "Block rows not equal: block_rows={}, bitmap_size={}", + block.rows(), + bitmap_filter.size()); + auto handle_col = block.getByName(MutSup::extra_handle_column_name).column; + auto version_col = block.getByName(MutSup::version_column_name).column; + auto delmark_col = block.getByName(MutSup::delmark_column_name).column; + const auto handles = ColumnView(*handle_col); + const auto versions = ColumnView(*version_col); + const auto delmarks = ColumnView(*delmark_col); + for (UInt32 i = 0; i < bitmap_filter.size(); ++i) + { + RowKeyValue rowkey; + if constexpr (std::is_same_v) + rowkey = RowKeyValue(dm_context.is_common_handle, std::make_shared(String(handles[i])), 0); + else + rowkey = RowKeyValue(dm_context.is_common_handle, nullptr, handles[i]); + + if (new_bitmap_filter->get(i) != bitmap_filter.get(i)) + { + LOG_ERROR( + segment_snap->log, + "{} {} dt={} vc={} handle={} version={} delmark={} version_filter={} " + "rowkey_filter={} ver_by_delta={} ver_by_stable={} ver_by_rs={}", + __FUNCTION__, + i, + new_bitmap_filter->get(i), + bitmap_filter.get(i), + rowkey.toDebugString(), + versions[i], + delmarks[i], + bitmap_filter.version_filter[i], + bitmap_filter.rowkey_filter[i], + bitmap_filter.version_filter_by_delta[i], + bitmap_filter.version_filter_by_stable[i], + bitmap_filter.version_filter_by_read_ts[i]); + continue; + } + + if constexpr (std::is_same_v) + { + LOG_INFO( + segment_snap->log, + "{} {} dt={} vc={} handle={} version={} delmark={} version_filter={} " + "rowkey_filter={} ver_by_delta={} ver_by_stable={} ver_by_rs={}", + __FUNCTION__, + i, + new_bitmap_filter->get(i), + bitmap_filter.get(i), + rowkey.toDebugString(), + versions[i], + delmarks[i], + bitmap_filter.version_filter[i], + bitmap_filter.rowkey_filter[i], + bitmap_filter.version_filter_by_delta[i], + bitmap_filter.version_filter_by_stable[i], + bitmap_filter.version_filter_by_read_ts[i]); + } + } + throw Exception("Bitmap not equal", ErrorCodes::LOGICAL_ERROR); +} + template BlockInputStreamPtr Segment::getBitmapFilterInputStream( const DMContext & dm_context, diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index d42e8f1b78c..8eb2c4080dc 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -839,6 +839,16 @@ class Segment const ColumnDefines & read_columns, const StableValueSpacePtr & stable); + template + void checkMVCCBitmap( + const DMContext & dm_context, + const SegmentSnapshotPtr & segment_snap, + const RowKeyRanges & read_ranges, + const DMFilePackFilterResults & pack_filter_results, + UInt64 start_ts, + size_t expected_block_size, + const BitmapFilter & bitmap_filter); + #ifndef DBMS_PUBLIC_GTEST private: #else diff --git a/dbms/src/Storages/DeltaMerge/VersionChain/DeleteMarkFilter.cpp b/dbms/src/Storages/DeltaMerge/VersionChain/DeleteMarkFilter.cpp index b5f025be9fc..57141be0635 100644 --- a/dbms/src/Storages/DeltaMerge/VersionChain/DeleteMarkFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/VersionChain/DeleteMarkFilter.cpp @@ -64,7 +64,8 @@ UInt32 buildDeleteMarkFilterDMFile( const UInt32 start_pack_id, const RSResults & rs_results, const ssize_t start_row_id, - BitmapFilter & filter) + BitmapFilter & filter, + const LoggerPtr & log) { auto need_read_packs = std::make_shared(); std::unordered_map start_row_id_of_need_read_packs; // pack_id -> start_row_id @@ -94,6 +95,8 @@ UInt32 buildDeleteMarkFilterDMFile( } } + LOG_DEBUG(log, "need_read_packs: {}", *need_read_packs); + if (need_read_packs->empty()) return 0; @@ -126,7 +129,8 @@ UInt32 buildDeleteMarkFilterColumnFileBig( const DMContext & dm_context, const ColumnFileBig & cf_big, const ssize_t start_row_id, - BitmapFilter & filter) + BitmapFilter & filter, + const LoggerPtr & log) { auto [valid_handle_res, valid_start_pack_id] = getClippedRSResultsByRange(dm_context, cf_big.getFile(), cf_big.getRange()); @@ -138,14 +142,16 @@ UInt32 buildDeleteMarkFilterColumnFileBig( valid_start_pack_id, valid_handle_res, start_row_id, - filter); + filter, + log); } UInt32 buildDeleteMarkFilterStable( const DMContext & dm_context, const StableValueSpace::Snapshot & stable, const DMFilePackFilterResultPtr & stable_filter_res, - BitmapFilter & filter) + BitmapFilter & filter, + const LoggerPtr & log) { const auto & dmfiles = stable.getDMFiles(); RUNTIME_CHECK(dmfiles.size() == 1, dmfiles.size()); @@ -155,7 +161,8 @@ UInt32 buildDeleteMarkFilterStable( /*start_pack_id*/ 0, stable_filter_res->getPackRes(), /*start_row_id*/ 0, - filter); + filter, + log); } UInt32 buildDeleteMarkFilter( @@ -171,7 +178,7 @@ UInt32 buildDeleteMarkFilter( const auto cfs = snapshot.delta->getColumnFiles(); const auto & data_provider = snapshot.delta->getDataProvider(); - auto filtered_out_rows = buildDeleteMarkFilterStable(dm_context, stable, stable_filter_res, filter); + auto filtered_out_rows = buildDeleteMarkFilterStable(dm_context, stable, stable_filter_res, filter, snapshot.log); auto read_rows = stable_rows; for (const auto & cf : cfs) { @@ -191,7 +198,8 @@ UInt32 buildDeleteMarkFilter( if (const auto * cf_big = cf->tryToBigFile(); cf_big) { - filtered_out_rows += buildDeleteMarkFilterColumnFileBig(dm_context, *cf_big, start_row_id, filter); + filtered_out_rows + += buildDeleteMarkFilterColumnFileBig(dm_context, *cf_big, start_row_id, filter, snapshot.log); continue; } RUNTIME_CHECK_MSG(false, "{}: unknow ColumnFile type", cf->toString()); diff --git a/dbms/src/Storages/DeltaMerge/VersionChain/MVCCBitmapFilter.cpp b/dbms/src/Storages/DeltaMerge/VersionChain/MVCCBitmapFilter.cpp index b2f51966d5e..7cf08c2a5d9 100644 --- a/dbms/src/Storages/DeltaMerge/VersionChain/MVCCBitmapFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/VersionChain/MVCCBitmapFilter.cpp @@ -51,10 +51,14 @@ BitmapFilterPtr buildMVCCBitmapFilter( stable_filter_res, *bitmap_filter); const auto build_version_filter_ms = sw.elapsedMillisecondsFromLastTime(); + if (unlikely(dm_context.isVersionChainForTestEnabled())) + bitmap_filter->saveVersionFilterForDebug(); const auto rowkey_filtered_out_rows = buildRowKeyFilter(dm_context, snapshot, read_ranges, stable_filter_res, *bitmap_filter); const auto build_rowkey_filter_ms = sw.elapsedMillisecondsFromLastTime(); + if (unlikely(dm_context.isVersionChainForTestEnabled())) + bitmap_filter->saveRowKeyFilterForDebug(); const auto delete_filtered_out_rows = buildDeleteMarkFilter(dm_context, snapshot, stable_filter_res, *bitmap_filter); diff --git a/dbms/src/Storages/DeltaMerge/VersionChain/VersionChain.h b/dbms/src/Storages/DeltaMerge/VersionChain/VersionChain.h index 56362d89a9e..c6d9daedb2a 100644 --- a/dbms/src/Storages/DeltaMerge/VersionChain/VersionChain.h +++ b/dbms/src/Storages/DeltaMerge/VersionChain/VersionChain.h @@ -176,6 +176,9 @@ enum class VersionChainMode : Int64 Disabled = 0, // Generating MVCC bitmap by using version chain. Enabled = 1, + // Generating MVCC bitmap by using version chain and delta index, + // then perform comparative verification. Only for test. + EnabledForTest = 2, }; } // namespace DB::DM