|
22 | 22 |
|
23 | 23 | namespace DB |
24 | 24 | { |
| 25 | +namespace |
| 26 | +{ |
| 27 | +constexpr std::array remote_cache_file_type_labels = {"merged", "coldata", "other"}; |
| 28 | +constexpr std::array remote_cache_wait_result_labels = {"hit", "timeout", "failed"}; |
| 29 | +constexpr std::array remote_cache_reject_reason_labels = {"too_many_download"}; |
| 30 | +constexpr std::array remote_cache_download_stage_labels = {"queue_wait", "download"}; |
| 31 | +constexpr auto remote_cache_wait_on_downloading_buckets = ExpBuckets{0.0001, 2, 20}; |
| 32 | +constexpr auto remote_cache_bg_download_stage_buckets = ExpBuckets{0.0001, 2, 20}; |
| 33 | + |
| 34 | +static_assert( |
| 35 | + remote_cache_file_type_labels.size() == static_cast<size_t>(TiFlashMetrics::RemoteCacheFileTypeMetric::Count)); |
| 36 | +static_assert( |
| 37 | + remote_cache_wait_result_labels.size() == static_cast<size_t>(TiFlashMetrics::RemoteCacheWaitResultMetric::Count)); |
| 38 | +static_assert( |
| 39 | + remote_cache_reject_reason_labels.size() |
| 40 | + == static_cast<size_t>(TiFlashMetrics::RemoteCacheRejectReasonMetric::Count)); |
| 41 | +static_assert( |
| 42 | + remote_cache_download_stage_labels.size() |
| 43 | + == static_cast<size_t>(TiFlashMetrics::RemoteCacheDownloadStageMetric::Count)); |
| 44 | +} // namespace |
| 45 | + |
25 | 46 | TiFlashMetrics & TiFlashMetrics::instance() |
26 | 47 | { |
27 | 48 | static TiFlashMetrics inst; // Instantiated on first use. |
@@ -78,6 +99,82 @@ TiFlashMetrics::TiFlashMetrics() |
78 | 99 | .Name("tiflash_storage_s3_store_summary_bytes") |
79 | 100 | .Help("S3 storage summary bytes by store and file type") |
80 | 101 | .Register(*registry); |
| 102 | + |
| 103 | + registered_remote_cache_wait_on_downloading_result_family |
| 104 | + = &prometheus::BuildCounter() |
| 105 | + .Name("tiflash_storage_remote_cache_wait_on_downloading_result") |
| 106 | + .Help("Bounded wait result of remote cache downloading") |
| 107 | + .Register(*registry); |
| 108 | + registered_remote_cache_wait_on_downloading_bytes_family |
| 109 | + = &prometheus::BuildCounter() |
| 110 | + .Name("tiflash_storage_remote_cache_wait_on_downloading_bytes") |
| 111 | + .Help("Bytes covered by remote cache bounded wait") |
| 112 | + .Register(*registry); |
| 113 | + // Timeline for one cache miss with possible follower requests: |
| 114 | + // |
| 115 | + // req A: miss -> create Empty -> enqueue bg task ---- queue_wait ---- download ---- Complete/Failed |
| 116 | + // req B: sees Empty -> -------- wait_on_downloading_seconds --------> hit/timeout/failed |
| 117 | + // req C: sees Empty -> --- wait_on_downloading_seconds ---> hit/timeout/failed |
| 118 | + // |
| 119 | + // `tiflash_storage_remote_cache_bg_download_stage_seconds` |
| 120 | + // - downloader-task view |
| 121 | + // - measures how long the background download itself spent in `queue_wait` and `download` |
| 122 | + registered_remote_cache_bg_download_stage_seconds_family |
| 123 | + = &prometheus::BuildHistogram() |
| 124 | + .Name("tiflash_storage_remote_cache_bg_download_stage_seconds") |
| 125 | + .Help("Remote cache background download stage duration") |
| 126 | + .Register(*registry); |
| 127 | + // `tiflash_storage_remote_cache_wait_on_downloading_seconds` |
| 128 | + // - follower-request view |
| 129 | + // - measures how long a request waited on an existing `Empty` segment before ending as hit/timeout/failed |
| 130 | + registered_remote_cache_wait_on_downloading_seconds_family |
| 131 | + = &prometheus::BuildHistogram() |
| 132 | + .Name("tiflash_storage_remote_cache_wait_on_downloading_seconds") |
| 133 | + .Help("Bounded wait duration of remote cache downloading") |
| 134 | + .Register(*registry); |
| 135 | + registered_remote_cache_reject_family = &prometheus::BuildCounter() |
| 136 | + .Name("tiflash_storage_remote_cache_reject") |
| 137 | + .Help("Remote cache admission rejection by reason and file type") |
| 138 | + .Register(*registry); |
| 139 | + |
| 140 | + for (size_t file_type_idx = 0; file_type_idx < remote_cache_file_type_labels.size(); ++file_type_idx) |
| 141 | + { |
| 142 | + for (size_t result_idx = 0; result_idx < remote_cache_wait_result_labels.size(); ++result_idx) |
| 143 | + { |
| 144 | + auto labels = prometheus::Labels{ |
| 145 | + {"result", std::string(remote_cache_wait_result_labels[result_idx])}, |
| 146 | + {"file_type", std::string(remote_cache_file_type_labels[file_type_idx])}, |
| 147 | + }; |
| 148 | + remote_cache_wait_on_downloading_result_metrics[file_type_idx][result_idx] |
| 149 | + = ®istered_remote_cache_wait_on_downloading_result_family->Add(labels); |
| 150 | + remote_cache_wait_on_downloading_bytes_metrics[file_type_idx][result_idx] |
| 151 | + = ®istered_remote_cache_wait_on_downloading_bytes_family->Add(labels); |
| 152 | + prometheus::Histogram::BucketBoundaries wait_buckets = ExpBuckets{ |
| 153 | + remote_cache_wait_on_downloading_buckets.start, |
| 154 | + remote_cache_wait_on_downloading_buckets.base, |
| 155 | + remote_cache_wait_on_downloading_buckets.size}; |
| 156 | + remote_cache_wait_on_downloading_seconds_metrics[file_type_idx][result_idx] |
| 157 | + = ®istered_remote_cache_wait_on_downloading_seconds_family->Add(labels, wait_buckets); |
| 158 | + } |
| 159 | + for (size_t reason_idx = 0; reason_idx < remote_cache_reject_reason_labels.size(); ++reason_idx) |
| 160 | + { |
| 161 | + remote_cache_reject_metrics[file_type_idx][reason_idx] = ®istered_remote_cache_reject_family->Add( |
| 162 | + {{"reason", std::string(remote_cache_reject_reason_labels[reason_idx])}, |
| 163 | + {"file_type", std::string(remote_cache_file_type_labels[file_type_idx])}}); |
| 164 | + } |
| 165 | + for (size_t stage_idx = 0; stage_idx < remote_cache_download_stage_labels.size(); ++stage_idx) |
| 166 | + { |
| 167 | + prometheus::Histogram::BucketBoundaries buckets = ExpBuckets{ |
| 168 | + remote_cache_bg_download_stage_buckets.start, |
| 169 | + remote_cache_bg_download_stage_buckets.base, |
| 170 | + remote_cache_bg_download_stage_buckets.size}; |
| 171 | + remote_cache_bg_download_stage_seconds_metrics[file_type_idx][stage_idx] |
| 172 | + = ®istered_remote_cache_bg_download_stage_seconds_family->Add( |
| 173 | + {{"stage", std::string(remote_cache_download_stage_labels[stage_idx])}, |
| 174 | + {"file_type", std::string(remote_cache_file_type_labels[file_type_idx])}}, |
| 175 | + buckets); |
| 176 | + } |
| 177 | + } |
81 | 178 | } |
82 | 179 |
|
83 | 180 | void TiFlashMetrics::addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru) |
@@ -287,4 +384,41 @@ void TiFlashMetrics::setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_by |
287 | 384 | it->second.data_file_bytes->Set(data_file_bytes); |
288 | 385 | it->second.dt_file_bytes->Set(dt_file_bytes); |
289 | 386 | } |
| 387 | + |
| 388 | +prometheus::Counter & TiFlashMetrics::getRemoteCacheWaitOnDownloadingResultCounter( |
| 389 | + TiFlashMetrics::RemoteCacheFileTypeMetric file_type, |
| 390 | + TiFlashMetrics::RemoteCacheWaitResultMetric result) |
| 391 | +{ |
| 392 | + return *remote_cache_wait_on_downloading_result_metrics[static_cast<size_t>(file_type)] |
| 393 | + [static_cast<size_t>(result)]; |
| 394 | +} |
| 395 | + |
| 396 | +prometheus::Counter & TiFlashMetrics::getRemoteCacheWaitOnDownloadingBytesCounter( |
| 397 | + TiFlashMetrics::RemoteCacheFileTypeMetric file_type, |
| 398 | + TiFlashMetrics::RemoteCacheWaitResultMetric result) |
| 399 | +{ |
| 400 | + return *remote_cache_wait_on_downloading_bytes_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(result)]; |
| 401 | +} |
| 402 | + |
| 403 | +prometheus::Histogram & TiFlashMetrics::getRemoteCacheWaitOnDownloadingSecondsHistogram( |
| 404 | + TiFlashMetrics::RemoteCacheFileTypeMetric file_type, |
| 405 | + TiFlashMetrics::RemoteCacheWaitResultMetric result) |
| 406 | +{ |
| 407 | + return *remote_cache_wait_on_downloading_seconds_metrics[static_cast<size_t>(file_type)] |
| 408 | + [static_cast<size_t>(result)]; |
| 409 | +} |
| 410 | + |
| 411 | +prometheus::Histogram & TiFlashMetrics::getRemoteCacheBgDownloadStageSecondsHistogram( |
| 412 | + TiFlashMetrics::RemoteCacheFileTypeMetric file_type, |
| 413 | + TiFlashMetrics::RemoteCacheDownloadStageMetric stage) |
| 414 | +{ |
| 415 | + return *remote_cache_bg_download_stage_seconds_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(stage)]; |
| 416 | +} |
| 417 | + |
| 418 | +prometheus::Counter & TiFlashMetrics::getRemoteCacheRejectCounter( |
| 419 | + TiFlashMetrics::RemoteCacheFileTypeMetric file_type, |
| 420 | + TiFlashMetrics::RemoteCacheRejectReasonMetric reason) |
| 421 | +{ |
| 422 | + return *remote_cache_reject_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(reason)]; |
| 423 | +} |
290 | 424 | } // namespace DB |
0 commit comments