From a102d532274d05fe876d96dd7f5566f4e4a1fd12 Mon Sep 17 00:00:00 2001 From: maladetska Date: Sat, 7 Feb 2026 15:57:16 +0300 Subject: [PATCH 01/17] init commit --- cmake/external_libs.cmake | 1 + include/ydb-cpp-sdk/client/metrics/metrics.h | 81 ++++++++++ .../ydb-cpp-sdk/open_telemetry/extension.h | 34 ++++ include/ydb-cpp-sdk/open_telemetry/otel.h | 33 ++++ src/CMakeLists.txt | 3 +- src/client/CMakeLists.txt | 1 + .../grpc_connections/grpc_connections.h | 12 ++ src/client/metrics/CMakeLists.txt | 14 ++ src/client/metrics/metrics.cpp | 32 ++++ src/client/query/CMakeLists.txt | 1 + src/client/query/client.cpp | 55 ++++++- src/client/query/impl/CMakeLists.txt | 1 + src/client/query/impl/query_spans.cpp | 56 +++++++ src/client/query/impl/query_spans.h | 23 +++ src/open_telemetry/CMakeLists.txt | 17 ++ src/open_telemetry/otel.cpp | 153 ++++++++++++++++++ 16 files changed, 509 insertions(+), 8 deletions(-) create mode 100644 include/ydb-cpp-sdk/client/metrics/metrics.h create mode 100644 include/ydb-cpp-sdk/open_telemetry/extension.h create mode 100644 include/ydb-cpp-sdk/open_telemetry/otel.h create mode 100644 src/client/metrics/CMakeLists.txt create mode 100644 src/client/metrics/metrics.cpp create mode 100644 src/client/query/impl/query_spans.cpp create mode 100644 src/client/query/impl/query_spans.h create mode 100644 src/open_telemetry/CMakeLists.txt create mode 100644 src/open_telemetry/otel.cpp diff --git a/cmake/external_libs.cmake b/cmake/external_libs.cmake index dc46fdb1d5e..be645f332ae 100644 --- a/cmake/external_libs.cmake +++ b/cmake/external_libs.cmake @@ -13,6 +13,7 @@ find_package(base64 REQUIRED) find_package(Brotli 1.1.0 REQUIRED) find_package(jwt-cpp REQUIRED) find_package(double-conversion REQUIRED) +find_package(opentelemetry-cpp REQUIRED) # RapidJSON if (YDB_SDK_USE_RAPID_JSON) diff --git a/include/ydb-cpp-sdk/client/metrics/metrics.h b/include/ydb-cpp-sdk/client/metrics/metrics.h new file mode 100644 index 00000000000..d3dc5eb2c19 --- /dev/null +++ b/include/ydb-cpp-sdk/client/metrics/metrics.h @@ -0,0 +1,81 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace NYdb::inline V3::NMetrics { + +using TLabels = std::map; + +class ICounter { +public: + virtual ~ICounter() = default; + virtual void Inc() = 0; +}; + +class IGauge { +public: + virtual ~IGauge() = default; + virtual void Add(double delta) = 0; + virtual void Set(double value) = 0; +}; + +class IHistogram { +public: + virtual ~IHistogram() = default; + virtual void Record(double value) = 0; +}; + +class IMetricRegistry { +public: + virtual ~IMetricRegistry() = default; + + virtual std::shared_ptr Counter(const std::string& name, const TLabels& labels = {}) = 0; + virtual std::shared_ptr Gauge(const std::string& name, const TLabels& labels = {}) = 0; + virtual std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) = 0; +}; + +enum class ESpanKind { + INTERNAL, + SERVER, + CLIENT, + PRODUCER, + CONSUMER +}; + +class ISpan { +public: + virtual ~ISpan() = default; + virtual void End() = 0; + virtual void SetAttribute(const std::string& key, const std::string& value) = 0; + virtual void SetAttribute(const std::string& key, int64_t value) = 0; +}; + +class ITracer { +public: + virtual ~ITracer() = default; + virtual std::shared_ptr StartSpan(const std::string& name, ESpanKind kind = ESpanKind::INTERNAL) = 0; +}; + +class ITraceProvider { +public: + virtual ~ITraceProvider() = default; + virtual std::shared_ptr GetTracer(const std::string& name) = 0; +}; + +class IMetricsApi : public IExtensionApi { +public: + static IMetricsApi* Create(TDriver driver); +public: + virtual ~IMetricsApi() = default; + virtual void SetMetricRegistry(std::shared_ptr registry) = 0; + virtual void SetTraceProvider(std::shared_ptr provider) = 0; + virtual std::shared_ptr GetMetricRegistry() const = 0; + virtual std::shared_ptr GetTraceProvider() const = 0; +}; + +} // namespace NYdb::NMetrics diff --git a/include/ydb-cpp-sdk/open_telemetry/extension.h b/include/ydb-cpp-sdk/open_telemetry/extension.h new file mode 100644 index 00000000000..b5683d6d41b --- /dev/null +++ b/include/ydb-cpp-sdk/open_telemetry/extension.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + +namespace NYdb::inline V3::NMetrics { + +class TOtelExtension : public IExtension { +public: + using IApi = IMetricsApi; + + struct TParams { + opentelemetry::nostd::shared_ptr MeterProvider; + opentelemetry::nostd::shared_ptr TracerProvider; + }; + + TOtelExtension(const TParams& params, IApi* api) { + if (params.MeterProvider) { + api->SetMetricRegistry(std::make_shared(params.MeterProvider)); + } + if (params.TracerProvider) { + api->SetTraceProvider(std::make_shared(params.TracerProvider)); + } + } +}; + +inline void AddOpenTelemetry(TDriver& driver + , opentelemetry::nostd::shared_ptr meterProvider + , opentelemetry::nostd::shared_ptr tracerProvider +) { + driver.AddExtension({meterProvider, tracerProvider}); +} + +} // namespace NYdb::NMetrics diff --git a/include/ydb-cpp-sdk/open_telemetry/otel.h b/include/ydb-cpp-sdk/open_telemetry/otel.h new file mode 100644 index 00000000000..b1d23c92fa6 --- /dev/null +++ b/include/ydb-cpp-sdk/open_telemetry/otel.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +#include +#include + +namespace NYdb::inline V3::NMetrics { + +class TOtelMetricRegistry : public IMetricRegistry { +public: + TOtelMetricRegistry(opentelemetry::nostd::shared_ptr meterProvider); + + std::shared_ptr Counter(const std::string& name, const TLabels& labels = {}) override; + std::shared_ptr Gauge(const std::string& name, const TLabels& labels = {}) override; + std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) override; + +private: + opentelemetry::nostd::shared_ptr MeterProvider_; + opentelemetry::nostd::shared_ptr Meter_; +}; + +class TOtelTraceProvider : public ITraceProvider { +public: + TOtelTraceProvider(opentelemetry::nostd::shared_ptr tracerProvider); + + std::shared_ptr GetTracer(const std::string& name) override; + +private: + opentelemetry::nostd::shared_ptr TracerProvider_; +}; + +} // namespace NYdb::NMetrics diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3dff7094058..c3fa4e733cd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(api) add_subdirectory(client) -add_subdirectory(library) \ No newline at end of file +add_subdirectory(library) +add_subdirectory(open_telemetry) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index e7f448e8675..65167cddbd0 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(iam) add_subdirectory(iam_private) add_subdirectory(impl) add_subdirectory(import) +add_subdirectory(metrics) add_subdirectory(monitoring) add_subdirectory(operation) add_subdirectory(params) diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.h b/src/client/impl/internal/grpc_connections/grpc_connections.h index 756d2f0d957..15cddb4e6ec 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.h +++ b/src/client/impl/internal/grpc_connections/grpc_connections.h @@ -581,6 +581,18 @@ class TGRpcConnectionsImpl ::NMonitoring::TMetricRegistry* GetMetricRegistry() override; void RegisterExtension(IExtension* extension); void RegisterExtensionApi(IExtensionApi* api); + + template + T* GetExtensionApi() { + std::lock_guard lock(ExtensionsLock_); + for (const auto& api : ExtensionApis_) { + if (auto ptr = dynamic_cast(api.get())) { + return ptr; + } + } + return nullptr; + } + void SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb); const TLog& GetLog() const override; diff --git a/src/client/metrics/CMakeLists.txt b/src/client/metrics/CMakeLists.txt new file mode 100644 index 00000000000..94902b0f415 --- /dev/null +++ b/src/client/metrics/CMakeLists.txt @@ -0,0 +1,14 @@ +_ydb_sdk_add_library(client-metrics) + +target_sources(client-metrics PRIVATE + metrics.cpp +) + +target_include_directories(client-metrics PUBLIC + $ + $ +) + +target_link_libraries(client-metrics PUBLIC + client-extension_common +) diff --git a/src/client/metrics/metrics.cpp b/src/client/metrics/metrics.cpp new file mode 100644 index 00000000000..836d01f2071 --- /dev/null +++ b/src/client/metrics/metrics.cpp @@ -0,0 +1,32 @@ +#include + +namespace NYdb::inline V3::NMetrics { + +class TMetricsApiImpl : public IMetricsApi { +public: + void SetMetricRegistry(std::shared_ptr registry) override { + Registry_ = std::move(registry); + } + + void SetTraceProvider(std::shared_ptr provider) override { + TraceProvider_ = std::move(provider); + } + + std::shared_ptr GetMetricRegistry() const override { + return Registry_; + } + + std::shared_ptr GetTraceProvider() const override { + return TraceProvider_; + } + +private: + std::shared_ptr Registry_; + std::shared_ptr TraceProvider_; +}; + +IMetricsApi* IMetricsApi::Create(TDriver driver) { + return new TMetricsApiImpl(); +} + +} // namespace NYdb::NMetrics diff --git a/src/client/query/CMakeLists.txt b/src/client/query/CMakeLists.txt index 6677d402d4d..f1395ff107b 100644 --- a/src/client/query/CMakeLists.txt +++ b/src/client/query/CMakeLists.txt @@ -7,6 +7,7 @@ target_link_libraries(client-ydb_query PUBLIC impl-internal-make_request impl-session impl-internal-retry + client-metrics client-ydb_common_client client-ydb_driver client-ydb_query-impl diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index ccf90f1175c..ca1453e0faf 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include @@ -67,6 +69,12 @@ class TQueryClient::TImpl: public TClientImplCommon, public { SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query")); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Query")); + + if (auto metricsApi = Connections_->GetExtensionApi()) { + if (auto traceProvider = metricsApi->GetTraceProvider()) { + Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-query"); + } + } } ~TImpl() { @@ -94,8 +102,21 @@ class TQueryClient::TImpl: public TClientImplCommon, public { CollectQuerySize(query); CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); + + auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint); + return TExecQueryImpl::ExecuteQuery( - Connections_, DbDriverState_, query, txControl, params, settings, session); + Connections_, DbDriverState_, query, txControl, params, settings, session) + .Apply([span](TAsyncExecuteQueryResult future) { + try { + auto result = future.GetValue(); + span->End(result.GetStatus()); + return result; + } catch (...) { + span->End(EStatus::CLIENT_INTERNAL_ERROR); + throw; + } + }); } NThreading::TFuture ExecuteScript(const std::string& script, const std::optional& params, const TExecuteScriptSettings& settings) { @@ -162,7 +183,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto responseCb = [promise, session] + auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint); + + auto responseCb = [promise, session, span] (Ydb::Query::RollbackTransactionResponse* response, TPlainStatus status) mutable { try { if (response) { @@ -171,11 +194,15 @@ class TQueryClient::TImpl: public TClientImplCommon, public TStatus rollbackTxStatus(TPlainStatus{static_cast(response->status()), std::move(opIssues), status.Endpoint, std::move(status.Metadata)}); + span->End(rollbackTxStatus.GetStatus()); + promise.SetValue(std::move(rollbackTxStatus)); } else { + span->End(status.Status); promise.SetValue(TStatus(std::move(status))); } } catch (...) { + span->End(EStatus::CLIENT_INTERNAL_ERROR); promise.SetException(std::current_exception()); } }; @@ -203,7 +230,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto responseCb = [promise, session] + auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint); + + auto responseCb = [promise, session, span] (Ydb::Query::CommitTransactionResponse* response, TPlainStatus status) mutable { try { if (response) { @@ -212,12 +241,16 @@ class TQueryClient::TImpl: public TClientImplCommon, public TStatus commitTxStatus(TPlainStatus{static_cast(response->status()), std::move(opIssues), status.Endpoint, std::move(status.Metadata)}); + span->End(commitTxStatus.GetStatus()); + TCommitTransactionResult commitTxResult(std::move(commitTxStatus)); promise.SetValue(std::move(commitTxResult)); } else { + span->End(status.Status); promise.SetValue(TCommitTransactionResult(TStatus(std::move(status)))); } } catch (...) { + span->End(EStatus::CLIENT_INTERNAL_ERROR); promise.SetException(std::current_exception()); } }; @@ -425,10 +458,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public TAsyncCreateSessionResult GetSession(const TCreateSessionSettings& settings) { class TQueryClientGetSessionCtx : public NSessionPool::IGetSessionCtx { public: - TQueryClientGetSessionCtx(std::shared_ptr client, const TCreateSessionSettings& settings) + TQueryClientGetSessionCtx(std::shared_ptr client, const TCreateSessionSettings& settings, std::shared_ptr span) : Promise(NThreading::NewPromise()) , Client(client) , RpcSettings(TRpcRequestSettings::Make(settings)) + , Span(span) {} TAsyncCreateSessionResult GetFuture() { @@ -437,6 +471,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public void ReplyError(TStatus status) override { TSession session; + if (Span) Span->End(status.GetStatus()); ScheduleReply(TCreateSessionResult(std::move(status), std::move(session))); } @@ -449,14 +484,17 @@ class TQueryClient::TImpl: public TClientImplCommon, public ) ); + if (Span) Span->End(EStatus::SUCCESS); ScheduleReply(std::move(val)); } void ReplyNewSession() override { Client->CreateAttachedSession(RpcSettings).Subscribe( - [promise{std::move(Promise)}](TAsyncCreateSessionResult future) mutable + [promise{std::move(Promise)}, span = Span](TAsyncCreateSessionResult future) mutable { - promise.SetValue(future.ExtractValue()); + auto val = future.ExtractValue(); + if (span) span->End(val.GetStatus()); + promise.SetValue(std::move(val)); }); } @@ -481,9 +519,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public NThreading::TPromise Promise; std::shared_ptr Client; const TRpcRequestSettings RpcSettings; + std::shared_ptr Span; }; - auto ctx = std::make_unique(shared_from_this(), settings); + auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint); + auto ctx = std::make_unique(shared_from_this(), settings, span); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); @@ -552,6 +592,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public } private: + std::shared_ptr Tracer_; NSdkStats::TStatCollector::TClientRetryOperationStatCollector RetryOperationStatCollector_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> QuerySizeHistogram_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> ParamsSizeHistogram_; diff --git a/src/client/query/impl/CMakeLists.txt b/src/client/query/impl/CMakeLists.txt index 76b112b2254..70f93b6d68d 100644 --- a/src/client/query/impl/CMakeLists.txt +++ b/src/client/query/impl/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(client-ydb_query-impl PUBLIC target_sources(client-ydb_query-impl PRIVATE exec_query.cpp client_session.cpp + query_spans.cpp ) _ydb_sdk_install_targets(TARGETS client-ydb_query-impl) diff --git a/src/client/query/impl/query_spans.cpp b/src/client/query/impl/query_spans.cpp new file mode 100644 index 00000000000..c612f442663 --- /dev/null +++ b/src/client/query/impl/query_spans.cpp @@ -0,0 +1,56 @@ +#include "query_spans.h" + +#include + +namespace NYdb::inline V3::NQuery { + +namespace { + +void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { + auto pos = endpoint.find(':'); + if (pos != std::string::npos) { + host = endpoint.substr(0, pos); + try { + port = std::stoi(endpoint.substr(pos + 1)); + } catch (...) { + port = 2135; + } + } else { + host = endpoint; + port = 2135; + } +} + +} // namespace + +TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint) { + if (!tracer) return; + + std::string host; + int port; + ParseEndpoint(endpoint, host, port); + + Span_ = tracer->StartSpan("ydb." + operationName, NMetrics::ESpanKind::CLIENT); + Span_->SetAttribute("db.system.name", "ydb"); + Span_->SetAttribute("server.address", host); + Span_->SetAttribute("server.port", static_cast(port)); +} + +TQuerySpan::~TQuerySpan() { + if (Span_) { + Span_->End(); + } +} + +void TQuerySpan::End(EStatus status) { + if (Span_) { + Span_->SetAttribute("db.response.status_code", static_cast(status)); + if (status != EStatus::SUCCESS) { + Span_->SetAttribute("error.type", ToString(status)); + } + Span_->End(); + Span_.reset(); + } +} + +} // namespace NYdb::NQuery diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h new file mode 100644 index 00000000000..d34e263d4db --- /dev/null +++ b/src/client/query/impl/query_spans.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace NYdb::inline V3::NQuery { + +class TQuerySpan { +public: + TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint); + ~TQuerySpan(); + + void End(EStatus status); + +private: + std::shared_ptr Span_; +}; + +} // namespace NYdb::NQuery diff --git a/src/open_telemetry/CMakeLists.txt b/src/open_telemetry/CMakeLists.txt new file mode 100644 index 00000000000..4b0d7df8102 --- /dev/null +++ b/src/open_telemetry/CMakeLists.txt @@ -0,0 +1,17 @@ +_ydb_sdk_add_library(open_telemetry) + +target_sources(open_telemetry PRIVATE + otel.cpp +) + +target_include_directories(open_telemetry PUBLIC + $ + $ +) + +target_link_libraries(open_telemetry PUBLIC + client-metrics + opentelemetry-cpp::api + opentelemetry-cpp::metrics + opentelemetry-cpp::trace +) diff --git a/src/open_telemetry/otel.cpp b/src/open_telemetry/otel.cpp new file mode 100644 index 00000000000..b12a08c1b7b --- /dev/null +++ b/src/open_telemetry/otel.cpp @@ -0,0 +1,153 @@ +#include + +#include +#include +#include +#include +#include + +namespace NYdb::inline V3::NMetrics { + +namespace { + +using namespace opentelemetry; + +common::KeyValueIterableView MakeAttributes(const TLabels& labels) { + return common::KeyValueIterableView(labels); +} + +class TOtelCounter : public ICounter { +public: + TOtelCounter(nostd::shared_ptr> counter, const TLabels& labels) + : Counter_(std::move(counter)) + , Labels_(labels) + {} + + void Inc() override { + Counter_->Add(1, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + } + +private: + nostd::shared_ptr> Counter_; + TLabels Labels_; +}; + +class TOtelUpDownCounterGauge : public IGauge { +public: + TOtelUpDownCounterGauge(nostd::shared_ptr> counter, const TLabels& labels) + : Counter_(std::move(counter)) + , Labels_(labels) + {} + + void Add(double delta) override { + Counter_->Add(delta, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + Value_ += delta; + } + + void Set(double value) override { + Counter_->Add(value - Value_, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + Value_ = value; + } + +private: + nostd::shared_ptr> Counter_; + TLabels Labels_; + double Value_ = 0; +}; + +class TOtelHistogram : public IHistogram { +public: + TOtelHistogram(nostd::shared_ptr> histogram, const TLabels& labels) + : Histogram_(std::move(histogram)) + , Labels_(labels) + {} + + void Record(double value) override { + Histogram_->Record(value, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + } + +private: + nostd::shared_ptr> Histogram_; + TLabels Labels_; +}; + +trace::SpanKind MapSpanKind(ESpanKind kind) { + switch (kind) { + case ESpanKind::INTERNAL: return trace::SpanKind::kInternal; + case ESpanKind::SERVER: return trace::SpanKind::kServer; + case ESpanKind::CLIENT: return trace::SpanKind::kClient; + case ESpanKind::PRODUCER: return trace::SpanKind::kProducer; + case ESpanKind::CONSUMER: return trace::SpanKind::kConsumer; + } + return trace::SpanKind::kInternal; +} + +class TOtelSpan : public ISpan { +public: + TOtelSpan(nostd::shared_ptr span) + : Span_(std::move(span)) + {} + + void End() override { + Span_->End(); + } + + void SetAttribute(const std::string& key, const std::string& value) override { + Span_->SetAttribute(key, value); + } + + void SetAttribute(const std::string& key, int64_t value) override { + Span_->SetAttribute(key, value); + } + +private: + nostd::shared_ptr Span_; +}; + +class TOtelTracer : public ITracer { +public: + TOtelTracer(nostd::shared_ptr tracer) + : Tracer_(std::move(tracer)) + {} + + std::shared_ptr StartSpan(const std::string& name, ESpanKind kind) override { + trace::StartSpanOptions options; + options.kind = MapSpanKind(kind); + return std::make_shared(Tracer_->StartSpan(name, options)); + } + +private: + nostd::shared_ptr Tracer_; +}; + +} // namespace + +TOtelMetricRegistry::TOtelMetricRegistry(nostd::shared_ptr meterProvider) + : MeterProvider_(std::move(meterProvider)) + , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", "1.0.0")) +{} + +std::shared_ptr TOtelMetricRegistry::Counter(const std::string& name, const TLabels& labels) { + auto counter = Meter_->CreateUInt64Counter(name); + return std::make_shared(std::move(counter), labels); +} + +std::shared_ptr TOtelMetricRegistry::Gauge(const std::string& name, const TLabels& labels) { + auto counter = Meter_->CreateDoubleUpDownCounter(name); + return std::make_shared(std::move(counter), labels); +} + +std::shared_ptr TOtelMetricRegistry::Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels) { + auto histogram = Meter_->CreateDoubleHistogram(name); + return std::make_shared(std::move(histogram), labels); +} + +TOtelTraceProvider::TOtelTraceProvider(nostd::shared_ptr tracerProvider) + : TracerProvider_(std::move(tracerProvider)) +{} + +std::shared_ptr TOtelTraceProvider::GetTracer(const std::string& name) { + return std::make_shared(TracerProvider_->GetTracer(name)); +} + +} // namespace NYdb::NMetrics From 90118388d09338f83871caf64fff96d5430507ca Mon Sep 17 00:00:00 2001 From: maladetska Date: Sat, 21 Feb 2026 15:57:37 +0300 Subject: [PATCH 02/17] . --- CMakeLists.txt | 4 ++++ cmake/external_libs.cmake | 5 ++++- {src/open_telemetry => open_telemetry}/CMakeLists.txt | 7 +++++-- .../include}/ydb-cpp-sdk/open_telemetry/extension.h | 0 .../include}/ydb-cpp-sdk/open_telemetry/otel.h | 0 {src/open_telemetry => open_telemetry/src}/otel.cpp | 0 src/CMakeLists.txt | 1 - 7 files changed, 13 insertions(+), 4 deletions(-) rename {src/open_telemetry => open_telemetry}/CMakeLists.txt (59%) rename {include => open_telemetry/include}/ydb-cpp-sdk/open_telemetry/extension.h (100%) rename {include => open_telemetry/include}/ydb-cpp-sdk/open_telemetry/otel.h (100%) rename {src/open_telemetry => open_telemetry/src}/otel.cpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86eaef64720..991c12d8a93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,10 @@ add_subdirectory(include/ydb-cpp-sdk/client) add_subdirectory(src) add_subdirectory(util) +if (YDB_SDK_HAS_OPEN_TELEMETRY) + add_subdirectory(open_telemetry EXCLUDE_FROM_ALL) +endif() + #_ydb_sdk_validate_public_headers() if (YDB_SDK_EXAMPLES) diff --git a/cmake/external_libs.cmake b/cmake/external_libs.cmake index be645f332ae..76666562f06 100644 --- a/cmake/external_libs.cmake +++ b/cmake/external_libs.cmake @@ -13,7 +13,10 @@ find_package(base64 REQUIRED) find_package(Brotli 1.1.0 REQUIRED) find_package(jwt-cpp REQUIRED) find_package(double-conversion REQUIRED) -find_package(opentelemetry-cpp REQUIRED) +find_package(opentelemetry-cpp QUIET) +if (opentelemetry-cpp_FOUND) + set(YDB_SDK_HAS_OPEN_TELEMETRY ON) +endif() # RapidJSON if (YDB_SDK_USE_RAPID_JSON) diff --git a/src/open_telemetry/CMakeLists.txt b/open_telemetry/CMakeLists.txt similarity index 59% rename from src/open_telemetry/CMakeLists.txt rename to open_telemetry/CMakeLists.txt index 4b0d7df8102..aa0cee855e1 100644 --- a/src/open_telemetry/CMakeLists.txt +++ b/open_telemetry/CMakeLists.txt @@ -1,11 +1,11 @@ _ydb_sdk_add_library(open_telemetry) target_sources(open_telemetry PRIVATE - otel.cpp + src/otel.cpp ) target_include_directories(open_telemetry PUBLIC - $ + $ $ ) @@ -15,3 +15,6 @@ target_link_libraries(open_telemetry PUBLIC opentelemetry-cpp::metrics opentelemetry-cpp::trace ) + +_ydb_sdk_make_client_component(OpenTelemetry open_telemetry) +_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/include/ydb-cpp-sdk/open_telemetry/extension.h b/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h similarity index 100% rename from include/ydb-cpp-sdk/open_telemetry/extension.h rename to open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h diff --git a/include/ydb-cpp-sdk/open_telemetry/otel.h b/open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h similarity index 100% rename from include/ydb-cpp-sdk/open_telemetry/otel.h rename to open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h diff --git a/src/open_telemetry/otel.cpp b/open_telemetry/src/otel.cpp similarity index 100% rename from src/open_telemetry/otel.cpp rename to open_telemetry/src/otel.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c3fa4e733cd..b251a041380 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(api) add_subdirectory(client) add_subdirectory(library) -add_subdirectory(open_telemetry) From bbc803b1aa7e9261e0b32177bec9309886761cb6 Mon Sep 17 00:00:00 2001 From: maladetska Date: Tue, 24 Feb 2026 14:43:45 +0300 Subject: [PATCH 03/17] make metrics as client_component --- src/client/metrics/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/client/metrics/CMakeLists.txt b/src/client/metrics/CMakeLists.txt index 94902b0f415..ce8526b5e7e 100644 --- a/src/client/metrics/CMakeLists.txt +++ b/src/client/metrics/CMakeLists.txt @@ -12,3 +12,5 @@ target_include_directories(client-metrics PUBLIC target_link_libraries(client-metrics PUBLIC client-extension_common ) + +_ydb_sdk_make_client_component(Metrics client-metrics) From feb3e8dc461d1b498cd03a719a66d08775d8bfbb Mon Sep 17 00:00:00 2001 From: maladetska Date: Sun, 1 Mar 2026 02:20:51 +0300 Subject: [PATCH 04/17] fixes --- CMakeLists.txt | 7 +- cmake/external_libs.cmake | 9 +- include/ydb-cpp-sdk/client/driver/driver.h | 13 +++ include/ydb-cpp-sdk/client/metrics/metrics.h | 16 --- open_telemetry/CMakeLists.txt | 20 ---- .../ydb-cpp-sdk/open_telemetry/extension.h | 34 ------ plugins/CMakeLists.txt | 3 + plugins/open_telemetry/CMakeLists.txt | 36 ++++++ .../ydb-cpp-sdk/open_telemetry/extension.h | 16 +++ .../ydb-cpp-sdk/open_telemetry/metrics.h | 23 ++-- .../ydb-cpp-sdk/open_telemetry/trace.h | 29 +++++ .../open_telemetry/src/metrics.cpp | 105 ++++++++---------- plugins/open_telemetry/src/trace.cpp | 70 ++++++++++++ src/client/driver/driver.cpp | 24 ++++ .../grpc_connections/grpc_connections.cpp | 10 ++ .../grpc_connections/grpc_connections.h | 9 ++ .../impl/internal/grpc_connections/params.h | 7 ++ src/client/metrics/CMakeLists.txt | 9 -- src/client/metrics/metrics.cpp | 31 ------ src/client/query/client.cpp | 18 +-- src/client/query/impl/query_spans.cpp | 54 +++++++-- src/client/query/impl/query_spans.h | 4 +- 22 files changed, 340 insertions(+), 207 deletions(-) delete mode 100644 open_telemetry/CMakeLists.txt delete mode 100644 open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h create mode 100644 plugins/CMakeLists.txt create mode 100644 plugins/open_telemetry/CMakeLists.txt create mode 100644 plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h rename open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h => plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h (62%) create mode 100644 plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h rename open_telemetry/src/otel.cpp => plugins/open_telemetry/src/metrics.cpp (61%) create mode 100644 plugins/open_telemetry/src/trace.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 991c12d8a93..6df450c510c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,8 @@ project(YDB-CPP-SDK VERSION ${YDB_SDK_VERSION} LANGUAGES C CXX ASM) option(YDB_SDK_INSTALL "Install YDB C++ SDK" Off) option(YDB_SDK_TESTS "Build YDB C++ SDK tests" Off) option(YDB_SDK_EXAMPLES "Build YDB C++ SDK examples" On) +option(YDB_SDK_ENABLE_OTEL_METRICS "Build OpenTelemetry metrics plugin" Off) +option(YDB_SDK_ENABLE_OTEL_TRACE "Build OpenTelemetry trace plugin" Off) set(YDB_SDK_GOOGLE_COMMON_PROTOS_TARGET "" CACHE STRING "Name of cmake target preparing google common proto library") option(YDB_SDK_USE_RAPID_JSON "Search for rapid json library in system" ON) @@ -58,10 +60,7 @@ add_subdirectory(library/cpp) add_subdirectory(include/ydb-cpp-sdk/client) add_subdirectory(src) add_subdirectory(util) - -if (YDB_SDK_HAS_OPEN_TELEMETRY) - add_subdirectory(open_telemetry EXCLUDE_FROM_ALL) -endif() +add_subdirectory(plugins) #_ydb_sdk_validate_public_headers() diff --git a/cmake/external_libs.cmake b/cmake/external_libs.cmake index 76666562f06..8445e4d2fc1 100644 --- a/cmake/external_libs.cmake +++ b/cmake/external_libs.cmake @@ -13,9 +13,12 @@ find_package(base64 REQUIRED) find_package(Brotli 1.1.0 REQUIRED) find_package(jwt-cpp REQUIRED) find_package(double-conversion REQUIRED) -find_package(opentelemetry-cpp QUIET) -if (opentelemetry-cpp_FOUND) - set(YDB_SDK_HAS_OPEN_TELEMETRY ON) + +if (YDB_SDK_ENABLE_OTEL_METRICS OR YDB_SDK_ENABLE_OTEL_TRACE) + find_package(opentelemetry-cpp QUIET) + if (NOT opentelemetry-cpp_FOUND) + message(FATAL_ERROR "Dependency 'opentelemetry-cpp' was not found.") + endif() endif() # RapidJSON diff --git a/include/ydb-cpp-sdk/client/driver/driver.h b/include/ydb-cpp-sdk/client/driver/driver.h index 72aa008ccca..8f373e9b25c 100644 --- a/include/ydb-cpp-sdk/client/driver/driver.h +++ b/include/ydb-cpp-sdk/client/driver/driver.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,18 @@ class TDriverConfig { //! If not set, default executor will be used. TDriverConfig& SetExecutor(std::shared_ptr executor); + //! Set external metrics exporter implementation. + TDriverConfig& SetMetricExporter(std::shared_ptr exporter); + + //! Set external tracing exporter implementation. + TDriverConfig& SetTraceExporter(std::shared_ptr exporter); + + //! Get configured metrics exporter implementation. + std::shared_ptr GetMetricExporter() const; + + //! Get configured tracing exporter implementation. + std::shared_ptr GetTraceExporter() const; + private: class TImpl; std::shared_ptr Impl_; diff --git a/include/ydb-cpp-sdk/client/metrics/metrics.h b/include/ydb-cpp-sdk/client/metrics/metrics.h index d3dc5eb2c19..0fce1f081bf 100644 --- a/include/ydb-cpp-sdk/client/metrics/metrics.h +++ b/include/ydb-cpp-sdk/client/metrics/metrics.h @@ -1,11 +1,6 @@ #pragma once -#include - #include -#include -#include -#include namespace NYdb::inline V3::NMetrics { @@ -67,15 +62,4 @@ class ITraceProvider { virtual std::shared_ptr GetTracer(const std::string& name) = 0; }; -class IMetricsApi : public IExtensionApi { -public: - static IMetricsApi* Create(TDriver driver); -public: - virtual ~IMetricsApi() = default; - virtual void SetMetricRegistry(std::shared_ptr registry) = 0; - virtual void SetTraceProvider(std::shared_ptr provider) = 0; - virtual std::shared_ptr GetMetricRegistry() const = 0; - virtual std::shared_ptr GetTraceProvider() const = 0; -}; - } // namespace NYdb::NMetrics diff --git a/open_telemetry/CMakeLists.txt b/open_telemetry/CMakeLists.txt deleted file mode 100644 index aa0cee855e1..00000000000 --- a/open_telemetry/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -_ydb_sdk_add_library(open_telemetry) - -target_sources(open_telemetry PRIVATE - src/otel.cpp -) - -target_include_directories(open_telemetry PUBLIC - $ - $ -) - -target_link_libraries(open_telemetry PUBLIC - client-metrics - opentelemetry-cpp::api - opentelemetry-cpp::metrics - opentelemetry-cpp::trace -) - -_ydb_sdk_make_client_component(OpenTelemetry open_telemetry) -_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h b/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h deleted file mode 100644 index b5683d6d41b..00000000000 --- a/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include -#include - -namespace NYdb::inline V3::NMetrics { - -class TOtelExtension : public IExtension { -public: - using IApi = IMetricsApi; - - struct TParams { - opentelemetry::nostd::shared_ptr MeterProvider; - opentelemetry::nostd::shared_ptr TracerProvider; - }; - - TOtelExtension(const TParams& params, IApi* api) { - if (params.MeterProvider) { - api->SetMetricRegistry(std::make_shared(params.MeterProvider)); - } - if (params.TracerProvider) { - api->SetTraceProvider(std::make_shared(params.TracerProvider)); - } - } -}; - -inline void AddOpenTelemetry(TDriver& driver - , opentelemetry::nostd::shared_ptr meterProvider - , opentelemetry::nostd::shared_ptr tracerProvider -) { - driver.AddExtension({meterProvider, tracerProvider}); -} - -} // namespace NYdb::NMetrics diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt new file mode 100644 index 00000000000..836ed903825 --- /dev/null +++ b/plugins/CMakeLists.txt @@ -0,0 +1,3 @@ +if (YDB_SDK_ENABLE_OTEL_METRICS OR YDB_SDK_ENABLE_OTEL_TRACE) + add_subdirectory(open_telemetry EXCLUDE_FROM_ALL) +endif() diff --git a/plugins/open_telemetry/CMakeLists.txt b/plugins/open_telemetry/CMakeLists.txt new file mode 100644 index 00000000000..d005708d422 --- /dev/null +++ b/plugins/open_telemetry/CMakeLists.txt @@ -0,0 +1,36 @@ +if (YDB_SDK_ENABLE_OTEL_METRICS) + _ydb_sdk_add_library(open_telemetry_metrics) + target_sources(open_telemetry_metrics PRIVATE + src/metrics.cpp + ) + target_include_directories(open_telemetry_metrics PUBLIC + $ + $ + ) + target_link_libraries(open_telemetry_metrics PUBLIC + client-metrics + client-resources + opentelemetry-cpp::api + opentelemetry-cpp::metrics + ) + _ydb_sdk_make_client_component(OpenTelemetryMetrics open_telemetry_metrics) +endif() + +if (YDB_SDK_ENABLE_OTEL_TRACE) + _ydb_sdk_add_library(open_telemetry_trace) + target_sources(open_telemetry_trace PRIVATE + src/trace.cpp + ) + target_include_directories(open_telemetry_trace PUBLIC + $ + $ + ) + target_link_libraries(open_telemetry_trace PUBLIC + client-metrics + opentelemetry-cpp::api + opentelemetry-cpp::trace + ) + _ydb_sdk_make_client_component(OpenTelemetryTrace open_telemetry_trace) +endif() + +_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h new file mode 100644 index 00000000000..b0db9ea7d7c --- /dev/null +++ b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace NYdb::inline V3::NMetrics { + +inline void AddOpenTelemetry(TDriverConfig& config + , opentelemetry::nostd::shared_ptr meterProvider + , opentelemetry::nostd::shared_ptr tracerProvider +) { + AddOpenTelemetryMetrics(config, std::move(meterProvider)); + AddOpenTelemetryTrace(config, std::move(tracerProvider)); +} + +} // namespace NYdb::NMetrics diff --git a/open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h similarity index 62% rename from open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h rename to plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h index b1d23c92fa6..5e9e9e77dea 100644 --- a/open_telemetry/include/ydb-cpp-sdk/open_telemetry/otel.h +++ b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include -#include namespace NYdb::inline V3::NMetrics { @@ -16,18 +16,21 @@ class TOtelMetricRegistry : public IMetricRegistry { std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) override; private: + void ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets); + opentelemetry::nostd::shared_ptr MeterProvider_; opentelemetry::nostd::shared_ptr Meter_; + std::mutex HistogramViewsLock_; + std::unordered_set HistogramViews_; }; -class TOtelTraceProvider : public ITraceProvider { -public: - TOtelTraceProvider(opentelemetry::nostd::shared_ptr tracerProvider); - - std::shared_ptr GetTracer(const std::string& name) override; - -private: - opentelemetry::nostd::shared_ptr TracerProvider_; -}; +inline void AddOpenTelemetryMetrics( + TDriverConfig& config, + opentelemetry::nostd::shared_ptr meterProvider) +{ + if (meterProvider) { + config.SetMetricExporter(std::make_shared(std::move(meterProvider))); + } +} } // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h new file mode 100644 index 00000000000..3ba2e146fd9 --- /dev/null +++ b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include + +namespace NYdb::inline V3::NMetrics { + +class TOtelTraceProvider : public ITraceProvider { +public: + TOtelTraceProvider(opentelemetry::nostd::shared_ptr tracerProvider); + + std::shared_ptr GetTracer(const std::string& name) override; + +private: + opentelemetry::nostd::shared_ptr TracerProvider_; +}; + +inline void AddOpenTelemetryTrace( + TDriverConfig& config, + opentelemetry::nostd::shared_ptr tracerProvider) +{ + if (tracerProvider) { + config.SetTraceExporter(std::make_shared(std::move(tracerProvider))); + } +} + +} // namespace NYdb::NMetrics diff --git a/open_telemetry/src/otel.cpp b/plugins/open_telemetry/src/metrics.cpp similarity index 61% rename from open_telemetry/src/otel.cpp rename to plugins/open_telemetry/src/metrics.cpp index b12a08c1b7b..65850fd08b3 100644 --- a/open_telemetry/src/otel.cpp +++ b/plugins/open_telemetry/src/metrics.cpp @@ -1,10 +1,12 @@ -#include +#include +#include -#include -#include #include -#include #include +#include +#include +#include +#include namespace NYdb::inline V3::NMetrics { @@ -71,61 +73,53 @@ class TOtelHistogram : public IHistogram { TLabels Labels_; }; -trace::SpanKind MapSpanKind(ESpanKind kind) { - switch (kind) { - case ESpanKind::INTERNAL: return trace::SpanKind::kInternal; - case ESpanKind::SERVER: return trace::SpanKind::kServer; - case ESpanKind::CLIENT: return trace::SpanKind::kClient; - case ESpanKind::PRODUCER: return trace::SpanKind::kProducer; - case ESpanKind::CONSUMER: return trace::SpanKind::kConsumer; - } - return trace::SpanKind::kInternal; -} +} // namespace -class TOtelSpan : public ISpan { -public: - TOtelSpan(nostd::shared_ptr span) - : Span_(std::move(span)) - {} +TOtelMetricRegistry::TOtelMetricRegistry(nostd::shared_ptr meterProvider) + : MeterProvider_(std::move(meterProvider)) + , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", GetSdkSemver())) +{} - void End() override { - Span_->End(); +void TOtelMetricRegistry::ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets) { + if (buckets.empty()) { + return; } - void SetAttribute(const std::string& key, const std::string& value) override { - Span_->SetAttribute(key, value); + auto* sdkProvider = dynamic_cast(MeterProvider_.get()); + if (!sdkProvider) { + return; } - void SetAttribute(const std::string& key, int64_t value) override { - Span_->SetAttribute(key, value); + { + std::lock_guard lock(HistogramViewsLock_); + if (!HistogramViews_.insert(name).second) { + return; + } } -private: - nostd::shared_ptr Span_; -}; - -class TOtelTracer : public ITracer { -public: - TOtelTracer(nostd::shared_ptr tracer) - : Tracer_(std::move(tracer)) - {} - - std::shared_ptr StartSpan(const std::string& name, ESpanKind kind) override { - trace::StartSpanOptions options; - options.kind = MapSpanKind(kind); - return std::make_shared(Tracer_->StartSpan(name, options)); - } - -private: - nostd::shared_ptr Tracer_; -}; - -} // namespace - -TOtelMetricRegistry::TOtelMetricRegistry(nostd::shared_ptr meterProvider) - : MeterProvider_(std::move(meterProvider)) - , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", "1.0.0")) -{} + auto selector = std::make_unique( + sdk::metrics::InstrumentType::kHistogram, + name, + "" + ); + auto meterSelector = std::make_unique( + "ydb-cpp-sdk", + GetSdkSemver(), + {} + ); + + auto histogramConfig = std::make_shared(); + histogramConfig->boundaries_ = buckets; + + auto view = std::make_unique( + {}, + {}, + sdk::metrics::AggregationType::kHistogram, + histogramConfig + ); + + sdkProvider->AddView(std::move(selector), std::move(meterSelector), std::move(view)); +} std::shared_ptr TOtelMetricRegistry::Counter(const std::string& name, const TLabels& labels) { auto counter = Meter_->CreateUInt64Counter(name); @@ -138,16 +132,9 @@ std::shared_ptr TOtelMetricRegistry::Gauge(const std::string& name, cons } std::shared_ptr TOtelMetricRegistry::Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels) { + ConfigureHistogramBuckets(name, buckets); auto histogram = Meter_->CreateDoubleHistogram(name); return std::make_shared(std::move(histogram), labels); } -TOtelTraceProvider::TOtelTraceProvider(nostd::shared_ptr tracerProvider) - : TracerProvider_(std::move(tracerProvider)) -{} - -std::shared_ptr TOtelTraceProvider::GetTracer(const std::string& name) { - return std::make_shared(TracerProvider_->GetTracer(name)); -} - } // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/src/trace.cpp b/plugins/open_telemetry/src/trace.cpp new file mode 100644 index 00000000000..54f04cb84df --- /dev/null +++ b/plugins/open_telemetry/src/trace.cpp @@ -0,0 +1,70 @@ +#include + +#include + +namespace NYdb::inline V3::NMetrics { + +namespace { + +using namespace opentelemetry; + +trace::SpanKind MapSpanKind(ESpanKind kind) { + switch (kind) { + case ESpanKind::INTERNAL: return trace::SpanKind::kInternal; + case ESpanKind::SERVER: return trace::SpanKind::kServer; + case ESpanKind::CLIENT: return trace::SpanKind::kClient; + case ESpanKind::PRODUCER: return trace::SpanKind::kProducer; + case ESpanKind::CONSUMER: return trace::SpanKind::kConsumer; + } + return trace::SpanKind::kInternal; +} + +class TOtelSpan : public ISpan { +public: + TOtelSpan(nostd::shared_ptr span) + : Span_(std::move(span)) + {} + + void End() override { + Span_->End(); + } + + void SetAttribute(const std::string& key, const std::string& value) override { + Span_->SetAttribute(key, value); + } + + void SetAttribute(const std::string& key, int64_t value) override { + Span_->SetAttribute(key, value); + } + +private: + nostd::shared_ptr Span_; +}; + +class TOtelTracer : public ITracer { +public: + TOtelTracer(nostd::shared_ptr tracer) + : Tracer_(std::move(tracer)) + {} + + std::shared_ptr StartSpan(const std::string& name, ESpanKind kind) override { + trace::StartSpanOptions options; + options.kind = MapSpanKind(kind); + return std::make_shared(Tracer_->StartSpan(name, options)); + } + +private: + nostd::shared_ptr Tracer_; +}; + +} // namespace + +TOtelTraceProvider::TOtelTraceProvider(nostd::shared_ptr tracerProvider) + : TracerProvider_(std::move(tracerProvider)) +{} + +std::shared_ptr TOtelTraceProvider::GetTracer(const std::string& name) { + return std::make_shared(TracerProvider_->GetTracer(name)); +} + +} // namespace NYdb::NMetrics diff --git a/src/client/driver/driver.cpp b/src/client/driver/driver.cpp index 207c67b6d5f..63aeac8aea5 100644 --- a/src/client/driver/driver.cpp +++ b/src/client/driver/driver.cpp @@ -51,6 +51,8 @@ class TDriverConfig::TImpl : public IConnectionsParams { uint64_t GetMaxMessageSize() const override { return MaxMessageSize; } const TLog& GetLog() const override { return Log; } std::shared_ptr GetExecutor() const override { return Executor; } + std::shared_ptr GetMetricExporter() const override { return MetricExporter; } + std::shared_ptr GetTraceExporter() const override { return TraceExporter; } std::string Endpoint; size_t NetworkThreadsNum = 2; @@ -80,6 +82,8 @@ class TDriverConfig::TImpl : public IConnectionsParams { uint64_t MaxMessageSize = 0; TLog Log; // Null by default. std::shared_ptr Executor; + std::shared_ptr MetricExporter; + std::shared_ptr TraceExporter; }; TDriverConfig::TDriverConfig(const std::string& connectionString) @@ -229,6 +233,24 @@ TDriverConfig& TDriverConfig::SetExecutor(std::shared_ptr executor) { return *this; } +TDriverConfig& TDriverConfig::SetMetricExporter(std::shared_ptr exporter) { + Impl_->MetricExporter = std::move(exporter); + return *this; +} + +TDriverConfig& TDriverConfig::SetTraceExporter(std::shared_ptr exporter) { + Impl_->TraceExporter = std::move(exporter); + return *this; +} + +std::shared_ptr TDriverConfig::GetMetricExporter() const { + return Impl_->MetricExporter; +} + +std::shared_ptr TDriverConfig::GetTraceExporter() const { + return Impl_->TraceExporter; +} + //////////////////////////////////////////////////////////////////////////////// std::shared_ptr CreateInternalInterface(const TDriver connection) { @@ -280,6 +302,8 @@ TDriverConfig TDriver::GetConfig() const { config.SetMaxOutboundMessageSize(Impl_->MaxOutboundMessageSize_); config.SetMaxMessageSize(Impl_->MaxMessageSize_); config.Impl_->Log = Impl_->Log; + config.SetMetricExporter(Impl_->GetMetricExporter()); + config.SetTraceExporter(Impl_->GetTraceExporter()); return config; } diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.cpp b/src/client/impl/internal/grpc_connections/grpc_connections.cpp index 48e170d28c6..32645964933 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.cpp +++ b/src/client/impl/internal/grpc_connections/grpc_connections.cpp @@ -167,6 +167,8 @@ TGRpcConnectionsImpl::TGRpcConnectionsImpl(std::shared_ptr p #ifndef YDB_GRPC_BYPASS_CHANNEL_POOL , ChannelPool_(TcpKeepAliveSettings_, params->GetSocketIdleTimeout()) #endif + , MetricExporter_(params->GetMetricExporter()) + , TraceExporter_(params->GetTraceExporter()) , NetworkThreadsNum_(params->GetNetworkThreadsNum()) , UsePerChannelTcpConnection_(params->GetUsePerChannelTcpConnection()) , GRpcClientLow_(NetworkThreadsNum_) @@ -434,6 +436,14 @@ void TGRpcConnectionsImpl::RegisterExtensionApi(IExtensionApi* api) { ExtensionApis_.emplace_back(api); } +std::shared_ptr TGRpcConnectionsImpl::GetMetricExporter() const { + return MetricExporter_; +} + +std::shared_ptr TGRpcConnectionsImpl::GetTraceExporter() const { + return TraceExporter_; +} + void TGRpcConnectionsImpl::SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb) { std::lock_guard lock(ExtensionsLock_); DiscoveryMutatorCb = std::move(cb); diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.h b/src/client/impl/internal/grpc_connections/grpc_connections.h index 15cddb4e6ec..14db28028d2 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.h +++ b/src/client/impl/internal/grpc_connections/grpc_connections.h @@ -18,6 +18,11 @@ namespace NYdb::inline V3 { +namespace NMetrics { + class IMetricRegistry; + class ITraceProvider; +} // namespace NMetrics + constexpr TDeadline::Duration GRPC_KEEP_ALIVE_TIMEOUT_FOR_DISCOVERY = std::chrono::seconds(10); constexpr TDeadline::Duration INITIAL_DEFERRED_CALL_DELAY = std::chrono::milliseconds(10); // The delay before first deferred service call constexpr TDeadline::Duration GET_ENDPOINTS_TIMEOUT = std::chrono::seconds(10); // Time wait for ListEndpoints request, after this time we pass error to client @@ -581,6 +586,8 @@ class TGRpcConnectionsImpl ::NMonitoring::TMetricRegistry* GetMetricRegistry() override; void RegisterExtension(IExtension* extension); void RegisterExtensionApi(IExtensionApi* api); + std::shared_ptr GetMetricExporter() const; + std::shared_ptr GetTraceExporter() const; template T* GetExtensionApi() { @@ -726,6 +733,8 @@ class TGRpcConnectionsImpl std::vector> Extensions_; std::vector> ExtensionApis_; + std::shared_ptr MetricExporter_; + std::shared_ptr TraceExporter_; IDiscoveryMutatorApi::TMutatorCb DiscoveryMutatorCb; diff --git a/src/client/impl/internal/grpc_connections/params.h b/src/client/impl/internal/grpc_connections/params.h index 2bc9f4567c5..e6aa5e87dde 100644 --- a/src/client/impl/internal/grpc_connections/params.h +++ b/src/client/impl/internal/grpc_connections/params.h @@ -11,6 +11,11 @@ namespace NYdb::inline V3 { +namespace NMetrics { + class IMetricRegistry; + class ITraceProvider; +} // namespace NMetrics + class IConnectionsParams { public: virtual ~IConnectionsParams() = default; @@ -36,6 +41,8 @@ class IConnectionsParams { virtual uint64_t GetMaxOutboundMessageSize() const = 0; virtual uint64_t GetMaxMessageSize() const = 0; virtual std::shared_ptr GetExecutor() const = 0; + virtual std::shared_ptr GetMetricExporter() const = 0; + virtual std::shared_ptr GetTraceExporter() const = 0; }; } // namespace NYdb diff --git a/src/client/metrics/CMakeLists.txt b/src/client/metrics/CMakeLists.txt index ce8526b5e7e..03915de6b5e 100644 --- a/src/client/metrics/CMakeLists.txt +++ b/src/client/metrics/CMakeLists.txt @@ -4,13 +4,4 @@ target_sources(client-metrics PRIVATE metrics.cpp ) -target_include_directories(client-metrics PUBLIC - $ - $ -) - -target_link_libraries(client-metrics PUBLIC - client-extension_common -) - _ydb_sdk_make_client_component(Metrics client-metrics) diff --git a/src/client/metrics/metrics.cpp b/src/client/metrics/metrics.cpp index 836d01f2071..341917291bb 100644 --- a/src/client/metrics/metrics.cpp +++ b/src/client/metrics/metrics.cpp @@ -1,32 +1 @@ #include - -namespace NYdb::inline V3::NMetrics { - -class TMetricsApiImpl : public IMetricsApi { -public: - void SetMetricRegistry(std::shared_ptr registry) override { - Registry_ = std::move(registry); - } - - void SetTraceProvider(std::shared_ptr provider) override { - TraceProvider_ = std::move(provider); - } - - std::shared_ptr GetMetricRegistry() const override { - return Registry_; - } - - std::shared_ptr GetTraceProvider() const override { - return TraceProvider_; - } - -private: - std::shared_ptr Registry_; - std::shared_ptr TraceProvider_; -}; - -IMetricsApi* IMetricsApi::Create(TDriver driver) { - return new TMetricsApiImpl(); -} - -} // namespace NYdb::NMetrics diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index ca1453e0faf..80a3d6ed00f 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -70,10 +70,8 @@ class TQueryClient::TImpl: public TClientImplCommon, public SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query")); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Query")); - if (auto metricsApi = Connections_->GetExtensionApi()) { - if (auto traceProvider = metricsApi->GetTraceProvider()) { - Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-query"); - } + if (auto traceProvider = Connections_->GetTraceExporter()) { + Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-query"); } } @@ -471,7 +469,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public void ReplyError(TStatus status) override { TSession session; - if (Span) Span->End(status.GetStatus()); + if (Span) { + Span->End(status.GetStatus()); + } ScheduleReply(TCreateSessionResult(std::move(status), std::move(session))); } @@ -484,7 +484,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public ) ); - if (Span) Span->End(EStatus::SUCCESS); + if (Span) { + Span->End(EStatus::SUCCESS); + } ScheduleReply(std::move(val)); } @@ -493,7 +495,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public [promise{std::move(Promise)}, span = Span](TAsyncCreateSessionResult future) mutable { auto val = future.ExtractValue(); - if (span) span->End(val.GetStatus()); + if (span) { + span->End(val.GetStatus()); + } promise.SetValue(std::move(val)); }); } diff --git a/src/client/query/impl/query_spans.cpp b/src/client/query/impl/query_spans.cpp index c612f442663..fc1329aacd0 100644 --- a/src/client/query/impl/query_spans.cpp +++ b/src/client/query/impl/query_spans.cpp @@ -21,34 +21,64 @@ void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { } } +void SafeLogSpanError(const char* message) noexcept { + try { + try { + Cerr << "TQuerySpan: " << message << ": " << CurrentExceptionMessage() << Endl; + return; + } catch (...) { + } + Cerr << "TQuerySpan: " << message << ": (unknown)" << Endl; + } catch (...) { + } +} + } // namespace TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint) { - if (!tracer) return; + if (!tracer) { + return; + } std::string host; int port; ParseEndpoint(endpoint, host, port); - Span_ = tracer->StartSpan("ydb." + operationName, NMetrics::ESpanKind::CLIENT); - Span_->SetAttribute("db.system.name", "ydb"); - Span_->SetAttribute("server.address", host); - Span_->SetAttribute("server.port", static_cast(port)); + try { + Span_ = tracer->StartSpan("ydb." + operationName, NMetrics::ESpanKind::CLIENT); + if (!Span_) { + return; + } + Span_->SetAttribute("db.system.name", "ydb"); + Span_->SetAttribute("server.address", host); + Span_->SetAttribute("server.port", static_cast(port)); + } catch (...) { + SafeLogSpanError("failed to initialize span"); + Span_.reset(); + } } -TQuerySpan::~TQuerySpan() { +TQuerySpan::~TQuerySpan() noexcept { if (Span_) { - Span_->End(); + try { + Span_->End(); + } catch (...) { + SafeLogSpanError("failed to end span"); + } } } -void TQuerySpan::End(EStatus status) { +void TQuerySpan::End(EStatus status) noexcept { if (Span_) { - Span_->SetAttribute("db.response.status_code", static_cast(status)); - if (status != EStatus::SUCCESS) { - Span_->SetAttribute("error.type", ToString(status)); + try { + Span_->SetAttribute("db.response.status_code", static_cast(status)); + if (status != EStatus::SUCCESS) { + Span_->SetAttribute("error.type", ToString(status)); + } + Span_->End(); + } catch (...) { + SafeLogSpanError("failed to finalize span"); } - Span_->End(); Span_.reset(); } } diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h index d34e263d4db..ca0b6853954 100644 --- a/src/client/query/impl/query_spans.h +++ b/src/client/query/impl/query_spans.h @@ -12,9 +12,9 @@ namespace NYdb::inline V3::NQuery { class TQuerySpan { public: TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint); - ~TQuerySpan(); + ~TQuerySpan() noexcept; - void End(EStatus status); + void End(EStatus status) noexcept; private: std::shared_ptr Span_; From 0e31bc16c1158e5598a44b58e79926a84b2142a4 Mon Sep 17 00:00:00 2001 From: maladetska Date: Mon, 2 Mar 2026 15:12:38 +0300 Subject: [PATCH 05/17] add includes --- include/ydb-cpp-sdk/client/metrics/metrics.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/ydb-cpp-sdk/client/metrics/metrics.h b/include/ydb-cpp-sdk/client/metrics/metrics.h index 0fce1f081bf..ea06c28f9a0 100644 --- a/include/ydb-cpp-sdk/client/metrics/metrics.h +++ b/include/ydb-cpp-sdk/client/metrics/metrics.h @@ -1,6 +1,10 @@ #pragma once +#include #include +#include +#include +#include namespace NYdb::inline V3::NMetrics { From 23414c1339f8ef5373687b27c4c0e0260a059a9a Mon Sep 17 00:00:00 2001 From: maladetska Date: Mon, 16 Mar 2026 01:55:33 +0800 Subject: [PATCH 06/17] fixes and add metric tests --- include/ydb-cpp-sdk/client/driver/driver.h | 17 +- include/ydb-cpp-sdk/client/metrics/metrics.h | 28 -- include/ydb-cpp-sdk/client/trace/trace.h | 39 +++ plugins/CMakeLists.txt | 5 +- plugins/metrics/CMakeLists.txt | 3 + plugins/metrics/otel/CMakeLists.txt | 17 ++ .../ydb-cpp-sdk/open_telemetry/metrics.h | 16 + plugins/metrics/otel/src/metrics.cpp | 156 ++++++++++ plugins/open_telemetry/CMakeLists.txt | 36 --- .../ydb-cpp-sdk/open_telemetry/extension.h | 16 - .../ydb-cpp-sdk/open_telemetry/metrics.h | 36 --- .../ydb-cpp-sdk/open_telemetry/trace.h | 29 -- plugins/open_telemetry/src/metrics.cpp | 140 --------- plugins/trace/CMakeLists.txt | 3 + plugins/trace/otel/CMakeLists.txt | 16 + .../ydb-cpp-sdk/open_telemetry/trace.h | 16 + .../otel}/src/trace.cpp | 39 ++- src/client/CMakeLists.txt | 1 + src/client/driver/driver.cpp | 28 +- .../grpc_connections/grpc_connections.cpp | 12 +- .../grpc_connections/grpc_connections.h | 19 +- .../impl/internal/grpc_connections/params.h | 4 +- src/client/metrics/CMakeLists.txt | 2 +- src/client/query/CMakeLists.txt | 3 +- src/client/query/client.cpp | 48 ++- src/client/query/impl/CMakeLists.txt | 1 + src/client/query/impl/query_metrics.cpp | 71 +++++ src/client/query/impl/query_metrics.h | 28 ++ src/client/query/impl/query_spans.cpp | 71 ++++- src/client/query/impl/query_spans.h | 7 +- src/client/trace/CMakeLists.txt | 7 + src/client/trace/trace.cpp | 1 + tests/common/fake_metric_registry.h | 122 ++++++++ tests/integration/CMakeLists.txt | 1 + tests/integration/metrics/CMakeLists.txt | 12 + tests/integration/metrics/main.cpp | 273 ++++++++++++++++++ tests/unit/client/CMakeLists.txt | 13 + tests/unit/client/query/query_metrics_ut.cpp | 190 ++++++++++++ 38 files changed, 1162 insertions(+), 364 deletions(-) create mode 100644 include/ydb-cpp-sdk/client/trace/trace.h create mode 100644 plugins/metrics/CMakeLists.txt create mode 100644 plugins/metrics/otel/CMakeLists.txt create mode 100644 plugins/metrics/otel/include/ydb-cpp-sdk/open_telemetry/metrics.h create mode 100644 plugins/metrics/otel/src/metrics.cpp delete mode 100644 plugins/open_telemetry/CMakeLists.txt delete mode 100644 plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h delete mode 100644 plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h delete mode 100644 plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h delete mode 100644 plugins/open_telemetry/src/metrics.cpp create mode 100644 plugins/trace/CMakeLists.txt create mode 100644 plugins/trace/otel/CMakeLists.txt create mode 100644 plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h rename plugins/{open_telemetry => trace/otel}/src/trace.cpp (55%) create mode 100644 src/client/query/impl/query_metrics.cpp create mode 100644 src/client/query/impl/query_metrics.h create mode 100644 src/client/trace/CMakeLists.txt create mode 100644 src/client/trace/trace.cpp create mode 100644 tests/common/fake_metric_registry.h create mode 100644 tests/integration/metrics/CMakeLists.txt create mode 100644 tests/integration/metrics/main.cpp create mode 100644 tests/unit/client/query/query_metrics_ut.cpp diff --git a/include/ydb-cpp-sdk/client/driver/driver.h b/include/ydb-cpp-sdk/client/driver/driver.h index 8f373e9b25c..20fa52d5e60 100644 --- a/include/ydb-cpp-sdk/client/driver/driver.h +++ b/include/ydb-cpp-sdk/client/driver/driver.h @@ -3,10 +3,11 @@ #include "fwd.h" #include +#include +#include #include #include #include -#include #include #include #include @@ -154,17 +155,11 @@ class TDriverConfig { //! If not set, default executor will be used. TDriverConfig& SetExecutor(std::shared_ptr executor); - //! Set external metrics exporter implementation. - TDriverConfig& SetMetricExporter(std::shared_ptr exporter); - - //! Set external tracing exporter implementation. - TDriverConfig& SetTraceExporter(std::shared_ptr exporter); - - //! Get configured metrics exporter implementation. - std::shared_ptr GetMetricExporter() const; + //! Set external metrics registry implementation. + TDriverConfig& SetMetricRegistry(std::shared_ptr registry); - //! Get configured tracing exporter implementation. - std::shared_ptr GetTraceExporter() const; + //! Set external trace provider implementation. + TDriverConfig& SetTraceProvider(std::shared_ptr provider); private: class TImpl; diff --git a/include/ydb-cpp-sdk/client/metrics/metrics.h b/include/ydb-cpp-sdk/client/metrics/metrics.h index ea06c28f9a0..7e2b0b903dd 100644 --- a/include/ydb-cpp-sdk/client/metrics/metrics.h +++ b/include/ydb-cpp-sdk/client/metrics/metrics.h @@ -38,32 +38,4 @@ class IMetricRegistry { virtual std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) = 0; }; -enum class ESpanKind { - INTERNAL, - SERVER, - CLIENT, - PRODUCER, - CONSUMER -}; - -class ISpan { -public: - virtual ~ISpan() = default; - virtual void End() = 0; - virtual void SetAttribute(const std::string& key, const std::string& value) = 0; - virtual void SetAttribute(const std::string& key, int64_t value) = 0; -}; - -class ITracer { -public: - virtual ~ITracer() = default; - virtual std::shared_ptr StartSpan(const std::string& name, ESpanKind kind = ESpanKind::INTERNAL) = 0; -}; - -class ITraceProvider { -public: - virtual ~ITraceProvider() = default; - virtual std::shared_ptr GetTracer(const std::string& name) = 0; -}; - } // namespace NYdb::NMetrics diff --git a/include/ydb-cpp-sdk/client/trace/trace.h b/include/ydb-cpp-sdk/client/trace/trace.h new file mode 100644 index 00000000000..b86297146a9 --- /dev/null +++ b/include/ydb-cpp-sdk/client/trace/trace.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include + +namespace NYdb::inline V3::NMetrics { + +enum class ESpanKind { + INTERNAL, + SERVER, + CLIENT, + PRODUCER, + CONSUMER +}; + +class ISpan { +public: + virtual ~ISpan() = default; + virtual void End() = 0; + virtual void SetAttribute(const std::string& key, const std::string& value) = 0; + virtual void SetAttribute(const std::string& key, int64_t value) = 0; + virtual void AddEvent(const std::string& name, const std::map& attributes = {}) = 0; +}; + +class ITracer { +public: + virtual ~ITracer() = default; + virtual std::shared_ptr StartSpan(const std::string& name, ESpanKind kind = ESpanKind::INTERNAL) = 0; +}; + +class ITraceProvider { +public: + virtual ~ITraceProvider() = default; + virtual std::shared_ptr GetTracer(const std::string& name) = 0; +}; + +} // namespace NYdb::NMetrics diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 836ed903825..0d232800455 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -1,3 +1,2 @@ -if (YDB_SDK_ENABLE_OTEL_METRICS OR YDB_SDK_ENABLE_OTEL_TRACE) - add_subdirectory(open_telemetry EXCLUDE_FROM_ALL) -endif() +add_subdirectory(metrics) +add_subdirectory(trace) diff --git a/plugins/metrics/CMakeLists.txt b/plugins/metrics/CMakeLists.txt new file mode 100644 index 00000000000..6d50a5111e7 --- /dev/null +++ b/plugins/metrics/CMakeLists.txt @@ -0,0 +1,3 @@ +if (YDB_SDK_ENABLE_OTEL_METRICS) + add_subdirectory(otel EXCLUDE_FROM_ALL) +endif() diff --git a/plugins/metrics/otel/CMakeLists.txt b/plugins/metrics/otel/CMakeLists.txt new file mode 100644 index 00000000000..e26b1931984 --- /dev/null +++ b/plugins/metrics/otel/CMakeLists.txt @@ -0,0 +1,17 @@ +_ydb_sdk_add_library(open_telemetry_metrics) +target_sources(open_telemetry_metrics PRIVATE + src/metrics.cpp +) +target_include_directories(open_telemetry_metrics PUBLIC + $ + $ +) +target_link_libraries(open_telemetry_metrics PUBLIC + client-metrics + client-resources + opentelemetry-cpp::api + opentelemetry-cpp::metrics +) +_ydb_sdk_make_client_component(OpenTelemetryMetrics open_telemetry_metrics) + +_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/plugins/metrics/otel/include/ydb-cpp-sdk/open_telemetry/metrics.h b/plugins/metrics/otel/include/ydb-cpp-sdk/open_telemetry/metrics.h new file mode 100644 index 00000000000..054d040f97d --- /dev/null +++ b/plugins/metrics/otel/include/ydb-cpp-sdk/open_telemetry/metrics.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +#include + +namespace opentelemetry::metrics { +class MeterProvider; +} + +namespace NYdb::inline V3::NMetrics { + +std::shared_ptr CreateOtelMetricRegistry( + opentelemetry::nostd::shared_ptr meterProvider); + +} // namespace NYdb::NMetrics diff --git a/plugins/metrics/otel/src/metrics.cpp b/plugins/metrics/otel/src/metrics.cpp new file mode 100644 index 00000000000..6b9f14be362 --- /dev/null +++ b/plugins/metrics/otel/src/metrics.cpp @@ -0,0 +1,156 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace NYdb::inline V3::NMetrics { + +namespace { + +using namespace opentelemetry; + +common::KeyValueIterableView MakeAttributes(const TLabels& labels) { + return common::KeyValueIterableView(labels); +} + +class TOtelCounter : public ICounter { +public: + TOtelCounter(nostd::shared_ptr> counter, const TLabels& labels) + : Counter_(std::move(counter)) + , Labels_(labels) + {} + + void Inc() override { + Counter_->Add(1, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + } + +private: + nostd::shared_ptr> Counter_; + TLabels Labels_; +}; + +class TOtelUpDownCounterGauge : public IGauge { +public: + TOtelUpDownCounterGauge(nostd::shared_ptr> counter, const TLabels& labels) + : Counter_(std::move(counter)) + , Labels_(labels) + {} + + void Add(double delta) override { + Counter_->Add(delta, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + Value_ += delta; + } + + void Set(double value) override { + Counter_->Add(value - Value_, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + Value_ = value; + } + +private: + nostd::shared_ptr> Counter_; + TLabels Labels_; + double Value_ = 0; +}; + +class TOtelHistogram : public IHistogram { +public: + TOtelHistogram(nostd::shared_ptr> histogram, const TLabels& labels) + : Histogram_(std::move(histogram)) + , Labels_(labels) + {} + + void Record(double value) override { + Histogram_->Record(value, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); + } + +private: + nostd::shared_ptr> Histogram_; + TLabels Labels_; +}; + +class TOtelMetricRegistry : public IMetricRegistry { +public: + TOtelMetricRegistry(nostd::shared_ptr meterProvider) + : MeterProvider_(std::move(meterProvider)) + , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", GetSdkSemver())) + {} + + std::shared_ptr Counter(const std::string& name, const TLabels& labels) override { + auto counter = Meter_->CreateUInt64Counter(name); + return std::make_shared(std::move(counter), labels); + } + + std::shared_ptr Gauge(const std::string& name, const TLabels& labels) override { + auto counter = Meter_->CreateDoubleUpDownCounter(name); + return std::make_shared(std::move(counter), labels); + } + + std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels) override { + ConfigureHistogramBuckets(name, buckets); + auto histogram = Meter_->CreateDoubleHistogram(name); + return std::make_shared(std::move(histogram), labels); + } + +private: + void ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets) { + if (buckets.empty()) { + return; + } + + auto* sdkProvider = dynamic_cast(MeterProvider_.get()); + if (!sdkProvider) { + return; + } + + { + std::lock_guard lock(HistogramViewsLock_); + if (!HistogramViews_.insert(name).second) { + return; + } + } + + auto selector = std::make_unique( + sdk::metrics::InstrumentType::kHistogram, + name, + "" + ); + auto meterSelector = std::make_unique( + "ydb-cpp-sdk", + GetSdkSemver(), + {} + ); + + auto histogramConfig = std::make_shared(); + histogramConfig->boundaries_ = buckets; + + auto view = std::make_unique( + {}, + {}, + sdk::metrics::AggregationType::kHistogram, + histogramConfig + ); + + sdkProvider->AddView(std::move(selector), std::move(meterSelector), std::move(view)); + } + + nostd::shared_ptr MeterProvider_; + nostd::shared_ptr Meter_; + std::mutex HistogramViewsLock_; + std::unordered_set HistogramViews_; +}; + +} // namespace + +std::shared_ptr CreateOtelMetricRegistry( + opentelemetry::nostd::shared_ptr meterProvider) +{ + return std::make_shared(std::move(meterProvider)); +} + +} // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/CMakeLists.txt b/plugins/open_telemetry/CMakeLists.txt deleted file mode 100644 index d005708d422..00000000000 --- a/plugins/open_telemetry/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -if (YDB_SDK_ENABLE_OTEL_METRICS) - _ydb_sdk_add_library(open_telemetry_metrics) - target_sources(open_telemetry_metrics PRIVATE - src/metrics.cpp - ) - target_include_directories(open_telemetry_metrics PUBLIC - $ - $ - ) - target_link_libraries(open_telemetry_metrics PUBLIC - client-metrics - client-resources - opentelemetry-cpp::api - opentelemetry-cpp::metrics - ) - _ydb_sdk_make_client_component(OpenTelemetryMetrics open_telemetry_metrics) -endif() - -if (YDB_SDK_ENABLE_OTEL_TRACE) - _ydb_sdk_add_library(open_telemetry_trace) - target_sources(open_telemetry_trace PRIVATE - src/trace.cpp - ) - target_include_directories(open_telemetry_trace PUBLIC - $ - $ - ) - target_link_libraries(open_telemetry_trace PUBLIC - client-metrics - opentelemetry-cpp::api - opentelemetry-cpp::trace - ) - _ydb_sdk_make_client_component(OpenTelemetryTrace open_telemetry_trace) -endif() - -_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h deleted file mode 100644 index b0db9ea7d7c..00000000000 --- a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/extension.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include -#include - -namespace NYdb::inline V3::NMetrics { - -inline void AddOpenTelemetry(TDriverConfig& config - , opentelemetry::nostd::shared_ptr meterProvider - , opentelemetry::nostd::shared_ptr tracerProvider -) { - AddOpenTelemetryMetrics(config, std::move(meterProvider)); - AddOpenTelemetryTrace(config, std::move(tracerProvider)); -} - -} // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h deleted file mode 100644 index 5e9e9e77dea..00000000000 --- a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/metrics.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace NYdb::inline V3::NMetrics { - -class TOtelMetricRegistry : public IMetricRegistry { -public: - TOtelMetricRegistry(opentelemetry::nostd::shared_ptr meterProvider); - - std::shared_ptr Counter(const std::string& name, const TLabels& labels = {}) override; - std::shared_ptr Gauge(const std::string& name, const TLabels& labels = {}) override; - std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) override; - -private: - void ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets); - - opentelemetry::nostd::shared_ptr MeterProvider_; - opentelemetry::nostd::shared_ptr Meter_; - std::mutex HistogramViewsLock_; - std::unordered_set HistogramViews_; -}; - -inline void AddOpenTelemetryMetrics( - TDriverConfig& config, - opentelemetry::nostd::shared_ptr meterProvider) -{ - if (meterProvider) { - config.SetMetricExporter(std::make_shared(std::move(meterProvider))); - } -} - -} // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h b/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h deleted file mode 100644 index 3ba2e146fd9..00000000000 --- a/plugins/open_telemetry/include/ydb-cpp-sdk/open_telemetry/trace.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace NYdb::inline V3::NMetrics { - -class TOtelTraceProvider : public ITraceProvider { -public: - TOtelTraceProvider(opentelemetry::nostd::shared_ptr tracerProvider); - - std::shared_ptr GetTracer(const std::string& name) override; - -private: - opentelemetry::nostd::shared_ptr TracerProvider_; -}; - -inline void AddOpenTelemetryTrace( - TDriverConfig& config, - opentelemetry::nostd::shared_ptr tracerProvider) -{ - if (tracerProvider) { - config.SetTraceExporter(std::make_shared(std::move(tracerProvider))); - } -} - -} // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/src/metrics.cpp b/plugins/open_telemetry/src/metrics.cpp deleted file mode 100644 index 65850fd08b3..00000000000 --- a/plugins/open_telemetry/src/metrics.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace NYdb::inline V3::NMetrics { - -namespace { - -using namespace opentelemetry; - -common::KeyValueIterableView MakeAttributes(const TLabels& labels) { - return common::KeyValueIterableView(labels); -} - -class TOtelCounter : public ICounter { -public: - TOtelCounter(nostd::shared_ptr> counter, const TLabels& labels) - : Counter_(std::move(counter)) - , Labels_(labels) - {} - - void Inc() override { - Counter_->Add(1, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); - } - -private: - nostd::shared_ptr> Counter_; - TLabels Labels_; -}; - -class TOtelUpDownCounterGauge : public IGauge { -public: - TOtelUpDownCounterGauge(nostd::shared_ptr> counter, const TLabels& labels) - : Counter_(std::move(counter)) - , Labels_(labels) - {} - - void Add(double delta) override { - Counter_->Add(delta, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); - Value_ += delta; - } - - void Set(double value) override { - Counter_->Add(value - Value_, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); - Value_ = value; - } - -private: - nostd::shared_ptr> Counter_; - TLabels Labels_; - double Value_ = 0; -}; - -class TOtelHistogram : public IHistogram { -public: - TOtelHistogram(nostd::shared_ptr> histogram, const TLabels& labels) - : Histogram_(std::move(histogram)) - , Labels_(labels) - {} - - void Record(double value) override { - Histogram_->Record(value, MakeAttributes(Labels_), context::RuntimeContext::GetCurrent()); - } - -private: - nostd::shared_ptr> Histogram_; - TLabels Labels_; -}; - -} // namespace - -TOtelMetricRegistry::TOtelMetricRegistry(nostd::shared_ptr meterProvider) - : MeterProvider_(std::move(meterProvider)) - , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", GetSdkSemver())) -{} - -void TOtelMetricRegistry::ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets) { - if (buckets.empty()) { - return; - } - - auto* sdkProvider = dynamic_cast(MeterProvider_.get()); - if (!sdkProvider) { - return; - } - - { - std::lock_guard lock(HistogramViewsLock_); - if (!HistogramViews_.insert(name).second) { - return; - } - } - - auto selector = std::make_unique( - sdk::metrics::InstrumentType::kHistogram, - name, - "" - ); - auto meterSelector = std::make_unique( - "ydb-cpp-sdk", - GetSdkSemver(), - {} - ); - - auto histogramConfig = std::make_shared(); - histogramConfig->boundaries_ = buckets; - - auto view = std::make_unique( - {}, - {}, - sdk::metrics::AggregationType::kHistogram, - histogramConfig - ); - - sdkProvider->AddView(std::move(selector), std::move(meterSelector), std::move(view)); -} - -std::shared_ptr TOtelMetricRegistry::Counter(const std::string& name, const TLabels& labels) { - auto counter = Meter_->CreateUInt64Counter(name); - return std::make_shared(std::move(counter), labels); -} - -std::shared_ptr TOtelMetricRegistry::Gauge(const std::string& name, const TLabels& labels) { - auto counter = Meter_->CreateDoubleUpDownCounter(name); - return std::make_shared(std::move(counter), labels); -} - -std::shared_ptr TOtelMetricRegistry::Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels) { - ConfigureHistogramBuckets(name, buckets); - auto histogram = Meter_->CreateDoubleHistogram(name); - return std::make_shared(std::move(histogram), labels); -} - -} // namespace NYdb::NMetrics diff --git a/plugins/trace/CMakeLists.txt b/plugins/trace/CMakeLists.txt new file mode 100644 index 00000000000..ef231ab7103 --- /dev/null +++ b/plugins/trace/CMakeLists.txt @@ -0,0 +1,3 @@ +if (YDB_SDK_ENABLE_OTEL_TRACE) + add_subdirectory(otel EXCLUDE_FROM_ALL) +endif() diff --git a/plugins/trace/otel/CMakeLists.txt b/plugins/trace/otel/CMakeLists.txt new file mode 100644 index 00000000000..6816d8ff7c6 --- /dev/null +++ b/plugins/trace/otel/CMakeLists.txt @@ -0,0 +1,16 @@ +_ydb_sdk_add_library(open_telemetry_trace) +target_sources(open_telemetry_trace PRIVATE + src/trace.cpp +) +target_include_directories(open_telemetry_trace PUBLIC + $ + $ +) +target_link_libraries(open_telemetry_trace PUBLIC + client-trace + opentelemetry-cpp::api + opentelemetry-cpp::trace +) +_ydb_sdk_make_client_component(OpenTelemetryTrace open_telemetry_trace) + +_ydb_sdk_install_headers(${CMAKE_INSTALL_INCLUDEDIR} DIRECTORY include/) diff --git a/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h b/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h new file mode 100644 index 00000000000..9bdc12fb25f --- /dev/null +++ b/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +#include + +namespace opentelemetry::trace { +class TracerProvider; +} + +namespace NYdb::inline V3::NMetrics { + +std::shared_ptr CreateOtelTraceProvider( + opentelemetry::nostd::shared_ptr tracerProvider); + +} // namespace NYdb::NMetrics diff --git a/plugins/open_telemetry/src/trace.cpp b/plugins/trace/otel/src/trace.cpp similarity index 55% rename from plugins/open_telemetry/src/trace.cpp rename to plugins/trace/otel/src/trace.cpp index 54f04cb84df..7cac3f4c1cb 100644 --- a/plugins/open_telemetry/src/trace.cpp +++ b/plugins/trace/otel/src/trace.cpp @@ -1,6 +1,8 @@ #include +#include #include +#include namespace NYdb::inline V3::NMetrics { @@ -37,6 +39,19 @@ class TOtelSpan : public ISpan { Span_->SetAttribute(key, value); } + void AddEvent(const std::string& name, const std::map& attributes) override { + if (attributes.empty()) { + Span_->AddEvent(name); + } else { + std::vector> attrs; + attrs.reserve(attributes.size()); + for (const auto& [k, v] : attributes) { + attrs.emplace_back(nostd::string_view(k), common::AttributeValue(nostd::string_view(v))); + } + Span_->AddEvent(name, attrs); + } + } + private: nostd::shared_ptr Span_; }; @@ -57,14 +72,26 @@ class TOtelTracer : public ITracer { nostd::shared_ptr Tracer_; }; -} // namespace +class TOtelTraceProvider : public ITraceProvider { +public: + TOtelTraceProvider(nostd::shared_ptr tracerProvider) + : TracerProvider_(std::move(tracerProvider)) + {} -TOtelTraceProvider::TOtelTraceProvider(nostd::shared_ptr tracerProvider) - : TracerProvider_(std::move(tracerProvider)) -{} + std::shared_ptr GetTracer(const std::string& name) override { + return std::make_shared(TracerProvider_->GetTracer(name)); + } + +private: + nostd::shared_ptr TracerProvider_; +}; + +} // namespace -std::shared_ptr TOtelTraceProvider::GetTracer(const std::string& name) { - return std::make_shared(TracerProvider_->GetTracer(name)); +std::shared_ptr CreateOtelTraceProvider( + opentelemetry::nostd::shared_ptr tracerProvider) +{ + return std::make_shared(std::move(tracerProvider)); } } // namespace NYdb::NMetrics diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 65167cddbd0..ce5e4938058 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -26,5 +26,6 @@ add_subdirectory(scheme) add_subdirectory(ss_tasks) add_subdirectory(table) add_subdirectory(topic) +add_subdirectory(trace) add_subdirectory(types) add_subdirectory(value) diff --git a/src/client/driver/driver.cpp b/src/client/driver/driver.cpp index 63aeac8aea5..c0ef98756fe 100644 --- a/src/client/driver/driver.cpp +++ b/src/client/driver/driver.cpp @@ -51,8 +51,8 @@ class TDriverConfig::TImpl : public IConnectionsParams { uint64_t GetMaxMessageSize() const override { return MaxMessageSize; } const TLog& GetLog() const override { return Log; } std::shared_ptr GetExecutor() const override { return Executor; } - std::shared_ptr GetMetricExporter() const override { return MetricExporter; } - std::shared_ptr GetTraceExporter() const override { return TraceExporter; } + std::shared_ptr GetExternalMetricRegistry() const override { return MetricRegistry; } + std::shared_ptr GetTraceProvider() const override { return TraceProvider; } std::string Endpoint; size_t NetworkThreadsNum = 2; @@ -82,8 +82,8 @@ class TDriverConfig::TImpl : public IConnectionsParams { uint64_t MaxMessageSize = 0; TLog Log; // Null by default. std::shared_ptr Executor; - std::shared_ptr MetricExporter; - std::shared_ptr TraceExporter; + std::shared_ptr MetricRegistry; + std::shared_ptr TraceProvider; }; TDriverConfig::TDriverConfig(const std::string& connectionString) @@ -233,24 +233,16 @@ TDriverConfig& TDriverConfig::SetExecutor(std::shared_ptr executor) { return *this; } -TDriverConfig& TDriverConfig::SetMetricExporter(std::shared_ptr exporter) { - Impl_->MetricExporter = std::move(exporter); +TDriverConfig& TDriverConfig::SetMetricRegistry(std::shared_ptr registry) { + Impl_->MetricRegistry = std::move(registry); return *this; } -TDriverConfig& TDriverConfig::SetTraceExporter(std::shared_ptr exporter) { - Impl_->TraceExporter = std::move(exporter); +TDriverConfig& TDriverConfig::SetTraceProvider(std::shared_ptr provider) { + Impl_->TraceProvider = std::move(provider); return *this; } -std::shared_ptr TDriverConfig::GetMetricExporter() const { - return Impl_->MetricExporter; -} - -std::shared_ptr TDriverConfig::GetTraceExporter() const { - return Impl_->TraceExporter; -} - //////////////////////////////////////////////////////////////////////////////// std::shared_ptr CreateInternalInterface(const TDriver connection) { @@ -302,8 +294,8 @@ TDriverConfig TDriver::GetConfig() const { config.SetMaxOutboundMessageSize(Impl_->MaxOutboundMessageSize_); config.SetMaxMessageSize(Impl_->MaxMessageSize_); config.Impl_->Log = Impl_->Log; - config.SetMetricExporter(Impl_->GetMetricExporter()); - config.SetTraceExporter(Impl_->GetTraceExporter()); + config.SetMetricRegistry(Impl_->GetExternalMetricRegistry()); + config.SetTraceProvider(Impl_->GetTraceProvider()); return config; } diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.cpp b/src/client/impl/internal/grpc_connections/grpc_connections.cpp index 32645964933..757d5b777b7 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.cpp +++ b/src/client/impl/internal/grpc_connections/grpc_connections.cpp @@ -167,8 +167,8 @@ TGRpcConnectionsImpl::TGRpcConnectionsImpl(std::shared_ptr p #ifndef YDB_GRPC_BYPASS_CHANNEL_POOL , ChannelPool_(TcpKeepAliveSettings_, params->GetSocketIdleTimeout()) #endif - , MetricExporter_(params->GetMetricExporter()) - , TraceExporter_(params->GetTraceExporter()) + , MetricRegistry_(params->GetExternalMetricRegistry()) + , TraceProvider_(params->GetTraceProvider()) , NetworkThreadsNum_(params->GetNetworkThreadsNum()) , UsePerChannelTcpConnection_(params->GetUsePerChannelTcpConnection()) , GRpcClientLow_(NetworkThreadsNum_) @@ -436,12 +436,12 @@ void TGRpcConnectionsImpl::RegisterExtensionApi(IExtensionApi* api) { ExtensionApis_.emplace_back(api); } -std::shared_ptr TGRpcConnectionsImpl::GetMetricExporter() const { - return MetricExporter_; +std::shared_ptr TGRpcConnectionsImpl::GetExternalMetricRegistry() const { + return MetricRegistry_; } -std::shared_ptr TGRpcConnectionsImpl::GetTraceExporter() const { - return TraceExporter_; +std::shared_ptr TGRpcConnectionsImpl::GetTraceProvider() const { + return TraceProvider_; } void TGRpcConnectionsImpl::SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb) { diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.h b/src/client/impl/internal/grpc_connections/grpc_connections.h index 14db28028d2..84a25162912 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.h +++ b/src/client/impl/internal/grpc_connections/grpc_connections.h @@ -586,19 +586,8 @@ class TGRpcConnectionsImpl ::NMonitoring::TMetricRegistry* GetMetricRegistry() override; void RegisterExtension(IExtension* extension); void RegisterExtensionApi(IExtensionApi* api); - std::shared_ptr GetMetricExporter() const; - std::shared_ptr GetTraceExporter() const; - - template - T* GetExtensionApi() { - std::lock_guard lock(ExtensionsLock_); - for (const auto& api : ExtensionApis_) { - if (auto ptr = dynamic_cast(api.get())) { - return ptr; - } - } - return nullptr; - } + std::shared_ptr GetExternalMetricRegistry() const; + std::shared_ptr GetTraceProvider() const; void SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb); const TLog& GetLog() const override; @@ -733,8 +722,8 @@ class TGRpcConnectionsImpl std::vector> Extensions_; std::vector> ExtensionApis_; - std::shared_ptr MetricExporter_; - std::shared_ptr TraceExporter_; + std::shared_ptr MetricRegistry_; + std::shared_ptr TraceProvider_; IDiscoveryMutatorApi::TMutatorCb DiscoveryMutatorCb; diff --git a/src/client/impl/internal/grpc_connections/params.h b/src/client/impl/internal/grpc_connections/params.h index e6aa5e87dde..1e827d3343b 100644 --- a/src/client/impl/internal/grpc_connections/params.h +++ b/src/client/impl/internal/grpc_connections/params.h @@ -41,8 +41,8 @@ class IConnectionsParams { virtual uint64_t GetMaxOutboundMessageSize() const = 0; virtual uint64_t GetMaxMessageSize() const = 0; virtual std::shared_ptr GetExecutor() const = 0; - virtual std::shared_ptr GetMetricExporter() const = 0; - virtual std::shared_ptr GetTraceExporter() const = 0; + virtual std::shared_ptr GetExternalMetricRegistry() const = 0; + virtual std::shared_ptr GetTraceProvider() const = 0; }; } // namespace NYdb diff --git a/src/client/metrics/CMakeLists.txt b/src/client/metrics/CMakeLists.txt index 03915de6b5e..e681a846b26 100644 --- a/src/client/metrics/CMakeLists.txt +++ b/src/client/metrics/CMakeLists.txt @@ -1,7 +1,7 @@ _ydb_sdk_add_library(client-metrics) target_sources(client-metrics PRIVATE - metrics.cpp + metrics.cpp ) _ydb_sdk_make_client_component(Metrics client-metrics) diff --git a/src/client/query/CMakeLists.txt b/src/client/query/CMakeLists.txt index f1395ff107b..3cc7401200b 100644 --- a/src/client/query/CMakeLists.txt +++ b/src/client/query/CMakeLists.txt @@ -7,11 +7,12 @@ target_link_libraries(client-ydb_query PUBLIC impl-internal-make_request impl-session impl-internal-retry - client-metrics client-ydb_common_client client-ydb_driver client-ydb_query-impl client-ydb_result + client-metrics + client-trace client-types-operation api-protos api-grpc diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index 80a3d6ed00f..0fc3f75ec88 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -15,9 +15,10 @@ #include #include #include +#include #include #include -#include +#include #include @@ -70,9 +71,10 @@ class TQueryClient::TImpl: public TClientImplCommon, public SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query")); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Query")); - if (auto traceProvider = Connections_->GetTraceExporter()) { + if (auto traceProvider = Connections_->GetTraceProvider()) { Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-query"); } + MetricRegistry_ = Connections_->GetExternalMetricRegistry(); } ~TImpl() { @@ -102,16 +104,21 @@ class TQueryClient::TImpl: public TClientImplCommon, public CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint); + span->SetQueryText(query); + auto metrics = std::make_shared(MetricRegistry_, "ExecuteQuery"); return TExecQueryImpl::ExecuteQuery( Connections_, DbDriverState_, query, txControl, params, settings, session) - .Apply([span](TAsyncExecuteQueryResult future) { + .Apply([span, metrics](TAsyncExecuteQueryResult future) { try { auto result = future.GetValue(); + span->SetPeerEndpoint(result.GetEndpoint()); span->End(result.GetStatus()); + metrics->End(result.GetStatus()); return result; } catch (...) { span->End(EStatus::CLIENT_INTERNAL_ERROR); + metrics->End(EStatus::CLIENT_INTERNAL_ERROR); throw; } }); @@ -182,10 +189,12 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint); + auto metrics = std::make_shared(MetricRegistry_, "Rollback"); - auto responseCb = [promise, session, span] + auto responseCb = [promise, session, span, metrics] (Ydb::Query::RollbackTransactionResponse* response, TPlainStatus status) mutable { try { + span->SetPeerEndpoint(status.Endpoint); if (response) { NYdb::NIssue::TIssues opIssues; NYdb::NIssue::IssuesFromMessage(response->issues(), opIssues); @@ -193,14 +202,17 @@ class TQueryClient::TImpl: public TClientImplCommon, public status.Endpoint, std::move(status.Metadata)}); span->End(rollbackTxStatus.GetStatus()); + metrics->End(rollbackTxStatus.GetStatus()); promise.SetValue(std::move(rollbackTxStatus)); } else { span->End(status.Status); + metrics->End(status.Status); promise.SetValue(TStatus(std::move(status))); } } catch (...) { span->End(EStatus::CLIENT_INTERNAL_ERROR); + metrics->End(EStatus::CLIENT_INTERNAL_ERROR); promise.SetException(std::current_exception()); } }; @@ -229,10 +241,12 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint); + auto metrics = std::make_shared(MetricRegistry_, "Commit"); - auto responseCb = [promise, session, span] + auto responseCb = [promise, session, span, metrics] (Ydb::Query::CommitTransactionResponse* response, TPlainStatus status) mutable { try { + span->SetPeerEndpoint(status.Endpoint); if (response) { NYdb::NIssue::TIssues opIssues; NYdb::NIssue::IssuesFromMessage(response->issues(), opIssues); @@ -240,15 +254,18 @@ class TQueryClient::TImpl: public TClientImplCommon, public status.Endpoint, std::move(status.Metadata)}); span->End(commitTxStatus.GetStatus()); + metrics->End(commitTxStatus.GetStatus()); TCommitTransactionResult commitTxResult(std::move(commitTxStatus)); promise.SetValue(std::move(commitTxResult)); } else { span->End(status.Status); + metrics->End(status.Status); promise.SetValue(TCommitTransactionResult(TStatus(std::move(status)))); } } catch (...) { span->End(EStatus::CLIENT_INTERNAL_ERROR); + metrics->End(EStatus::CLIENT_INTERNAL_ERROR); promise.SetException(std::current_exception()); } }; @@ -456,11 +473,13 @@ class TQueryClient::TImpl: public TClientImplCommon, public TAsyncCreateSessionResult GetSession(const TCreateSessionSettings& settings) { class TQueryClientGetSessionCtx : public NSessionPool::IGetSessionCtx { public: - TQueryClientGetSessionCtx(std::shared_ptr client, const TCreateSessionSettings& settings, std::shared_ptr span) + TQueryClientGetSessionCtx(std::shared_ptr client, const TCreateSessionSettings& settings, + std::shared_ptr span, std::shared_ptr metrics) : Promise(NThreading::NewPromise()) , Client(client) , RpcSettings(TRpcRequestSettings::Make(settings)) , Span(span) + , Metrics(metrics) {} TAsyncCreateSessionResult GetFuture() { @@ -472,6 +491,9 @@ class TQueryClient::TImpl: public TClientImplCommon, public if (Span) { Span->End(status.GetStatus()); } + if (Metrics) { + Metrics->End(status.GetStatus()); + } ScheduleReply(TCreateSessionResult(std::move(status), std::move(session))); } @@ -487,17 +509,24 @@ class TQueryClient::TImpl: public TClientImplCommon, public if (Span) { Span->End(EStatus::SUCCESS); } + if (Metrics) { + Metrics->End(EStatus::SUCCESS); + } ScheduleReply(std::move(val)); } void ReplyNewSession() override { Client->CreateAttachedSession(RpcSettings).Subscribe( - [promise{std::move(Promise)}, span = Span](TAsyncCreateSessionResult future) mutable + [promise{std::move(Promise)}, span = Span, metrics = Metrics](TAsyncCreateSessionResult future) mutable { auto val = future.ExtractValue(); if (span) { + span->SetPeerEndpoint(val.GetEndpoint()); span->End(val.GetStatus()); } + if (metrics) { + metrics->End(val.GetStatus()); + } promise.SetValue(std::move(val)); }); } @@ -524,10 +553,12 @@ class TQueryClient::TImpl: public TClientImplCommon, public std::shared_ptr Client; const TRpcRequestSettings RpcSettings; std::shared_ptr Span; + std::shared_ptr Metrics; }; auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint); - auto ctx = std::make_unique(shared_from_this(), settings, span); + auto metrics = std::make_shared(MetricRegistry_, "CreateSession"); + auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); @@ -597,6 +628,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public private: std::shared_ptr Tracer_; + std::shared_ptr MetricRegistry_; NSdkStats::TStatCollector::TClientRetryOperationStatCollector RetryOperationStatCollector_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> QuerySizeHistogram_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> ParamsSizeHistogram_; diff --git a/src/client/query/impl/CMakeLists.txt b/src/client/query/impl/CMakeLists.txt index 70f93b6d68d..c6c290795fe 100644 --- a/src/client/query/impl/CMakeLists.txt +++ b/src/client/query/impl/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(client-ydb_query-impl PUBLIC target_sources(client-ydb_query-impl PRIVATE exec_query.cpp client_session.cpp + query_metrics.cpp query_spans.cpp ) diff --git a/src/client/query/impl/query_metrics.cpp b/src/client/query/impl/query_metrics.cpp new file mode 100644 index 00000000000..f314f3d8b96 --- /dev/null +++ b/src/client/query/impl/query_metrics.cpp @@ -0,0 +1,71 @@ +#include "query_metrics.h" + +#include + +namespace NYdb::inline V3::NQuery { + +namespace { + +void SafeLogMetricsError(const char* message) noexcept { + try { + try { + std::cerr << "TQueryMetrics: " << message << ": " << CurrentExceptionMessage() << std::endl; + return; + } catch (...) { + } + std::cerr << "TQueryMetrics: " << message << ": (unknown)" << std::endl; + } catch (...) { + } +} + +} // namespace + +static const std::vector LatencyBuckets = { + 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 30000 +}; + +TQueryMetrics::TQueryMetrics(std::shared_ptr registry, const std::string& operationName) { + if (!registry) { + return; + } + + try { + NMetrics::TLabels labels = {{"operation", operationName}}; + RequestCounter_ = registry->Counter("ydb.query.requests", labels); + ErrorCounter_ = registry->Counter("ydb.query.errors", labels); + LatencyHistogram_ = registry->Histogram("ydb.query.latency_ms", LatencyBuckets, labels); + + RequestCounter_->Inc(); + StartTime_ = TInstant::Now(); + } catch (...) { + SafeLogMetricsError("failed to initialize metrics"); + RequestCounter_.reset(); + ErrorCounter_.reset(); + LatencyHistogram_.reset(); + } +} + +TQueryMetrics::~TQueryMetrics() noexcept { + End(EStatus::CLIENT_INTERNAL_ERROR); +} + +void TQueryMetrics::End(EStatus status) noexcept { + if (Ended_) { + return; + } + Ended_ = true; + + try { + if (LatencyHistogram_) { + auto durationMs = (TInstant::Now() - StartTime_).MilliSeconds(); + LatencyHistogram_->Record(static_cast(durationMs)); + } + if (status != EStatus::SUCCESS && ErrorCounter_) { + ErrorCounter_->Inc(); + } + } catch (...) { + SafeLogMetricsError("failed to record metrics"); + } +} + +} // namespace NYdb::NQuery diff --git a/src/client/query/impl/query_metrics.h b/src/client/query/impl/query_metrics.h new file mode 100644 index 00000000000..807472e2434 --- /dev/null +++ b/src/client/query/impl/query_metrics.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +namespace NYdb::inline V3::NQuery { + +class TQueryMetrics { +public: + TQueryMetrics(std::shared_ptr registry, const std::string& operationName); + ~TQueryMetrics() noexcept; + + void End(EStatus status) noexcept; + +private: + std::shared_ptr RequestCounter_; + std::shared_ptr ErrorCounter_; + std::shared_ptr LatencyHistogram_; + TInstant StartTime_; + bool Ended_ = false; +}; + +} // namespace NYdb::NQuery diff --git a/src/client/query/impl/query_spans.cpp b/src/client/query/impl/query_spans.cpp index fc1329aacd0..4bbd4d2250b 100644 --- a/src/client/query/impl/query_spans.cpp +++ b/src/client/query/impl/query_spans.cpp @@ -6,29 +6,49 @@ namespace NYdb::inline V3::NQuery { namespace { +constexpr int DefaultGrpcPort = 2135; + void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { - auto pos = endpoint.find(':'); + port = DefaultGrpcPort; + + if (endpoint.empty()) { + host = endpoint; + return; + } + + // IPv6 bracket notation: [addr]:port + if (endpoint.front() == '[') { + auto bracketEnd = endpoint.find(']'); + if (bracketEnd != std::string::npos) { + host = endpoint.substr(1, bracketEnd - 1); + if (bracketEnd + 2 < endpoint.size() && endpoint[bracketEnd + 1] == ':') { + try { + port = std::stoi(endpoint.substr(bracketEnd + 2)); + } catch (...) {} + } + return; + } + } + + auto pos = endpoint.rfind(':'); if (pos != std::string::npos) { host = endpoint.substr(0, pos); try { port = std::stoi(endpoint.substr(pos + 1)); - } catch (...) { - port = 2135; - } + } catch (...) {} } else { host = endpoint; - port = 2135; } } void SafeLogSpanError(const char* message) noexcept { try { try { - Cerr << "TQuerySpan: " << message << ": " << CurrentExceptionMessage() << Endl; + std::cerr << "TQuerySpan: " << message << ": " << CurrentExceptionMessage() << std::endl; return; } catch (...) { } - Cerr << "TQuerySpan: " << message << ": (unknown)" << Endl; + std::cerr << "TQuerySpan: " << message << ": (unknown)" << std::endl; } catch (...) { } } @@ -68,6 +88,43 @@ TQuerySpan::~TQuerySpan() noexcept { } } +void TQuerySpan::SetPeerEndpoint(const std::string& endpoint) noexcept { + if (!Span_ || endpoint.empty()) { + return; + } + try { + std::string host; + int port; + ParseEndpoint(endpoint, host, port); + Span_->SetAttribute("network.peer.address", host); + Span_->SetAttribute("network.peer.port", static_cast(port)); + } catch (...) { + SafeLogSpanError("failed to set peer endpoint"); + } +} + +void TQuerySpan::SetQueryText(const std::string& query) noexcept { + if (!Span_ || query.empty()) { + return; + } + try { + Span_->SetAttribute("db.query.text", query); + } catch (...) { + SafeLogSpanError("failed to set query text"); + } +} + +void TQuerySpan::AddEvent(const std::string& name, const std::map& attributes) noexcept { + if (!Span_) { + return; + } + try { + Span_->AddEvent(name, attributes); + } catch (...) { + SafeLogSpanError("failed to add event"); + } +} + void TQuerySpan::End(EStatus status) noexcept { if (Span_) { try { diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h index ca0b6853954..75fd0fa830e 100644 --- a/src/client/query/impl/query_spans.h +++ b/src/client/query/impl/query_spans.h @@ -1,9 +1,10 @@ #pragma once -#include +#include #include #include +#include #include #include @@ -14,6 +15,10 @@ class TQuerySpan { TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint); ~TQuerySpan() noexcept; + void SetPeerEndpoint(const std::string& endpoint) noexcept; + void SetQueryText(const std::string& query) noexcept; + void AddEvent(const std::string& name, const std::map& attributes = {}) noexcept; + void End(EStatus status) noexcept; private: diff --git a/src/client/trace/CMakeLists.txt b/src/client/trace/CMakeLists.txt new file mode 100644 index 00000000000..86a8f8d4208 --- /dev/null +++ b/src/client/trace/CMakeLists.txt @@ -0,0 +1,7 @@ +_ydb_sdk_add_library(client-trace) + +target_sources(client-trace PRIVATE + trace.cpp +) + +_ydb_sdk_make_client_component(Trace client-trace) diff --git a/src/client/trace/trace.cpp b/src/client/trace/trace.cpp new file mode 100644 index 00000000000..6bf5bc664f0 --- /dev/null +++ b/src/client/trace/trace.cpp @@ -0,0 +1 @@ +#include diff --git a/tests/common/fake_metric_registry.h b/tests/common/fake_metric_registry.h new file mode 100644 index 00000000000..60ff1414633 --- /dev/null +++ b/tests/common/fake_metric_registry.h @@ -0,0 +1,122 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace NYdb::NTests { + +class TFakeCounter : public NMetrics::ICounter { +public: + void Inc() override { + Count_.fetch_add(1, std::memory_order_relaxed); + } + + int64_t Get() const { + return Count_.load(std::memory_order_relaxed); + } + +private: + std::atomic Count_{0}; +}; + +class TFakeHistogram : public NMetrics::IHistogram { +public: + void Record(double value) override { + std::lock_guard lock(Mutex_); + Values_.push_back(value); + } + + std::vector GetValues() const { + std::lock_guard lock(Mutex_); + return Values_; + } + + size_t Count() const { + std::lock_guard lock(Mutex_); + return Values_.size(); + } + +private: + mutable std::mutex Mutex_; + std::vector Values_; +}; + +class TFakeGauge : public NMetrics::IGauge { +public: + void Add(double delta) override { Value_ += delta; } + void Set(double value) override { Value_ = value; } + double Get() const { return Value_; } + +private: + double Value_ = 0.0; +}; + +struct TMetricKey { + std::string Name; + NMetrics::TLabels Labels; + + bool operator==(const TMetricKey& other) const = default; + bool operator<(const TMetricKey& other) const { + if (Name != other.Name) return Name < other.Name; + return Labels < other.Labels; + } +}; + +class TFakeMetricRegistry : public NMetrics::IMetricRegistry { +public: + std::shared_ptr Counter(const std::string& name, const NMetrics::TLabels& labels) override { + std::lock_guard lock(Mutex_); + auto key = TMetricKey{name, labels}; + auto it = Counters_.find(key); + if (it != Counters_.end()) { + return it->second; + } + auto counter = std::make_shared(); + Counters_[key] = counter; + return counter; + } + + std::shared_ptr Gauge(const std::string& name, const NMetrics::TLabels& labels) override { + std::lock_guard lock(Mutex_); + auto key = TMetricKey{name, labels}; + auto gauge = std::make_shared(); + Gauges_[key] = gauge; + return gauge; + } + + std::shared_ptr Histogram(const std::string& name, const std::vector& /*buckets*/, const NMetrics::TLabels& labels) override { + std::lock_guard lock(Mutex_); + auto key = TMetricKey{name, labels}; + auto it = Histograms_.find(key); + if (it != Histograms_.end()) { + return it->second; + } + auto histogram = std::make_shared(); + Histograms_[key] = histogram; + return histogram; + } + + std::shared_ptr GetCounter(const std::string& name, const NMetrics::TLabels& labels = {}) const { + std::lock_guard lock(Mutex_); + auto it = Counters_.find(TMetricKey{name, labels}); + return it != Counters_.end() ? it->second : nullptr; + } + + std::shared_ptr GetHistogram(const std::string& name, const NMetrics::TLabels& labels = {}) const { + std::lock_guard lock(Mutex_); + auto it = Histograms_.find(TMetricKey{name, labels}); + return it != Histograms_.end() ? it->second : nullptr; + } + +private: + mutable std::mutex Mutex_; + std::map> Counters_; + std::map> Gauges_; + std::map> Histograms_; +}; + +} // namespace NYdb::NTests diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index d5a1d709245..8aa28839a63 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(auth) add_subdirectory(basic_example) add_subdirectory(bulk_upsert) +add_subdirectory(metrics) add_subdirectory(server_restart) add_subdirectory(sessions) add_subdirectory(sessions_pool) diff --git a/tests/integration/metrics/CMakeLists.txt b/tests/integration/metrics/CMakeLists.txt new file mode 100644 index 00000000000..6c9bb8b3abd --- /dev/null +++ b/tests/integration/metrics/CMakeLists.txt @@ -0,0 +1,12 @@ +add_ydb_test(NAME metrics_it GTEST + INCLUDE_DIRS + ${YDB_SDK_SOURCE_DIR} + SOURCES + main.cpp + LINK_LIBRARIES + yutil + YDB-CPP-SDK::Query + client-metrics + LABELS + integration +) diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp new file mode 100644 index 00000000000..cab8f71cdc4 --- /dev/null +++ b/tests/integration/metrics/main.cpp @@ -0,0 +1,273 @@ +#include +#include +#include + +#include + +using namespace NYdb; +using namespace NYdb::NQuery; +using namespace NYdb::NTests; + +namespace { + +struct TRunArgs { + TDriver Driver; + std::shared_ptr Registry; +}; + +TRunArgs MakeRunArgs() { + std::string endpoint = std::getenv("YDB_ENDPOINT"); + std::string database = std::getenv("YDB_DATABASE"); + + auto registry = std::make_shared(); + + auto driverConfig = TDriverConfig() + .SetEndpoint(endpoint) + .SetDatabase(database) + .SetAuthToken(std::getenv("YDB_TOKEN") ? std::getenv("YDB_TOKEN") : "") + .SetMetricRegistry(registry); + + TDriver driver(driverConfig); + return {driver, registry}; +} + +std::shared_ptr GetCounter( + const std::shared_ptr& registry, + const std::string& name, + const std::string& operation) +{ + return registry->GetCounter(name, {{"operation", operation}}); +} + +std::shared_ptr GetHistogram( + const std::shared_ptr& registry, + const std::string& name, + const std::string& operation) +{ + return registry->GetHistogram(name, {{"operation", operation}}); +} + +} // namespace + +TEST(QueryMetricsIntegration, ExecuteQuerySuccessRecordsMetrics) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto session = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); + + auto result = session.GetSession().ExecuteQuery( + "SELECT 1;", + TTxControl::BeginTx().CommitTx() + ).ExtractValueSync(); + ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); + + auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + ASSERT_NE(requests, nullptr) << "ExecuteQuery request counter not created"; + EXPECT_GE(requests->Get(), 1); + + auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); + + auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); + ASSERT_NE(latency, nullptr) << "ExecuteQuery latency histogram not created"; + EXPECT_GE(latency->Count(), 1u); + for (double v : latency->GetValues()) { + EXPECT_GE(v, 0.0); + } + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, ExecuteQueryErrorRecordsErrorMetric) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto session = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); + + auto result = session.GetSession().ExecuteQuery( + "INVALID SQL QUERY !!!", + TTxControl::BeginTx().CommitTx() + ).ExtractValueSync(); + EXPECT_NE(result.GetStatus(), EStatus::SUCCESS); + + auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + ASSERT_NE(requests, nullptr); + EXPECT_GE(requests->Get(), 1); + + auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_GE(errors->Get(), 1); + + auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); + ASSERT_NE(latency, nullptr); + EXPECT_GE(latency->Count(), 1u); + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, CreateSessionRecordsMetrics) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto session = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); + + auto requests = GetCounter(registry, "ydb.query.requests", "CreateSession"); + ASSERT_NE(requests, nullptr) << "CreateSession request counter not created"; + EXPECT_GE(requests->Get(), 1); + + auto latency = GetHistogram(registry, "ydb.query.latency_ms", "CreateSession"); + ASSERT_NE(latency, nullptr) << "CreateSession latency histogram not created"; + EXPECT_GE(latency->Count(), 1u); + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, CommitTransactionRecordsMetrics) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto sessionResult = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(sessionResult.IsSuccess()) << sessionResult.GetIssues().ToString(); + auto session = sessionResult.GetSession(); + + auto beginResult = session.BeginTransaction(TTxSettings::SerializableRW()).ExtractValueSync(); + ASSERT_TRUE(beginResult.IsSuccess()) << beginResult.GetIssues().ToString(); + auto tx = beginResult.GetTransaction(); + + auto execResult = tx.GetSession().ExecuteQuery( + "SELECT 1;", + TTxControl::Tx(tx) + ).ExtractValueSync(); + ASSERT_EQ(execResult.GetStatus(), EStatus::SUCCESS) << execResult.GetIssues().ToString(); + + if (execResult.GetTransaction()) { + auto commitResult = execResult.GetTransaction()->Commit().ExtractValueSync(); + ASSERT_TRUE(commitResult.IsSuccess()) << commitResult.GetIssues().ToString(); + + auto commitRequests = GetCounter(registry, "ydb.query.requests", "Commit"); + ASSERT_NE(commitRequests, nullptr) << "Commit request counter not created"; + EXPECT_GE(commitRequests->Get(), 1); + + auto commitLatency = GetHistogram(registry, "ydb.query.latency_ms", "Commit"); + ASSERT_NE(commitLatency, nullptr); + EXPECT_GE(commitLatency->Count(), 1u); + } + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, RollbackTransactionRecordsMetrics) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto sessionResult = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(sessionResult.IsSuccess()) << sessionResult.GetIssues().ToString(); + auto session = sessionResult.GetSession(); + + auto beginResult = session.BeginTransaction(TTxSettings::SerializableRW()).ExtractValueSync(); + ASSERT_TRUE(beginResult.IsSuccess()) << beginResult.GetIssues().ToString(); + auto tx = beginResult.GetTransaction(); + + auto rollbackResult = tx.Rollback().ExtractValueSync(); + ASSERT_TRUE(rollbackResult.IsSuccess()) << rollbackResult.GetIssues().ToString(); + + auto rollbackRequests = GetCounter(registry, "ydb.query.requests", "Rollback"); + ASSERT_NE(rollbackRequests, nullptr) << "Rollback request counter not created"; + EXPECT_GE(rollbackRequests->Get(), 1); + + auto rollbackErrors = GetCounter(registry, "ydb.query.errors", "Rollback"); + ASSERT_NE(rollbackErrors, nullptr); + EXPECT_EQ(rollbackErrors->Get(), 0); + + auto rollbackLatency = GetHistogram(registry, "ydb.query.latency_ms", "Rollback"); + ASSERT_NE(rollbackLatency, nullptr); + EXPECT_GE(rollbackLatency->Count(), 1u); + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, MultipleQueriesAccumulateMetrics) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto sessionResult = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(sessionResult.IsSuccess()) << sessionResult.GetIssues().ToString(); + auto session = sessionResult.GetSession(); + + const int numQueries = 5; + for (int i = 0; i < numQueries; ++i) { + auto result = session.ExecuteQuery( + "SELECT 1;", + TTxControl::BeginTx().CommitTx() + ).ExtractValueSync(); + ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); + } + + auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + ASSERT_NE(requests, nullptr); + EXPECT_EQ(requests->Get(), numQueries); + + auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); + + auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); + ASSERT_NE(latency, nullptr); + EXPECT_EQ(latency->Count(), static_cast(numQueries)); + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, NoRegistryDoesNotBreakOperations) { + std::string endpoint = std::getenv("YDB_ENDPOINT"); + std::string database = std::getenv("YDB_DATABASE"); + + auto driverConfig = TDriverConfig() + .SetEndpoint(endpoint) + .SetDatabase(database) + .SetAuthToken(std::getenv("YDB_TOKEN") ? std::getenv("YDB_TOKEN") : ""); + + TDriver driver(driverConfig); + TQueryClient client(driver); + + auto session = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); + + auto result = session.GetSession().ExecuteQuery( + "SELECT 1;", + TTxControl::BeginTx().CommitTx() + ).ExtractValueSync(); + EXPECT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); + + driver.Stop(true); +} + +TEST(QueryMetricsIntegration, LatencyValuesAreRealistic) { + auto [driver, registry] = MakeRunArgs(); + TQueryClient client(driver); + + auto sessionResult = client.GetSession().ExtractValueSync(); + ASSERT_TRUE(sessionResult.IsSuccess()) << sessionResult.GetIssues().ToString(); + auto session = sessionResult.GetSession(); + + auto result = session.ExecuteQuery( + "SELECT 1;", + TTxControl::BeginTx().CommitTx() + ).ExtractValueSync(); + ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); + + auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); + ASSERT_NE(latency, nullptr); + ASSERT_GE(latency->Count(), 1u); + + for (double v : latency->GetValues()) { + EXPECT_GE(v, 0.0) << "Latency must be non-negative"; + EXPECT_LT(v, 30000.0) << "Latency > 30s is unrealistic for SELECT 1"; + } + + driver.Stop(true); +} diff --git a/tests/unit/client/CMakeLists.txt b/tests/unit/client/CMakeLists.txt index 03b0a17c386..2ad5a38e01f 100644 --- a/tests/unit/client/CMakeLists.txt +++ b/tests/unit/client/CMakeLists.txt @@ -100,3 +100,16 @@ add_ydb_test(NAME client-ydb_value_ut GTEST LABELS unit ) + +add_ydb_test(NAME client-ydb_query_metrics_ut GTEST + INCLUDE_DIRS + ${YDB_SDK_SOURCE_DIR} + SOURCES + query/query_metrics_ut.cpp + LINK_LIBRARIES + yutil + client-ydb_query-impl + client-metrics + LABELS + unit +) diff --git a/tests/unit/client/query/query_metrics_ut.cpp b/tests/unit/client/query/query_metrics_ut.cpp new file mode 100644 index 00000000000..20c681b7eca --- /dev/null +++ b/tests/unit/client/query/query_metrics_ut.cpp @@ -0,0 +1,190 @@ +#include +#include + +#include + +using namespace NYdb; +using namespace NYdb::NQuery; +using namespace NYdb::NMetrics; +using namespace NYdb::NTests; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +class QueryMetricsTest : public ::testing::Test { +protected: + void SetUp() override { + Registry = std::make_shared(); + } + + std::shared_ptr RequestCounter(const std::string& op) { + return Registry->GetCounter("ydb.query.requests", {{"operation", op}}); + } + + std::shared_ptr ErrorCounter(const std::string& op) { + return Registry->GetCounter("ydb.query.errors", {{"operation", op}}); + } + + std::shared_ptr LatencyHistogram(const std::string& op) { + return Registry->GetHistogram("ydb.query.latency_ms", {{"operation", op}}); + } + + std::shared_ptr Registry; +}; + +TEST_F(QueryMetricsTest, RequestCounterIncrementedOnConstruction) { + TQueryMetrics metrics(Registry, "ExecuteQuery"); + + auto counter = RequestCounter("ExecuteQuery"); + ASSERT_NE(counter, nullptr); + EXPECT_EQ(counter->Get(), 1); +} + +TEST_F(QueryMetricsTest, SuccessDoesNotIncrementErrorCounter) { + { + TQueryMetrics metrics(Registry, "ExecuteQuery"); + metrics.End(EStatus::SUCCESS); + } + + auto errors = ErrorCounter("ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); +} + +TEST_F(QueryMetricsTest, FailureIncrementsErrorCounter) { + { + TQueryMetrics metrics(Registry, "Commit"); + metrics.End(EStatus::UNAVAILABLE); + } + + auto errors = ErrorCounter("Commit"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 1); +} + +TEST_F(QueryMetricsTest, LatencyRecordedOnEnd) { + { + TQueryMetrics metrics(Registry, "Rollback"); + metrics.End(EStatus::SUCCESS); + } + + auto hist = LatencyHistogram("Rollback"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); + EXPECT_GE(hist->GetValues()[0], 0.0); +} + +TEST_F(QueryMetricsTest, DoubleEndIsIdempotent) { + TQueryMetrics metrics(Registry, "ExecuteQuery"); + metrics.End(EStatus::SUCCESS); + metrics.End(EStatus::INTERNAL_ERROR); + + auto errors = ErrorCounter("ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); + + auto hist = LatencyHistogram("ExecuteQuery"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); +} + +TEST_F(QueryMetricsTest, DestructorCallsEndWithClientInternalError) { + { + TQueryMetrics metrics(Registry, "CreateSession"); + } + + auto requests = RequestCounter("CreateSession"); + ASSERT_NE(requests, nullptr); + EXPECT_EQ(requests->Get(), 1); + + auto errors = ErrorCounter("CreateSession"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 1); + + auto hist = LatencyHistogram("CreateSession"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); +} + +TEST_F(QueryMetricsTest, NullRegistryDoesNotCrash) { + EXPECT_NO_THROW({ + TQueryMetrics metrics(nullptr, "ExecuteQuery"); + metrics.End(EStatus::SUCCESS); + }); +} + +TEST_F(QueryMetricsTest, CorrectMetricNamesAndLabels) { + TQueryMetrics metrics(Registry, "ExecuteQuery"); + metrics.End(EStatus::SUCCESS); + + EXPECT_NE(Registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteQuery"}}), nullptr); + EXPECT_NE(Registry->GetCounter("ydb.query.errors", {{"operation", "ExecuteQuery"}}), nullptr); + EXPECT_NE(Registry->GetHistogram("ydb.query.latency_ms", {{"operation", "ExecuteQuery"}}), nullptr); + + EXPECT_EQ(Registry->GetCounter("ydb.query.requests", {{"operation", "Commit"}}), nullptr); +} + +TEST_F(QueryMetricsTest, DifferentOperationsHaveSeparateMetrics) { + { + TQueryMetrics m1(Registry, "ExecuteQuery"); + m1.End(EStatus::SUCCESS); + } + { + TQueryMetrics m2(Registry, "Commit"); + m2.End(EStatus::OVERLOADED); + } + + auto execRequests = RequestCounter("ExecuteQuery"); + auto commitRequests = RequestCounter("Commit"); + ASSERT_NE(execRequests, nullptr); + ASSERT_NE(commitRequests, nullptr); + EXPECT_EQ(execRequests->Get(), 1); + EXPECT_EQ(commitRequests->Get(), 1); + + auto execErrors = ErrorCounter("ExecuteQuery"); + auto commitErrors = ErrorCounter("Commit"); + EXPECT_EQ(execErrors->Get(), 0); + EXPECT_EQ(commitErrors->Get(), 1); +} + +TEST_F(QueryMetricsTest, MultipleRequestsAccumulate) { + for (int i = 0; i < 5; ++i) { + TQueryMetrics metrics(Registry, "ExecuteQuery"); + metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); + } + + auto requests = RequestCounter("ExecuteQuery"); + ASSERT_NE(requests, nullptr); + EXPECT_EQ(requests->Get(), 5); + + auto errors = ErrorCounter("ExecuteQuery"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 2); + + auto hist = LatencyHistogram("ExecuteQuery"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 5u); +} + +TEST_F(QueryMetricsTest, AllErrorStatusesIncrementErrorCounter) { + std::vector errorStatuses = { + EStatus::BAD_REQUEST, + EStatus::UNAUTHORIZED, + EStatus::INTERNAL_ERROR, + EStatus::UNAVAILABLE, + EStatus::OVERLOADED, + EStatus::TIMEOUT, + EStatus::NOT_FOUND, + EStatus::CLIENT_INTERNAL_ERROR, + }; + + for (auto status : errorStatuses) { + TQueryMetrics metrics(Registry, "Rollback"); + metrics.End(status); + } + + auto errors = ErrorCounter("Rollback"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); +} From aa80faab259c06902eca1787038f047dc5889eb7 Mon Sep 17 00:00:00 2001 From: maladetska Date: Fri, 27 Mar 2026 22:20:48 +0300 Subject: [PATCH 07/17] add table metrics --- src/client/impl/CMakeLists.txt | 1 + src/client/impl/observability/CMakeLists.txt | 12 + .../impl/observability/client_metrics.cpp | 116 +++++++++ .../impl/observability/client_metrics.h | 28 +++ src/client/query/impl/CMakeLists.txt | 5 +- src/client/query/impl/query_metrics.cpp | 71 ------ src/client/query/impl/query_metrics.h | 24 +- src/client/table/impl/CMakeLists.txt | 2 + src/client/table/impl/table_metrics.h | 14 ++ tests/integration/metrics/main.cpp | 60 ++--- tests/unit/client/CMakeLists.txt | 6 +- .../observability/client_metrics_ut.cpp | 229 ++++++++++++++++++ tests/unit/client/query/query_metrics_ut.cpp | 190 --------------- 13 files changed, 446 insertions(+), 312 deletions(-) create mode 100644 src/client/impl/observability/CMakeLists.txt create mode 100644 src/client/impl/observability/client_metrics.cpp create mode 100644 src/client/impl/observability/client_metrics.h delete mode 100644 src/client/query/impl/query_metrics.cpp create mode 100644 src/client/table/impl/table_metrics.h create mode 100644 tests/unit/client/observability/client_metrics_ut.cpp delete mode 100644 tests/unit/client/query/query_metrics_ut.cpp diff --git a/src/client/impl/CMakeLists.txt b/src/client/impl/CMakeLists.txt index 9e04f134b37..8dfc3fa865b 100644 --- a/src/client/impl/CMakeLists.txt +++ b/src/client/impl/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(endpoints) add_subdirectory(executor) add_subdirectory(internal) +add_subdirectory(observability) add_subdirectory(session) add_subdirectory(stats) diff --git a/src/client/impl/observability/CMakeLists.txt b/src/client/impl/observability/CMakeLists.txt new file mode 100644 index 00000000000..961d2821559 --- /dev/null +++ b/src/client/impl/observability/CMakeLists.txt @@ -0,0 +1,12 @@ +_ydb_sdk_add_library(impl-observability) + +target_link_libraries(impl-observability PUBLIC + yutil + client-metrics +) + +target_sources(impl-observability PRIVATE + client_metrics.cpp +) + +_ydb_sdk_install_targets(TARGETS impl-observability) diff --git a/src/client/impl/observability/client_metrics.cpp b/src/client/impl/observability/client_metrics.cpp new file mode 100644 index 00000000000..efa9b739517 --- /dev/null +++ b/src/client/impl/observability/client_metrics.cpp @@ -0,0 +1,116 @@ +#include "client_metrics.h" + +#include + +namespace NYdb::inline V3::NObservability { + +namespace { + +void SafeLogMetricsError(const char* /*message*/) noexcept { + try { + try { + std::rethrow_exception(std::current_exception()); + } catch (const std::exception&) { + return; + } catch (...) { + } + } catch (...) { + } +} + +std::string StatusToString(EStatus status) { + switch (status) { + case EStatus::SUCCESS: return "SUCCESS"; + case EStatus::BAD_REQUEST: return "BAD_REQUEST"; + case EStatus::UNAUTHORIZED: return "UNAUTHORIZED"; + case EStatus::INTERNAL_ERROR: return "INTERNAL_ERROR"; + case EStatus::ABORTED: return "ABORTED"; + case EStatus::UNAVAILABLE: return "UNAVAILABLE"; + case EStatus::OVERLOADED: return "OVERLOADED"; + case EStatus::SCHEME_ERROR: return "SCHEME_ERROR"; + case EStatus::GENERIC_ERROR: return "GENERIC_ERROR"; + case EStatus::TIMEOUT: return "TIMEOUT"; + case EStatus::BAD_SESSION: return "BAD_SESSION"; + case EStatus::PRECONDITION_FAILED: return "PRECONDITION_FAILED"; + case EStatus::ALREADY_EXISTS: return "ALREADY_EXISTS"; + case EStatus::NOT_FOUND: return "NOT_FOUND"; + case EStatus::SESSION_EXPIRED: return "SESSION_EXPIRED"; + case EStatus::CANCELLED: return "CANCELLED"; + case EStatus::UNDETERMINED: return "UNDETERMINED"; + case EStatus::UNSUPPORTED: return "UNSUPPORTED"; + case EStatus::SESSION_BUSY: return "SESSION_BUSY"; + case EStatus::EXTERNAL_ERROR: return "EXTERNAL_ERROR"; + case EStatus::TRANSPORT_UNAVAILABLE: return "TRANSPORT_UNAVAILABLE"; + case EStatus::CLIENT_RESOURCE_EXHAUSTED:return "CLIENT_RESOURCE_EXHAUSTED"; + case EStatus::CLIENT_DEADLINE_EXCEEDED: return "CLIENT_DEADLINE_EXCEEDED"; + case EStatus::CLIENT_INTERNAL_ERROR: return "CLIENT_INTERNAL_ERROR"; + case EStatus::CLIENT_CANCELLED: return "CLIENT_CANCELLED"; + case EStatus::CLIENT_UNAUTHENTICATED: return "CLIENT_UNAUTHENTICATED"; + case EStatus::CLIENT_CALL_UNIMPLEMENTED:return "CLIENT_CALL_UNIMPLEMENTED"; + case EStatus::CLIENT_OUT_OF_RANGE: return "CLIENT_OUT_OF_RANGE"; + case EStatus::CLIENT_DISCOVERY_FAILED: return "CLIENT_DISCOVERY_FAILED"; + case EStatus::CLIENT_LIMITS_REACHED: return "CLIENT_LIMITS_REACHED"; + default: return "STATUS_UNDEFINED"; + } +} + +} // namespace + +static const std::vector DurationBucketsSec = { + 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 +}; + +TClientMetrics::TClientMetrics(std::shared_ptr registry, + const std::string& prefix, const std::string& operationName) +{ + if (!registry) { + return; + } + + try { + NMetrics::TLabels labels = {{"operation", operationName}}; + RequestCounter_ = registry->Counter(prefix + ".requests", labels); + ErrorCounter_ = registry->Counter(prefix + ".errors", labels); + + NMetrics::TLabels durationLabels = { + {"db.system.name", "ydb"}, + {"db.operation.name", operationName}, + }; + DurationHistogram_ = registry->Histogram("db.client.operation.duration", DurationBucketsSec, durationLabels); + + RequestCounter_->Inc(); + StartTime_ = std::chrono::steady_clock::now(); + } catch (...) { + SafeLogMetricsError("failed to initialize metrics"); + RequestCounter_.reset(); + ErrorCounter_.reset(); + DurationHistogram_.reset(); + } +} + +TClientMetrics::~TClientMetrics() noexcept { + End(EStatus::CLIENT_INTERNAL_ERROR); +} + +void TClientMetrics::End(EStatus status) noexcept { + if (Ended_) { + return; + } + Ended_ = true; + + try { + if (DurationHistogram_) { + auto elapsed = std::chrono::steady_clock::now() - StartTime_; + double durationSec = std::chrono::duration(elapsed).count(); + DurationHistogram_->Record(durationSec); + } + + if (status != EStatus::SUCCESS && ErrorCounter_) { + ErrorCounter_->Inc(); + } + } catch (...) { + SafeLogMetricsError("failed to record metrics"); + } +} + +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/client_metrics.h b/src/client/impl/observability/client_metrics.h new file mode 100644 index 00000000000..bce81a958f2 --- /dev/null +++ b/src/client/impl/observability/client_metrics.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace NYdb::inline V3::NObservability { + +class TClientMetrics { +public: + TClientMetrics(std::shared_ptr registry, + const std::string& prefix, const std::string& operationName); + ~TClientMetrics() noexcept; + + void End(EStatus status) noexcept; + +private: + std::shared_ptr RequestCounter_; + std::shared_ptr ErrorCounter_; + std::shared_ptr DurationHistogram_; + std::chrono::steady_clock::time_point StartTime_; + bool Ended_ = false; +}; + +} // namespace NYdb::NObservability diff --git a/src/client/query/impl/CMakeLists.txt b/src/client/query/impl/CMakeLists.txt index c6c290795fe..fdc46b50f95 100644 --- a/src/client/query/impl/CMakeLists.txt +++ b/src/client/query/impl/CMakeLists.txt @@ -9,10 +9,13 @@ target_link_libraries(client-ydb_query-impl PUBLIC client-ydb_result ) +target_link_libraries(client-ydb_query-impl PUBLIC + impl-observability +) + target_sources(client-ydb_query-impl PRIVATE exec_query.cpp client_session.cpp - query_metrics.cpp query_spans.cpp ) diff --git a/src/client/query/impl/query_metrics.cpp b/src/client/query/impl/query_metrics.cpp deleted file mode 100644 index f314f3d8b96..00000000000 --- a/src/client/query/impl/query_metrics.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "query_metrics.h" - -#include - -namespace NYdb::inline V3::NQuery { - -namespace { - -void SafeLogMetricsError(const char* message) noexcept { - try { - try { - std::cerr << "TQueryMetrics: " << message << ": " << CurrentExceptionMessage() << std::endl; - return; - } catch (...) { - } - std::cerr << "TQueryMetrics: " << message << ": (unknown)" << std::endl; - } catch (...) { - } -} - -} // namespace - -static const std::vector LatencyBuckets = { - 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 30000 -}; - -TQueryMetrics::TQueryMetrics(std::shared_ptr registry, const std::string& operationName) { - if (!registry) { - return; - } - - try { - NMetrics::TLabels labels = {{"operation", operationName}}; - RequestCounter_ = registry->Counter("ydb.query.requests", labels); - ErrorCounter_ = registry->Counter("ydb.query.errors", labels); - LatencyHistogram_ = registry->Histogram("ydb.query.latency_ms", LatencyBuckets, labels); - - RequestCounter_->Inc(); - StartTime_ = TInstant::Now(); - } catch (...) { - SafeLogMetricsError("failed to initialize metrics"); - RequestCounter_.reset(); - ErrorCounter_.reset(); - LatencyHistogram_.reset(); - } -} - -TQueryMetrics::~TQueryMetrics() noexcept { - End(EStatus::CLIENT_INTERNAL_ERROR); -} - -void TQueryMetrics::End(EStatus status) noexcept { - if (Ended_) { - return; - } - Ended_ = true; - - try { - if (LatencyHistogram_) { - auto durationMs = (TInstant::Now() - StartTime_).MilliSeconds(); - LatencyHistogram_->Record(static_cast(durationMs)); - } - if (status != EStatus::SUCCESS && ErrorCounter_) { - ErrorCounter_->Inc(); - } - } catch (...) { - SafeLogMetricsError("failed to record metrics"); - } -} - -} // namespace NYdb::NQuery diff --git a/src/client/query/impl/query_metrics.h b/src/client/query/impl/query_metrics.h index 807472e2434..841e3212f14 100644 --- a/src/client/query/impl/query_metrics.h +++ b/src/client/query/impl/query_metrics.h @@ -1,28 +1,14 @@ #pragma once -#include -#include - -#include - -#include -#include +#include namespace NYdb::inline V3::NQuery { -class TQueryMetrics { +class TQueryMetrics : public NObservability::TClientMetrics { public: - TQueryMetrics(std::shared_ptr registry, const std::string& operationName); - ~TQueryMetrics() noexcept; - - void End(EStatus status) noexcept; - -private: - std::shared_ptr RequestCounter_; - std::shared_ptr ErrorCounter_; - std::shared_ptr LatencyHistogram_; - TInstant StartTime_; - bool Ended_ = false; + TQueryMetrics(std::shared_ptr registry, const std::string& operationName) + : TClientMetrics(std::move(registry), "ydb.query", operationName) + {} }; } // namespace NYdb::NQuery diff --git a/src/client/table/impl/CMakeLists.txt b/src/client/table/impl/CMakeLists.txt index 8f53d386fc6..8ecfe4ead87 100644 --- a/src/client/table/impl/CMakeLists.txt +++ b/src/client/table/impl/CMakeLists.txt @@ -10,6 +10,8 @@ target_link_libraries(client-ydb_table-impl client-impl-ydb_endpoints impl-session client-ydb_table-query_stats + client-metrics + impl-observability PRIVATE OpenSSL::SSL ) diff --git a/src/client/table/impl/table_metrics.h b/src/client/table/impl/table_metrics.h new file mode 100644 index 00000000000..5bf6128a6ea --- /dev/null +++ b/src/client/table/impl/table_metrics.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace NYdb::inline V3::NTable { + +class TTableMetrics : public NObservability::TClientMetrics { +public: + TTableMetrics(std::shared_ptr registry, const std::string& operationName) + : TClientMetrics(std::move(registry), "ydb.table", operationName) + {} +}; + +} // namespace NYdb::NTable diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index cab8f71cdc4..21c2398954c 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -39,12 +39,14 @@ std::shared_ptr GetCounter( return registry->GetCounter(name, {{"operation", operation}}); } -std::shared_ptr GetHistogram( +std::shared_ptr GetDuration( const std::shared_ptr& registry, - const std::string& name, const std::string& operation) { - return registry->GetHistogram(name, {{"operation", operation}}); + return registry->GetHistogram("db.client.operation.duration", { + {"db.system.name", "ydb"}, + {"db.operation.name", operation}, + }); } } // namespace @@ -70,10 +72,10 @@ TEST(QueryMetricsIntegration, ExecuteQuerySuccessRecordsMetrics) { ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 0); - auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); - ASSERT_NE(latency, nullptr) << "ExecuteQuery latency histogram not created"; - EXPECT_GE(latency->Count(), 1u); - for (double v : latency->GetValues()) { + auto duration = GetDuration(registry, "ExecuteQuery"); + ASSERT_NE(duration, nullptr) << "ExecuteQuery duration histogram not created"; + EXPECT_GE(duration->Count(), 1u); + for (double v : duration->GetValues()) { EXPECT_GE(v, 0.0); } @@ -101,9 +103,9 @@ TEST(QueryMetricsIntegration, ExecuteQueryErrorRecordsErrorMetric) { ASSERT_NE(errors, nullptr); EXPECT_GE(errors->Get(), 1); - auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); - ASSERT_NE(latency, nullptr); - EXPECT_GE(latency->Count(), 1u); + auto duration = GetDuration(registry, "ExecuteQuery"); + ASSERT_NE(duration, nullptr); + EXPECT_GE(duration->Count(), 1u); driver.Stop(true); } @@ -119,9 +121,9 @@ TEST(QueryMetricsIntegration, CreateSessionRecordsMetrics) { ASSERT_NE(requests, nullptr) << "CreateSession request counter not created"; EXPECT_GE(requests->Get(), 1); - auto latency = GetHistogram(registry, "ydb.query.latency_ms", "CreateSession"); - ASSERT_NE(latency, nullptr) << "CreateSession latency histogram not created"; - EXPECT_GE(latency->Count(), 1u); + auto duration = GetDuration(registry, "CreateSession"); + ASSERT_NE(duration, nullptr) << "CreateSession duration histogram not created"; + EXPECT_GE(duration->Count(), 1u); driver.Stop(true); } @@ -152,9 +154,9 @@ TEST(QueryMetricsIntegration, CommitTransactionRecordsMetrics) { ASSERT_NE(commitRequests, nullptr) << "Commit request counter not created"; EXPECT_GE(commitRequests->Get(), 1); - auto commitLatency = GetHistogram(registry, "ydb.query.latency_ms", "Commit"); - ASSERT_NE(commitLatency, nullptr); - EXPECT_GE(commitLatency->Count(), 1u); + auto commitDuration = GetDuration(registry, "Commit"); + ASSERT_NE(commitDuration, nullptr); + EXPECT_GE(commitDuration->Count(), 1u); } driver.Stop(true); @@ -183,9 +185,9 @@ TEST(QueryMetricsIntegration, RollbackTransactionRecordsMetrics) { ASSERT_NE(rollbackErrors, nullptr); EXPECT_EQ(rollbackErrors->Get(), 0); - auto rollbackLatency = GetHistogram(registry, "ydb.query.latency_ms", "Rollback"); - ASSERT_NE(rollbackLatency, nullptr); - EXPECT_GE(rollbackLatency->Count(), 1u); + auto rollbackDuration = GetDuration(registry, "Rollback"); + ASSERT_NE(rollbackDuration, nullptr); + EXPECT_GE(rollbackDuration->Count(), 1u); driver.Stop(true); } @@ -215,9 +217,9 @@ TEST(QueryMetricsIntegration, MultipleQueriesAccumulateMetrics) { ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 0); - auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); - ASSERT_NE(latency, nullptr); - EXPECT_EQ(latency->Count(), static_cast(numQueries)); + auto duration = GetDuration(registry, "ExecuteQuery"); + ASSERT_NE(duration, nullptr); + EXPECT_EQ(duration->Count(), static_cast(numQueries)); driver.Stop(true); } @@ -246,7 +248,7 @@ TEST(QueryMetricsIntegration, NoRegistryDoesNotBreakOperations) { driver.Stop(true); } -TEST(QueryMetricsIntegration, LatencyValuesAreRealistic) { +TEST(QueryMetricsIntegration, DurationValuesAreRealistic) { auto [driver, registry] = MakeRunArgs(); TQueryClient client(driver); @@ -260,13 +262,13 @@ TEST(QueryMetricsIntegration, LatencyValuesAreRealistic) { ).ExtractValueSync(); ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); - auto latency = GetHistogram(registry, "ydb.query.latency_ms", "ExecuteQuery"); - ASSERT_NE(latency, nullptr); - ASSERT_GE(latency->Count(), 1u); + auto duration = GetDuration(registry, "ExecuteQuery"); + ASSERT_NE(duration, nullptr); + ASSERT_GE(duration->Count(), 1u); - for (double v : latency->GetValues()) { - EXPECT_GE(v, 0.0) << "Latency must be non-negative"; - EXPECT_LT(v, 30000.0) << "Latency > 30s is unrealistic for SELECT 1"; + for (double v : duration->GetValues()) { + EXPECT_GE(v, 0.0) << "Duration must be non-negative"; + EXPECT_LT(v, 30.0) << "Duration > 30s is unrealistic for SELECT 1"; } driver.Stop(true); diff --git a/tests/unit/client/CMakeLists.txt b/tests/unit/client/CMakeLists.txt index 2ad5a38e01f..de86c3fe274 100644 --- a/tests/unit/client/CMakeLists.txt +++ b/tests/unit/client/CMakeLists.txt @@ -101,14 +101,16 @@ add_ydb_test(NAME client-ydb_value_ut GTEST unit ) -add_ydb_test(NAME client-ydb_query_metrics_ut GTEST +add_ydb_test(NAME client-ydb_metrics_ut GTEST INCLUDE_DIRS ${YDB_SDK_SOURCE_DIR} SOURCES - query/query_metrics_ut.cpp + observability/client_metrics_ut.cpp LINK_LIBRARIES yutil + impl-observability client-ydb_query-impl + client-ydb_table-impl client-metrics LABELS unit diff --git a/tests/unit/client/observability/client_metrics_ut.cpp b/tests/unit/client/observability/client_metrics_ut.cpp new file mode 100644 index 00000000000..3dbedfa801d --- /dev/null +++ b/tests/unit/client/observability/client_metrics_ut.cpp @@ -0,0 +1,229 @@ +#include +#include +#include +#include + +#include + +using namespace NYdb; +using namespace NYdb::NObservability; +using namespace NYdb::NMetrics; +using namespace NYdb::NTests; + +// --------------------------------------------------------------------------- +// TClientMetrics (shared logic) +// --------------------------------------------------------------------------- + +class ClientMetricsTest : public ::testing::Test { +protected: + void SetUp() override { + Registry = std::make_shared(); + } + + std::shared_ptr RequestCounter(const std::string& op) { + return Registry->GetCounter(Prefix + ".requests", {{"operation", op}}); + } + + std::shared_ptr ErrorCounter(const std::string& op) { + return Registry->GetCounter(Prefix + ".errors", {{"operation", op}}); + } + + std::shared_ptr DurationHistogram(const std::string& op) { + return Registry->GetHistogram("db.client.operation.duration", { + {"db.system.name", "ydb"}, + {"db.operation.name", op}, + }); + } + + const std::string Prefix = "ydb.test"; + std::shared_ptr Registry; +}; + +TEST_F(ClientMetricsTest, RequestCounterIncrementedOnConstruction) { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + + auto counter = RequestCounter("DoSomething"); + ASSERT_NE(counter, nullptr); + EXPECT_EQ(counter->Get(), 1); +} + +TEST_F(ClientMetricsTest, SuccessDoesNotIncrementErrorCounter) { + { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + metrics.End(EStatus::SUCCESS); + } + + auto errors = ErrorCounter("DoSomething"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); +} + +TEST_F(ClientMetricsTest, FailureIncrementsErrorCounter) { + { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + metrics.End(EStatus::UNAVAILABLE); + } + + auto errors = ErrorCounter("DoSomething"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 1); +} + +TEST_F(ClientMetricsTest, DurationRecordedOnEnd) { + { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + metrics.End(EStatus::SUCCESS); + } + + auto hist = DurationHistogram("DoSomething"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); + EXPECT_GE(hist->GetValues()[0], 0.0); +} + +TEST_F(ClientMetricsTest, DurationIsInSeconds) { + { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + metrics.End(EStatus::SUCCESS); + } + + auto hist = DurationHistogram("DoSomething"); + ASSERT_NE(hist, nullptr); + EXPECT_LT(hist->GetValues()[0], 1.0); +} + +TEST_F(ClientMetricsTest, DoubleEndIsIdempotent) { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + metrics.End(EStatus::SUCCESS); + metrics.End(EStatus::INTERNAL_ERROR); + + auto errors = ErrorCounter("DoSomething"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 0); + + auto hist = DurationHistogram("DoSomething"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); +} + +TEST_F(ClientMetricsTest, DestructorCallsEndWithClientInternalError) { + { + TClientMetrics metrics(Registry, Prefix, "DoSomething"); + } + + auto requests = RequestCounter("DoSomething"); + ASSERT_NE(requests, nullptr); + EXPECT_EQ(requests->Get(), 1); + + auto errors = ErrorCounter("DoSomething"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), 1); + + auto hist = DurationHistogram("DoSomething"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->Count(), 1u); +} + +TEST_F(ClientMetricsTest, NullRegistryDoesNotCrash) { + EXPECT_NO_THROW({ + TClientMetrics metrics(nullptr, Prefix, "DoSomething"); + metrics.End(EStatus::SUCCESS); + }); +} + +TEST_F(ClientMetricsTest, DifferentOperationsHaveSeparateMetrics) { + { + TClientMetrics m1(Registry, Prefix, "OpA"); + m1.End(EStatus::SUCCESS); + } + { + TClientMetrics m2(Registry, Prefix, "OpB"); + m2.End(EStatus::OVERLOADED); + } + + EXPECT_EQ(RequestCounter("OpA")->Get(), 1); + EXPECT_EQ(RequestCounter("OpB")->Get(), 1); + EXPECT_EQ(ErrorCounter("OpA")->Get(), 0); + EXPECT_EQ(ErrorCounter("OpB")->Get(), 1); + EXPECT_EQ(DurationHistogram("OpA")->Count(), 1u); + EXPECT_EQ(DurationHistogram("OpB")->Count(), 1u); +} + +TEST_F(ClientMetricsTest, MultipleRequestsAccumulate) { + for (int i = 0; i < 5; ++i) { + TClientMetrics metrics(Registry, Prefix, "Op"); + metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); + } + + EXPECT_EQ(RequestCounter("Op")->Get(), 5); + EXPECT_EQ(ErrorCounter("Op")->Get(), 2); + EXPECT_EQ(DurationHistogram("Op")->Count(), 5u); +} + +TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { + std::vector errorStatuses = { + EStatus::BAD_REQUEST, + EStatus::UNAUTHORIZED, + EStatus::INTERNAL_ERROR, + EStatus::UNAVAILABLE, + EStatus::OVERLOADED, + EStatus::TIMEOUT, + EStatus::NOT_FOUND, + EStatus::CLIENT_INTERNAL_ERROR, + }; + + for (auto status : errorStatuses) { + TClientMetrics metrics(Registry, Prefix, "Op"); + metrics.End(status); + } + + auto errors = ErrorCounter("Op"); + ASSERT_NE(errors, nullptr); + EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); +} + +TEST_F(ClientMetricsTest, PrefixAppliedToCounterNames) { + TClientMetrics metrics(Registry, "ydb.custom", "Op"); + metrics.End(EStatus::SUCCESS); + + EXPECT_NE(Registry->GetCounter("ydb.custom.requests", {{"operation", "Op"}}), nullptr); + EXPECT_NE(Registry->GetCounter("ydb.custom.errors", {{"operation", "Op"}}), nullptr); + + EXPECT_EQ(Registry->GetCounter("ydb.test.requests", {{"operation", "Op"}}), nullptr); +} + +// --------------------------------------------------------------------------- +// TQueryMetrics prefix +// --------------------------------------------------------------------------- + +TEST(QueryMetricsTest, UsesQueryPrefix) { + auto registry = std::make_shared(); + + NQuery::TQueryMetrics metrics(registry, "ExecuteQuery"); + metrics.End(EStatus::SUCCESS); + + EXPECT_NE(registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteQuery"}}), nullptr); + EXPECT_NE(registry->GetCounter("ydb.query.errors", {{"operation", "ExecuteQuery"}}), nullptr); + EXPECT_NE(registry->GetHistogram("db.client.operation.duration", { + {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteQuery"}}), nullptr); + + EXPECT_EQ(registry->GetCounter("ydb.table.requests", {{"operation", "ExecuteQuery"}}), nullptr); +} + +// --------------------------------------------------------------------------- +// TTableMetrics prefix +// --------------------------------------------------------------------------- + +TEST(TableMetricsTest, UsesTablePrefix) { + auto registry = std::make_shared(); + + NTable::TTableMetrics metrics(registry, "ExecuteDataQuery"); + metrics.End(EStatus::SUCCESS); + + EXPECT_NE(registry->GetCounter("ydb.table.requests", {{"operation", "ExecuteDataQuery"}}), nullptr); + EXPECT_NE(registry->GetCounter("ydb.table.errors", {{"operation", "ExecuteDataQuery"}}), nullptr); + EXPECT_NE(registry->GetHistogram("db.client.operation.duration", { + {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteDataQuery"}}), nullptr); + + EXPECT_EQ(registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteDataQuery"}}), nullptr); +} diff --git a/tests/unit/client/query/query_metrics_ut.cpp b/tests/unit/client/query/query_metrics_ut.cpp deleted file mode 100644 index 20c681b7eca..00000000000 --- a/tests/unit/client/query/query_metrics_ut.cpp +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include - -#include - -using namespace NYdb; -using namespace NYdb::NQuery; -using namespace NYdb::NMetrics; -using namespace NYdb::NTests; - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -class QueryMetricsTest : public ::testing::Test { -protected: - void SetUp() override { - Registry = std::make_shared(); - } - - std::shared_ptr RequestCounter(const std::string& op) { - return Registry->GetCounter("ydb.query.requests", {{"operation", op}}); - } - - std::shared_ptr ErrorCounter(const std::string& op) { - return Registry->GetCounter("ydb.query.errors", {{"operation", op}}); - } - - std::shared_ptr LatencyHistogram(const std::string& op) { - return Registry->GetHistogram("ydb.query.latency_ms", {{"operation", op}}); - } - - std::shared_ptr Registry; -}; - -TEST_F(QueryMetricsTest, RequestCounterIncrementedOnConstruction) { - TQueryMetrics metrics(Registry, "ExecuteQuery"); - - auto counter = RequestCounter("ExecuteQuery"); - ASSERT_NE(counter, nullptr); - EXPECT_EQ(counter->Get(), 1); -} - -TEST_F(QueryMetricsTest, SuccessDoesNotIncrementErrorCounter) { - { - TQueryMetrics metrics(Registry, "ExecuteQuery"); - metrics.End(EStatus::SUCCESS); - } - - auto errors = ErrorCounter("ExecuteQuery"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), 0); -} - -TEST_F(QueryMetricsTest, FailureIncrementsErrorCounter) { - { - TQueryMetrics metrics(Registry, "Commit"); - metrics.End(EStatus::UNAVAILABLE); - } - - auto errors = ErrorCounter("Commit"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), 1); -} - -TEST_F(QueryMetricsTest, LatencyRecordedOnEnd) { - { - TQueryMetrics metrics(Registry, "Rollback"); - metrics.End(EStatus::SUCCESS); - } - - auto hist = LatencyHistogram("Rollback"); - ASSERT_NE(hist, nullptr); - EXPECT_EQ(hist->Count(), 1u); - EXPECT_GE(hist->GetValues()[0], 0.0); -} - -TEST_F(QueryMetricsTest, DoubleEndIsIdempotent) { - TQueryMetrics metrics(Registry, "ExecuteQuery"); - metrics.End(EStatus::SUCCESS); - metrics.End(EStatus::INTERNAL_ERROR); - - auto errors = ErrorCounter("ExecuteQuery"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), 0); - - auto hist = LatencyHistogram("ExecuteQuery"); - ASSERT_NE(hist, nullptr); - EXPECT_EQ(hist->Count(), 1u); -} - -TEST_F(QueryMetricsTest, DestructorCallsEndWithClientInternalError) { - { - TQueryMetrics metrics(Registry, "CreateSession"); - } - - auto requests = RequestCounter("CreateSession"); - ASSERT_NE(requests, nullptr); - EXPECT_EQ(requests->Get(), 1); - - auto errors = ErrorCounter("CreateSession"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), 1); - - auto hist = LatencyHistogram("CreateSession"); - ASSERT_NE(hist, nullptr); - EXPECT_EQ(hist->Count(), 1u); -} - -TEST_F(QueryMetricsTest, NullRegistryDoesNotCrash) { - EXPECT_NO_THROW({ - TQueryMetrics metrics(nullptr, "ExecuteQuery"); - metrics.End(EStatus::SUCCESS); - }); -} - -TEST_F(QueryMetricsTest, CorrectMetricNamesAndLabels) { - TQueryMetrics metrics(Registry, "ExecuteQuery"); - metrics.End(EStatus::SUCCESS); - - EXPECT_NE(Registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteQuery"}}), nullptr); - EXPECT_NE(Registry->GetCounter("ydb.query.errors", {{"operation", "ExecuteQuery"}}), nullptr); - EXPECT_NE(Registry->GetHistogram("ydb.query.latency_ms", {{"operation", "ExecuteQuery"}}), nullptr); - - EXPECT_EQ(Registry->GetCounter("ydb.query.requests", {{"operation", "Commit"}}), nullptr); -} - -TEST_F(QueryMetricsTest, DifferentOperationsHaveSeparateMetrics) { - { - TQueryMetrics m1(Registry, "ExecuteQuery"); - m1.End(EStatus::SUCCESS); - } - { - TQueryMetrics m2(Registry, "Commit"); - m2.End(EStatus::OVERLOADED); - } - - auto execRequests = RequestCounter("ExecuteQuery"); - auto commitRequests = RequestCounter("Commit"); - ASSERT_NE(execRequests, nullptr); - ASSERT_NE(commitRequests, nullptr); - EXPECT_EQ(execRequests->Get(), 1); - EXPECT_EQ(commitRequests->Get(), 1); - - auto execErrors = ErrorCounter("ExecuteQuery"); - auto commitErrors = ErrorCounter("Commit"); - EXPECT_EQ(execErrors->Get(), 0); - EXPECT_EQ(commitErrors->Get(), 1); -} - -TEST_F(QueryMetricsTest, MultipleRequestsAccumulate) { - for (int i = 0; i < 5; ++i) { - TQueryMetrics metrics(Registry, "ExecuteQuery"); - metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); - } - - auto requests = RequestCounter("ExecuteQuery"); - ASSERT_NE(requests, nullptr); - EXPECT_EQ(requests->Get(), 5); - - auto errors = ErrorCounter("ExecuteQuery"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), 2); - - auto hist = LatencyHistogram("ExecuteQuery"); - ASSERT_NE(hist, nullptr); - EXPECT_EQ(hist->Count(), 5u); -} - -TEST_F(QueryMetricsTest, AllErrorStatusesIncrementErrorCounter) { - std::vector errorStatuses = { - EStatus::BAD_REQUEST, - EStatus::UNAUTHORIZED, - EStatus::INTERNAL_ERROR, - EStatus::UNAVAILABLE, - EStatus::OVERLOADED, - EStatus::TIMEOUT, - EStatus::NOT_FOUND, - EStatus::CLIENT_INTERNAL_ERROR, - }; - - for (auto status : errorStatuses) { - TQueryMetrics metrics(Registry, "Rollback"); - metrics.End(status); - } - - auto errors = ErrorCounter("Rollback"); - ASSERT_NE(errors, nullptr); - EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); -} From b46d38449ee4c183b0902449883be8545b0db4fd Mon Sep 17 00:00:00 2001 From: maladetska Date: Sun, 29 Mar 2026 23:43:25 +0300 Subject: [PATCH 08/17] fix semconv --- include/ydb-cpp-sdk/client/metrics/metrics.h | 22 ++- plugins/metrics/otel/src/metrics.cpp | 31 ++-- .../impl/observability/client_metrics.cpp | 83 +++++----- .../impl/observability/client_metrics.h | 8 +- src/client/query/client.cpp | 8 +- src/client/query/impl/query_metrics.h | 2 +- src/client/query/impl/query_spans.cpp | 37 +++-- src/client/query/impl/query_spans.h | 9 +- src/client/table/impl/CMakeLists.txt | 1 + src/client/table/impl/table_client.cpp | 25 ++- src/client/table/impl/table_client.h | 14 +- src/client/table/impl/table_metrics.h | 2 +- src/client/table/impl/table_spans.cpp | 116 ++++++++++++++ src/client/table/impl/table_spans.h | 31 ++++ tests/common/fake_metric_registry.h | 19 ++- tests/integration/metrics/main.cpp | 54 ++++--- .../observability/client_metrics_ut.cpp | 144 +++++++++++------- 17 files changed, 440 insertions(+), 166 deletions(-) create mode 100644 src/client/table/impl/table_spans.cpp create mode 100644 src/client/table/impl/table_spans.h diff --git a/include/ydb-cpp-sdk/client/metrics/metrics.h b/include/ydb-cpp-sdk/client/metrics/metrics.h index 7e2b0b903dd..5faa930ed50 100644 --- a/include/ydb-cpp-sdk/client/metrics/metrics.h +++ b/include/ydb-cpp-sdk/client/metrics/metrics.h @@ -33,9 +33,25 @@ class IMetricRegistry { public: virtual ~IMetricRegistry() = default; - virtual std::shared_ptr Counter(const std::string& name, const TLabels& labels = {}) = 0; - virtual std::shared_ptr Gauge(const std::string& name, const TLabels& labels = {}) = 0; - virtual std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels = {}) = 0; + virtual std::shared_ptr Counter( + const std::string& name, + const TLabels& labels = {}, + const std::string& description = {}, + const std::string& unit = {} + ) = 0; + virtual std::shared_ptr Gauge( + const std::string& name, + const TLabels& labels = {}, + const std::string& description = {}, + const std::string& unit = {} + ) = 0; + virtual std::shared_ptr Histogram( + const std::string& name, + const std::vector& buckets, + const TLabels& labels = {}, + const std::string& description = {}, + const std::string& unit = {} + ) = 0; }; } // namespace NYdb::NMetrics diff --git a/plugins/metrics/otel/src/metrics.cpp b/plugins/metrics/otel/src/metrics.cpp index 6b9f14be362..af07af89bb8 100644 --- a/plugins/metrics/otel/src/metrics.cpp +++ b/plugins/metrics/otel/src/metrics.cpp @@ -81,24 +81,37 @@ class TOtelMetricRegistry : public IMetricRegistry { , Meter_(MeterProvider_->GetMeter("ydb-cpp-sdk", GetSdkSemver())) {} - std::shared_ptr Counter(const std::string& name, const TLabels& labels) override { - auto counter = Meter_->CreateUInt64Counter(name); + std::shared_ptr Counter(const std::string& name + , const TLabels& labels + , const std::string& description + , const std::string& unit + ) override { + auto counter = Meter_->CreateUInt64Counter(name, description, unit); return std::make_shared(std::move(counter), labels); } - std::shared_ptr Gauge(const std::string& name, const TLabels& labels) override { - auto counter = Meter_->CreateDoubleUpDownCounter(name); + std::shared_ptr Gauge(const std::string& name + , const TLabels& labels + , const std::string& description + , const std::string& unit + ) override { + auto counter = Meter_->CreateDoubleUpDownCounter(name, description, unit); return std::make_shared(std::move(counter), labels); } - std::shared_ptr Histogram(const std::string& name, const std::vector& buckets, const TLabels& labels) override { - ConfigureHistogramBuckets(name, buckets); - auto histogram = Meter_->CreateDoubleHistogram(name); + std::shared_ptr Histogram(const std::string& name + , const std::vector& buckets + , const TLabels& labels + , const std::string& description + , const std::string& unit + ) override { + ConfigureHistogramBuckets(name, unit, buckets); + auto histogram = Meter_->CreateDoubleHistogram(name, description, unit); return std::make_shared(std::move(histogram), labels); } private: - void ConfigureHistogramBuckets(const std::string& name, const std::vector& buckets) { + void ConfigureHistogramBuckets(const std::string& name, const std::string& unit, const std::vector& buckets) { if (buckets.empty()) { return; } @@ -118,7 +131,7 @@ class TOtelMetricRegistry : public IMetricRegistry { auto selector = std::make_unique( sdk::metrics::InstrumentType::kHistogram, name, - "" + unit ); auto meterSelector = std::make_unique( "ydb-cpp-sdk", diff --git a/src/client/impl/observability/client_metrics.cpp b/src/client/impl/observability/client_metrics.cpp index efa9b739517..f66605bc772 100644 --- a/src/client/impl/observability/client_metrics.cpp +++ b/src/client/impl/observability/client_metrics.cpp @@ -1,6 +1,7 @@ #include "client_metrics.h" #include +#include namespace NYdb::inline V3::NObservability { @@ -18,65 +19,32 @@ void SafeLogMetricsError(const char* /*message*/) noexcept { } } -std::string StatusToString(EStatus status) { - switch (status) { - case EStatus::SUCCESS: return "SUCCESS"; - case EStatus::BAD_REQUEST: return "BAD_REQUEST"; - case EStatus::UNAUTHORIZED: return "UNAUTHORIZED"; - case EStatus::INTERNAL_ERROR: return "INTERNAL_ERROR"; - case EStatus::ABORTED: return "ABORTED"; - case EStatus::UNAVAILABLE: return "UNAVAILABLE"; - case EStatus::OVERLOADED: return "OVERLOADED"; - case EStatus::SCHEME_ERROR: return "SCHEME_ERROR"; - case EStatus::GENERIC_ERROR: return "GENERIC_ERROR"; - case EStatus::TIMEOUT: return "TIMEOUT"; - case EStatus::BAD_SESSION: return "BAD_SESSION"; - case EStatus::PRECONDITION_FAILED: return "PRECONDITION_FAILED"; - case EStatus::ALREADY_EXISTS: return "ALREADY_EXISTS"; - case EStatus::NOT_FOUND: return "NOT_FOUND"; - case EStatus::SESSION_EXPIRED: return "SESSION_EXPIRED"; - case EStatus::CANCELLED: return "CANCELLED"; - case EStatus::UNDETERMINED: return "UNDETERMINED"; - case EStatus::UNSUPPORTED: return "UNSUPPORTED"; - case EStatus::SESSION_BUSY: return "SESSION_BUSY"; - case EStatus::EXTERNAL_ERROR: return "EXTERNAL_ERROR"; - case EStatus::TRANSPORT_UNAVAILABLE: return "TRANSPORT_UNAVAILABLE"; - case EStatus::CLIENT_RESOURCE_EXHAUSTED:return "CLIENT_RESOURCE_EXHAUSTED"; - case EStatus::CLIENT_DEADLINE_EXCEEDED: return "CLIENT_DEADLINE_EXCEEDED"; - case EStatus::CLIENT_INTERNAL_ERROR: return "CLIENT_INTERNAL_ERROR"; - case EStatus::CLIENT_CANCELLED: return "CLIENT_CANCELLED"; - case EStatus::CLIENT_UNAUTHENTICATED: return "CLIENT_UNAUTHENTICATED"; - case EStatus::CLIENT_CALL_UNIMPLEMENTED:return "CLIENT_CALL_UNIMPLEMENTED"; - case EStatus::CLIENT_OUT_OF_RANGE: return "CLIENT_OUT_OF_RANGE"; - case EStatus::CLIENT_DISCOVERY_FAILED: return "CLIENT_DISCOVERY_FAILED"; - case EStatus::CLIENT_LIMITS_REACHED: return "CLIENT_LIMITS_REACHED"; - default: return "STATUS_UNDEFINED"; - } -} - } // namespace static const std::vector DurationBucketsSec = { 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 }; -TClientMetrics::TClientMetrics(std::shared_ptr registry, - const std::string& prefix, const std::string& operationName) +static constexpr const char* RequestsDescription = "Number of database client operations started."; +static constexpr const char* ErrorsDescription = "Number of database client operations that failed."; +static constexpr const char* DurationDescription = "Duration of database client operations."; + +TClientMetrics::TClientMetrics(std::shared_ptr registry + , const std::string& operationName +) : Registry_(std::move(registry)) + , OperationName_(operationName) { - if (!registry) { + if (!Registry_) { return; } try { - NMetrics::TLabels labels = {{"operation", operationName}}; - RequestCounter_ = registry->Counter(prefix + ".requests", labels); - ErrorCounter_ = registry->Counter(prefix + ".errors", labels); - - NMetrics::TLabels durationLabels = { - {"db.system.name", "ydb"}, + NMetrics::TLabels labels = { + {"db.system.name", "other_sql"}, {"db.operation.name", operationName}, }; - DurationHistogram_ = registry->Histogram("db.client.operation.duration", DurationBucketsSec, durationLabels); + RequestCounter_ = Registry_->Counter("db.client.operation.requests", labels, RequestsDescription, "{operation}"); + ErrorCounter_ = Registry_->Counter("db.client.operation.errors", labels, ErrorsDescription, "{error}"); RequestCounter_->Inc(); StartTime_ = std::chrono::steady_clock::now(); @@ -84,7 +52,7 @@ TClientMetrics::TClientMetrics(std::shared_ptr regist SafeLogMetricsError("failed to initialize metrics"); RequestCounter_.reset(); ErrorCounter_.reset(); - DurationHistogram_.reset(); + Registry_.reset(); } } @@ -99,10 +67,27 @@ void TClientMetrics::End(EStatus status) noexcept { Ended_ = true; try { - if (DurationHistogram_) { + const std::string statusCode = ToString(status); + if (Registry_) { auto elapsed = std::chrono::steady_clock::now() - StartTime_; double durationSec = std::chrono::duration(elapsed).count(); - DurationHistogram_->Record(durationSec); + NMetrics::TLabels durationLabels = { + {"db.system.name", "other_sql"}, + {"db.operation.name", OperationName_}, + {"db.response.status_code", statusCode}, + }; + if (status != EStatus::SUCCESS) { + durationLabels["error.type"] = statusCode; + } + auto durationHistogram = Registry_->Histogram( + "db.client.operation.duration", + DurationBucketsSec, + durationLabels, + DurationDescription, + "s"); + if (durationHistogram) { + durationHistogram->Record(durationSec); + } } if (status != EStatus::SUCCESS && ErrorCounter_) { diff --git a/src/client/impl/observability/client_metrics.h b/src/client/impl/observability/client_metrics.h index bce81a958f2..e12a30cad60 100644 --- a/src/client/impl/observability/client_metrics.h +++ b/src/client/impl/observability/client_metrics.h @@ -11,16 +11,18 @@ namespace NYdb::inline V3::NObservability { class TClientMetrics { public: - TClientMetrics(std::shared_ptr registry, - const std::string& prefix, const std::string& operationName); + TClientMetrics(std::shared_ptr registry + , const std::string& operationName + ); ~TClientMetrics() noexcept; void End(EStatus status) noexcept; private: + std::shared_ptr Registry_; + std::string OperationName_; std::shared_ptr RequestCounter_; std::shared_ptr ErrorCounter_; - std::shared_ptr DurationHistogram_; std::chrono::steady_clock::time_point StartTime_; bool Ended_ = false; }; diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index 0fc3f75ec88..8c627089170 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -103,7 +103,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public CollectQuerySize(query); CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); - auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint); + auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); span->SetQueryText(query); auto metrics = std::make_shared(MetricRegistry_, "ExecuteQuery"); @@ -188,7 +188,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint); + auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto metrics = std::make_shared(MetricRegistry_, "Rollback"); auto responseCb = [promise, session, span, metrics] @@ -240,7 +240,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint); + auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto metrics = std::make_shared(MetricRegistry_, "Commit"); auto responseCb = [promise, session, span, metrics] @@ -556,7 +556,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public std::shared_ptr Metrics; }; - auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint); + auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto metrics = std::make_shared(MetricRegistry_, "CreateSession"); auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); auto future = ctx->GetFuture(); diff --git a/src/client/query/impl/query_metrics.h b/src/client/query/impl/query_metrics.h index 841e3212f14..2bd284f76f3 100644 --- a/src/client/query/impl/query_metrics.h +++ b/src/client/query/impl/query_metrics.h @@ -7,7 +7,7 @@ namespace NYdb::inline V3::NQuery { class TQueryMetrics : public NObservability::TClientMetrics { public: TQueryMetrics(std::shared_ptr registry, const std::string& operationName) - : TClientMetrics(std::move(registry), "ydb.query", operationName) + : TClientMetrics(std::move(registry), operationName) {} }; diff --git a/src/client/query/impl/query_spans.cpp b/src/client/query/impl/query_spans.cpp index 4bbd4d2250b..372b4464fae 100644 --- a/src/client/query/impl/query_spans.cpp +++ b/src/client/query/impl/query_spans.cpp @@ -1,7 +1,11 @@ #include "query_spans.h" +#include + #include +#include + namespace NYdb::inline V3::NQuery { namespace { @@ -16,7 +20,6 @@ void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { return; } - // IPv6 bracket notation: [addr]:port if (endpoint.front() == '[') { auto bracketEnd = endpoint.find(']'); if (bracketEnd != std::string::npos) { @@ -41,21 +44,26 @@ void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { } } -void SafeLogSpanError(const char* message) noexcept { +void SafeLogSpanError(TLog& log, const char* message) noexcept { try { try { - std::cerr << "TQuerySpan: " << message << ": " << CurrentExceptionMessage() << std::endl; + std::rethrow_exception(std::current_exception()); + } catch (const std::exception& e) { + LOG_LAZY(log, TLOG_ERR, std::string("TQuerySpan: ") + message + ": " + e.what()); return; } catch (...) { } - std::cerr << "TQuerySpan: " << message << ": (unknown)" << std::endl; + LOG_LAZY(log, TLOG_ERR, std::string("TQuerySpan: ") + message + ": (unknown)"); } catch (...) { } } } // namespace -TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint) { +TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::string& operationName, + const std::string& endpoint, const TLog& log) + : Log_(log) +{ if (!tracer) { return; } @@ -65,15 +73,16 @@ TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::str ParseEndpoint(endpoint, host, port); try { - Span_ = tracer->StartSpan("ydb." + operationName, NMetrics::ESpanKind::CLIENT); + Span_ = tracer->StartSpan(operationName, NMetrics::ESpanKind::CLIENT); if (!Span_) { return; } - Span_->SetAttribute("db.system.name", "ydb"); + Span_->SetAttribute("db.system.name", "other_sql"); + Span_->SetAttribute("db.operation.name", operationName); Span_->SetAttribute("server.address", host); Span_->SetAttribute("server.port", static_cast(port)); } catch (...) { - SafeLogSpanError("failed to initialize span"); + SafeLogSpanError(Log_, "failed to initialize span"); Span_.reset(); } } @@ -83,7 +92,7 @@ TQuerySpan::~TQuerySpan() noexcept { try { Span_->End(); } catch (...) { - SafeLogSpanError("failed to end span"); + SafeLogSpanError(Log_, "failed to end span"); } } } @@ -99,7 +108,7 @@ void TQuerySpan::SetPeerEndpoint(const std::string& endpoint) noexcept { Span_->SetAttribute("network.peer.address", host); Span_->SetAttribute("network.peer.port", static_cast(port)); } catch (...) { - SafeLogSpanError("failed to set peer endpoint"); + SafeLogSpanError(Log_, "failed to set peer endpoint"); } } @@ -110,7 +119,7 @@ void TQuerySpan::SetQueryText(const std::string& query) noexcept { try { Span_->SetAttribute("db.query.text", query); } catch (...) { - SafeLogSpanError("failed to set query text"); + SafeLogSpanError(Log_, "failed to set query text"); } } @@ -121,20 +130,20 @@ void TQuerySpan::AddEvent(const std::string& name, const std::mapAddEvent(name, attributes); } catch (...) { - SafeLogSpanError("failed to add event"); + SafeLogSpanError(Log_, "failed to add event"); } } void TQuerySpan::End(EStatus status) noexcept { if (Span_) { try { - Span_->SetAttribute("db.response.status_code", static_cast(status)); + Span_->SetAttribute("db.response.status_code", ToString(status)); if (status != EStatus::SUCCESS) { Span_->SetAttribute("error.type", ToString(status)); } Span_->End(); } catch (...) { - SafeLogSpanError("failed to finalize span"); + SafeLogSpanError(Log_, "failed to finalize span"); } Span_.reset(); } diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h index 75fd0fa830e..dc2e6e554e9 100644 --- a/src/client/query/impl/query_spans.h +++ b/src/client/query/impl/query_spans.h @@ -4,6 +4,8 @@ #include #include +#include + #include #include #include @@ -12,7 +14,11 @@ namespace NYdb::inline V3::NQuery { class TQuerySpan { public: - TQuerySpan(std::shared_ptr tracer, const std::string& operationName, const std::string& endpoint); + TQuerySpan(std::shared_ptr tracer + , const std::string& operationName + , const std::string& endpoint + , const TLog& log + ); ~TQuerySpan() noexcept; void SetPeerEndpoint(const std::string& endpoint) noexcept; @@ -22,6 +28,7 @@ class TQuerySpan { void End(EStatus status) noexcept; private: + TLog Log_; std::shared_ptr Span_; }; diff --git a/src/client/table/impl/CMakeLists.txt b/src/client/table/impl/CMakeLists.txt index 8ecfe4ead87..b0427de6eda 100644 --- a/src/client/table/impl/CMakeLists.txt +++ b/src/client/table/impl/CMakeLists.txt @@ -21,6 +21,7 @@ target_sources(client-ydb_table-impl PRIVATE data_query.cpp readers.cpp request_migrator.cpp + table_spans.cpp table_client.cpp transaction.cpp ) diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index 4df9e91e24e..ceb8950769b 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -22,6 +22,12 @@ TTableClient::TImpl::TImpl(std::shared_ptr&& connections, , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { + MetricRegistry_ = Connections_->GetExternalMetricRegistry(); + + if (auto traceProvider = Connections_->GetTraceProvider()) { + Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-table"); + } + if (!DbDriverState_->StatCollector.IsCollecting()) { return; } @@ -378,8 +384,10 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio auto createSessionPromise = NewPromise(); auto self = shared_from_this(); + auto metrics = std::make_shared(MetricRegistry_, "CreateSession"); + auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto createSessionExtractor = [createSessionPromise, self, standalone] + auto createSessionExtractor = [createSessionPromise, self, standalone, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { Ydb::Table::CreateSessionResult result; if (any) { @@ -392,10 +400,11 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio } self->DbDriverState_->StatCollector.IncSessionsOnHost(status.Endpoint); } else { - // We do not use SessionStatusInterception for CreateSession request session.SessionImpl_->MarkBroken(); } TCreateSessionResult val(TStatus(std::move(status)), std::move(session)); + metrics->End(val.GetStatus()); + span->End(val.GetStatus()); createSessionPromise.SetValue(std::move(val)); }; @@ -759,11 +768,21 @@ TAsyncStatus TTableClient::TImpl::ExecuteSchemeQuery(const TSession& session, co request.set_session_id(TStringType{session.GetId()}); request.set_yql_text(TStringType{query}); - return RunSimple( + auto metrics = std::make_shared(MetricRegistry_, "ExecuteSchemeQuery"); + auto span = std::make_shared(Tracer_, "ExecuteSchemeQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + + auto future = RunSimple( std::move(request), &Ydb::Table::V1::TableService::Stub::AsyncExecuteSchemeQuery, rpcSettings ); + + return future.Apply([metrics, span](NThreading::TFuture f) mutable { + auto status = f.ExtractValue(); + metrics->End(status.GetStatus()); + span->End(status.GetStatus()); + return status; + }); } TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSession& session, const TTxSettings& txSettings, diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index 8fe71287f36..0d38fd1006f 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -17,6 +17,8 @@ #include "data_query.h" #include "request_migrator.h" #include "readers.h" +#include "table_metrics.h" +#include "table_spans.h" #include @@ -237,6 +239,10 @@ class TTableClient::TImpl: public TClientImplCommon, public auto promise = NewPromise(); bool keepInCache = settings.KeepInQueryCache_ && settings.KeepInQueryCache_.value(); + auto metrics = std::make_shared(MetricRegistry_, "ExecuteDataQuery"); + auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + + // We don't want to delay call of TSession dtor, so we can't capture it by copy // otherwise we break session pool and other clients logic. // Same problem with TDataQuery and TTransaction @@ -246,7 +252,7 @@ class TTableClient::TImpl: public TClientImplCommon, public // - capture pointer // - call free just before SetValue call auto sessionPtr = new TSession(session); - auto extractor = [promise, sessionPtr, query, fromCache, keepInCache] + auto extractor = [promise, sessionPtr, query, fromCache, keepInCache, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { std::vector res; std::optional tx; @@ -285,6 +291,9 @@ class TTableClient::TImpl: public TClientImplCommon, public TDataQueryResult dataQueryResult(TStatus(std::move(status)), std::move(res), tx, dataQuery, fromCache, queryStats); + metrics->End(dataQueryResult.GetStatus()); + span->End(dataQueryResult.GetStatus()); + delete sessionPtr; tx.reset(); dataQuery.reset(); @@ -326,6 +335,9 @@ class TTableClient::TImpl: public TClientImplCommon, public NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> ParamsSizeHistogram; NSdkStats::TAtomicCounter<::NMonitoring::TRate> SessionRemovedDueBalancing; + std::shared_ptr MetricRegistry_; + std::shared_ptr Tracer_; + private: NSessionPool::TSessionPool SessionPool_; TRequestMigrator RequestMigrator_; diff --git a/src/client/table/impl/table_metrics.h b/src/client/table/impl/table_metrics.h index 5bf6128a6ea..83f9deafdb4 100644 --- a/src/client/table/impl/table_metrics.h +++ b/src/client/table/impl/table_metrics.h @@ -7,7 +7,7 @@ namespace NYdb::inline V3::NTable { class TTableMetrics : public NObservability::TClientMetrics { public: TTableMetrics(std::shared_ptr registry, const std::string& operationName) - : TClientMetrics(std::move(registry), "ydb.table", operationName) + : TClientMetrics(std::move(registry), operationName) {} }; diff --git a/src/client/table/impl/table_spans.cpp b/src/client/table/impl/table_spans.cpp new file mode 100644 index 00000000000..e476c38eccb --- /dev/null +++ b/src/client/table/impl/table_spans.cpp @@ -0,0 +1,116 @@ +#include "table_spans.h" + +#include + +#include + +#include + +namespace NYdb::inline V3::NTable { + +namespace { + +constexpr int DefaultGrpcPort = 2135; + +void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { + port = DefaultGrpcPort; + + if (endpoint.empty()) { + host = endpoint; + return; + } + + if (endpoint.front() == '[') { + auto bracketEnd = endpoint.find(']'); + if (bracketEnd != std::string::npos) { + host = endpoint.substr(1, bracketEnd - 1); + if (bracketEnd + 2 < endpoint.size() && endpoint[bracketEnd + 1] == ':') { + try { + port = std::stoi(endpoint.substr(bracketEnd + 2)); + } catch (...) {} + } + return; + } + } + + auto pos = endpoint.rfind(':'); + if (pos != std::string::npos) { + host = endpoint.substr(0, pos); + try { + port = std::stoi(endpoint.substr(pos + 1)); + } catch (...) {} + } else { + host = endpoint; + } +} + +void SafeLogSpanError(TLog& log, const char* message) noexcept { + try { + try { + std::rethrow_exception(std::current_exception()); + } catch (const std::exception& e) { + LOG_LAZY(log, TLOG_ERR, std::string("TTableSpan: ") + message + ": " + e.what()); + return; + } catch (...) { + } + LOG_LAZY(log, TLOG_ERR, std::string("TTableSpan: ") + message + ": (unknown)"); + } catch (...) { + } +} + +} // namespace + +TTableSpan::TTableSpan(std::shared_ptr tracer + , const std::string& operationName + , const std::string& endpoint + , const TLog& log +) : Log_(log) { + if (!tracer) { + return; + } + + std::string host; + int port; + ParseEndpoint(endpoint, host, port); + + try { + Span_ = tracer->StartSpan(operationName, NMetrics::ESpanKind::CLIENT); + if (!Span_) { + return; + } + Span_->SetAttribute("db.system.name", "other_sql"); + Span_->SetAttribute("db.operation.name", operationName); + Span_->SetAttribute("server.address", host); + Span_->SetAttribute("server.port", static_cast(port)); + } catch (...) { + SafeLogSpanError(Log_, "failed to initialize span"); + Span_.reset(); + } +} + +TTableSpan::~TTableSpan() noexcept { + if (Span_) { + try { + Span_->End(); + } catch (...) { + SafeLogSpanError(Log_, "failed to end span"); + } + } +} + +void TTableSpan::End(EStatus status) noexcept { + if (Span_) { + try { + Span_->SetAttribute("db.response.status_code", ToString(status)); + if (status != EStatus::SUCCESS) { + Span_->SetAttribute("error.type", ToString(status)); + } + Span_->End(); + } catch (...) { + SafeLogSpanError(Log_, "failed to finalize span"); + } + Span_.reset(); + } +} + +} // namespace NYdb::NTable diff --git a/src/client/table/impl/table_spans.h b/src/client/table/impl/table_spans.h new file mode 100644 index 00000000000..5f1ccab6dc1 --- /dev/null +++ b/src/client/table/impl/table_spans.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include + +namespace NYdb::inline V3::NTable { + +class TTableSpan { +public: + TTableSpan(std::shared_ptr tracer + , const std::string& operationName + , const std::string& endpoint + , const TLog& log + ); + ~TTableSpan() noexcept; + + void End(EStatus status) noexcept; + +private: + TLog Log_; + std::shared_ptr Span_; +}; + +} // namespace NYdb::NTable diff --git a/tests/common/fake_metric_registry.h b/tests/common/fake_metric_registry.h index 60ff1414633..032234f080f 100644 --- a/tests/common/fake_metric_registry.h +++ b/tests/common/fake_metric_registry.h @@ -68,7 +68,11 @@ struct TMetricKey { class TFakeMetricRegistry : public NMetrics::IMetricRegistry { public: - std::shared_ptr Counter(const std::string& name, const NMetrics::TLabels& labels) override { + std::shared_ptr Counter(const std::string& name + , const NMetrics::TLabels& labels + , const std::string& /*description*/ + , const std::string& /*unit*/ + ) override { std::lock_guard lock(Mutex_); auto key = TMetricKey{name, labels}; auto it = Counters_.find(key); @@ -80,7 +84,11 @@ class TFakeMetricRegistry : public NMetrics::IMetricRegistry { return counter; } - std::shared_ptr Gauge(const std::string& name, const NMetrics::TLabels& labels) override { + std::shared_ptr Gauge(const std::string& name + , const NMetrics::TLabels& labels + , const std::string& /*description*/ + , const std::string& /*unit*/ + ) override { std::lock_guard lock(Mutex_); auto key = TMetricKey{name, labels}; auto gauge = std::make_shared(); @@ -88,7 +96,12 @@ class TFakeMetricRegistry : public NMetrics::IMetricRegistry { return gauge; } - std::shared_ptr Histogram(const std::string& name, const std::vector& /*buckets*/, const NMetrics::TLabels& labels) override { + std::shared_ptr Histogram(const std::string& name + , const std::vector& /*buckets*/ + , const NMetrics::TLabels& labels + , const std::string& /*description*/ + , const std::string& /*unit*/ + ) override { std::lock_guard lock(Mutex_); auto key = TMetricKey{name, labels}; auto it = Histograms_.find(key); diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index 21c2398954c..fec3aab583b 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -36,17 +37,26 @@ std::shared_ptr GetCounter( const std::string& name, const std::string& operation) { - return registry->GetCounter(name, {{"operation", operation}}); + return registry->GetCounter(name, { + {"db.system.name", "other_sql"}, + {"db.operation.name", operation}, + }); } std::shared_ptr GetDuration( const std::shared_ptr& registry, - const std::string& operation) + const std::string& operation, + EStatus status) { - return registry->GetHistogram("db.client.operation.duration", { - {"db.system.name", "ydb"}, + NMetrics::TLabels labels = { + {"db.system.name", "other_sql"}, {"db.operation.name", operation}, - }); + {"db.response.status_code", ToString(status)}, + }; + if (status != EStatus::SUCCESS) { + labels["error.type"] = ToString(status); + } + return registry->GetHistogram("db.client.operation.duration", labels); } } // namespace @@ -64,15 +74,15 @@ TEST(QueryMetricsIntegration, ExecuteQuerySuccessRecordsMetrics) { ).ExtractValueSync(); ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); - auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + auto requests = GetCounter(registry, "db.client.operation.requests", "ExecuteQuery"); ASSERT_NE(requests, nullptr) << "ExecuteQuery request counter not created"; EXPECT_GE(requests->Get(), 1); - auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + auto errors = GetCounter(registry, "db.client.operation.errors", "ExecuteQuery"); ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 0); - auto duration = GetDuration(registry, "ExecuteQuery"); + auto duration = GetDuration(registry, "ExecuteQuery", EStatus::SUCCESS); ASSERT_NE(duration, nullptr) << "ExecuteQuery duration histogram not created"; EXPECT_GE(duration->Count(), 1u); for (double v : duration->GetValues()) { @@ -95,15 +105,15 @@ TEST(QueryMetricsIntegration, ExecuteQueryErrorRecordsErrorMetric) { ).ExtractValueSync(); EXPECT_NE(result.GetStatus(), EStatus::SUCCESS); - auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + auto requests = GetCounter(registry, "db.client.operation.requests", "ExecuteQuery"); ASSERT_NE(requests, nullptr); EXPECT_GE(requests->Get(), 1); - auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + auto errors = GetCounter(registry, "db.client.operation.errors", "ExecuteQuery"); ASSERT_NE(errors, nullptr); EXPECT_GE(errors->Get(), 1); - auto duration = GetDuration(registry, "ExecuteQuery"); + auto duration = GetDuration(registry, "ExecuteQuery", result.GetStatus()); ASSERT_NE(duration, nullptr); EXPECT_GE(duration->Count(), 1u); @@ -117,11 +127,11 @@ TEST(QueryMetricsIntegration, CreateSessionRecordsMetrics) { auto session = client.GetSession().ExtractValueSync(); ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); - auto requests = GetCounter(registry, "ydb.query.requests", "CreateSession"); + auto requests = GetCounter(registry, "db.client.operation.requests", "CreateSession"); ASSERT_NE(requests, nullptr) << "CreateSession request counter not created"; EXPECT_GE(requests->Get(), 1); - auto duration = GetDuration(registry, "CreateSession"); + auto duration = GetDuration(registry, "CreateSession", EStatus::SUCCESS); ASSERT_NE(duration, nullptr) << "CreateSession duration histogram not created"; EXPECT_GE(duration->Count(), 1u); @@ -150,11 +160,11 @@ TEST(QueryMetricsIntegration, CommitTransactionRecordsMetrics) { auto commitResult = execResult.GetTransaction()->Commit().ExtractValueSync(); ASSERT_TRUE(commitResult.IsSuccess()) << commitResult.GetIssues().ToString(); - auto commitRequests = GetCounter(registry, "ydb.query.requests", "Commit"); + auto commitRequests = GetCounter(registry, "db.client.operation.requests", "Commit"); ASSERT_NE(commitRequests, nullptr) << "Commit request counter not created"; EXPECT_GE(commitRequests->Get(), 1); - auto commitDuration = GetDuration(registry, "Commit"); + auto commitDuration = GetDuration(registry, "Commit", EStatus::SUCCESS); ASSERT_NE(commitDuration, nullptr); EXPECT_GE(commitDuration->Count(), 1u); } @@ -177,15 +187,15 @@ TEST(QueryMetricsIntegration, RollbackTransactionRecordsMetrics) { auto rollbackResult = tx.Rollback().ExtractValueSync(); ASSERT_TRUE(rollbackResult.IsSuccess()) << rollbackResult.GetIssues().ToString(); - auto rollbackRequests = GetCounter(registry, "ydb.query.requests", "Rollback"); + auto rollbackRequests = GetCounter(registry, "db.client.operation.requests", "Rollback"); ASSERT_NE(rollbackRequests, nullptr) << "Rollback request counter not created"; EXPECT_GE(rollbackRequests->Get(), 1); - auto rollbackErrors = GetCounter(registry, "ydb.query.errors", "Rollback"); + auto rollbackErrors = GetCounter(registry, "db.client.operation.errors", "Rollback"); ASSERT_NE(rollbackErrors, nullptr); EXPECT_EQ(rollbackErrors->Get(), 0); - auto rollbackDuration = GetDuration(registry, "Rollback"); + auto rollbackDuration = GetDuration(registry, "Rollback", EStatus::SUCCESS); ASSERT_NE(rollbackDuration, nullptr); EXPECT_GE(rollbackDuration->Count(), 1u); @@ -209,15 +219,15 @@ TEST(QueryMetricsIntegration, MultipleQueriesAccumulateMetrics) { ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); } - auto requests = GetCounter(registry, "ydb.query.requests", "ExecuteQuery"); + auto requests = GetCounter(registry, "db.client.operation.requests", "ExecuteQuery"); ASSERT_NE(requests, nullptr); EXPECT_EQ(requests->Get(), numQueries); - auto errors = GetCounter(registry, "ydb.query.errors", "ExecuteQuery"); + auto errors = GetCounter(registry, "db.client.operation.errors", "ExecuteQuery"); ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 0); - auto duration = GetDuration(registry, "ExecuteQuery"); + auto duration = GetDuration(registry, "ExecuteQuery", EStatus::SUCCESS); ASSERT_NE(duration, nullptr); EXPECT_EQ(duration->Count(), static_cast(numQueries)); @@ -262,7 +272,7 @@ TEST(QueryMetricsIntegration, DurationValuesAreRealistic) { ).ExtractValueSync(); ASSERT_EQ(result.GetStatus(), EStatus::SUCCESS) << result.GetIssues().ToString(); - auto duration = GetDuration(registry, "ExecuteQuery"); + auto duration = GetDuration(registry, "ExecuteQuery", EStatus::SUCCESS); ASSERT_NE(duration, nullptr); ASSERT_GE(duration->Count(), 1u); diff --git a/tests/unit/client/observability/client_metrics_ut.cpp b/tests/unit/client/observability/client_metrics_ut.cpp index 3dbedfa801d..209ce1db0d3 100644 --- a/tests/unit/client/observability/client_metrics_ut.cpp +++ b/tests/unit/client/observability/client_metrics_ut.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -21,26 +22,36 @@ class ClientMetricsTest : public ::testing::Test { } std::shared_ptr RequestCounter(const std::string& op) { - return Registry->GetCounter(Prefix + ".requests", {{"operation", op}}); + return Registry->GetCounter("db.client.operation.requests", { + {"db.system.name", "other_sql"}, + {"db.operation.name", op}, + }); } std::shared_ptr ErrorCounter(const std::string& op) { - return Registry->GetCounter(Prefix + ".errors", {{"operation", op}}); + return Registry->GetCounter("db.client.operation.errors", { + {"db.system.name", "other_sql"}, + {"db.operation.name", op}, + }); } - std::shared_ptr DurationHistogram(const std::string& op) { - return Registry->GetHistogram("db.client.operation.duration", { - {"db.system.name", "ydb"}, + std::shared_ptr DurationHistogram(const std::string& op, EStatus status) { + TLabels labels = { + {"db.system.name", "other_sql"}, {"db.operation.name", op}, - }); + {"db.response.status_code", ToString(status)}, + }; + if (status != EStatus::SUCCESS) { + labels["error.type"] = ToString(status); + } + return Registry->GetHistogram("db.client.operation.duration", labels); } - const std::string Prefix = "ydb.test"; std::shared_ptr Registry; }; TEST_F(ClientMetricsTest, RequestCounterIncrementedOnConstruction) { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); auto counter = RequestCounter("DoSomething"); ASSERT_NE(counter, nullptr); @@ -49,7 +60,7 @@ TEST_F(ClientMetricsTest, RequestCounterIncrementedOnConstruction) { TEST_F(ClientMetricsTest, SuccessDoesNotIncrementErrorCounter) { { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); metrics.End(EStatus::SUCCESS); } @@ -60,7 +71,7 @@ TEST_F(ClientMetricsTest, SuccessDoesNotIncrementErrorCounter) { TEST_F(ClientMetricsTest, FailureIncrementsErrorCounter) { { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); metrics.End(EStatus::UNAVAILABLE); } @@ -71,11 +82,11 @@ TEST_F(ClientMetricsTest, FailureIncrementsErrorCounter) { TEST_F(ClientMetricsTest, DurationRecordedOnEnd) { { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); metrics.End(EStatus::SUCCESS); } - auto hist = DurationHistogram("DoSomething"); + auto hist = DurationHistogram("DoSomething", EStatus::SUCCESS); ASSERT_NE(hist, nullptr); EXPECT_EQ(hist->Count(), 1u); EXPECT_GE(hist->GetValues()[0], 0.0); @@ -83,17 +94,17 @@ TEST_F(ClientMetricsTest, DurationRecordedOnEnd) { TEST_F(ClientMetricsTest, DurationIsInSeconds) { { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); metrics.End(EStatus::SUCCESS); } - auto hist = DurationHistogram("DoSomething"); + auto hist = DurationHistogram("DoSomething", EStatus::SUCCESS); ASSERT_NE(hist, nullptr); EXPECT_LT(hist->GetValues()[0], 1.0); } TEST_F(ClientMetricsTest, DoubleEndIsIdempotent) { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); metrics.End(EStatus::SUCCESS); metrics.End(EStatus::INTERNAL_ERROR); @@ -101,14 +112,14 @@ TEST_F(ClientMetricsTest, DoubleEndIsIdempotent) { ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 0); - auto hist = DurationHistogram("DoSomething"); + auto hist = DurationHistogram("DoSomething", EStatus::SUCCESS); ASSERT_NE(hist, nullptr); EXPECT_EQ(hist->Count(), 1u); } TEST_F(ClientMetricsTest, DestructorCallsEndWithClientInternalError) { { - TClientMetrics metrics(Registry, Prefix, "DoSomething"); + TClientMetrics metrics(Registry, "DoSomething"); } auto requests = RequestCounter("DoSomething"); @@ -119,25 +130,25 @@ TEST_F(ClientMetricsTest, DestructorCallsEndWithClientInternalError) { ASSERT_NE(errors, nullptr); EXPECT_EQ(errors->Get(), 1); - auto hist = DurationHistogram("DoSomething"); + auto hist = DurationHistogram("DoSomething", EStatus::CLIENT_INTERNAL_ERROR); ASSERT_NE(hist, nullptr); EXPECT_EQ(hist->Count(), 1u); } TEST_F(ClientMetricsTest, NullRegistryDoesNotCrash) { EXPECT_NO_THROW({ - TClientMetrics metrics(nullptr, Prefix, "DoSomething"); + TClientMetrics metrics(nullptr, "DoSomething"); metrics.End(EStatus::SUCCESS); }); } TEST_F(ClientMetricsTest, DifferentOperationsHaveSeparateMetrics) { { - TClientMetrics m1(Registry, Prefix, "OpA"); + TClientMetrics m1(Registry, "OpA"); m1.End(EStatus::SUCCESS); } { - TClientMetrics m2(Registry, Prefix, "OpB"); + TClientMetrics m2(Registry, "OpB"); m2.End(EStatus::OVERLOADED); } @@ -145,19 +156,20 @@ TEST_F(ClientMetricsTest, DifferentOperationsHaveSeparateMetrics) { EXPECT_EQ(RequestCounter("OpB")->Get(), 1); EXPECT_EQ(ErrorCounter("OpA")->Get(), 0); EXPECT_EQ(ErrorCounter("OpB")->Get(), 1); - EXPECT_EQ(DurationHistogram("OpA")->Count(), 1u); - EXPECT_EQ(DurationHistogram("OpB")->Count(), 1u); + EXPECT_EQ(DurationHistogram("OpA", EStatus::SUCCESS)->Count(), 1u); + EXPECT_EQ(DurationHistogram("OpB", EStatus::OVERLOADED)->Count(), 1u); } TEST_F(ClientMetricsTest, MultipleRequestsAccumulate) { for (int i = 0; i < 5; ++i) { - TClientMetrics metrics(Registry, Prefix, "Op"); + TClientMetrics metrics(Registry, "Op"); metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); } EXPECT_EQ(RequestCounter("Op")->Get(), 5); EXPECT_EQ(ErrorCounter("Op")->Get(), 2); - EXPECT_EQ(DurationHistogram("Op")->Count(), 5u); + EXPECT_EQ(DurationHistogram("Op", EStatus::SUCCESS)->Count(), 3u); + EXPECT_EQ(DurationHistogram("Op", EStatus::TIMEOUT)->Count(), 2u); } TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { @@ -173,7 +185,7 @@ TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { }; for (auto status : errorStatuses) { - TClientMetrics metrics(Registry, Prefix, "Op"); + TClientMetrics metrics(Registry, "Op"); metrics.End(status); } @@ -182,48 +194,76 @@ TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); } -TEST_F(ClientMetricsTest, PrefixAppliedToCounterNames) { - TClientMetrics metrics(Registry, "ydb.custom", "Op"); - metrics.End(EStatus::SUCCESS); - - EXPECT_NE(Registry->GetCounter("ydb.custom.requests", {{"operation", "Op"}}), nullptr); - EXPECT_NE(Registry->GetCounter("ydb.custom.errors", {{"operation", "Op"}}), nullptr); - - EXPECT_EQ(Registry->GetCounter("ydb.test.requests", {{"operation", "Op"}}), nullptr); -} - // --------------------------------------------------------------------------- -// TQueryMetrics prefix +// TQueryMetrics // --------------------------------------------------------------------------- -TEST(QueryMetricsTest, UsesQueryPrefix) { +TEST(QueryMetricsTest, UsesOtelStandardMetrics) { auto registry = std::make_shared(); NQuery::TQueryMetrics metrics(registry, "ExecuteQuery"); metrics.End(EStatus::SUCCESS); - EXPECT_NE(registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteQuery"}}), nullptr); - EXPECT_NE(registry->GetCounter("ydb.query.errors", {{"operation", "ExecuteQuery"}}), nullptr); - EXPECT_NE(registry->GetHistogram("db.client.operation.duration", { - {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteQuery"}}), nullptr); - - EXPECT_EQ(registry->GetCounter("ydb.table.requests", {{"operation", "ExecuteQuery"}}), nullptr); + EXPECT_NE( + registry->GetCounter( + "db.client.operation.requests", + {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteQuery"}} + ), + nullptr + ); + EXPECT_NE( + registry->GetCounter( + "db.client.operation.errors", + {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteQuery"}} + ), + nullptr + ); + EXPECT_NE( + registry->GetHistogram( + "db.client.operation.duration", + { + {"db.system.name", "other_sql"}, + {"db.operation.name", "ExecuteQuery"}, + {"db.response.status_code", ToString(EStatus::SUCCESS)}, + } + ), + nullptr + ); } // --------------------------------------------------------------------------- -// TTableMetrics prefix +// TTableMetrics // --------------------------------------------------------------------------- -TEST(TableMetricsTest, UsesTablePrefix) { +TEST(TableMetricsTest, UsesOtelStandardMetrics) { auto registry = std::make_shared(); NTable::TTableMetrics metrics(registry, "ExecuteDataQuery"); metrics.End(EStatus::SUCCESS); - EXPECT_NE(registry->GetCounter("ydb.table.requests", {{"operation", "ExecuteDataQuery"}}), nullptr); - EXPECT_NE(registry->GetCounter("ydb.table.errors", {{"operation", "ExecuteDataQuery"}}), nullptr); - EXPECT_NE(registry->GetHistogram("db.client.operation.duration", { - {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteDataQuery"}}), nullptr); - - EXPECT_EQ(registry->GetCounter("ydb.query.requests", {{"operation", "ExecuteDataQuery"}}), nullptr); + EXPECT_NE( + registry->GetCounter( + "db.client.operation.requests", + {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteDataQuery"}} + ), + nullptr + ); + EXPECT_NE( + registry->GetCounter( + "db.client.operation.errors", + {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteDataQuery"}} + ), + nullptr + ); + EXPECT_NE( + registry->GetHistogram( + "db.client.operation.duration", + { + {"db.system.name", "other_sql"}, + {"db.operation.name", "ExecuteDataQuery"}, + {"db.response.status_code", ToString(EStatus::SUCCESS)}, + } + ), + nullptr + ); } From fe6450e15363e94ad359982f8ade98eda9920b76 Mon Sep 17 00:00:00 2001 From: maladetska Date: Tue, 31 Mar 2026 19:08:44 +0300 Subject: [PATCH 09/17] fix namespaces, CMakeLists --- cmake/external_libs.cmake | 5 +- include/ydb-cpp-sdk/client/driver/driver.h | 2 +- include/ydb-cpp-sdk/client/trace/trace.h | 4 +- .../ydb-cpp-sdk/open_telemetry/trace.h | 4 +- plugins/trace/otel/src/trace.cpp | 4 +- src/client/driver/driver.cpp | 6 +- .../grpc_connections/grpc_connections.cpp | 2 +- .../grpc_connections/grpc_connections.h | 9 +- .../impl/internal/grpc_connections/params.h | 7 +- src/client/impl/observability/CMakeLists.txt | 3 +- ...ient_metrics.cpp => operation_metrics.cpp} | 25 ++-- .../{client_metrics.h => operation_metrics.h} | 10 +- .../observability/operation_span.cpp} | 42 +++---- .../impl/observability/operation_span.h | 34 +++++ src/client/query/CMakeLists.txt | 1 - src/client/query/client.cpp | 11 +- src/client/query/impl/CMakeLists.txt | 1 - src/client/query/impl/query_metrics.h | 10 +- src/client/query/impl/query_spans.h | 28 +---- src/client/table/impl/CMakeLists.txt | 1 - src/client/table/impl/table_client.cpp | 45 +++++-- src/client/table/impl/table_client.h | 9 +- src/client/table/impl/table_metrics.h | 10 +- src/client/table/impl/table_spans.cpp | 116 ------------------ src/client/table/impl/table_spans.h | 24 +--- tests/unit/client/CMakeLists.txt | 2 +- ...etrics_ut.cpp => operation_metrics_ut.cpp} | 66 +++++----- 27 files changed, 204 insertions(+), 277 deletions(-) rename src/client/impl/observability/{client_metrics.cpp => operation_metrics.cpp} (78%) rename src/client/impl/observability/{client_metrics.h => operation_metrics.h} (75%) rename src/client/{query/impl/query_spans.cpp => impl/observability/operation_span.cpp} (71%) create mode 100644 src/client/impl/observability/operation_span.h delete mode 100644 src/client/table/impl/table_spans.cpp rename tests/unit/client/observability/{client_metrics_ut.cpp => operation_metrics_ut.cpp} (76%) diff --git a/cmake/external_libs.cmake b/cmake/external_libs.cmake index 8445e4d2fc1..9d2500bffb4 100644 --- a/cmake/external_libs.cmake +++ b/cmake/external_libs.cmake @@ -15,10 +15,7 @@ find_package(jwt-cpp REQUIRED) find_package(double-conversion REQUIRED) if (YDB_SDK_ENABLE_OTEL_METRICS OR YDB_SDK_ENABLE_OTEL_TRACE) - find_package(opentelemetry-cpp QUIET) - if (NOT opentelemetry-cpp_FOUND) - message(FATAL_ERROR "Dependency 'opentelemetry-cpp' was not found.") - endif() + find_package(opentelemetry-cpp REQUIRED) endif() # RapidJSON diff --git a/include/ydb-cpp-sdk/client/driver/driver.h b/include/ydb-cpp-sdk/client/driver/driver.h index 20fa52d5e60..8d5ab1fac2b 100644 --- a/include/ydb-cpp-sdk/client/driver/driver.h +++ b/include/ydb-cpp-sdk/client/driver/driver.h @@ -159,7 +159,7 @@ class TDriverConfig { TDriverConfig& SetMetricRegistry(std::shared_ptr registry); //! Set external trace provider implementation. - TDriverConfig& SetTraceProvider(std::shared_ptr provider); + TDriverConfig& SetTraceProvider(std::shared_ptr provider); private: class TImpl; diff --git a/include/ydb-cpp-sdk/client/trace/trace.h b/include/ydb-cpp-sdk/client/trace/trace.h index b86297146a9..117f4220b39 100644 --- a/include/ydb-cpp-sdk/client/trace/trace.h +++ b/include/ydb-cpp-sdk/client/trace/trace.h @@ -5,7 +5,7 @@ #include #include -namespace NYdb::inline V3::NMetrics { +namespace NYdb::inline V3::NTrace { enum class ESpanKind { INTERNAL, @@ -36,4 +36,4 @@ class ITraceProvider { virtual std::shared_ptr GetTracer(const std::string& name) = 0; }; -} // namespace NYdb::NMetrics +} // namespace NYdb::NTrace diff --git a/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h b/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h index 9bdc12fb25f..1736954771c 100644 --- a/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h +++ b/plugins/trace/otel/include/ydb-cpp-sdk/open_telemetry/trace.h @@ -8,9 +8,9 @@ namespace opentelemetry::trace { class TracerProvider; } -namespace NYdb::inline V3::NMetrics { +namespace NYdb::inline V3::NTrace { std::shared_ptr CreateOtelTraceProvider( opentelemetry::nostd::shared_ptr tracerProvider); -} // namespace NYdb::NMetrics +} // namespace NYdb::NTrace diff --git a/plugins/trace/otel/src/trace.cpp b/plugins/trace/otel/src/trace.cpp index 7cac3f4c1cb..41b1df64793 100644 --- a/plugins/trace/otel/src/trace.cpp +++ b/plugins/trace/otel/src/trace.cpp @@ -4,7 +4,7 @@ #include #include -namespace NYdb::inline V3::NMetrics { +namespace NYdb::inline V3::NTrace { namespace { @@ -94,4 +94,4 @@ std::shared_ptr CreateOtelTraceProvider( return std::make_shared(std::move(tracerProvider)); } -} // namespace NYdb::NMetrics +} // namespace NYdb::NTrace diff --git a/src/client/driver/driver.cpp b/src/client/driver/driver.cpp index c0ef98756fe..7bdf3a79bbc 100644 --- a/src/client/driver/driver.cpp +++ b/src/client/driver/driver.cpp @@ -52,7 +52,7 @@ class TDriverConfig::TImpl : public IConnectionsParams { const TLog& GetLog() const override { return Log; } std::shared_ptr GetExecutor() const override { return Executor; } std::shared_ptr GetExternalMetricRegistry() const override { return MetricRegistry; } - std::shared_ptr GetTraceProvider() const override { return TraceProvider; } + std::shared_ptr GetTraceProvider() const override { return TraceProvider; } std::string Endpoint; size_t NetworkThreadsNum = 2; @@ -83,7 +83,7 @@ class TDriverConfig::TImpl : public IConnectionsParams { TLog Log; // Null by default. std::shared_ptr Executor; std::shared_ptr MetricRegistry; - std::shared_ptr TraceProvider; + std::shared_ptr TraceProvider; }; TDriverConfig::TDriverConfig(const std::string& connectionString) @@ -238,7 +238,7 @@ TDriverConfig& TDriverConfig::SetMetricRegistry(std::shared_ptr provider) { +TDriverConfig& TDriverConfig::SetTraceProvider(std::shared_ptr provider) { Impl_->TraceProvider = std::move(provider); return *this; } diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.cpp b/src/client/impl/internal/grpc_connections/grpc_connections.cpp index 757d5b777b7..09fb75687ee 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.cpp +++ b/src/client/impl/internal/grpc_connections/grpc_connections.cpp @@ -440,7 +440,7 @@ std::shared_ptr TGRpcConnectionsImpl::GetExternalMetr return MetricRegistry_; } -std::shared_ptr TGRpcConnectionsImpl::GetTraceProvider() const { +std::shared_ptr TGRpcConnectionsImpl::GetTraceProvider() const { return TraceProvider_; } diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.h b/src/client/impl/internal/grpc_connections/grpc_connections.h index 84a25162912..8d2386b30e3 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.h +++ b/src/client/impl/internal/grpc_connections/grpc_connections.h @@ -20,9 +20,12 @@ namespace NYdb::inline V3 { namespace NMetrics { class IMetricRegistry; - class ITraceProvider; } // namespace NMetrics +namespace NTrace { + class ITraceProvider; +} // namespace NTrace + constexpr TDeadline::Duration GRPC_KEEP_ALIVE_TIMEOUT_FOR_DISCOVERY = std::chrono::seconds(10); constexpr TDeadline::Duration INITIAL_DEFERRED_CALL_DELAY = std::chrono::milliseconds(10); // The delay before first deferred service call constexpr TDeadline::Duration GET_ENDPOINTS_TIMEOUT = std::chrono::seconds(10); // Time wait for ListEndpoints request, after this time we pass error to client @@ -587,7 +590,7 @@ class TGRpcConnectionsImpl void RegisterExtension(IExtension* extension); void RegisterExtensionApi(IExtensionApi* api); std::shared_ptr GetExternalMetricRegistry() const; - std::shared_ptr GetTraceProvider() const; + std::shared_ptr GetTraceProvider() const; void SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb); const TLog& GetLog() const override; @@ -723,7 +726,7 @@ class TGRpcConnectionsImpl std::vector> Extensions_; std::vector> ExtensionApis_; std::shared_ptr MetricRegistry_; - std::shared_ptr TraceProvider_; + std::shared_ptr TraceProvider_; IDiscoveryMutatorApi::TMutatorCb DiscoveryMutatorCb; diff --git a/src/client/impl/internal/grpc_connections/params.h b/src/client/impl/internal/grpc_connections/params.h index 1e827d3343b..50c90211603 100644 --- a/src/client/impl/internal/grpc_connections/params.h +++ b/src/client/impl/internal/grpc_connections/params.h @@ -13,9 +13,12 @@ namespace NYdb::inline V3 { namespace NMetrics { class IMetricRegistry; - class ITraceProvider; } // namespace NMetrics +namespace NTrace { + class ITraceProvider; +} // namespace NTrace + class IConnectionsParams { public: virtual ~IConnectionsParams() = default; @@ -42,7 +45,7 @@ class IConnectionsParams { virtual uint64_t GetMaxMessageSize() const = 0; virtual std::shared_ptr GetExecutor() const = 0; virtual std::shared_ptr GetExternalMetricRegistry() const = 0; - virtual std::shared_ptr GetTraceProvider() const = 0; + virtual std::shared_ptr GetTraceProvider() const = 0; }; } // namespace NYdb diff --git a/src/client/impl/observability/CMakeLists.txt b/src/client/impl/observability/CMakeLists.txt index 961d2821559..33389f3ce2e 100644 --- a/src/client/impl/observability/CMakeLists.txt +++ b/src/client/impl/observability/CMakeLists.txt @@ -6,7 +6,8 @@ target_link_libraries(impl-observability PUBLIC ) target_sources(impl-observability PRIVATE - client_metrics.cpp + operation_metrics.cpp + operation_span.cpp ) _ydb_sdk_install_targets(TARGETS impl-observability) diff --git a/src/client/impl/observability/client_metrics.cpp b/src/client/impl/observability/operation_metrics.cpp similarity index 78% rename from src/client/impl/observability/client_metrics.cpp rename to src/client/impl/observability/operation_metrics.cpp index f66605bc772..f8aec18ffe5 100644 --- a/src/client/impl/observability/client_metrics.cpp +++ b/src/client/impl/observability/operation_metrics.cpp @@ -1,20 +1,25 @@ -#include "client_metrics.h" +#include "operation_metrics.h" + +#include -#include #include +#include + namespace NYdb::inline V3::NObservability { namespace { -void SafeLogMetricsError(const char* /*message*/) noexcept { +void SafeLogMetricsError(TLog& log, const char* message) noexcept { try { try { std::rethrow_exception(std::current_exception()); - } catch (const std::exception&) { + } catch (const std::exception& e) { + LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": " + e.what()); return; } catch (...) { } + LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": (unknown)"); } catch (...) { } } @@ -29,10 +34,12 @@ static constexpr const char* RequestsDescription = "Number of database client op static constexpr const char* ErrorsDescription = "Number of database client operations that failed."; static constexpr const char* DurationDescription = "Duration of database client operations."; -TClientMetrics::TClientMetrics(std::shared_ptr registry +TOperationMetrics::TOperationMetrics(std::shared_ptr registry , const std::string& operationName + , const TLog& log ) : Registry_(std::move(registry)) , OperationName_(operationName) + , Log_(log) { if (!Registry_) { return; @@ -49,18 +56,18 @@ TClientMetrics::TClientMetrics(std::shared_ptr regist RequestCounter_->Inc(); StartTime_ = std::chrono::steady_clock::now(); } catch (...) { - SafeLogMetricsError("failed to initialize metrics"); + SafeLogMetricsError(Log_, "failed to initialize metrics"); RequestCounter_.reset(); ErrorCounter_.reset(); Registry_.reset(); } } -TClientMetrics::~TClientMetrics() noexcept { +TOperationMetrics::~TOperationMetrics() noexcept { End(EStatus::CLIENT_INTERNAL_ERROR); } -void TClientMetrics::End(EStatus status) noexcept { +void TOperationMetrics::End(EStatus status) noexcept { if (Ended_) { return; } @@ -94,7 +101,7 @@ void TClientMetrics::End(EStatus status) noexcept { ErrorCounter_->Inc(); } } catch (...) { - SafeLogMetricsError("failed to record metrics"); + SafeLogMetricsError(Log_, "failed to record metrics"); } } diff --git a/src/client/impl/observability/client_metrics.h b/src/client/impl/observability/operation_metrics.h similarity index 75% rename from src/client/impl/observability/client_metrics.h rename to src/client/impl/observability/operation_metrics.h index e12a30cad60..86f37d0ce7c 100644 --- a/src/client/impl/observability/client_metrics.h +++ b/src/client/impl/observability/operation_metrics.h @@ -3,18 +3,21 @@ #include #include +#include + #include #include #include namespace NYdb::inline V3::NObservability { -class TClientMetrics { +class TOperationMetrics { public: - TClientMetrics(std::shared_ptr registry + TOperationMetrics(std::shared_ptr registry , const std::string& operationName + , const TLog& log ); - ~TClientMetrics() noexcept; + ~TOperationMetrics() noexcept; void End(EStatus status) noexcept; @@ -25,6 +28,7 @@ class TClientMetrics { std::shared_ptr ErrorCounter_; std::chrono::steady_clock::time_point StartTime_; bool Ended_ = false; + TLog Log_; }; } // namespace NYdb::NObservability diff --git a/src/client/query/impl/query_spans.cpp b/src/client/impl/observability/operation_span.cpp similarity index 71% rename from src/client/query/impl/query_spans.cpp rename to src/client/impl/observability/operation_span.cpp index 372b4464fae..300d4ff9801 100644 --- a/src/client/query/impl/query_spans.cpp +++ b/src/client/impl/observability/operation_span.cpp @@ -1,4 +1,4 @@ -#include "query_spans.h" +#include "operation_span.h" #include @@ -6,7 +6,7 @@ #include -namespace NYdb::inline V3::NQuery { +namespace NYdb::inline V3::NObservability { namespace { @@ -49,21 +49,22 @@ void SafeLogSpanError(TLog& log, const char* message) noexcept { try { std::rethrow_exception(std::current_exception()); } catch (const std::exception& e) { - LOG_LAZY(log, TLOG_ERR, std::string("TQuerySpan: ") + message + ": " + e.what()); + LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": " + e.what()); return; } catch (...) { } - LOG_LAZY(log, TLOG_ERR, std::string("TQuerySpan: ") + message + ": (unknown)"); + LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": (unknown)"); } catch (...) { } } } // namespace -TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::string& operationName, - const std::string& endpoint, const TLog& log) - : Log_(log) -{ +TOperationSpan::TOperationSpan(std::shared_ptr tracer + , const std::string& operationName + , const std::string& endpoint + , const TLog& log +) : Log_(log) { if (!tracer) { return; } @@ -73,7 +74,7 @@ TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::str ParseEndpoint(endpoint, host, port); try { - Span_ = tracer->StartSpan(operationName, NMetrics::ESpanKind::CLIENT); + Span_ = tracer->StartSpan(operationName, NTrace::ESpanKind::CLIENT); if (!Span_) { return; } @@ -87,7 +88,7 @@ TQuerySpan::TQuerySpan(std::shared_ptr tracer, const std::str } } -TQuerySpan::~TQuerySpan() noexcept { +TOperationSpan::~TOperationSpan() noexcept { if (Span_) { try { Span_->End(); @@ -97,7 +98,7 @@ TQuerySpan::~TQuerySpan() noexcept { } } -void TQuerySpan::SetPeerEndpoint(const std::string& endpoint) noexcept { +void TOperationSpan::SetPeerEndpoint(const std::string& endpoint) noexcept { if (!Span_ || endpoint.empty()) { return; } @@ -112,18 +113,7 @@ void TQuerySpan::SetPeerEndpoint(const std::string& endpoint) noexcept { } } -void TQuerySpan::SetQueryText(const std::string& query) noexcept { - if (!Span_ || query.empty()) { - return; - } - try { - Span_->SetAttribute("db.query.text", query); - } catch (...) { - SafeLogSpanError(Log_, "failed to set query text"); - } -} - -void TQuerySpan::AddEvent(const std::string& name, const std::map& attributes) noexcept { +void TOperationSpan::AddEvent(const std::string& name, const std::map& attributes) noexcept { if (!Span_) { return; } @@ -134,10 +124,10 @@ void TQuerySpan::AddEvent(const std::string& name, const std::mapSetAttribute("db.response.status_code", ToString(status)); + Span_->SetAttribute("db.response.status_code", static_cast(status)); if (status != EStatus::SUCCESS) { Span_->SetAttribute("error.type", ToString(status)); } @@ -149,4 +139,4 @@ void TQuerySpan::End(EStatus status) noexcept { } } -} // namespace NYdb::NQuery +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/operation_span.h b/src/client/impl/observability/operation_span.h new file mode 100644 index 00000000000..f2925c945e4 --- /dev/null +++ b/src/client/impl/observability/operation_span.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include + +namespace NYdb::inline V3::NObservability { + +class TOperationSpan { +public: + TOperationSpan(std::shared_ptr tracer + , const std::string& operationName + , const std::string& endpoint + , const TLog& log + ); + ~TOperationSpan() noexcept; + + void SetPeerEndpoint(const std::string& endpoint) noexcept; + void AddEvent(const std::string& name, const std::map& attributes = {}) noexcept; + + void End(EStatus status) noexcept; + +private: + TLog Log_; + std::shared_ptr Span_; +}; + +} // namespace NYdb::NObservability diff --git a/src/client/query/CMakeLists.txt b/src/client/query/CMakeLists.txt index 3cc7401200b..bc159ea87ab 100644 --- a/src/client/query/CMakeLists.txt +++ b/src/client/query/CMakeLists.txt @@ -12,7 +12,6 @@ target_link_libraries(client-ydb_query PUBLIC client-ydb_query-impl client-ydb_result client-metrics - client-trace client-types-operation api-protos api-grpc diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index 8c627089170..176ab6ca977 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -104,8 +104,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - span->SetQueryText(query); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteQuery"); + auto metrics = std::make_shared(MetricRegistry_, "ExecuteQuery", DbDriverState_->Log); return TExecQueryImpl::ExecuteQuery( Connections_, DbDriverState_, query, txControl, params, settings, session) @@ -189,7 +188,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "Rollback"); + auto metrics = std::make_shared(MetricRegistry_, "Rollback", DbDriverState_->Log); auto responseCb = [promise, session, span, metrics] (Ydb::Query::RollbackTransactionResponse* response, TPlainStatus status) mutable { @@ -241,7 +240,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "Commit"); + auto metrics = std::make_shared(MetricRegistry_, "Commit", DbDriverState_->Log); auto responseCb = [promise, session, span, metrics] (Ydb::Query::CommitTransactionResponse* response, TPlainStatus status) mutable { @@ -557,7 +556,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public }; auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "CreateSession"); + auto metrics = std::make_shared(MetricRegistry_, "CreateSession", DbDriverState_->Log); auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); @@ -627,7 +626,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public } private: - std::shared_ptr Tracer_; + std::shared_ptr Tracer_; std::shared_ptr MetricRegistry_; NSdkStats::TStatCollector::TClientRetryOperationStatCollector RetryOperationStatCollector_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> QuerySizeHistogram_; diff --git a/src/client/query/impl/CMakeLists.txt b/src/client/query/impl/CMakeLists.txt index fdc46b50f95..d33258b7afd 100644 --- a/src/client/query/impl/CMakeLists.txt +++ b/src/client/query/impl/CMakeLists.txt @@ -16,7 +16,6 @@ target_link_libraries(client-ydb_query-impl PUBLIC target_sources(client-ydb_query-impl PRIVATE exec_query.cpp client_session.cpp - query_spans.cpp ) _ydb_sdk_install_targets(TARGETS client-ydb_query-impl) diff --git a/src/client/query/impl/query_metrics.h b/src/client/query/impl/query_metrics.h index 2bd284f76f3..a03338d6c13 100644 --- a/src/client/query/impl/query_metrics.h +++ b/src/client/query/impl/query_metrics.h @@ -1,13 +1,15 @@ #pragma once -#include +#include namespace NYdb::inline V3::NQuery { -class TQueryMetrics : public NObservability::TClientMetrics { +class TQueryMetrics : public NObservability::TOperationMetrics { public: - TQueryMetrics(std::shared_ptr registry, const std::string& operationName) - : TClientMetrics(std::move(registry), operationName) + TQueryMetrics(std::shared_ptr registry + , const std::string& operationName + , const TLog& log + ) : TOperationMetrics(std::move(registry), operationName, log) {} }; diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h index dc2e6e554e9..61b7f75ade8 100644 --- a/src/client/query/impl/query_spans.h +++ b/src/client/query/impl/query_spans.h @@ -1,35 +1,17 @@ #pragma once -#include -#include -#include - -#include - -#include -#include -#include +#include namespace NYdb::inline V3::NQuery { -class TQuerySpan { +class TQuerySpan : public NObservability::TOperationSpan { public: - TQuerySpan(std::shared_ptr tracer + TQuerySpan(std::shared_ptr tracer , const std::string& operationName , const std::string& endpoint , const TLog& log - ); - ~TQuerySpan() noexcept; - - void SetPeerEndpoint(const std::string& endpoint) noexcept; - void SetQueryText(const std::string& query) noexcept; - void AddEvent(const std::string& name, const std::map& attributes = {}) noexcept; - - void End(EStatus status) noexcept; - -private: - TLog Log_; - std::shared_ptr Span_; + ) : TOperationSpan(std::move(tracer), operationName, endpoint, log) + {} }; } // namespace NYdb::NQuery diff --git a/src/client/table/impl/CMakeLists.txt b/src/client/table/impl/CMakeLists.txt index b0427de6eda..8ecfe4ead87 100644 --- a/src/client/table/impl/CMakeLists.txt +++ b/src/client/table/impl/CMakeLists.txt @@ -21,7 +21,6 @@ target_sources(client-ydb_table-impl PRIVATE data_query.cpp readers.cpp request_migrator.cpp - table_spans.cpp table_client.cpp transaction.cpp ) diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index ceb8950769b..f30411c0980 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -384,7 +384,7 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio auto createSessionPromise = NewPromise(); auto self = shared_from_this(); - auto metrics = std::make_shared(MetricRegistry_, "CreateSession"); + auto metrics = std::make_shared(MetricRegistry_, "CreateSession", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto createSessionExtractor = [createSessionPromise, self, standalone, metrics, span] @@ -400,6 +400,7 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio } self->DbDriverState_->StatCollector.IncSessionsOnHost(status.Endpoint); } else { + // We do not use SessionStatusInterception for CreateSession request session.SessionImpl_->MarkBroken(); } TCreateSessionResult val(TStatus(std::move(status)), std::move(session)); @@ -768,7 +769,7 @@ TAsyncStatus TTableClient::TImpl::ExecuteSchemeQuery(const TSession& session, co request.set_session_id(TStringType{session.GetId()}); request.set_yql_text(TStringType{query}); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteSchemeQuery"); + auto metrics = std::make_shared(MetricRegistry_, "ExecuteSchemeQuery", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "ExecuteSchemeQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto future = RunSimple( @@ -795,9 +796,12 @@ TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSessio request.set_session_id(TStringType{session.GetId()}); SetTxSettings(txSettings, request.mutable_tx_settings()); + auto metrics = std::make_shared(MetricRegistry_, "BeginTransaction", DbDriverState_->Log); + auto span = std::make_shared(Tracer_, "BeginTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto promise = NewPromise(); - auto extractor = [promise, session] + auto extractor = [promise, session, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { std::string txId; if (any) { @@ -808,6 +812,8 @@ TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSessio TBeginTransactionResult beginTxResult(TStatus(std::move(status)), TTransaction(session, txId)); + span->End(beginTxResult.GetStatus()); + metrics->End(beginTxResult.GetStatus()); promise.SetValue(std::move(beginTxResult)); }; @@ -834,9 +840,12 @@ TAsyncCommitTransactionResult TTableClient::TImpl::CommitTransaction(const TSess request.set_tx_id(TStringType{txId}); request.set_collect_stats(GetStatsCollectionMode(settings.CollectQueryStats_)); + auto span = std::make_shared(Tracer_, "CommitTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(MetricRegistry_, "CommitTransaction", DbDriverState_->Log); + auto promise = NewPromise(); - auto extractor = [promise] + auto extractor = [promise, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { std::optional queryStats; if (any) { @@ -849,6 +858,8 @@ TAsyncCommitTransactionResult TTableClient::TImpl::CommitTransaction(const TSess } TCommitTransactionResult commitTxResult(TStatus(std::move(status)), queryStats); + span->End(commitTxResult.GetStatus()); + metrics->End(commitTxResult.GetStatus()); promise.SetValue(std::move(commitTxResult)); }; @@ -874,11 +885,21 @@ TAsyncStatus TTableClient::TImpl::RollbackTransaction(const TSession& session, c request.set_session_id(TStringType{session.GetId()}); request.set_tx_id(TStringType{txId}); - return RunSimple( + auto span = std::make_shared(Tracer_, "RollbackTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(MetricRegistry_, "RollbackTransaction", DbDriverState_->Log); + + auto future = RunSimple( std::move(request), &Ydb::Table::V1::TableService::Stub::AsyncRollbackTransaction, rpcSettings ); + + return future.Apply([metrics, span](TAsyncStatus future) { + auto status = future.GetValue(); + span->End(status.GetStatus()); + metrics->End(status.GetStatus()); + return status; + }); } TAsyncExplainDataQueryResult TTableClient::TImpl::ExplainDataQuery(const TSession& session, const std::string& query, @@ -1147,10 +1168,15 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, *mutable_rows->mutable_type() = rows.GetType().GetProto(); } + auto metrics = std::make_shared(MetricRegistry_, "BulkUpsert", DbDriverState_->Log); + auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto promise = NewPromise(); - auto extractor = [promise](google::protobuf::Any* any, TPlainStatus status) mutable { + auto extractor = [promise, metrics, span](google::protobuf::Any* any, TPlainStatus status) mutable { Y_UNUSED(any); TBulkUpsertResult val(TStatus(std::move(status))); + span->End(val.GetStatus()); + metrics->End(val.GetStatus()); promise.SetValue(std::move(val)); }; @@ -1193,12 +1219,17 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, } request.set_data(TStringType{data}); + auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(MetricRegistry_, "BulkUpsert", DbDriverState_->Log); + auto promise = NewPromise(); - auto extractor = [promise] + auto extractor = [promise, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { Y_UNUSED(any); TBulkUpsertResult val(TStatus(std::move(status))); + span->End(val.GetStatus()); + metrics->End(val.GetStatus()); promise.SetValue(std::move(val)); }; diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index 0d38fd1006f..7a8902dcb15 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -239,10 +239,10 @@ class TTableClient::TImpl: public TClientImplCommon, public auto promise = NewPromise(); bool keepInCache = settings.KeepInQueryCache_ && settings.KeepInQueryCache_.value(); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteDataQuery"); + auto metrics = std::make_shared(MetricRegistry_, "ExecuteDataQuery", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - + // We don't want to delay call of TSession dtor, so we can't capture it by copy // otherwise we break session pool and other clients logic. // Same problem with TDataQuery and TTransaction @@ -335,10 +335,9 @@ class TTableClient::TImpl: public TClientImplCommon, public NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> ParamsSizeHistogram; NSdkStats::TAtomicCounter<::NMonitoring::TRate> SessionRemovedDueBalancing; - std::shared_ptr MetricRegistry_; - std::shared_ptr Tracer_; - private: + std::shared_ptr Tracer_; + std::shared_ptr MetricRegistry_; NSessionPool::TSessionPool SessionPool_; TRequestMigrator RequestMigrator_; static const TKeepAliveSettings KeepAliveSettings; diff --git a/src/client/table/impl/table_metrics.h b/src/client/table/impl/table_metrics.h index 83f9deafdb4..8b8f32b2882 100644 --- a/src/client/table/impl/table_metrics.h +++ b/src/client/table/impl/table_metrics.h @@ -1,13 +1,15 @@ #pragma once -#include +#include namespace NYdb::inline V3::NTable { -class TTableMetrics : public NObservability::TClientMetrics { +class TTableMetrics : public NObservability::TOperationMetrics { public: - TTableMetrics(std::shared_ptr registry, const std::string& operationName) - : TClientMetrics(std::move(registry), operationName) + TTableMetrics(std::shared_ptr registry + , const std::string& operationName + , const TLog& log + ) : TOperationMetrics(std::move(registry), operationName, log) {} }; diff --git a/src/client/table/impl/table_spans.cpp b/src/client/table/impl/table_spans.cpp deleted file mode 100644 index e476c38eccb..00000000000 --- a/src/client/table/impl/table_spans.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "table_spans.h" - -#include - -#include - -#include - -namespace NYdb::inline V3::NTable { - -namespace { - -constexpr int DefaultGrpcPort = 2135; - -void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { - port = DefaultGrpcPort; - - if (endpoint.empty()) { - host = endpoint; - return; - } - - if (endpoint.front() == '[') { - auto bracketEnd = endpoint.find(']'); - if (bracketEnd != std::string::npos) { - host = endpoint.substr(1, bracketEnd - 1); - if (bracketEnd + 2 < endpoint.size() && endpoint[bracketEnd + 1] == ':') { - try { - port = std::stoi(endpoint.substr(bracketEnd + 2)); - } catch (...) {} - } - return; - } - } - - auto pos = endpoint.rfind(':'); - if (pos != std::string::npos) { - host = endpoint.substr(0, pos); - try { - port = std::stoi(endpoint.substr(pos + 1)); - } catch (...) {} - } else { - host = endpoint; - } -} - -void SafeLogSpanError(TLog& log, const char* message) noexcept { - try { - try { - std::rethrow_exception(std::current_exception()); - } catch (const std::exception& e) { - LOG_LAZY(log, TLOG_ERR, std::string("TTableSpan: ") + message + ": " + e.what()); - return; - } catch (...) { - } - LOG_LAZY(log, TLOG_ERR, std::string("TTableSpan: ") + message + ": (unknown)"); - } catch (...) { - } -} - -} // namespace - -TTableSpan::TTableSpan(std::shared_ptr tracer - , const std::string& operationName - , const std::string& endpoint - , const TLog& log -) : Log_(log) { - if (!tracer) { - return; - } - - std::string host; - int port; - ParseEndpoint(endpoint, host, port); - - try { - Span_ = tracer->StartSpan(operationName, NMetrics::ESpanKind::CLIENT); - if (!Span_) { - return; - } - Span_->SetAttribute("db.system.name", "other_sql"); - Span_->SetAttribute("db.operation.name", operationName); - Span_->SetAttribute("server.address", host); - Span_->SetAttribute("server.port", static_cast(port)); - } catch (...) { - SafeLogSpanError(Log_, "failed to initialize span"); - Span_.reset(); - } -} - -TTableSpan::~TTableSpan() noexcept { - if (Span_) { - try { - Span_->End(); - } catch (...) { - SafeLogSpanError(Log_, "failed to end span"); - } - } -} - -void TTableSpan::End(EStatus status) noexcept { - if (Span_) { - try { - Span_->SetAttribute("db.response.status_code", ToString(status)); - if (status != EStatus::SUCCESS) { - Span_->SetAttribute("error.type", ToString(status)); - } - Span_->End(); - } catch (...) { - SafeLogSpanError(Log_, "failed to finalize span"); - } - Span_.reset(); - } -} - -} // namespace NYdb::NTable diff --git a/src/client/table/impl/table_spans.h b/src/client/table/impl/table_spans.h index 5f1ccab6dc1..94f3ea8e87f 100644 --- a/src/client/table/impl/table_spans.h +++ b/src/client/table/impl/table_spans.h @@ -1,31 +1,17 @@ #pragma once -#include -#include -#include - -#include - -#include -#include -#include +#include namespace NYdb::inline V3::NTable { -class TTableSpan { +class TTableSpan : public NObservability::TOperationSpan { public: - TTableSpan(std::shared_ptr tracer + TTableSpan(std::shared_ptr tracer , const std::string& operationName , const std::string& endpoint , const TLog& log - ); - ~TTableSpan() noexcept; - - void End(EStatus status) noexcept; - -private: - TLog Log_; - std::shared_ptr Span_; + ) : TOperationSpan(std::move(tracer), operationName, endpoint, log) + {} }; } // namespace NYdb::NTable diff --git a/tests/unit/client/CMakeLists.txt b/tests/unit/client/CMakeLists.txt index de86c3fe274..d27649e3fd3 100644 --- a/tests/unit/client/CMakeLists.txt +++ b/tests/unit/client/CMakeLists.txt @@ -105,7 +105,7 @@ add_ydb_test(NAME client-ydb_metrics_ut GTEST INCLUDE_DIRS ${YDB_SDK_SOURCE_DIR} SOURCES - observability/client_metrics_ut.cpp + observability/operation_metrics_ut.cpp LINK_LIBRARIES yutil impl-observability diff --git a/tests/unit/client/observability/client_metrics_ut.cpp b/tests/unit/client/observability/operation_metrics_ut.cpp similarity index 76% rename from tests/unit/client/observability/client_metrics_ut.cpp rename to tests/unit/client/observability/operation_metrics_ut.cpp index 209ce1db0d3..7e76e48793f 100644 --- a/tests/unit/client/observability/client_metrics_ut.cpp +++ b/tests/unit/client/observability/operation_metrics_ut.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -12,10 +12,10 @@ using namespace NYdb::NMetrics; using namespace NYdb::NTests; // --------------------------------------------------------------------------- -// TClientMetrics (shared logic) +// TOperationMetrics (shared logic) // --------------------------------------------------------------------------- -class ClientMetricsTest : public ::testing::Test { +class OperationMetricsTest : public ::testing::Test { protected: void SetUp() override { Registry = std::make_shared(); @@ -50,17 +50,17 @@ class ClientMetricsTest : public ::testing::Test { std::shared_ptr Registry; }; -TEST_F(ClientMetricsTest, RequestCounterIncrementedOnConstruction) { - TClientMetrics metrics(Registry, "DoSomething"); +TEST_F(OperationMetricsTest, RequestCounterIncrementedOnConstruction) { + TOperationMetrics metrics(Registry, "DoSomething", TLog()); auto counter = RequestCounter("DoSomething"); ASSERT_NE(counter, nullptr); EXPECT_EQ(counter->Get(), 1); } -TEST_F(ClientMetricsTest, SuccessDoesNotIncrementErrorCounter) { +TEST_F(OperationMetricsTest, SuccessDoesNotIncrementErrorCounter) { { - TClientMetrics metrics(Registry, "DoSomething"); + TOperationMetrics metrics(Registry, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -69,9 +69,9 @@ TEST_F(ClientMetricsTest, SuccessDoesNotIncrementErrorCounter) { EXPECT_EQ(errors->Get(), 0); } -TEST_F(ClientMetricsTest, FailureIncrementsErrorCounter) { +TEST_F(OperationMetricsTest, FailureIncrementsErrorCounter) { { - TClientMetrics metrics(Registry, "DoSomething"); + TOperationMetrics metrics(Registry, "DoSomething", TLog()); metrics.End(EStatus::UNAVAILABLE); } @@ -80,9 +80,9 @@ TEST_F(ClientMetricsTest, FailureIncrementsErrorCounter) { EXPECT_EQ(errors->Get(), 1); } -TEST_F(ClientMetricsTest, DurationRecordedOnEnd) { +TEST_F(OperationMetricsTest, DurationRecordedOnEnd) { { - TClientMetrics metrics(Registry, "DoSomething"); + TOperationMetrics metrics(Registry, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -92,9 +92,9 @@ TEST_F(ClientMetricsTest, DurationRecordedOnEnd) { EXPECT_GE(hist->GetValues()[0], 0.0); } -TEST_F(ClientMetricsTest, DurationIsInSeconds) { +TEST_F(OperationMetricsTest, DurationIsInSeconds) { { - TClientMetrics metrics(Registry, "DoSomething"); + TOperationMetrics metrics(Registry, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -103,8 +103,8 @@ TEST_F(ClientMetricsTest, DurationIsInSeconds) { EXPECT_LT(hist->GetValues()[0], 1.0); } -TEST_F(ClientMetricsTest, DoubleEndIsIdempotent) { - TClientMetrics metrics(Registry, "DoSomething"); +TEST_F(OperationMetricsTest, DoubleEndIsIdempotent) { + TOperationMetrics metrics(Registry, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); metrics.End(EStatus::INTERNAL_ERROR); @@ -117,9 +117,9 @@ TEST_F(ClientMetricsTest, DoubleEndIsIdempotent) { EXPECT_EQ(hist->Count(), 1u); } -TEST_F(ClientMetricsTest, DestructorCallsEndWithClientInternalError) { +TEST_F(OperationMetricsTest, DestructorCallsEndWithClientInternalError) { { - TClientMetrics metrics(Registry, "DoSomething"); + TOperationMetrics metrics(Registry, "DoSomething", TLog()); } auto requests = RequestCounter("DoSomething"); @@ -135,20 +135,20 @@ TEST_F(ClientMetricsTest, DestructorCallsEndWithClientInternalError) { EXPECT_EQ(hist->Count(), 1u); } -TEST_F(ClientMetricsTest, NullRegistryDoesNotCrash) { +TEST_F(OperationMetricsTest, NullRegistryDoesNotCrash) { EXPECT_NO_THROW({ - TClientMetrics metrics(nullptr, "DoSomething"); + TOperationMetrics metrics(nullptr, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); }); } -TEST_F(ClientMetricsTest, DifferentOperationsHaveSeparateMetrics) { +TEST_F(OperationMetricsTest, DifferentOperationsHaveSeparateMetrics) { { - TClientMetrics m1(Registry, "OpA"); + TOperationMetrics m1(Registry, "OpA", TLog()); m1.End(EStatus::SUCCESS); } { - TClientMetrics m2(Registry, "OpB"); + TOperationMetrics m2(Registry, "OpB", TLog()); m2.End(EStatus::OVERLOADED); } @@ -160,9 +160,9 @@ TEST_F(ClientMetricsTest, DifferentOperationsHaveSeparateMetrics) { EXPECT_EQ(DurationHistogram("OpB", EStatus::OVERLOADED)->Count(), 1u); } -TEST_F(ClientMetricsTest, MultipleRequestsAccumulate) { +TEST_F(OperationMetricsTest, MultipleRequestsAccumulate) { for (int i = 0; i < 5; ++i) { - TClientMetrics metrics(Registry, "Op"); + TOperationMetrics metrics(Registry, "Op", TLog()); metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); } @@ -172,7 +172,7 @@ TEST_F(ClientMetricsTest, MultipleRequestsAccumulate) { EXPECT_EQ(DurationHistogram("Op", EStatus::TIMEOUT)->Count(), 2u); } -TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { +TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { std::vector errorStatuses = { EStatus::BAD_REQUEST, EStatus::UNAUTHORIZED, @@ -185,7 +185,7 @@ TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { }; for (auto status : errorStatuses) { - TClientMetrics metrics(Registry, "Op"); + TOperationMetrics metrics(Registry, "Op", TLog()); metrics.End(status); } @@ -201,20 +201,26 @@ TEST_F(ClientMetricsTest, AllErrorStatusesIncrementErrorCounter) { TEST(QueryMetricsTest, UsesOtelStandardMetrics) { auto registry = std::make_shared(); - NQuery::TQueryMetrics metrics(registry, "ExecuteQuery"); + NQuery::TQueryMetrics metrics(registry, "ExecuteQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( registry->GetCounter( "db.client.operation.requests", - {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteQuery"}} + { + {"db.system.name", "other_sql"}, + {"db.operation.name", "ExecuteQuery"} + } ), nullptr ); EXPECT_NE( registry->GetCounter( "db.client.operation.errors", - {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteQuery"}} + { + {"db.system.name", "other_sql"}, + {"db.operation.name", "ExecuteQuery"} + } ), nullptr ); @@ -238,7 +244,7 @@ TEST(QueryMetricsTest, UsesOtelStandardMetrics) { TEST(TableMetricsTest, UsesOtelStandardMetrics) { auto registry = std::make_shared(); - NTable::TTableMetrics metrics(registry, "ExecuteDataQuery"); + NTable::TTableMetrics metrics(registry, "ExecuteDataQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( From 107197428b99ac6078d96ccd40f212284231230d Mon Sep 17 00:00:00 2001 From: maladetska Date: Wed, 1 Apr 2026 14:11:42 +0300 Subject: [PATCH 10/17] unify client operation metrics pipeline --- cmake/external_libs.cmake | 1 + src/client/impl/observability/CMakeLists.txt | 1 + .../impl/observability/operation_metrics.cpp | 64 +++------- .../impl/observability/operation_metrics.h | 11 +- src/client/impl/stats/CMakeLists.txt | 1 + src/client/impl/stats/stats.h | 110 +++++++++++++++++- src/client/query/client.cpp | 16 +-- src/client/query/impl/query_metrics.h | 16 --- src/client/table/impl/table_client.cpp | 24 ++-- src/client/table/impl/table_client.h | 6 +- src/client/table/impl/table_metrics.h | 16 --- .../observability/operation_metrics_ut.cpp | 51 ++++---- 12 files changed, 179 insertions(+), 138 deletions(-) delete mode 100644 src/client/query/impl/query_metrics.h delete mode 100644 src/client/table/impl/table_metrics.h diff --git a/cmake/external_libs.cmake b/cmake/external_libs.cmake index 9d2500bffb4..4560fd662b3 100644 --- a/cmake/external_libs.cmake +++ b/cmake/external_libs.cmake @@ -14,6 +14,7 @@ find_package(Brotli 1.1.0 REQUIRED) find_package(jwt-cpp REQUIRED) find_package(double-conversion REQUIRED) +# OpenTelemetry if (YDB_SDK_ENABLE_OTEL_METRICS OR YDB_SDK_ENABLE_OTEL_TRACE) find_package(opentelemetry-cpp REQUIRED) endif() diff --git a/src/client/impl/observability/CMakeLists.txt b/src/client/impl/observability/CMakeLists.txt index 33389f3ce2e..264c5abd87e 100644 --- a/src/client/impl/observability/CMakeLists.txt +++ b/src/client/impl/observability/CMakeLists.txt @@ -3,6 +3,7 @@ _ydb_sdk_add_library(impl-observability) target_link_libraries(impl-observability PUBLIC yutil client-metrics + client-impl-ydb_stats ) target_sources(impl-observability PRIVATE diff --git a/src/client/impl/observability/operation_metrics.cpp b/src/client/impl/observability/operation_metrics.cpp index f8aec18ffe5..9ef3f367c04 100644 --- a/src/client/impl/observability/operation_metrics.cpp +++ b/src/client/impl/observability/operation_metrics.cpp @@ -2,8 +2,6 @@ #include -#include - #include namespace NYdb::inline V3::NObservability { @@ -26,40 +24,22 @@ void SafeLogMetricsError(TLog& log, const char* message) noexcept { } // namespace -static const std::vector DurationBucketsSec = { - 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 -}; - -static constexpr const char* RequestsDescription = "Number of database client operations started."; -static constexpr const char* ErrorsDescription = "Number of database client operations that failed."; -static constexpr const char* DurationDescription = "Duration of database client operations."; - -TOperationMetrics::TOperationMetrics(std::shared_ptr registry +TOperationMetrics::TOperationMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector , const std::string& operationName , const TLog& log -) : Registry_(std::move(registry)) +) : Collector_(operationCollector) , OperationName_(operationName) , Log_(log) { - if (!Registry_) { + if (!Collector_) { return; } - try { - NMetrics::TLabels labels = { - {"db.system.name", "other_sql"}, - {"db.operation.name", operationName}, - }; - RequestCounter_ = Registry_->Counter("db.client.operation.requests", labels, RequestsDescription, "{operation}"); - ErrorCounter_ = Registry_->Counter("db.client.operation.errors", labels, ErrorsDescription, "{error}"); - - RequestCounter_->Inc(); + Collector_->IncRequestCount(operationName); StartTime_ = std::chrono::steady_clock::now(); } catch (...) { SafeLogMetricsError(Log_, "failed to initialize metrics"); - RequestCounter_.reset(); - ErrorCounter_.reset(); - Registry_.reset(); + Collector_ = nullptr; } } @@ -73,33 +53,15 @@ void TOperationMetrics::End(EStatus status) noexcept { } Ended_ = true; - try { - const std::string statusCode = ToString(status); - if (Registry_) { - auto elapsed = std::chrono::steady_clock::now() - StartTime_; - double durationSec = std::chrono::duration(elapsed).count(); - NMetrics::TLabels durationLabels = { - {"db.system.name", "other_sql"}, - {"db.operation.name", OperationName_}, - {"db.response.status_code", statusCode}, - }; - if (status != EStatus::SUCCESS) { - durationLabels["error.type"] = statusCode; - } - auto durationHistogram = Registry_->Histogram( - "db.client.operation.duration", - DurationBucketsSec, - durationLabels, - DurationDescription, - "s"); - if (durationHistogram) { - durationHistogram->Record(durationSec); - } - } + if (!Collector_) { + return; + } - if (status != EStatus::SUCCESS && ErrorCounter_) { - ErrorCounter_->Inc(); - } + try { + auto elapsed = std::chrono::steady_clock::now() - StartTime_; + double durationSec = std::chrono::duration(elapsed).count(); + Collector_->RecordLatency(OperationName_, durationSec, status); + Collector_->IncErrorCount(OperationName_, status); } catch (...) { SafeLogMetricsError(Log_, "failed to record metrics"); } diff --git a/src/client/impl/observability/operation_metrics.h b/src/client/impl/observability/operation_metrics.h index 86f37d0ce7c..70067d8333a 100644 --- a/src/client/impl/observability/operation_metrics.h +++ b/src/client/impl/observability/operation_metrics.h @@ -1,19 +1,18 @@ #pragma once -#include +#include #include #include #include -#include #include namespace NYdb::inline V3::NObservability { class TOperationMetrics { public: - TOperationMetrics(std::shared_ptr registry + TOperationMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector , const std::string& operationName , const TLog& log ); @@ -22,11 +21,9 @@ class TOperationMetrics { void End(EStatus status) noexcept; private: - std::shared_ptr Registry_; + NSdkStats::TStatCollector::TClientOperationStatCollector* Collector_ = nullptr; std::string OperationName_; - std::shared_ptr RequestCounter_; - std::shared_ptr ErrorCounter_; - std::chrono::steady_clock::time_point StartTime_; + std::chrono::steady_clock::time_point StartTime_{}; bool Ended_ = false; TLog Log_; }; diff --git a/src/client/impl/stats/CMakeLists.txt b/src/client/impl/stats/CMakeLists.txt index 498104196cd..15866af4bc6 100644 --- a/src/client/impl/stats/CMakeLists.txt +++ b/src/client/impl/stats/CMakeLists.txt @@ -4,6 +4,7 @@ target_link_libraries(client-impl-ydb_stats PUBLIC yutil grpc-client monlib-metrics + client-metrics ) target_sources(client-impl-ydb_stats PRIVATE diff --git a/src/client/impl/stats/stats.h b/src/client/impl/stats/stats.h index d545764c887..3d11004ebed 100644 --- a/src/client/impl/stats/stats.h +++ b/src/client/impl/stats/stats.h @@ -1,13 +1,16 @@ #pragma once #include +#include #include #include #include #include +#include #include +#include namespace NYdb::inline V3 { namespace NSdkStats { @@ -226,6 +229,101 @@ struct TStatCollector { std::string ClientType_; }; + struct TClientOperationStatCollector { + TClientOperationStatCollector() + : MetricRegistry_() + {} + + TClientOperationStatCollector(::NMonitoring::TMetricRegistry* registry, + const std::string& database, + const std::string& clientType) + : MetricRegistry_(registry) + , Database_(database) + , ClientType_(clientType) + {} + + void SetExternalRegistry(const std::shared_ptr& externalRegistry) { + ExternalRegistry_ = externalRegistry; + } + + void IncRequestCount(const std::string& operationName) { + if (auto registry = MetricRegistry_.Get()) { + registry->Rate({ + {"database", Database_}, + {"ydb_client", ClientType_}, + {"operation", operationName}, + {"sensor", "Request/Operations"} + })->Inc(); + } + if (ExternalRegistry_) { + ExternalRegistry_->Counter( + "db.client.operation.requests", + {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, + "Number of database client operations started.", + "{operation}" + )->Inc(); + } + } + + void IncErrorCount(const std::string& operationName, EStatus status) { + if (status == EStatus::SUCCESS) { + return; + } + if (auto registry = MetricRegistry_.Get()) { + registry->Rate({ + {"database", Database_}, + {"ydb_client", ClientType_}, + {"operation", operationName}, + {"status", TStringBuilder() << status}, + {"sensor", "Request/OperationErrors"} + })->Inc(); + } + if (ExternalRegistry_) { + ExternalRegistry_->Counter( + "db.client.operation.errors", + {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, + "Number of database client operations that failed.", + "{error}" + )->Inc(); + } + } + + void RecordLatency(const std::string& operationName, double durationSeconds, EStatus status) { + if (auto registry = MetricRegistry_.Get()) { + registry->HistogramRate({ + {"database", Database_}, + {"ydb_client", ClientType_}, + {"operation", operationName}, + {"sensor", "Request/OperationLatencyMs"} + }, ::NMonitoring::ExponentialHistogram(20, 2, 1))->Record( + static_cast(durationSeconds * 1000.0)); + } + if (ExternalRegistry_) { + NMetrics::TLabels labels = { + {"db.system.name", "other_sql"}, + {"db.operation.name", operationName}, + {"db.response.status_code", TStringBuilder() << status}, + }; + if (status != EStatus::SUCCESS) { + labels["error.type"] = TStringBuilder() << status; + } + ExternalRegistry_->Histogram( + "db.client.operation.duration", + {0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10}, + labels, + "Duration of database client operations.", + "s" + )->Record(durationSeconds); + } + } + + private: + TAtomicPointer<::NMonitoring::TMetricRegistry> MetricRegistry_; + std::shared_ptr ExternalRegistry_; + std::string Database_; + std::string ClientType_; + }; + struct TClientStatCollector { TClientStatCollector(::NMonitoring::TRate* cacheMiss = nullptr @@ -233,13 +331,15 @@ struct TStatCollector { , ::NMonitoring::THistogram* paramsSize = nullptr , ::NMonitoring::TRate* sessionRemoved = nullptr , ::NMonitoring::TRate* requestMigrated = nullptr - , TClientRetryOperationStatCollector retryOperationStatCollector = TClientRetryOperationStatCollector()) + , TClientRetryOperationStatCollector retryOperationStatCollector = TClientRetryOperationStatCollector() + , TClientOperationStatCollector operationStatCollector = TClientOperationStatCollector()) : CacheMiss(cacheMiss) , QuerySize(querySize) , ParamsSize(paramsSize) , SessionRemovedDueBalancing(sessionRemoved) , RequestMigrated(requestMigrated) , RetryOperationStatCollector(retryOperationStatCollector) + , OperationStatCollector(operationStatCollector) { } ::NMonitoring::TRate* CacheMiss; @@ -248,6 +348,7 @@ struct TStatCollector { ::NMonitoring::TRate* SessionRemovedDueBalancing; ::NMonitoring::TRate* RequestMigrated; TClientRetryOperationStatCollector RetryOperationStatCollector; + TClientOperationStatCollector OperationStatCollector; }; TStatCollector(const std::string& database, TMetricRegistry* sensorsRegistry) @@ -376,10 +477,13 @@ struct TStatCollector { {"sensor", "Request/ParamsSize"} }, ::NMonitoring::ExponentialHistogram(10, 2, 32)); return TClientStatCollector(cacheMiss, querySize, paramsSize, sessionRemovedDueBalancing, requestMigrated, - TClientRetryOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType)); + TClientRetryOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType), + TClientOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType)); } - return TClientStatCollector(); + return TClientStatCollector(nullptr, nullptr, nullptr, nullptr, nullptr, + TClientRetryOperationStatCollector(nullptr, Database_, clientType), + TClientOperationStatCollector(nullptr, Database_, clientType)); } bool IsCollecting() { diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index 176ab6ca977..9a5b89583ef 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -14,8 +14,8 @@ #include #include +#include #include -#include #include #include #include @@ -26,6 +26,7 @@ namespace NYdb::inline V3::NQuery { +using TQueryMetrics = NObservability::TOperationMetrics; using TRetryContextResultAsync = NRetry::Async::TRetryContext; using TRetryContextAsync = NRetry::Async::TRetryContext; @@ -74,7 +75,6 @@ class TQueryClient::TImpl: public TClientImplCommon, public if (auto traceProvider = Connections_->GetTraceProvider()) { Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-query"); } - MetricRegistry_ = Connections_->GetExternalMetricRegistry(); } ~TImpl() { @@ -85,6 +85,8 @@ class TQueryClient::TImpl: public TClientImplCommon, public QuerySizeHistogram_.Set(collector.QuerySize); ParamsSizeHistogram_.Set(collector.ParamsSize); RetryOperationStatCollector_ = collector.RetryOperationStatCollector; + OperationStatCollector_ = collector.OperationStatCollector; + OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); } TAsyncExecuteQueryIterator StreamExecuteQuery(const std::string& query, const TTxControl& txControl, @@ -104,7 +106,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteQuery", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteQuery", DbDriverState_->Log); return TExecQueryImpl::ExecuteQuery( Connections_, DbDriverState_, query, txControl, params, settings, session) @@ -188,7 +190,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "Rollback", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "Rollback", DbDriverState_->Log); auto responseCb = [promise, session, span, metrics] (Ydb::Query::RollbackTransactionResponse* response, TPlainStatus status) mutable { @@ -240,7 +242,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "Commit", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "Commit", DbDriverState_->Log); auto responseCb = [promise, session, span, metrics] (Ydb::Query::CommitTransactionResponse* response, TPlainStatus status) mutable { @@ -556,7 +558,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public }; auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "CreateSession", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "CreateSession", DbDriverState_->Log); auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); @@ -627,7 +629,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public private: std::shared_ptr Tracer_; - std::shared_ptr MetricRegistry_; + NSdkStats::TStatCollector::TClientOperationStatCollector OperationStatCollector_; NSdkStats::TStatCollector::TClientRetryOperationStatCollector RetryOperationStatCollector_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> QuerySizeHistogram_; NSdkStats::TAtomicHistogram<::NMonitoring::THistogram> ParamsSizeHistogram_; diff --git a/src/client/query/impl/query_metrics.h b/src/client/query/impl/query_metrics.h deleted file mode 100644 index a03338d6c13..00000000000 --- a/src/client/query/impl/query_metrics.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace NYdb::inline V3::NQuery { - -class TQueryMetrics : public NObservability::TOperationMetrics { -public: - TQueryMetrics(std::shared_ptr registry - , const std::string& operationName - , const TLog& log - ) : TOperationMetrics(std::move(registry), operationName, log) - {} -}; - -} // namespace NYdb::NQuery diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index f30411c0980..6db36b92a27 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -6,6 +6,8 @@ namespace NTable { using namespace NThreading; +using TTableMetrics = NObservability::TOperationMetrics; + const TKeepAliveSettings TTableClient::TImpl::KeepAliveSettings = TKeepAliveSettings().ClientTimeout(KEEP_ALIVE_CLIENT_TIMEOUT); @@ -22,7 +24,9 @@ TTableClient::TImpl::TImpl(std::shared_ptr&& connections, , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { - MetricRegistry_ = Connections_->GetExternalMetricRegistry(); + auto clientCollector = DbDriverState_->StatCollector.GetClientStatCollector("Table"); + OperationStatCollector_ = clientCollector.OperationStatCollector; + OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); if (auto traceProvider = Connections_->GetTraceProvider()) { Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-table"); @@ -32,7 +36,7 @@ TTableClient::TImpl::TImpl(std::shared_ptr&& connections, return; } - SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Table")); + SetStatCollector(clientCollector); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Table")); } @@ -384,7 +388,7 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio auto createSessionPromise = NewPromise(); auto self = shared_from_this(); - auto metrics = std::make_shared(MetricRegistry_, "CreateSession", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "CreateSession", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto createSessionExtractor = [createSessionPromise, self, standalone, metrics, span] @@ -769,7 +773,7 @@ TAsyncStatus TTableClient::TImpl::ExecuteSchemeQuery(const TSession& session, co request.set_session_id(TStringType{session.GetId()}); request.set_yql_text(TStringType{query}); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteSchemeQuery", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteSchemeQuery", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "ExecuteSchemeQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto future = RunSimple( @@ -796,7 +800,7 @@ TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSessio request.set_session_id(TStringType{session.GetId()}); SetTxSettings(txSettings, request.mutable_tx_settings()); - auto metrics = std::make_shared(MetricRegistry_, "BeginTransaction", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "BeginTransaction", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "BeginTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto promise = NewPromise(); @@ -841,7 +845,7 @@ TAsyncCommitTransactionResult TTableClient::TImpl::CommitTransaction(const TSess request.set_collect_stats(GetStatsCollectionMode(settings.CollectQueryStats_)); auto span = std::make_shared(Tracer_, "CommitTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "CommitTransaction", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "CommitTransaction", DbDriverState_->Log); auto promise = NewPromise(); @@ -886,7 +890,7 @@ TAsyncStatus TTableClient::TImpl::RollbackTransaction(const TSession& session, c request.set_tx_id(TStringType{txId}); auto span = std::make_shared(Tracer_, "RollbackTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "RollbackTransaction", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "RollbackTransaction", DbDriverState_->Log); auto future = RunSimple( std::move(request), @@ -1140,6 +1144,8 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli ParamsSizeHistogram.Set(collector.ParamsSize); RetryOperationStatCollector = collector.RetryOperationStatCollector; SessionRemovedDueBalancing.Set(collector.SessionRemovedDueBalancing); + OperationStatCollector_ = collector.OperationStatCollector; + OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); } TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { @@ -1168,7 +1174,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, *mutable_rows->mutable_type() = rows.GetType().GetProto(); } - auto metrics = std::make_shared(MetricRegistry_, "BulkUpsert", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "BulkUpsert", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto promise = NewPromise(); @@ -1220,7 +1226,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, request.set_data(TStringType{data}); auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(MetricRegistry_, "BulkUpsert", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "BulkUpsert", DbDriverState_->Log); auto promise = NewPromise(); diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index 7a8902dcb15..a82ba14caa1 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #undef INCLUDE_YDB_INTERNAL_H @@ -17,7 +18,6 @@ #include "data_query.h" #include "request_migrator.h" #include "readers.h" -#include "table_metrics.h" #include "table_spans.h" #include @@ -239,7 +239,7 @@ class TTableClient::TImpl: public TClientImplCommon, public auto promise = NewPromise(); bool keepInCache = settings.KeepInQueryCache_ && settings.KeepInQueryCache_.value(); - auto metrics = std::make_shared(MetricRegistry_, "ExecuteDataQuery", DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteDataQuery", DbDriverState_->Log); auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); @@ -337,7 +337,7 @@ class TTableClient::TImpl: public TClientImplCommon, public private: std::shared_ptr Tracer_; - std::shared_ptr MetricRegistry_; + NSdkStats::TStatCollector::TClientOperationStatCollector OperationStatCollector_; NSessionPool::TSessionPool SessionPool_; TRequestMigrator RequestMigrator_; static const TKeepAliveSettings KeepAliveSettings; diff --git a/src/client/table/impl/table_metrics.h b/src/client/table/impl/table_metrics.h deleted file mode 100644 index 8b8f32b2882..00000000000 --- a/src/client/table/impl/table_metrics.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace NYdb::inline V3::NTable { - -class TTableMetrics : public NObservability::TOperationMetrics { -public: - TTableMetrics(std::shared_ptr registry - , const std::string& operationName - , const TLog& log - ) : TOperationMetrics(std::move(registry), operationName, log) - {} -}; - -} // namespace NYdb::NTable diff --git a/tests/unit/client/observability/operation_metrics_ut.cpp b/tests/unit/client/observability/operation_metrics_ut.cpp index 7e76e48793f..ddb4f4592da 100644 --- a/tests/unit/client/observability/operation_metrics_ut.cpp +++ b/tests/unit/client/observability/operation_metrics_ut.cpp @@ -1,6 +1,5 @@ #include -#include -#include +#include #include #include @@ -10,6 +9,7 @@ using namespace NYdb; using namespace NYdb::NObservability; using namespace NYdb::NMetrics; using namespace NYdb::NTests; +using namespace NYdb::NSdkStats; // --------------------------------------------------------------------------- // TOperationMetrics (shared logic) @@ -19,6 +19,7 @@ class OperationMetricsTest : public ::testing::Test { protected: void SetUp() override { Registry = std::make_shared(); + OpCollector.SetExternalRegistry(Registry); } std::shared_ptr RequestCounter(const std::string& op) { @@ -47,11 +48,12 @@ class OperationMetricsTest : public ::testing::Test { return Registry->GetHistogram("db.client.operation.duration", labels); } + TStatCollector::TClientOperationStatCollector OpCollector; std::shared_ptr Registry; }; TEST_F(OperationMetricsTest, RequestCounterIncrementedOnConstruction) { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); auto counter = RequestCounter("DoSomething"); ASSERT_NE(counter, nullptr); @@ -60,7 +62,7 @@ TEST_F(OperationMetricsTest, RequestCounterIncrementedOnConstruction) { TEST_F(OperationMetricsTest, SuccessDoesNotIncrementErrorCounter) { { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -71,7 +73,7 @@ TEST_F(OperationMetricsTest, SuccessDoesNotIncrementErrorCounter) { TEST_F(OperationMetricsTest, FailureIncrementsErrorCounter) { { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::UNAVAILABLE); } @@ -82,7 +84,7 @@ TEST_F(OperationMetricsTest, FailureIncrementsErrorCounter) { TEST_F(OperationMetricsTest, DurationRecordedOnEnd) { { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -94,7 +96,7 @@ TEST_F(OperationMetricsTest, DurationRecordedOnEnd) { TEST_F(OperationMetricsTest, DurationIsInSeconds) { { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -104,7 +106,7 @@ TEST_F(OperationMetricsTest, DurationIsInSeconds) { } TEST_F(OperationMetricsTest, DoubleEndIsIdempotent) { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); metrics.End(EStatus::INTERNAL_ERROR); @@ -119,7 +121,7 @@ TEST_F(OperationMetricsTest, DoubleEndIsIdempotent) { TEST_F(OperationMetricsTest, DestructorCallsEndWithClientInternalError) { { - TOperationMetrics metrics(Registry, "DoSomething", TLog()); + TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); } auto requests = RequestCounter("DoSomething"); @@ -137,18 +139,19 @@ TEST_F(OperationMetricsTest, DestructorCallsEndWithClientInternalError) { TEST_F(OperationMetricsTest, NullRegistryDoesNotCrash) { EXPECT_NO_THROW({ - TOperationMetrics metrics(nullptr, "DoSomething", TLog()); + TStatCollector::TClientOperationStatCollector nullCollector; + TOperationMetrics metrics(&nullCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); }); } TEST_F(OperationMetricsTest, DifferentOperationsHaveSeparateMetrics) { { - TOperationMetrics m1(Registry, "OpA", TLog()); + TOperationMetrics m1(&OpCollector, "OpA", TLog()); m1.End(EStatus::SUCCESS); } { - TOperationMetrics m2(Registry, "OpB", TLog()); + TOperationMetrics m2(&OpCollector, "OpB", TLog()); m2.End(EStatus::OVERLOADED); } @@ -162,7 +165,7 @@ TEST_F(OperationMetricsTest, DifferentOperationsHaveSeparateMetrics) { TEST_F(OperationMetricsTest, MultipleRequestsAccumulate) { for (int i = 0; i < 5; ++i) { - TOperationMetrics metrics(Registry, "Op", TLog()); + TOperationMetrics metrics(&OpCollector, "Op", TLog()); metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); } @@ -185,7 +188,7 @@ TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { }; for (auto status : errorStatuses) { - TOperationMetrics metrics(Registry, "Op", TLog()); + TOperationMetrics metrics(&OpCollector, "Op", TLog()); metrics.End(status); } @@ -194,14 +197,12 @@ TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); } -// --------------------------------------------------------------------------- -// TQueryMetrics -// --------------------------------------------------------------------------- - -TEST(QueryMetricsTest, UsesOtelStandardMetrics) { +TEST(OperationMetricsAliasesTest, QueryOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); + TStatCollector::TClientOperationStatCollector collector; + collector.SetExternalRegistry(registry); - NQuery::TQueryMetrics metrics(registry, "ExecuteQuery", TLog()); + NObservability::TOperationMetrics metrics(&collector, "ExecuteQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( @@ -237,14 +238,12 @@ TEST(QueryMetricsTest, UsesOtelStandardMetrics) { ); } -// --------------------------------------------------------------------------- -// TTableMetrics -// --------------------------------------------------------------------------- - -TEST(TableMetricsTest, UsesOtelStandardMetrics) { +TEST(OperationMetricsAliasesTest, TableOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); + TStatCollector::TClientOperationStatCollector collector; + collector.SetExternalRegistry(registry); - NTable::TTableMetrics metrics(registry, "ExecuteDataQuery", TLog()); + NObservability::TOperationMetrics metrics(&collector, "ExecuteDataQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( From 5e4ea264d02a0ad15a13972cc600c85e7286e6fd Mon Sep 17 00:00:00 2001 From: maladetska Date: Wed, 1 Apr 2026 14:54:05 +0300 Subject: [PATCH 11/17] add cloned log_lazy --- src/client/impl/internal/common/log_lazy.h | 10 ++++++++++ src/client/impl/observability/operation_metrics.cpp | 2 +- src/client/impl/observability/operation_span.cpp | 2 +- src/client/impl/stats/stats.h | 6 ++++++ 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 src/client/impl/internal/common/log_lazy.h diff --git a/src/client/impl/internal/common/log_lazy.h b/src/client/impl/internal/common/log_lazy.h new file mode 100644 index 00000000000..0635ef2cc89 --- /dev/null +++ b/src/client/impl/internal/common/log_lazy.h @@ -0,0 +1,10 @@ +#pragma once + +#ifdef LOG_LAZY +#error log macro redefinition +#endif + +#define LOG_LAZY(log, priority, message) \ + if (log.IsOpen() && log.FiltrationLevel() >= priority) { \ + log.Write(priority, message); \ + } diff --git a/src/client/impl/observability/operation_metrics.cpp b/src/client/impl/observability/operation_metrics.cpp index 9ef3f367c04..e733a65bbd8 100644 --- a/src/client/impl/observability/operation_metrics.cpp +++ b/src/client/impl/observability/operation_metrics.cpp @@ -1,6 +1,6 @@ #include "operation_metrics.h" -#include +#include #include diff --git a/src/client/impl/observability/operation_span.cpp b/src/client/impl/observability/operation_span.cpp index 300d4ff9801..750e52749df 100644 --- a/src/client/impl/observability/operation_span.cpp +++ b/src/client/impl/observability/operation_span.cpp @@ -1,6 +1,6 @@ #include "operation_span.h" -#include +#include #include diff --git a/src/client/impl/stats/stats.h b/src/client/impl/stats/stats.h index 3d11004ebed..504bab61023 100644 --- a/src/client/impl/stats/stats.h +++ b/src/client/impl/stats/stats.h @@ -256,6 +256,12 @@ struct TStatCollector { })->Inc(); } if (ExternalRegistry_) { + ExternalRegistry_->Counter( + "db.client.operation.errors", + {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, + "Number of database client operations that failed.", + "{error}" + ); ExternalRegistry_->Counter( "db.client.operation.requests", {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, From 947b4cac0caa7ef0204a95487ec0741ef293d752 Mon Sep 17 00:00:00 2001 From: maladetska Date: Wed, 1 Apr 2026 17:24:06 +0300 Subject: [PATCH 12/17] fix SafeLogMetricsError, SafeLogSpanError --- .../impl/observability/operation_metrics.cpp | 12 ++++++++---- .../impl/observability/operation_span.cpp | 18 +++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/client/impl/observability/operation_metrics.cpp b/src/client/impl/observability/operation_metrics.cpp index e733a65bbd8..e326658b4ba 100644 --- a/src/client/impl/observability/operation_metrics.cpp +++ b/src/client/impl/observability/operation_metrics.cpp @@ -8,10 +8,14 @@ namespace NYdb::inline V3::NObservability { namespace { -void SafeLogMetricsError(TLog& log, const char* message) noexcept { +void SafeLogMetricsError(TLog& log, const char* message, std::exception_ptr exception) noexcept { try { + if (!exception) { + LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": (no active exception)"); + return; + } try { - std::rethrow_exception(std::current_exception()); + std::rethrow_exception(exception); } catch (const std::exception& e) { LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": " + e.what()); return; @@ -38,7 +42,7 @@ TOperationMetrics::TOperationMetrics(NSdkStats::TStatCollector::TClientOperation Collector_->IncRequestCount(operationName); StartTime_ = std::chrono::steady_clock::now(); } catch (...) { - SafeLogMetricsError(Log_, "failed to initialize metrics"); + SafeLogMetricsError(Log_, "failed to initialize metrics", std::current_exception()); Collector_ = nullptr; } } @@ -63,7 +67,7 @@ void TOperationMetrics::End(EStatus status) noexcept { Collector_->RecordLatency(OperationName_, durationSec, status); Collector_->IncErrorCount(OperationName_, status); } catch (...) { - SafeLogMetricsError(Log_, "failed to record metrics"); + SafeLogMetricsError(Log_, "failed to record metrics", std::current_exception()); } } diff --git a/src/client/impl/observability/operation_span.cpp b/src/client/impl/observability/operation_span.cpp index 750e52749df..57edf997286 100644 --- a/src/client/impl/observability/operation_span.cpp +++ b/src/client/impl/observability/operation_span.cpp @@ -44,10 +44,14 @@ void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { } } -void SafeLogSpanError(TLog& log, const char* message) noexcept { +void SafeLogSpanError(TLog& log, const char* message, std::exception_ptr exception) noexcept { try { + if (!exception) { + LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": (no active exception)"); + return; + } try { - std::rethrow_exception(std::current_exception()); + std::rethrow_exception(exception); } catch (const std::exception& e) { LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": " + e.what()); return; @@ -83,7 +87,7 @@ TOperationSpan::TOperationSpan(std::shared_ptr tracer Span_->SetAttribute("server.address", host); Span_->SetAttribute("server.port", static_cast(port)); } catch (...) { - SafeLogSpanError(Log_, "failed to initialize span"); + SafeLogSpanError(Log_, "failed to initialize span", std::current_exception()); Span_.reset(); } } @@ -93,7 +97,7 @@ TOperationSpan::~TOperationSpan() noexcept { try { Span_->End(); } catch (...) { - SafeLogSpanError(Log_, "failed to end span"); + SafeLogSpanError(Log_, "failed to end span", std::current_exception()); } } } @@ -109,7 +113,7 @@ void TOperationSpan::SetPeerEndpoint(const std::string& endpoint) noexcept { Span_->SetAttribute("network.peer.address", host); Span_->SetAttribute("network.peer.port", static_cast(port)); } catch (...) { - SafeLogSpanError(Log_, "failed to set peer endpoint"); + SafeLogSpanError(Log_, "failed to set peer endpoint", std::current_exception()); } } @@ -120,7 +124,7 @@ void TOperationSpan::AddEvent(const std::string& name, const std::mapAddEvent(name, attributes); } catch (...) { - SafeLogSpanError(Log_, "failed to add event"); + SafeLogSpanError(Log_, "failed to add event", std::current_exception()); } } @@ -133,7 +137,7 @@ void TOperationSpan::End(EStatus status) noexcept { } Span_->End(); } catch (...) { - SafeLogSpanError(Log_, "failed to finalize span"); + SafeLogSpanError(Log_, "failed to finalize span", std::current_exception()); } Span_.reset(); } From f6a019eda8a203053d6dec87eeefcf4205c6040c Mon Sep 17 00:00:00 2001 From: maladetska Date: Thu, 2 Apr 2026 10:47:09 +0300 Subject: [PATCH 13/17] Rename TOperationMetrics/TOperationSpan -> TRequestMetrics/TRequestSpan, remove TQuerySpan and TQueryMetrics, modify TClientOperationStatCollector constructor --- src/client/impl/observability/CMakeLists.txt | 4 +- src/client/impl/observability/metrics.cpp | 74 +++++++++++++++++++ .../{operation_metrics.h => metrics.h} | 10 +-- .../impl/observability/operation_metrics.cpp | 74 ------------------- .../{operation_span.cpp => span.cpp} | 38 +++++----- .../{operation_span.h => span.h} | 8 +- src/client/impl/stats/stats.h | 17 +++-- src/client/query/client.cpp | 14 ++-- src/client/query/impl/query_spans.h | 17 ----- src/client/table/impl/table_client.cpp | 11 ++- src/client/table/impl/table_client.h | 8 +- src/client/table/impl/table_spans.h | 17 ----- tests/unit/client/CMakeLists.txt | 2 +- ...peration_metrics_ut.cpp => metrics_ut.cpp} | 68 +++++++++-------- 14 files changed, 163 insertions(+), 199 deletions(-) create mode 100644 src/client/impl/observability/metrics.cpp rename src/client/impl/observability/{operation_metrics.h => metrics.h} (69%) delete mode 100644 src/client/impl/observability/operation_metrics.cpp rename src/client/impl/observability/{operation_span.cpp => span.cpp} (64%) rename src/client/impl/observability/{operation_span.h => span.h} (81%) delete mode 100644 src/client/query/impl/query_spans.h delete mode 100644 src/client/table/impl/table_spans.h rename tests/unit/client/observability/{operation_metrics_ut.cpp => metrics_ut.cpp} (74%) diff --git a/src/client/impl/observability/CMakeLists.txt b/src/client/impl/observability/CMakeLists.txt index 264c5abd87e..d7e89775717 100644 --- a/src/client/impl/observability/CMakeLists.txt +++ b/src/client/impl/observability/CMakeLists.txt @@ -7,8 +7,8 @@ target_link_libraries(impl-observability PUBLIC ) target_sources(impl-observability PRIVATE - operation_metrics.cpp - operation_span.cpp + metrics.cpp + span.cpp ) _ydb_sdk_install_targets(TARGETS impl-observability) diff --git a/src/client/impl/observability/metrics.cpp b/src/client/impl/observability/metrics.cpp new file mode 100644 index 00000000000..01a96f2cc58 --- /dev/null +++ b/src/client/impl/observability/metrics.cpp @@ -0,0 +1,74 @@ +#include "metrics.h" + +#include + +#include + +namespace NYdb::inline V3::NObservability { + +namespace { + +void SafeLogRequestMetricsError(TLog& log, const char* message, std::exception_ptr exception) noexcept { + try { + if (!exception) { + LOG_LAZY(log, TLOG_ERR, std::string("TRequestMetrics: ") + message + ": (no active exception)"); + return; + } + try { + std::rethrow_exception(exception); + } catch (const std::exception& e) { + LOG_LAZY(log, TLOG_ERR, std::string("TRequestMetrics: ") + message + ": " + e.what()); + return; + } catch (...) { + } + LOG_LAZY(log, TLOG_ERR, std::string("TRequestMetrics: ") + message + ": (unknown)"); + } catch (...) { + } +} + +} // namespace + +TRequestMetrics::TRequestMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector + , const std::string& requestName + , const TLog& log +) : Collector_(operationCollector) + , RequestName_(requestName) + , Log_(log) +{ + if (!Collector_) { + return; + } + try { + Collector_->IncRequestCount(requestName); + StartTime_ = std::chrono::steady_clock::now(); + } catch (...) { + SafeLogRequestMetricsError(Log_, "failed to initialize metrics", std::current_exception()); + Collector_ = nullptr; + } +} + +TRequestMetrics::~TRequestMetrics() noexcept { + End(EStatus::CLIENT_INTERNAL_ERROR); +} + +void TRequestMetrics::End(EStatus status) noexcept { + if (Ended_) { + return; + } + Ended_ = true; + + if (!Collector_) { + return; + } + + try { + auto elapsed = std::chrono::steady_clock::now() - StartTime_; + double durationSec = std::chrono::duration(elapsed).count(); + Collector_->RecordLatency(RequestName_, durationSec, status); + Collector_->IncErrorCount(RequestName_, status); + } catch (...) { + SafeLogRequestMetricsError(Log_, "failed to record metrics", std::current_exception()); + } +} + +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/operation_metrics.h b/src/client/impl/observability/metrics.h similarity index 69% rename from src/client/impl/observability/operation_metrics.h rename to src/client/impl/observability/metrics.h index 70067d8333a..07c91a08f30 100644 --- a/src/client/impl/observability/operation_metrics.h +++ b/src/client/impl/observability/metrics.h @@ -10,19 +10,19 @@ namespace NYdb::inline V3::NObservability { -class TOperationMetrics { +class TRequestMetrics { public: - TOperationMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector - , const std::string& operationName + TRequestMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector + , const std::string& requestName , const TLog& log ); - ~TOperationMetrics() noexcept; + ~TRequestMetrics() noexcept; void End(EStatus status) noexcept; private: NSdkStats::TStatCollector::TClientOperationStatCollector* Collector_ = nullptr; - std::string OperationName_; + std::string RequestName_; std::chrono::steady_clock::time_point StartTime_{}; bool Ended_ = false; TLog Log_; diff --git a/src/client/impl/observability/operation_metrics.cpp b/src/client/impl/observability/operation_metrics.cpp deleted file mode 100644 index e326658b4ba..00000000000 --- a/src/client/impl/observability/operation_metrics.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "operation_metrics.h" - -#include - -#include - -namespace NYdb::inline V3::NObservability { - -namespace { - -void SafeLogMetricsError(TLog& log, const char* message, std::exception_ptr exception) noexcept { - try { - if (!exception) { - LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": (no active exception)"); - return; - } - try { - std::rethrow_exception(exception); - } catch (const std::exception& e) { - LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": " + e.what()); - return; - } catch (...) { - } - LOG_LAZY(log, TLOG_ERR, std::string("TOperationMetrics: ") + message + ": (unknown)"); - } catch (...) { - } -} - -} // namespace - -TOperationMetrics::TOperationMetrics(NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector - , const std::string& operationName - , const TLog& log -) : Collector_(operationCollector) - , OperationName_(operationName) - , Log_(log) -{ - if (!Collector_) { - return; - } - try { - Collector_->IncRequestCount(operationName); - StartTime_ = std::chrono::steady_clock::now(); - } catch (...) { - SafeLogMetricsError(Log_, "failed to initialize metrics", std::current_exception()); - Collector_ = nullptr; - } -} - -TOperationMetrics::~TOperationMetrics() noexcept { - End(EStatus::CLIENT_INTERNAL_ERROR); -} - -void TOperationMetrics::End(EStatus status) noexcept { - if (Ended_) { - return; - } - Ended_ = true; - - if (!Collector_) { - return; - } - - try { - auto elapsed = std::chrono::steady_clock::now() - StartTime_; - double durationSec = std::chrono::duration(elapsed).count(); - Collector_->RecordLatency(OperationName_, durationSec, status); - Collector_->IncErrorCount(OperationName_, status); - } catch (...) { - SafeLogMetricsError(Log_, "failed to record metrics", std::current_exception()); - } -} - -} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/operation_span.cpp b/src/client/impl/observability/span.cpp similarity index 64% rename from src/client/impl/observability/operation_span.cpp rename to src/client/impl/observability/span.cpp index 57edf997286..39eca5bd0ca 100644 --- a/src/client/impl/observability/operation_span.cpp +++ b/src/client/impl/observability/span.cpp @@ -1,4 +1,4 @@ -#include "operation_span.h" +#include "span.h" #include @@ -44,28 +44,28 @@ void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { } } -void SafeLogSpanError(TLog& log, const char* message, std::exception_ptr exception) noexcept { +void SafeLogRequestSpanError(TLog& log, const char* message, std::exception_ptr exception) noexcept { try { if (!exception) { - LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": (no active exception)"); + LOG_LAZY(log, TLOG_ERR, std::string("TRequestSpan: ") + message + ": (no active exception)"); return; } try { std::rethrow_exception(exception); } catch (const std::exception& e) { - LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": " + e.what()); + LOG_LAZY(log, TLOG_ERR, std::string("TRequestSpan: ") + message + ": " + e.what()); return; } catch (...) { } - LOG_LAZY(log, TLOG_ERR, std::string("TOperationSpan: ") + message + ": (unknown)"); + LOG_LAZY(log, TLOG_ERR, std::string("TRequestSpan: ") + message + ": (unknown)"); } catch (...) { } } } // namespace -TOperationSpan::TOperationSpan(std::shared_ptr tracer - , const std::string& operationName +TRequestSpan::TRequestSpan(std::shared_ptr tracer + , const std::string& requestName , const std::string& endpoint , const TLog& log ) : Log_(log) { @@ -78,31 +78,31 @@ TOperationSpan::TOperationSpan(std::shared_ptr tracer ParseEndpoint(endpoint, host, port); try { - Span_ = tracer->StartSpan(operationName, NTrace::ESpanKind::CLIENT); + Span_ = tracer->StartSpan(requestName, NTrace::ESpanKind::CLIENT); if (!Span_) { return; } Span_->SetAttribute("db.system.name", "other_sql"); - Span_->SetAttribute("db.operation.name", operationName); + Span_->SetAttribute("db.operation.name", requestName); Span_->SetAttribute("server.address", host); Span_->SetAttribute("server.port", static_cast(port)); } catch (...) { - SafeLogSpanError(Log_, "failed to initialize span", std::current_exception()); + SafeLogRequestSpanError(Log_, "failed to initialize span", std::current_exception()); Span_.reset(); } } -TOperationSpan::~TOperationSpan() noexcept { +TRequestSpan::~TRequestSpan() noexcept { if (Span_) { try { Span_->End(); } catch (...) { - SafeLogSpanError(Log_, "failed to end span", std::current_exception()); + SafeLogRequestSpanError(Log_, "failed to end span", std::current_exception()); } } } -void TOperationSpan::SetPeerEndpoint(const std::string& endpoint) noexcept { +void TRequestSpan::SetPeerEndpoint(const std::string& endpoint) noexcept { if (!Span_ || endpoint.empty()) { return; } @@ -113,31 +113,31 @@ void TOperationSpan::SetPeerEndpoint(const std::string& endpoint) noexcept { Span_->SetAttribute("network.peer.address", host); Span_->SetAttribute("network.peer.port", static_cast(port)); } catch (...) { - SafeLogSpanError(Log_, "failed to set peer endpoint", std::current_exception()); + SafeLogRequestSpanError(Log_, "failed to set peer endpoint", std::current_exception()); } } -void TOperationSpan::AddEvent(const std::string& name, const std::map& attributes) noexcept { +void TRequestSpan::AddEvent(const std::string& name, const std::map& attributes) noexcept { if (!Span_) { return; } try { Span_->AddEvent(name, attributes); } catch (...) { - SafeLogSpanError(Log_, "failed to add event", std::current_exception()); + SafeLogRequestSpanError(Log_, "failed to add event", std::current_exception()); } } -void TOperationSpan::End(EStatus status) noexcept { +void TRequestSpan::End(EStatus status) noexcept { if (Span_) { try { - Span_->SetAttribute("db.response.status_code", static_cast(status)); + Span_->SetAttribute("db.response.status_code", ToString(status)); if (status != EStatus::SUCCESS) { Span_->SetAttribute("error.type", ToString(status)); } Span_->End(); } catch (...) { - SafeLogSpanError(Log_, "failed to finalize span", std::current_exception()); + SafeLogRequestSpanError(Log_, "failed to finalize span", std::current_exception()); } Span_.reset(); } diff --git a/src/client/impl/observability/operation_span.h b/src/client/impl/observability/span.h similarity index 81% rename from src/client/impl/observability/operation_span.h rename to src/client/impl/observability/span.h index f2925c945e4..09ec8923e62 100644 --- a/src/client/impl/observability/operation_span.h +++ b/src/client/impl/observability/span.h @@ -12,14 +12,14 @@ namespace NYdb::inline V3::NObservability { -class TOperationSpan { +class TRequestSpan { public: - TOperationSpan(std::shared_ptr tracer - , const std::string& operationName + TRequestSpan(std::shared_ptr tracer + , const std::string& requestName , const std::string& endpoint , const TLog& log ); - ~TOperationSpan() noexcept; + ~TRequestSpan() noexcept; void SetPeerEndpoint(const std::string& endpoint) noexcept; void AddEvent(const std::string& name, const std::map& attributes = {}) noexcept; diff --git a/src/client/impl/stats/stats.h b/src/client/impl/stats/stats.h index 504bab61023..ee87589be95 100644 --- a/src/client/impl/stats/stats.h +++ b/src/client/impl/stats/stats.h @@ -236,16 +236,14 @@ struct TStatCollector { TClientOperationStatCollector(::NMonitoring::TMetricRegistry* registry, const std::string& database, - const std::string& clientType) + const std::string& clientType, + std::shared_ptr externalRegistry = {}) : MetricRegistry_(registry) + , ExternalRegistry_(std::move(externalRegistry)) , Database_(database) , ClientType_(clientType) {} - void SetExternalRegistry(const std::shared_ptr& externalRegistry) { - ExternalRegistry_ = externalRegistry; - } - void IncRequestCount(const std::string& operationName) { if (auto registry = MetricRegistry_.Get()) { registry->Rate({ @@ -462,7 +460,10 @@ struct TStatCollector { return TSessionPoolStatCollector(); } - TClientStatCollector GetClientStatCollector(const std::string& clientType) { + TClientStatCollector GetClientStatCollector( + const std::string& clientType, + std::shared_ptr externalMetricRegistry = {}) + { if (auto registry = MetricRegistryPtr_.Get()) { ::NMonitoring::TRate* cacheMiss = nullptr; ::NMonitoring::TRate* sessionRemovedDueBalancing = nullptr; @@ -484,12 +485,12 @@ struct TStatCollector { return TClientStatCollector(cacheMiss, querySize, paramsSize, sessionRemovedDueBalancing, requestMigrated, TClientRetryOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType), - TClientOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType)); + TClientOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType, std::move(externalMetricRegistry))); } return TClientStatCollector(nullptr, nullptr, nullptr, nullptr, nullptr, TClientRetryOperationStatCollector(nullptr, Database_, clientType), - TClientOperationStatCollector(nullptr, Database_, clientType)); + TClientOperationStatCollector(nullptr, Database_, clientType, std::move(externalMetricRegistry))); } bool IsCollecting() { diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index 9a5b89583ef..e707f391a02 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -14,9 +14,9 @@ #include #include -#include +#include +#include #include -#include #include #include @@ -26,7 +26,8 @@ namespace NYdb::inline V3::NQuery { -using TQueryMetrics = NObservability::TOperationMetrics; +using TQueryMetrics = NObservability::TRequestMetrics; +using TQuerySpan = NObservability::TRequestSpan; using TRetryContextResultAsync = NRetry::Async::TRetryContext; using TRetryContextAsync = NRetry::Async::TRetryContext; @@ -69,7 +70,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { - SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query")); + SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query", Connections_->GetExternalMetricRegistry())); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Query")); if (auto traceProvider = Connections_->GetTraceProvider()) { @@ -86,7 +87,6 @@ class TQueryClient::TImpl: public TClientImplCommon, public ParamsSizeHistogram_.Set(collector.ParamsSize); RetryOperationStatCollector_ = collector.RetryOperationStatCollector; OperationStatCollector_ = collector.OperationStatCollector; - OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); } TAsyncExecuteQueryIterator StreamExecuteQuery(const std::string& query, const TTxControl& txControl, @@ -557,8 +557,8 @@ class TQueryClient::TImpl: public TClientImplCommon, public std::shared_ptr Metrics; }; - auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "CreateSession", DbDriverState_->Log); + auto span = std::make_shared(Tracer_, "GetSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "GetSession", DbDriverState_->Log); auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); diff --git a/src/client/query/impl/query_spans.h b/src/client/query/impl/query_spans.h deleted file mode 100644 index 61b7f75ade8..00000000000 --- a/src/client/query/impl/query_spans.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include - -namespace NYdb::inline V3::NQuery { - -class TQuerySpan : public NObservability::TOperationSpan { -public: - TQuerySpan(std::shared_ptr tracer - , const std::string& operationName - , const std::string& endpoint - , const TLog& log - ) : TOperationSpan(std::move(tracer), operationName, endpoint, log) - {} -}; - -} // namespace NYdb::NQuery diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index 6db36b92a27..cc2bcf92f3d 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -6,7 +6,8 @@ namespace NTable { using namespace NThreading; -using TTableMetrics = NObservability::TOperationMetrics; +using TTableMetrics = NObservability::TRequestMetrics; +using TTableSpan = NObservability::TRequestSpan; const TKeepAliveSettings TTableClient::TImpl::KeepAliveSettings = TKeepAliveSettings().ClientTimeout(KEEP_ALIVE_CLIENT_TIMEOUT); @@ -24,9 +25,8 @@ TTableClient::TImpl::TImpl(std::shared_ptr&& connections, , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { - auto clientCollector = DbDriverState_->StatCollector.GetClientStatCollector("Table"); + auto clientCollector = DbDriverState_->StatCollector.GetClientStatCollector("Table", Connections_->GetExternalMetricRegistry()); OperationStatCollector_ = clientCollector.OperationStatCollector; - OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); if (auto traceProvider = Connections_->GetTraceProvider()) { Tracer_ = traceProvider->GetTracer("ydb-cpp-sdk-table"); @@ -388,8 +388,8 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio auto createSessionPromise = NewPromise(); auto self = shared_from_this(); - auto metrics = std::make_shared(&OperationStatCollector_, "CreateSession", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "CreateSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "GetSession", DbDriverState_->Log); + auto span = std::make_shared(Tracer_, "GetSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); auto createSessionExtractor = [createSessionPromise, self, standalone, metrics, span] (google::protobuf::Any* any, TPlainStatus status) mutable { @@ -1145,7 +1145,6 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli RetryOperationStatCollector = collector.RetryOperationStatCollector; SessionRemovedDueBalancing.Set(collector.SessionRemovedDueBalancing); OperationStatCollector_ = collector.OperationStatCollector; - OperationStatCollector_.SetExternalRegistry(Connections_->GetExternalMetricRegistry()); } TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index a82ba14caa1..7675d6c3e89 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -4,7 +4,8 @@ #include #include #include -#include +#include +#include #include #include #undef INCLUDE_YDB_INTERNAL_H @@ -18,7 +19,6 @@ #include "data_query.h" #include "request_migrator.h" #include "readers.h" -#include "table_spans.h" #include @@ -239,8 +239,8 @@ class TTableClient::TImpl: public TClientImplCommon, public auto promise = NewPromise(); bool keepInCache = settings.KeepInQueryCache_ && settings.KeepInQueryCache_.value(); - auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteDataQuery", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteDataQuery", DbDriverState_->Log); + auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); // We don't want to delay call of TSession dtor, so we can't capture it by copy diff --git a/src/client/table/impl/table_spans.h b/src/client/table/impl/table_spans.h deleted file mode 100644 index 94f3ea8e87f..00000000000 --- a/src/client/table/impl/table_spans.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include - -namespace NYdb::inline V3::NTable { - -class TTableSpan : public NObservability::TOperationSpan { -public: - TTableSpan(std::shared_ptr tracer - , const std::string& operationName - , const std::string& endpoint - , const TLog& log - ) : TOperationSpan(std::move(tracer), operationName, endpoint, log) - {} -}; - -} // namespace NYdb::NTable diff --git a/tests/unit/client/CMakeLists.txt b/tests/unit/client/CMakeLists.txt index d27649e3fd3..8cbc4b16ab8 100644 --- a/tests/unit/client/CMakeLists.txt +++ b/tests/unit/client/CMakeLists.txt @@ -105,7 +105,7 @@ add_ydb_test(NAME client-ydb_metrics_ut GTEST INCLUDE_DIRS ${YDB_SDK_SOURCE_DIR} SOURCES - observability/operation_metrics_ut.cpp + observability/metrics_ut.cpp LINK_LIBRARIES yutil impl-observability diff --git a/tests/unit/client/observability/operation_metrics_ut.cpp b/tests/unit/client/observability/metrics_ut.cpp similarity index 74% rename from tests/unit/client/observability/operation_metrics_ut.cpp rename to tests/unit/client/observability/metrics_ut.cpp index ddb4f4592da..036eea92168 100644 --- a/tests/unit/client/observability/operation_metrics_ut.cpp +++ b/tests/unit/client/observability/metrics_ut.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -12,14 +12,14 @@ using namespace NYdb::NTests; using namespace NYdb::NSdkStats; // --------------------------------------------------------------------------- -// TOperationMetrics (shared logic) +// TRequestMetrics (shared logic) // --------------------------------------------------------------------------- -class OperationMetricsTest : public ::testing::Test { +class RequestMetricsTest : public ::testing::Test { protected: void SetUp() override { Registry = std::make_shared(); - OpCollector.SetExternalRegistry(Registry); + OpCollector = TStatCollector::TClientOperationStatCollector(nullptr, "", "", Registry); } std::shared_ptr RequestCounter(const std::string& op) { @@ -52,17 +52,17 @@ class OperationMetricsTest : public ::testing::Test { std::shared_ptr Registry; }; -TEST_F(OperationMetricsTest, RequestCounterIncrementedOnConstruction) { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); +TEST_F(RequestMetricsTest, RequestCounterIncrementedOnConstruction) { + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); auto counter = RequestCounter("DoSomething"); ASSERT_NE(counter, nullptr); EXPECT_EQ(counter->Get(), 1); } -TEST_F(OperationMetricsTest, SuccessDoesNotIncrementErrorCounter) { +TEST_F(RequestMetricsTest, SuccessDoesNotIncrementErrorCounter) { { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -71,9 +71,9 @@ TEST_F(OperationMetricsTest, SuccessDoesNotIncrementErrorCounter) { EXPECT_EQ(errors->Get(), 0); } -TEST_F(OperationMetricsTest, FailureIncrementsErrorCounter) { +TEST_F(RequestMetricsTest, FailureIncrementsErrorCounter) { { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::UNAVAILABLE); } @@ -82,9 +82,9 @@ TEST_F(OperationMetricsTest, FailureIncrementsErrorCounter) { EXPECT_EQ(errors->Get(), 1); } -TEST_F(OperationMetricsTest, DurationRecordedOnEnd) { +TEST_F(RequestMetricsTest, DurationRecordedOnEnd) { { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -94,9 +94,9 @@ TEST_F(OperationMetricsTest, DurationRecordedOnEnd) { EXPECT_GE(hist->GetValues()[0], 0.0); } -TEST_F(OperationMetricsTest, DurationIsInSeconds) { +TEST_F(RequestMetricsTest, DurationIsInSeconds) { { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); } @@ -105,8 +105,8 @@ TEST_F(OperationMetricsTest, DurationIsInSeconds) { EXPECT_LT(hist->GetValues()[0], 1.0); } -TEST_F(OperationMetricsTest, DoubleEndIsIdempotent) { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); +TEST_F(RequestMetricsTest, DoubleEndIsIdempotent) { + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); metrics.End(EStatus::INTERNAL_ERROR); @@ -119,9 +119,9 @@ TEST_F(OperationMetricsTest, DoubleEndIsIdempotent) { EXPECT_EQ(hist->Count(), 1u); } -TEST_F(OperationMetricsTest, DestructorCallsEndWithClientInternalError) { +TEST_F(RequestMetricsTest, DestructorCallsEndWithClientInternalError) { { - TOperationMetrics metrics(&OpCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&OpCollector, "DoSomething", TLog()); } auto requests = RequestCounter("DoSomething"); @@ -137,21 +137,21 @@ TEST_F(OperationMetricsTest, DestructorCallsEndWithClientInternalError) { EXPECT_EQ(hist->Count(), 1u); } -TEST_F(OperationMetricsTest, NullRegistryDoesNotCrash) { +TEST_F(RequestMetricsTest, NullRegistryDoesNotCrash) { EXPECT_NO_THROW({ TStatCollector::TClientOperationStatCollector nullCollector; - TOperationMetrics metrics(&nullCollector, "DoSomething", TLog()); + TRequestMetrics metrics(&nullCollector, "DoSomething", TLog()); metrics.End(EStatus::SUCCESS); }); } -TEST_F(OperationMetricsTest, DifferentOperationsHaveSeparateMetrics) { +TEST_F(RequestMetricsTest, DifferentOperationsHaveSeparateMetrics) { { - TOperationMetrics m1(&OpCollector, "OpA", TLog()); + TRequestMetrics m1(&OpCollector, "OpA", TLog()); m1.End(EStatus::SUCCESS); } { - TOperationMetrics m2(&OpCollector, "OpB", TLog()); + TRequestMetrics m2(&OpCollector, "OpB", TLog()); m2.End(EStatus::OVERLOADED); } @@ -163,9 +163,9 @@ TEST_F(OperationMetricsTest, DifferentOperationsHaveSeparateMetrics) { EXPECT_EQ(DurationHistogram("OpB", EStatus::OVERLOADED)->Count(), 1u); } -TEST_F(OperationMetricsTest, MultipleRequestsAccumulate) { +TEST_F(RequestMetricsTest, MultipleRequestsAccumulate) { for (int i = 0; i < 5; ++i) { - TOperationMetrics metrics(&OpCollector, "Op", TLog()); + TRequestMetrics metrics(&OpCollector, "Op", TLog()); metrics.End(i % 2 == 0 ? EStatus::SUCCESS : EStatus::TIMEOUT); } @@ -175,7 +175,7 @@ TEST_F(OperationMetricsTest, MultipleRequestsAccumulate) { EXPECT_EQ(DurationHistogram("Op", EStatus::TIMEOUT)->Count(), 2u); } -TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { +TEST_F(RequestMetricsTest, AllErrorStatusesIncrementErrorCounter) { std::vector errorStatuses = { EStatus::BAD_REQUEST, EStatus::UNAUTHORIZED, @@ -188,7 +188,7 @@ TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { }; for (auto status : errorStatuses) { - TOperationMetrics metrics(&OpCollector, "Op", TLog()); + TRequestMetrics metrics(&OpCollector, "Op", TLog()); metrics.End(status); } @@ -197,12 +197,11 @@ TEST_F(OperationMetricsTest, AllErrorStatusesIncrementErrorCounter) { EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); } -TEST(OperationMetricsAliasesTest, QueryOperationsUseOtelStandardMetrics) { +TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); - TStatCollector::TClientOperationStatCollector collector; - collector.SetExternalRegistry(registry); + TStatCollector::TClientOperationStatCollector collector(nullptr, "", "", registry); - NObservability::TOperationMetrics metrics(&collector, "ExecuteQuery", TLog()); + NObservability::TRequestMetrics metrics(&collector, "ExecuteQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( @@ -238,12 +237,11 @@ TEST(OperationMetricsAliasesTest, QueryOperationsUseOtelStandardMetrics) { ); } -TEST(OperationMetricsAliasesTest, TableOperationsUseOtelStandardMetrics) { +TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); - TStatCollector::TClientOperationStatCollector collector; - collector.SetExternalRegistry(registry); + TStatCollector::TClientOperationStatCollector collector(nullptr, "", "", registry); - NObservability::TOperationMetrics metrics(&collector, "ExecuteDataQuery", TLog()); + NObservability::TRequestMetrics metrics(&collector, "ExecuteDataQuery", TLog()); metrics.End(EStatus::SUCCESS); EXPECT_NE( From 5f8a13e37a4089cd5227fec0129274fda9403cf8 Mon Sep 17 00:00:00 2001 From: maladetska Date: Thu, 2 Apr 2026 22:02:54 +0300 Subject: [PATCH 14/17] Create TRequestObservation, add ydbClientType to avoid metrics confusion, other_sql -> ydb --- .../impl/internal/common/ydb_client_api.h | 25 +++++ src/client/impl/observability/CMakeLists.txt | 1 + src/client/impl/observability/observation.cpp | 34 ++++++ src/client/impl/observability/observation.h | 30 ++++++ src/client/impl/observability/span.cpp | 5 +- src/client/impl/observability/span.h | 1 + src/client/impl/stats/CMakeLists.txt | 1 + src/client/impl/stats/stats.h | 32 ++++-- src/client/query/client.cpp | 100 ++++++++---------- src/client/table/impl/table_client.cpp | 61 ++++------- src/client/table/impl/table_client.h | 23 ++-- tests/integration/metrics/main.cpp | 6 +- .../unit/client/observability/metrics_ut.cpp | 41 ++++--- 13 files changed, 231 insertions(+), 129 deletions(-) create mode 100644 src/client/impl/internal/common/ydb_client_api.h create mode 100644 src/client/impl/observability/observation.cpp create mode 100644 src/client/impl/observability/observation.h diff --git a/src/client/impl/internal/common/ydb_client_api.h b/src/client/impl/internal/common/ydb_client_api.h new file mode 100644 index 00000000000..e9d789d044b --- /dev/null +++ b/src/client/impl/internal/common/ydb_client_api.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace NYdb::inline V3::NObservability { + +inline std::string YdbClientApiAttributeValue(const std::string& clientType) { + if (clientType == "Query") { + return "query"; + } + if (clientType == "Table") { + return "table"; + } + if (clientType.empty()) { + return "unspecified"; + } + std::string out; + out.reserve(clientType.size()); + for (unsigned char ch : clientType) { + out.push_back(static_cast(::tolower(static_cast(ch)))); + } + return out; +} + +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/CMakeLists.txt b/src/client/impl/observability/CMakeLists.txt index d7e89775717..ea060beda47 100644 --- a/src/client/impl/observability/CMakeLists.txt +++ b/src/client/impl/observability/CMakeLists.txt @@ -8,6 +8,7 @@ target_link_libraries(impl-observability PUBLIC target_sources(impl-observability PRIVATE metrics.cpp + observation.cpp span.cpp ) diff --git a/src/client/impl/observability/observation.cpp b/src/client/impl/observability/observation.cpp new file mode 100644 index 00000000000..9a27534eb5c --- /dev/null +++ b/src/client/impl/observability/observation.cpp @@ -0,0 +1,34 @@ +#include "observation.h" + +namespace NYdb::inline V3::NObservability { + +TRequestObservation::TRequestObservation(const std::string& ydbClientType + , NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector + , std::shared_ptr tracer + , const std::string& operationName + , const std::string& discoveryEndpoint + , const TLog& log +) : Span_(std::make_shared(std::move(tracer), operationName, discoveryEndpoint, log, ydbClientType)) + , Metrics_(std::make_shared(operationCollector, operationName, log)) +{} + +void TRequestObservation::SetPeerEndpoint(const std::string& endpoint) noexcept { + if (Span_) { + Span_->SetPeerEndpoint(endpoint); + } +} + +void TRequestObservation::End(EStatus status) noexcept { + if (Span_) { + Span_->End(status); + } + if (Metrics_) { + Metrics_->End(status); + } +} + +void TRequestObservation::EndWithClientInternalError() noexcept { + End(EStatus::CLIENT_INTERNAL_ERROR); +} + +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/observation.h b/src/client/impl/observability/observation.h new file mode 100644 index 00000000000..92610d11c0b --- /dev/null +++ b/src/client/impl/observability/observation.h @@ -0,0 +1,30 @@ +#pragma once + +#include "metrics.h" +#include "span.h" + +#include +#include + +namespace NYdb::inline V3::NObservability { + +class TRequestObservation { +public: + TRequestObservation(const std::string& ydbClientType + , NSdkStats::TStatCollector::TClientOperationStatCollector* operationCollector + , std::shared_ptr tracer + , const std::string& operationName + , const std::string& discoveryEndpoint + , const TLog& log + ); + + void SetPeerEndpoint(const std::string& endpoint) noexcept; + void End(EStatus status) noexcept; + void EndWithClientInternalError() noexcept; + +private: + std::shared_ptr Span_; + std::shared_ptr Metrics_; +}; + +} // namespace NYdb::NObservability diff --git a/src/client/impl/observability/span.cpp b/src/client/impl/observability/span.cpp index 39eca5bd0ca..635550e2380 100644 --- a/src/client/impl/observability/span.cpp +++ b/src/client/impl/observability/span.cpp @@ -1,6 +1,7 @@ #include "span.h" #include +#include #include @@ -68,6 +69,7 @@ TRequestSpan::TRequestSpan(std::shared_ptr tracer , const std::string& requestName , const std::string& endpoint , const TLog& log + , const std::string& ydbClientType ) : Log_(log) { if (!tracer) { return; @@ -82,8 +84,9 @@ TRequestSpan::TRequestSpan(std::shared_ptr tracer if (!Span_) { return; } - Span_->SetAttribute("db.system.name", "other_sql"); + Span_->SetAttribute("db.system.name", "ydb"); Span_->SetAttribute("db.operation.name", requestName); + Span_->SetAttribute("ydb.client.api", YdbClientApiAttributeValue(ydbClientType)); Span_->SetAttribute("server.address", host); Span_->SetAttribute("server.port", static_cast(port)); } catch (...) { diff --git a/src/client/impl/observability/span.h b/src/client/impl/observability/span.h index 09ec8923e62..3c07423b68b 100644 --- a/src/client/impl/observability/span.h +++ b/src/client/impl/observability/span.h @@ -18,6 +18,7 @@ class TRequestSpan { , const std::string& requestName , const std::string& endpoint , const TLog& log + , const std::string& ydbClientType = {} ); ~TRequestSpan() noexcept; diff --git a/src/client/impl/stats/CMakeLists.txt b/src/client/impl/stats/CMakeLists.txt index 15866af4bc6..b3ebda44404 100644 --- a/src/client/impl/stats/CMakeLists.txt +++ b/src/client/impl/stats/CMakeLists.txt @@ -5,6 +5,7 @@ target_link_libraries(client-impl-ydb_stats PUBLIC grpc-client monlib-metrics client-metrics + impl-internal-common ) target_sources(client-impl-ydb_stats PRIVATE diff --git a/src/client/impl/stats/stats.h b/src/client/impl/stats/stats.h index ee87589be95..276ef9cd1d2 100644 --- a/src/client/impl/stats/stats.h +++ b/src/client/impl/stats/stats.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -254,18 +255,24 @@ struct TStatCollector { })->Inc(); } if (ExternalRegistry_) { - ExternalRegistry_->Counter( - "db.client.operation.errors", - {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, - "Number of database client operations that failed.", - "{error}" - ); + const std::string clientApi = NObservability::YdbClientApiAttributeValue(ClientType_); + NMetrics::TLabels labels = { + {"db.system.name", "ydb"}, + {"db.operation.name", operationName}, + {"ydb.client.api", clientApi}, + }; ExternalRegistry_->Counter( "db.client.operation.requests", - {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, + labels, "Number of database client operations started.", "{operation}" )->Inc(); + ExternalRegistry_->Counter( + "db.client.operation.errors", + labels, + "Number of database client operations that failed.", + "{error}" + ); } } @@ -283,9 +290,15 @@ struct TStatCollector { })->Inc(); } if (ExternalRegistry_) { + const std::string clientApi = NObservability::YdbClientApiAttributeValue(ClientType_); + NMetrics::TLabels labels = { + {"db.system.name", "ydb"}, + {"db.operation.name", operationName}, + {"ydb.client.api", clientApi}, + }; ExternalRegistry_->Counter( "db.client.operation.errors", - {{"db.system.name", "other_sql"}, {"db.operation.name", operationName}}, + labels, "Number of database client operations that failed.", "{error}" )->Inc(); @@ -304,8 +317,9 @@ struct TStatCollector { } if (ExternalRegistry_) { NMetrics::TLabels labels = { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", operationName}, + {"ydb.client.api", NObservability::YdbClientApiAttributeValue(ClientType_)}, {"db.response.status_code", TStringBuilder() << status}, }; if (status != EStatus::SUCCESS) { diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index e707f391a02..bceefe3f56b 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -14,8 +14,7 @@ #include #include -#include -#include +#include #include #include #include @@ -26,8 +25,7 @@ namespace NYdb::inline V3::NQuery { -using TQueryMetrics = NObservability::TRequestMetrics; -using TQuerySpan = NObservability::TRequestSpan; +using TQueryObservation = NObservability::TRequestObservation; using TRetryContextResultAsync = NRetry::Async::TRetryContext; using TRetryContextAsync = NRetry::Async::TRetryContext; @@ -105,21 +103,18 @@ class TQueryClient::TImpl: public TClientImplCommon, public CollectQuerySize(query); CollectParamsSize(params ? ¶ms->GetProtoMap() : nullptr); - auto span = std::make_shared(Tracer_, "ExecuteQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteQuery", DbDriverState_->Log); + auto obs = MakeObservation("ExecuteQuery"); return TExecQueryImpl::ExecuteQuery( Connections_, DbDriverState_, query, txControl, params, settings, session) - .Apply([span, metrics](TAsyncExecuteQueryResult future) { + .Apply([obs](TAsyncExecuteQueryResult future) { try { auto result = future.GetValue(); - span->SetPeerEndpoint(result.GetEndpoint()); - span->End(result.GetStatus()); - metrics->End(result.GetStatus()); + obs->SetPeerEndpoint(result.GetEndpoint()); + obs->End(result.GetStatus()); return result; } catch (...) { - span->End(EStatus::CLIENT_INTERNAL_ERROR); - metrics->End(EStatus::CLIENT_INTERNAL_ERROR); + obs->EndWithClientInternalError(); throw; } }); @@ -189,31 +184,27 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto span = std::make_shared(Tracer_, "Rollback", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "Rollback", DbDriverState_->Log); + auto obs = MakeObservation("Rollback"); - auto responseCb = [promise, session, span, metrics] + auto responseCb = [promise, session, obs] (Ydb::Query::RollbackTransactionResponse* response, TPlainStatus status) mutable { try { - span->SetPeerEndpoint(status.Endpoint); + obs->SetPeerEndpoint(status.Endpoint); if (response) { NYdb::NIssue::TIssues opIssues; NYdb::NIssue::IssuesFromMessage(response->issues(), opIssues); TStatus rollbackTxStatus(TPlainStatus{static_cast(response->status()), std::move(opIssues), status.Endpoint, std::move(status.Metadata)}); - span->End(rollbackTxStatus.GetStatus()); - metrics->End(rollbackTxStatus.GetStatus()); + obs->End(rollbackTxStatus.GetStatus()); promise.SetValue(std::move(rollbackTxStatus)); } else { - span->End(status.Status); - metrics->End(status.Status); + obs->End(status.Status); promise.SetValue(TStatus(std::move(status))); } } catch (...) { - span->End(EStatus::CLIENT_INTERNAL_ERROR); - metrics->End(EStatus::CLIENT_INTERNAL_ERROR); + obs->EndWithClientInternalError(); promise.SetException(std::current_exception()); } }; @@ -241,32 +232,28 @@ class TQueryClient::TImpl: public TClientImplCommon, public auto promise = NThreading::NewPromise(); - auto span = std::make_shared(Tracer_, "Commit", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "Commit", DbDriverState_->Log); + auto obs = MakeObservation("Commit"); - auto responseCb = [promise, session, span, metrics] + auto responseCb = [promise, session, obs] (Ydb::Query::CommitTransactionResponse* response, TPlainStatus status) mutable { try { - span->SetPeerEndpoint(status.Endpoint); + obs->SetPeerEndpoint(status.Endpoint); if (response) { NYdb::NIssue::TIssues opIssues; NYdb::NIssue::IssuesFromMessage(response->issues(), opIssues); TStatus commitTxStatus(TPlainStatus{static_cast(response->status()), std::move(opIssues), status.Endpoint, std::move(status.Metadata)}); - span->End(commitTxStatus.GetStatus()); - metrics->End(commitTxStatus.GetStatus()); + obs->End(commitTxStatus.GetStatus()); TCommitTransactionResult commitTxResult(std::move(commitTxStatus)); promise.SetValue(std::move(commitTxResult)); } else { - span->End(status.Status); - metrics->End(status.Status); + obs->End(status.Status); promise.SetValue(TCommitTransactionResult(TStatus(std::move(status)))); } } catch (...) { - span->End(EStatus::CLIENT_INTERNAL_ERROR); - metrics->End(EStatus::CLIENT_INTERNAL_ERROR); + obs->EndWithClientInternalError(); promise.SetException(std::current_exception()); } }; @@ -475,12 +462,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public class TQueryClientGetSessionCtx : public NSessionPool::IGetSessionCtx { public: TQueryClientGetSessionCtx(std::shared_ptr client, const TCreateSessionSettings& settings, - std::shared_ptr span, std::shared_ptr metrics) + std::shared_ptr observation) : Promise(NThreading::NewPromise()) , Client(client) , RpcSettings(TRpcRequestSettings::Make(settings)) - , Span(span) - , Metrics(metrics) + , Observation(std::move(observation)) {} TAsyncCreateSessionResult GetFuture() { @@ -489,11 +475,8 @@ class TQueryClient::TImpl: public TClientImplCommon, public void ReplyError(TStatus status) override { TSession session; - if (Span) { - Span->End(status.GetStatus()); - } - if (Metrics) { - Metrics->End(status.GetStatus()); + if (Observation) { + Observation->End(status.GetStatus()); } ScheduleReply(TCreateSessionResult(std::move(status), std::move(session))); } @@ -507,26 +490,20 @@ class TQueryClient::TImpl: public TClientImplCommon, public ) ); - if (Span) { - Span->End(EStatus::SUCCESS); - } - if (Metrics) { - Metrics->End(EStatus::SUCCESS); + if (Observation) { + Observation->End(EStatus::SUCCESS); } ScheduleReply(std::move(val)); } void ReplyNewSession() override { Client->CreateAttachedSession(RpcSettings).Subscribe( - [promise{std::move(Promise)}, span = Span, metrics = Metrics](TAsyncCreateSessionResult future) mutable + [promise{std::move(Promise)}, obs = Observation](TAsyncCreateSessionResult future) mutable { auto val = future.ExtractValue(); - if (span) { - span->SetPeerEndpoint(val.GetEndpoint()); - span->End(val.GetStatus()); - } - if (metrics) { - metrics->End(val.GetStatus()); + if (obs) { + obs->SetPeerEndpoint(val.GetEndpoint()); + obs->End(val.GetStatus()); } promise.SetValue(std::move(val)); }); @@ -553,13 +530,11 @@ class TQueryClient::TImpl: public TClientImplCommon, public NThreading::TPromise Promise; std::shared_ptr Client; const TRpcRequestSettings RpcSettings; - std::shared_ptr Span; - std::shared_ptr Metrics; + std::shared_ptr Observation; }; - auto span = std::make_shared(Tracer_, "GetSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "GetSession", DbDriverState_->Log); - auto ctx = std::make_unique(shared_from_this(), settings, span, metrics); + auto obs = MakeObservation("GetSession"); + auto ctx = std::make_unique(shared_from_this(), settings, obs); auto future = ctx->GetFuture(); SessionPool_.GetSession(std::move(ctx)); @@ -628,6 +603,17 @@ class TQueryClient::TImpl: public TClientImplCommon, public } private: + std::shared_ptr MakeObservation(const std::string& operationName) { + return std::make_shared( + "Query", + &OperationStatCollector_, + Tracer_, + operationName, + DbDriverState_->DiscoveryEndpoint, + DbDriverState_->Log + ); + } + std::shared_ptr Tracer_; NSdkStats::TStatCollector::TClientOperationStatCollector OperationStatCollector_; NSdkStats::TStatCollector::TClientRetryOperationStatCollector RetryOperationStatCollector_; diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index cc2bcf92f3d..28dc7c79489 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -6,9 +6,6 @@ namespace NTable { using namespace NThreading; -using TTableMetrics = NObservability::TRequestMetrics; -using TTableSpan = NObservability::TRequestSpan; - const TKeepAliveSettings TTableClient::TImpl::KeepAliveSettings = TKeepAliveSettings().ClientTimeout(KEEP_ALIVE_CLIENT_TIMEOUT); @@ -388,10 +385,9 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio auto createSessionPromise = NewPromise(); auto self = shared_from_this(); - auto metrics = std::make_shared(&OperationStatCollector_, "GetSession", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "GetSession", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto obs = MakeObservation("GetSession"); - auto createSessionExtractor = [createSessionPromise, self, standalone, metrics, span] + auto createSessionExtractor = [createSessionPromise, self, standalone, obs] (google::protobuf::Any* any, TPlainStatus status) mutable { Ydb::Table::CreateSessionResult result; if (any) { @@ -408,8 +404,7 @@ TAsyncCreateSessionResult TTableClient::TImpl::CreateSession(const TCreateSessio session.SessionImpl_->MarkBroken(); } TCreateSessionResult val(TStatus(std::move(status)), std::move(session)); - metrics->End(val.GetStatus()); - span->End(val.GetStatus()); + obs->End(val.GetStatus()); createSessionPromise.SetValue(std::move(val)); }; @@ -773,8 +768,7 @@ TAsyncStatus TTableClient::TImpl::ExecuteSchemeQuery(const TSession& session, co request.set_session_id(TStringType{session.GetId()}); request.set_yql_text(TStringType{query}); - auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteSchemeQuery", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "ExecuteSchemeQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto obs = MakeObservation("ExecuteSchemeQuery"); auto future = RunSimple( std::move(request), @@ -782,10 +776,9 @@ TAsyncStatus TTableClient::TImpl::ExecuteSchemeQuery(const TSession& session, co rpcSettings ); - return future.Apply([metrics, span](NThreading::TFuture f) mutable { + return future.Apply([obs](NThreading::TFuture f) mutable { auto status = f.ExtractValue(); - metrics->End(status.GetStatus()); - span->End(status.GetStatus()); + obs->End(status.GetStatus()); return status; }); } @@ -800,12 +793,11 @@ TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSessio request.set_session_id(TStringType{session.GetId()}); SetTxSettings(txSettings, request.mutable_tx_settings()); - auto metrics = std::make_shared(&OperationStatCollector_, "BeginTransaction", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "BeginTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto obs = MakeObservation("BeginTransaction"); auto promise = NewPromise(); - auto extractor = [promise, session, metrics, span] + auto extractor = [promise, session, obs] (google::protobuf::Any* any, TPlainStatus status) mutable { std::string txId; if (any) { @@ -816,8 +808,7 @@ TAsyncBeginTransactionResult TTableClient::TImpl::BeginTransaction(const TSessio TBeginTransactionResult beginTxResult(TStatus(std::move(status)), TTransaction(session, txId)); - span->End(beginTxResult.GetStatus()); - metrics->End(beginTxResult.GetStatus()); + obs->End(beginTxResult.GetStatus()); promise.SetValue(std::move(beginTxResult)); }; @@ -844,12 +835,11 @@ TAsyncCommitTransactionResult TTableClient::TImpl::CommitTransaction(const TSess request.set_tx_id(TStringType{txId}); request.set_collect_stats(GetStatsCollectionMode(settings.CollectQueryStats_)); - auto span = std::make_shared(Tracer_, "CommitTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "CommitTransaction", DbDriverState_->Log); + auto obs = MakeObservation("CommitTransaction"); auto promise = NewPromise(); - auto extractor = [promise, metrics, span] + auto extractor = [promise, obs] (google::protobuf::Any* any, TPlainStatus status) mutable { std::optional queryStats; if (any) { @@ -862,8 +852,7 @@ TAsyncCommitTransactionResult TTableClient::TImpl::CommitTransaction(const TSess } TCommitTransactionResult commitTxResult(TStatus(std::move(status)), queryStats); - span->End(commitTxResult.GetStatus()); - metrics->End(commitTxResult.GetStatus()); + obs->End(commitTxResult.GetStatus()); promise.SetValue(std::move(commitTxResult)); }; @@ -889,8 +878,7 @@ TAsyncStatus TTableClient::TImpl::RollbackTransaction(const TSession& session, c request.set_session_id(TStringType{session.GetId()}); request.set_tx_id(TStringType{txId}); - auto span = std::make_shared(Tracer_, "RollbackTransaction", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "RollbackTransaction", DbDriverState_->Log); + auto obs = MakeObservation("RollbackTransaction"); auto future = RunSimple( std::move(request), @@ -898,10 +886,9 @@ TAsyncStatus TTableClient::TImpl::RollbackTransaction(const TSession& session, c rpcSettings ); - return future.Apply([metrics, span](TAsyncStatus future) { - auto status = future.GetValue(); - span->End(status.GetStatus()); - metrics->End(status.GetStatus()); + return future.Apply([obs](TAsyncStatus fut) { + auto status = fut.GetValue(); + obs->End(status.GetStatus()); return status; }); } @@ -1173,15 +1160,13 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, *mutable_rows->mutable_type() = rows.GetType().GetProto(); } - auto metrics = std::make_shared(&OperationStatCollector_, "BulkUpsert", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); + auto obs = MakeObservation("BulkUpsert"); auto promise = NewPromise(); - auto extractor = [promise, metrics, span](google::protobuf::Any* any, TPlainStatus status) mutable { + auto extractor = [promise, obs](google::protobuf::Any* any, TPlainStatus status) mutable { Y_UNUSED(any); TBulkUpsertResult val(TStatus(std::move(status))); - span->End(val.GetStatus()); - metrics->End(val.GetStatus()); + obs->End(val.GetStatus()); promise.SetValue(std::move(val)); }; @@ -1224,17 +1209,15 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, } request.set_data(TStringType{data}); - auto span = std::make_shared(Tracer_, "BulkUpsert", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - auto metrics = std::make_shared(&OperationStatCollector_, "BulkUpsert", DbDriverState_->Log); + auto obs = MakeObservation("BulkUpsert"); auto promise = NewPromise(); - auto extractor = [promise, metrics, span] + auto extractor = [promise, obs] (google::protobuf::Any* any, TPlainStatus status) mutable { Y_UNUSED(any); TBulkUpsertResult val(TStatus(std::move(status))); - span->End(val.GetStatus()); - metrics->End(val.GetStatus()); + obs->End(val.GetStatus()); promise.SetValue(std::move(val)); }; diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index 7675d6c3e89..c53c73f5533 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -4,8 +4,7 @@ #include #include #include -#include -#include +#include #include #include #undef INCLUDE_YDB_INTERNAL_H @@ -239,9 +238,7 @@ class TTableClient::TImpl: public TClientImplCommon, public auto promise = NewPromise(); bool keepInCache = settings.KeepInQueryCache_ && settings.KeepInQueryCache_.value(); - auto metrics = std::make_shared(&OperationStatCollector_, "ExecuteDataQuery", DbDriverState_->Log); - auto span = std::make_shared(Tracer_, "ExecuteDataQuery", DbDriverState_->DiscoveryEndpoint, DbDriverState_->Log); - + auto obs = MakeObservation("ExecuteDataQuery"); // We don't want to delay call of TSession dtor, so we can't capture it by copy // otherwise we break session pool and other clients logic. @@ -252,7 +249,7 @@ class TTableClient::TImpl: public TClientImplCommon, public // - capture pointer // - call free just before SetValue call auto sessionPtr = new TSession(session); - auto extractor = [promise, sessionPtr, query, fromCache, keepInCache, metrics, span] + auto extractor = [promise, sessionPtr, query, fromCache, keepInCache, obs] (google::protobuf::Any* any, TPlainStatus status) mutable { std::vector res; std::optional tx; @@ -291,8 +288,7 @@ class TTableClient::TImpl: public TClientImplCommon, public TDataQueryResult dataQueryResult(TStatus(std::move(status)), std::move(res), tx, dataQuery, fromCache, queryStats); - metrics->End(dataQueryResult.GetStatus()); - span->End(dataQueryResult.GetStatus()); + obs->End(dataQueryResult.GetStatus()); delete sessionPtr; tx.reset(); @@ -341,6 +337,17 @@ class TTableClient::TImpl: public TClientImplCommon, public NSessionPool::TSessionPool SessionPool_; TRequestMigrator RequestMigrator_; static const TKeepAliveSettings KeepAliveSettings; + + std::shared_ptr MakeObservation(const std::string& operationName) { + return std::make_shared( + "Table", + &OperationStatCollector_, + Tracer_, + operationName, + DbDriverState_->DiscoveryEndpoint, + DbDriverState_->Log + ); + } }; } diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index fec3aab583b..4e52a7acef5 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -38,8 +38,9 @@ std::shared_ptr GetCounter( const std::string& operation) { return registry->GetCounter(name, { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", operation}, + {"ydb.client.api", "query"}, }); } @@ -49,8 +50,9 @@ std::shared_ptr GetDuration( EStatus status) { NMetrics::TLabels labels = { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", operation}, + {"ydb.client.api", "query"}, {"db.response.status_code", ToString(status)}, }; if (status != EStatus::SUCCESS) { diff --git a/tests/unit/client/observability/metrics_ut.cpp b/tests/unit/client/observability/metrics_ut.cpp index 036eea92168..0509858d59d 100644 --- a/tests/unit/client/observability/metrics_ut.cpp +++ b/tests/unit/client/observability/metrics_ut.cpp @@ -24,22 +24,25 @@ class RequestMetricsTest : public ::testing::Test { std::shared_ptr RequestCounter(const std::string& op) { return Registry->GetCounter("db.client.operation.requests", { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", op}, + {"ydb.client.api", "unspecified"}, }); } std::shared_ptr ErrorCounter(const std::string& op) { return Registry->GetCounter("db.client.operation.errors", { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", op}, + {"ydb.client.api", "unspecified"}, }); } std::shared_ptr DurationHistogram(const std::string& op, EStatus status) { TLabels labels = { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", op}, + {"ydb.client.api", "unspecified"}, {"db.response.status_code", ToString(status)}, }; if (status != EStatus::SUCCESS) { @@ -199,7 +202,7 @@ TEST_F(RequestMetricsTest, AllErrorStatusesIncrementErrorCounter) { TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); - TStatCollector::TClientOperationStatCollector collector(nullptr, "", "", registry); + TStatCollector::TClientOperationStatCollector collector(nullptr, "", "Query", registry); NObservability::TRequestMetrics metrics(&collector, "ExecuteQuery", TLog()); metrics.End(EStatus::SUCCESS); @@ -208,8 +211,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { registry->GetCounter( "db.client.operation.requests", { - {"db.system.name", "other_sql"}, - {"db.operation.name", "ExecuteQuery"} + {"db.system.name", "ydb"}, + {"db.operation.name", "ExecuteQuery"}, + {"ydb.client.api", "query"}, } ), nullptr @@ -218,8 +222,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { registry->GetCounter( "db.client.operation.errors", { - {"db.system.name", "other_sql"}, - {"db.operation.name", "ExecuteQuery"} + {"db.system.name", "ydb"}, + {"db.operation.name", "ExecuteQuery"}, + {"ydb.client.api", "query"}, } ), nullptr @@ -228,8 +233,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { registry->GetHistogram( "db.client.operation.duration", { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteQuery"}, + {"ydb.client.api", "query"}, {"db.response.status_code", ToString(EStatus::SUCCESS)}, } ), @@ -239,7 +245,7 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); - TStatCollector::TClientOperationStatCollector collector(nullptr, "", "", registry); + TStatCollector::TClientOperationStatCollector collector(nullptr, "", "Table", registry); NObservability::TRequestMetrics metrics(&collector, "ExecuteDataQuery", TLog()); metrics.End(EStatus::SUCCESS); @@ -247,14 +253,22 @@ TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { EXPECT_NE( registry->GetCounter( "db.client.operation.requests", - {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteDataQuery"}} + { + {"db.system.name", "ydb"}, + {"db.operation.name", "ExecuteDataQuery"}, + {"ydb.client.api", "table"} + } ), nullptr ); EXPECT_NE( registry->GetCounter( "db.client.operation.errors", - {{"db.system.name", "other_sql"}, {"db.operation.name", "ExecuteDataQuery"}} + { + {"db.system.name", "ydb"}, + {"db.operation.name", "ExecuteDataQuery"}, + {"ydb.client.api", "table"} + } ), nullptr ); @@ -262,8 +276,9 @@ TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { registry->GetHistogram( "db.client.operation.duration", { - {"db.system.name", "other_sql"}, + {"db.system.name", "ydb"}, {"db.operation.name", "ExecuteDataQuery"}, + {"ydb.client.api", "table"}, {"db.response.status_code", ToString(EStatus::SUCCESS)}, } ), From 94cbdd045dd9e5e5bdcfee6e573c2ad5c92a0027 Mon Sep 17 00:00:00 2001 From: maladetska Date: Fri, 3 Apr 2026 10:43:50 +0300 Subject: [PATCH 15/17] fix test --- tests/integration/metrics/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index 4e52a7acef5..33d52dfccde 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -129,11 +129,11 @@ TEST(QueryMetricsIntegration, CreateSessionRecordsMetrics) { auto session = client.GetSession().ExtractValueSync(); ASSERT_TRUE(session.IsSuccess()) << session.GetIssues().ToString(); - auto requests = GetCounter(registry, "db.client.operation.requests", "CreateSession"); + auto requests = GetCounter(registry, "db.client.operation.requests", "GetSession"); ASSERT_NE(requests, nullptr) << "CreateSession request counter not created"; EXPECT_GE(requests->Get(), 1); - auto duration = GetDuration(registry, "CreateSession", EStatus::SUCCESS); + auto duration = GetDuration(registry, "GetSession", EStatus::SUCCESS); ASSERT_NE(duration, nullptr) << "CreateSession duration histogram not created"; EXPECT_GE(duration->Count(), 1u); From 75c777db32ef2dbea13f6e54a7767999c390b4c2 Mon Sep 17 00:00:00 2001 From: maladetska Date: Wed, 8 Apr 2026 14:34:42 +0300 Subject: [PATCH 16/17] Add db.namespace, add ExternalMetricRegistry to TClientStatCollector, make attributes freecase --- .../impl/internal/common/ydb_client_api.h | 25 ----- .../impl/internal/db_driver_state/state.cpp | 2 +- .../grpc_connections/grpc_connections.h | 2 +- .../impl/internal/internal_client/client.h | 5 + src/client/impl/observability/observation.cpp | 3 +- src/client/impl/observability/observation.h | 1 + src/client/impl/observability/span.cpp | 7 +- src/client/impl/observability/span.h | 1 + src/client/impl/stats/CMakeLists.txt | 1 - src/client/impl/stats/stats.h | 31 ++++--- src/client/query/client.cpp | 3 +- src/client/table/impl/table_client.cpp | 2 +- src/client/table/impl/table_client.h | 1 + tests/integration/metrics/main.cpp | 4 +- .../unit/client/observability/metrics_ut.cpp | 92 +++++++++++++++++-- 15 files changed, 124 insertions(+), 56 deletions(-) delete mode 100644 src/client/impl/internal/common/ydb_client_api.h diff --git a/src/client/impl/internal/common/ydb_client_api.h b/src/client/impl/internal/common/ydb_client_api.h deleted file mode 100644 index e9d789d044b..00000000000 --- a/src/client/impl/internal/common/ydb_client_api.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -namespace NYdb::inline V3::NObservability { - -inline std::string YdbClientApiAttributeValue(const std::string& clientType) { - if (clientType == "Query") { - return "query"; - } - if (clientType == "Table") { - return "table"; - } - if (clientType.empty()) { - return "unspecified"; - } - std::string out; - out.reserve(clientType.size()); - for (unsigned char ch : clientType) { - out.push_back(static_cast(::tolower(static_cast(ch)))); - } - return out; -} - -} // namespace NYdb::NObservability diff --git a/src/client/impl/internal/db_driver_state/state.cpp b/src/client/impl/internal/db_driver_state/state.cpp index 9a836fc527a..e41a47869fc 100644 --- a/src/client/impl/internal/db_driver_state/state.cpp +++ b/src/client/impl/internal/db_driver_state/state.cpp @@ -44,7 +44,7 @@ TDbDriverState::TDbDriverState( auto self = shared_from_this(); return client->GetEndpoints(self); }, client) - , StatCollector(database, client->GetMetricRegistry()) + , StatCollector(database, client->GetMetricRegistry(), client->GetExternalMetricRegistry()) , Log(Client->GetLog()) , DiscoveryCompletedPromise(NThreading::NewPromise()) { diff --git a/src/client/impl/internal/grpc_connections/grpc_connections.h b/src/client/impl/internal/grpc_connections/grpc_connections.h index 8d2386b30e3..c6ef8686c36 100644 --- a/src/client/impl/internal/grpc_connections/grpc_connections.h +++ b/src/client/impl/internal/grpc_connections/grpc_connections.h @@ -589,7 +589,7 @@ class TGRpcConnectionsImpl ::NMonitoring::TMetricRegistry* GetMetricRegistry() override; void RegisterExtension(IExtension* extension); void RegisterExtensionApi(IExtensionApi* api); - std::shared_ptr GetExternalMetricRegistry() const; + std::shared_ptr GetExternalMetricRegistry() const override; std::shared_ptr GetTraceProvider() const; void SetDiscoveryMutator(IDiscoveryMutatorApi::TMutatorCb&& cb); diff --git a/src/client/impl/internal/internal_client/client.h b/src/client/impl/internal/internal_client/client.h index 3e52f984480..406a8b7103c 100644 --- a/src/client/impl/internal/internal_client/client.h +++ b/src/client/impl/internal/internal_client/client.h @@ -14,6 +14,10 @@ namespace NMonitoring { class TMetricRegistry; } +namespace NYdb::inline V3::NMetrics { + class IMetricRegistry; +} + namespace NYdb::inline V3 { class TDbDriverState; @@ -29,6 +33,7 @@ class IInternalClient { virtual TBalancingPolicy::TImpl GetBalancingSettings() const = 0; virtual bool StartStatCollecting(::NMonitoring::IMetricRegistry* sensorsRegistry) = 0; virtual ::NMonitoring::TMetricRegistry* GetMetricRegistry() = 0; + virtual std::shared_ptr GetExternalMetricRegistry() const = 0; virtual const TLog& GetLog() const = 0; }; diff --git a/src/client/impl/observability/observation.cpp b/src/client/impl/observability/observation.cpp index 9a27534eb5c..7483087bcfd 100644 --- a/src/client/impl/observability/observation.cpp +++ b/src/client/impl/observability/observation.cpp @@ -7,8 +7,9 @@ TRequestObservation::TRequestObservation(const std::string& ydbClientType , std::shared_ptr tracer , const std::string& operationName , const std::string& discoveryEndpoint + , const std::string& database , const TLog& log -) : Span_(std::make_shared(std::move(tracer), operationName, discoveryEndpoint, log, ydbClientType)) +) : Span_(std::make_shared(std::move(tracer), operationName, discoveryEndpoint, database, log, ydbClientType)) , Metrics_(std::make_shared(operationCollector, operationName, log)) {} diff --git a/src/client/impl/observability/observation.h b/src/client/impl/observability/observation.h index 92610d11c0b..544b0c4baff 100644 --- a/src/client/impl/observability/observation.h +++ b/src/client/impl/observability/observation.h @@ -15,6 +15,7 @@ class TRequestObservation { , std::shared_ptr tracer , const std::string& operationName , const std::string& discoveryEndpoint + , const std::string& database , const TLog& log ); diff --git a/src/client/impl/observability/span.cpp b/src/client/impl/observability/span.cpp index 635550e2380..46805563fee 100644 --- a/src/client/impl/observability/span.cpp +++ b/src/client/impl/observability/span.cpp @@ -1,7 +1,6 @@ #include "span.h" #include -#include #include @@ -13,6 +12,10 @@ namespace { constexpr int DefaultGrpcPort = 2135; +std::string YdbClientApiAttributeValue(const std::string& clientType) noexcept { + return clientType.empty() ? std::string("Unspecified") : clientType; +} + void ParseEndpoint(const std::string& endpoint, std::string& host, int& port) { port = DefaultGrpcPort; @@ -68,6 +71,7 @@ void SafeLogRequestSpanError(TLog& log, const char* message, std::exception_ptr TRequestSpan::TRequestSpan(std::shared_ptr tracer , const std::string& requestName , const std::string& endpoint + , const std::string& database , const TLog& log , const std::string& ydbClientType ) : Log_(log) { @@ -85,6 +89,7 @@ TRequestSpan::TRequestSpan(std::shared_ptr tracer return; } Span_->SetAttribute("db.system.name", "ydb"); + Span_->SetAttribute("db.namespace", database); Span_->SetAttribute("db.operation.name", requestName); Span_->SetAttribute("ydb.client.api", YdbClientApiAttributeValue(ydbClientType)); Span_->SetAttribute("server.address", host); diff --git a/src/client/impl/observability/span.h b/src/client/impl/observability/span.h index 3c07423b68b..9b69a1ac65d 100644 --- a/src/client/impl/observability/span.h +++ b/src/client/impl/observability/span.h @@ -17,6 +17,7 @@ class TRequestSpan { TRequestSpan(std::shared_ptr tracer , const std::string& requestName , const std::string& endpoint + , const std::string& database , const TLog& log , const std::string& ydbClientType = {} ); diff --git a/src/client/impl/stats/CMakeLists.txt b/src/client/impl/stats/CMakeLists.txt index b3ebda44404..15866af4bc6 100644 --- a/src/client/impl/stats/CMakeLists.txt +++ b/src/client/impl/stats/CMakeLists.txt @@ -5,7 +5,6 @@ target_link_libraries(client-impl-ydb_stats PUBLIC grpc-client monlib-metrics client-metrics - impl-internal-common ) target_sources(client-impl-ydb_stats PRIVATE diff --git a/src/client/impl/stats/stats.h b/src/client/impl/stats/stats.h index 276ef9cd1d2..b2e61c0ace8 100644 --- a/src/client/impl/stats/stats.h +++ b/src/client/impl/stats/stats.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -16,6 +15,10 @@ namespace NYdb::inline V3 { namespace NSdkStats { +inline std::string YdbClientApiAttributeValue(const std::string& clientType) { + return clientType.empty() ? std::string("Unspecified") : clientType; +} + // works only for case normal (foo_bar) underscore inline std::string UnderscoreToUpperCamel(const std::string& in) { @@ -255,9 +258,10 @@ struct TStatCollector { })->Inc(); } if (ExternalRegistry_) { - const std::string clientApi = NObservability::YdbClientApiAttributeValue(ClientType_); + const std::string clientApi = YdbClientApiAttributeValue(ClientType_); NMetrics::TLabels labels = { {"db.system.name", "ydb"}, + {"db.namespace", Database_}, {"db.operation.name", operationName}, {"ydb.client.api", clientApi}, }; @@ -290,9 +294,10 @@ struct TStatCollector { })->Inc(); } if (ExternalRegistry_) { - const std::string clientApi = NObservability::YdbClientApiAttributeValue(ClientType_); + const std::string clientApi = YdbClientApiAttributeValue(ClientType_); NMetrics::TLabels labels = { {"db.system.name", "ydb"}, + {"db.namespace", Database_}, {"db.operation.name", operationName}, {"ydb.client.api", clientApi}, }; @@ -318,8 +323,9 @@ struct TStatCollector { if (ExternalRegistry_) { NMetrics::TLabels labels = { {"db.system.name", "ydb"}, + {"db.namespace", Database_}, {"db.operation.name", operationName}, - {"ydb.client.api", NObservability::YdbClientApiAttributeValue(ClientType_)}, + {"ydb.client.api", YdbClientApiAttributeValue(ClientType_)}, {"db.response.status_code", TStringBuilder() << status}, }; if (status != EStatus::SUCCESS) { @@ -369,9 +375,12 @@ struct TStatCollector { TClientOperationStatCollector OperationStatCollector; }; - TStatCollector(const std::string& database, TMetricRegistry* sensorsRegistry) - : Database_(database) + TStatCollector(const std::string& database + , TMetricRegistry* sensorsRegistry + , std::shared_ptr externalMetricRegistry = {} + ) : Database_(database) , DatabaseLabel_({"database", database}) + , ExternalMetricRegistry_(std::move(externalMetricRegistry)) { if (sensorsRegistry) { SetMetricRegistry(sensorsRegistry); @@ -474,10 +483,7 @@ struct TStatCollector { return TSessionPoolStatCollector(); } - TClientStatCollector GetClientStatCollector( - const std::string& clientType, - std::shared_ptr externalMetricRegistry = {}) - { + TClientStatCollector GetClientStatCollector(const std::string& clientType) { if (auto registry = MetricRegistryPtr_.Get()) { ::NMonitoring::TRate* cacheMiss = nullptr; ::NMonitoring::TRate* sessionRemovedDueBalancing = nullptr; @@ -499,12 +505,12 @@ struct TStatCollector { return TClientStatCollector(cacheMiss, querySize, paramsSize, sessionRemovedDueBalancing, requestMigrated, TClientRetryOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType), - TClientOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType, std::move(externalMetricRegistry))); + TClientOperationStatCollector(MetricRegistryPtr_.Get(), Database_, clientType, ExternalMetricRegistry_)); } return TClientStatCollector(nullptr, nullptr, nullptr, nullptr, nullptr, TClientRetryOperationStatCollector(nullptr, Database_, clientType), - TClientOperationStatCollector(nullptr, Database_, clientType, std::move(externalMetricRegistry))); + TClientOperationStatCollector(nullptr, Database_, clientType, ExternalMetricRegistry_)); } bool IsCollecting() { @@ -522,6 +528,7 @@ struct TStatCollector { private: const std::string Database_; const ::NMonitoring::TLabel DatabaseLabel_; + std::shared_ptr ExternalMetricRegistry_; TAtomicPointer MetricRegistryPtr_; TAtomicCounter<::NMonitoring::TRate> DiscoveryDuePessimization_; TAtomicCounter<::NMonitoring::TRate> DiscoveryDueExpiration_; diff --git a/src/client/query/client.cpp b/src/client/query/client.cpp index bceefe3f56b..a6db0b273db 100644 --- a/src/client/query/client.cpp +++ b/src/client/query/client.cpp @@ -68,7 +68,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { - SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query", Connections_->GetExternalMetricRegistry())); + SetStatCollector(DbDriverState_->StatCollector.GetClientStatCollector("Query")); SessionPool_.SetStatCollector(DbDriverState_->StatCollector.GetSessionPoolStatCollector("Query")); if (auto traceProvider = Connections_->GetTraceProvider()) { @@ -610,6 +610,7 @@ class TQueryClient::TImpl: public TClientImplCommon, public Tracer_, operationName, DbDriverState_->DiscoveryEndpoint, + DbDriverState_->Database, DbDriverState_->Log ); } diff --git a/src/client/table/impl/table_client.cpp b/src/client/table/impl/table_client.cpp index 28dc7c79489..e33b71bfda8 100644 --- a/src/client/table/impl/table_client.cpp +++ b/src/client/table/impl/table_client.cpp @@ -22,7 +22,7 @@ TTableClient::TImpl::TImpl(std::shared_ptr&& connections, , Settings_(settings) , SessionPool_(Settings_.SessionPoolSettings_.MaxActiveSessions_) { - auto clientCollector = DbDriverState_->StatCollector.GetClientStatCollector("Table", Connections_->GetExternalMetricRegistry()); + auto clientCollector = DbDriverState_->StatCollector.GetClientStatCollector("Table"); OperationStatCollector_ = clientCollector.OperationStatCollector; if (auto traceProvider = Connections_->GetTraceProvider()) { diff --git a/src/client/table/impl/table_client.h b/src/client/table/impl/table_client.h index c53c73f5533..fcfe970a75b 100644 --- a/src/client/table/impl/table_client.h +++ b/src/client/table/impl/table_client.h @@ -345,6 +345,7 @@ class TTableClient::TImpl: public TClientImplCommon, public Tracer_, operationName, DbDriverState_->DiscoveryEndpoint, + DbDriverState_->Database, DbDriverState_->Log ); } diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index 33d52dfccde..13c57e09e72 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -40,7 +40,7 @@ std::shared_ptr GetCounter( return registry->GetCounter(name, { {"db.system.name", "ydb"}, {"db.operation.name", operation}, - {"ydb.client.api", "query"}, + {"ydb.client.api", "Query"}, }); } @@ -52,7 +52,7 @@ std::shared_ptr GetDuration( NMetrics::TLabels labels = { {"db.system.name", "ydb"}, {"db.operation.name", operation}, - {"ydb.client.api", "query"}, + {"ydb.client.api", "Query"}, {"db.response.status_code", ToString(status)}, }; if (status != EStatus::SUCCESS) { diff --git a/tests/unit/client/observability/metrics_ut.cpp b/tests/unit/client/observability/metrics_ut.cpp index 0509858d59d..d2e26c266dd 100644 --- a/tests/unit/client/observability/metrics_ut.cpp +++ b/tests/unit/client/observability/metrics_ut.cpp @@ -11,6 +11,10 @@ using namespace NYdb::NMetrics; using namespace NYdb::NTests; using namespace NYdb::NSdkStats; +namespace { + constexpr const char kTestDbNamespace[] = "/Root/testdb"; +} // namespace + // --------------------------------------------------------------------------- // TRequestMetrics (shared logic) // --------------------------------------------------------------------------- @@ -19,30 +23,34 @@ class RequestMetricsTest : public ::testing::Test { protected: void SetUp() override { Registry = std::make_shared(); - OpCollector = TStatCollector::TClientOperationStatCollector(nullptr, "", "", Registry); + OpCollector = TStatCollector::TClientOperationStatCollector( + nullptr, kTestDbNamespace, "", Registry); } std::shared_ptr RequestCounter(const std::string& op) { return Registry->GetCounter("db.client.operation.requests", { {"db.system.name", "ydb"}, + {"db.namespace", kTestDbNamespace}, {"db.operation.name", op}, - {"ydb.client.api", "unspecified"}, + {"ydb.client.api", "Unspecified"}, }); } std::shared_ptr ErrorCounter(const std::string& op) { return Registry->GetCounter("db.client.operation.errors", { {"db.system.name", "ydb"}, + {"db.namespace", kTestDbNamespace}, {"db.operation.name", op}, - {"ydb.client.api", "unspecified"}, + {"ydb.client.api", "Unspecified"}, }); } std::shared_ptr DurationHistogram(const std::string& op, EStatus status) { TLabels labels = { {"db.system.name", "ydb"}, + {"db.namespace", kTestDbNamespace}, {"db.operation.name", op}, - {"ydb.client.api", "unspecified"}, + {"ydb.client.api", "Unspecified"}, {"db.response.status_code", ToString(status)}, }; if (status != EStatus::SUCCESS) { @@ -200,6 +208,64 @@ TEST_F(RequestMetricsTest, AllErrorStatusesIncrementErrorCounter) { EXPECT_EQ(errors->Get(), static_cast(errorStatuses.size())); } +TEST(RequestMetricsDbNamespaceTest, DifferentNamespacesAreSeparateMetricSeries) { + auto registry = std::make_shared(); + TStatCollector::TClientOperationStatCollector collectorA(nullptr, "/db/alpha", "", registry); + TStatCollector::TClientOperationStatCollector collectorB(nullptr, "/db/beta", "", registry); + + { + TRequestMetrics m(&collectorA, "GetSession", TLog()); + m.End(EStatus::SUCCESS); + } + { + TRequestMetrics m(&collectorB, "GetSession", TLog()); + m.End(EStatus::SUCCESS); + } + + auto labelsAlpha = [](const char* op) { + return NMetrics::TLabels{ + {"db.system.name", "ydb"}, + {"db.namespace", "/db/alpha"}, + {"db.operation.name", op}, + {"ydb.client.api", "Unspecified"}, + }; + }; + auto labelsBeta = [](const char* op) { + return NMetrics::TLabels{ + {"db.system.name", "ydb"}, + {"db.namespace", "/db/beta"}, + {"db.operation.name", op}, + {"ydb.client.api", "Unspecified"}, + }; + }; + + auto reqAlpha = registry->GetCounter("db.client.operation.requests", labelsAlpha("GetSession")); + auto reqBeta = registry->GetCounter("db.client.operation.requests", labelsBeta("GetSession")); + ASSERT_NE(reqAlpha, nullptr); + ASSERT_NE(reqBeta, nullptr); + EXPECT_EQ(reqAlpha->Get(), 1); + EXPECT_EQ(reqBeta->Get(), 1); + + auto durAlpha = registry->GetHistogram( + "db.client.operation.duration", + [&] { + auto l = labelsAlpha("GetSession"); + l["db.response.status_code"] = ToString(EStatus::SUCCESS); + return l; + }()); + auto durBeta = registry->GetHistogram( + "db.client.operation.duration", + [&] { + auto l = labelsBeta("GetSession"); + l["db.response.status_code"] = ToString(EStatus::SUCCESS); + return l; + }()); + ASSERT_NE(durAlpha, nullptr); + ASSERT_NE(durBeta, nullptr); + EXPECT_EQ(durAlpha->Count(), 1u); + EXPECT_EQ(durBeta->Count(), 1u); +} + TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { auto registry = std::make_shared(); TStatCollector::TClientOperationStatCollector collector(nullptr, "", "Query", registry); @@ -212,8 +278,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { "db.client.operation.requests", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteQuery"}, - {"ydb.client.api", "query"}, + {"ydb.client.api", "Query"}, } ), nullptr @@ -223,8 +290,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { "db.client.operation.errors", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteQuery"}, - {"ydb.client.api", "query"}, + {"ydb.client.api", "Query"}, } ), nullptr @@ -234,8 +302,9 @@ TEST(RequestMetricsClientAliasesTest, QueryOperationsUseOtelStandardMetrics) { "db.client.operation.duration", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteQuery"}, - {"ydb.client.api", "query"}, + {"ydb.client.api", "Query"}, {"db.response.status_code", ToString(EStatus::SUCCESS)}, } ), @@ -255,8 +324,9 @@ TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { "db.client.operation.requests", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteDataQuery"}, - {"ydb.client.api", "table"} + {"ydb.client.api", "Table"}, } ), nullptr @@ -266,8 +336,9 @@ TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { "db.client.operation.errors", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteDataQuery"}, - {"ydb.client.api", "table"} + {"ydb.client.api", "Table"}, } ), nullptr @@ -277,8 +348,9 @@ TEST(RequestMetricsClientAliasesTest, TableOperationsUseOtelStandardMetrics) { "db.client.operation.duration", { {"db.system.name", "ydb"}, + {"db.namespace", ""}, {"db.operation.name", "ExecuteDataQuery"}, - {"ydb.client.api", "table"}, + {"ydb.client.api", "Table"}, {"db.response.status_code", ToString(EStatus::SUCCESS)}, } ), From 0f0d6551995735f8802ba70c03b4db4382f2456c Mon Sep 17 00:00:00 2001 From: maladetska Date: Thu, 9 Apr 2026 01:06:31 +0300 Subject: [PATCH 17/17] fix test --- tests/integration/metrics/main.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/integration/metrics/main.cpp b/tests/integration/metrics/main.cpp index 13c57e09e72..b933bcf0539 100644 --- a/tests/integration/metrics/main.cpp +++ b/tests/integration/metrics/main.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include using namespace NYdb; @@ -11,21 +13,26 @@ using namespace NYdb::NTests; namespace { +std::string GetEnvOrEmpty(const char* name) { + const char* value = std::getenv(name); + return value ? std::string(value) : std::string(); +} + struct TRunArgs { TDriver Driver; std::shared_ptr Registry; }; TRunArgs MakeRunArgs() { - std::string endpoint = std::getenv("YDB_ENDPOINT"); - std::string database = std::getenv("YDB_DATABASE"); + std::string endpoint = GetEnvOrEmpty("YDB_ENDPOINT"); + std::string database = GetEnvOrEmpty("YDB_DATABASE"); auto registry = std::make_shared(); auto driverConfig = TDriverConfig() .SetEndpoint(endpoint) .SetDatabase(database) - .SetAuthToken(std::getenv("YDB_TOKEN") ? std::getenv("YDB_TOKEN") : "") + .SetAuthToken(GetEnvOrEmpty("YDB_TOKEN")) .SetMetricRegistry(registry); TDriver driver(driverConfig); @@ -39,6 +46,7 @@ std::shared_ptr GetCounter( { return registry->GetCounter(name, { {"db.system.name", "ydb"}, + {"db.namespace", GetEnvOrEmpty("YDB_DATABASE")}, {"db.operation.name", operation}, {"ydb.client.api", "Query"}, }); @@ -51,6 +59,7 @@ std::shared_ptr GetDuration( { NMetrics::TLabels labels = { {"db.system.name", "ydb"}, + {"db.namespace", GetEnvOrEmpty("YDB_DATABASE")}, {"db.operation.name", operation}, {"ydb.client.api", "Query"}, {"db.response.status_code", ToString(status)}, @@ -237,13 +246,13 @@ TEST(QueryMetricsIntegration, MultipleQueriesAccumulateMetrics) { } TEST(QueryMetricsIntegration, NoRegistryDoesNotBreakOperations) { - std::string endpoint = std::getenv("YDB_ENDPOINT"); - std::string database = std::getenv("YDB_DATABASE"); + std::string endpoint = GetEnvOrEmpty("YDB_ENDPOINT"); + std::string database = GetEnvOrEmpty("YDB_DATABASE"); auto driverConfig = TDriverConfig() .SetEndpoint(endpoint) .SetDatabase(database) - .SetAuthToken(std::getenv("YDB_TOKEN") ? std::getenv("YDB_TOKEN") : ""); + .SetAuthToken(GetEnvOrEmpty("YDB_TOKEN")); TDriver driver(driverConfig); TQueryClient client(driver);