diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 98d679777..9928a0eae 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,15 +35,17 @@ jobs: packages: "libsqlite3-dev" run_tests: true - - name: "Base + Admin API" - cmake_flags: "-DCMAKE_BUILD_TYPE=Release -DENABLE_ADMIN_API=ON" + - name: "Base + Admin API + OTEL" + cmake_flags: "-DCMAKE_BUILD_TYPE=Release -DENABLE_ADMIN_API=ON -DWITH_OTEL=ON -DCMAKE_PREFIX_PATH=/opt/opentelemetry-cpp" packages: "" run_tests: true + with_otel: true - - name: "YAML + Admin API" - cmake_flags: "-DCMAKE_BUILD_TYPE=Release -DHAVE_YAML=ON -DENABLE_ADMIN_API=ON" + - name: "YAML + Admin API + OTEL" + cmake_flags: "-DCMAKE_BUILD_TYPE=Release -DHAVE_YAML=ON -DENABLE_ADMIN_API=ON -DWITH_OTEL=ON -DCMAKE_PREFIX_PATH=/opt/opentelemetry-cpp" packages: "libyaml-cpp-dev" run_tests: true + with_otel: true steps: - uses: actions/checkout@v4 @@ -52,6 +54,7 @@ jobs: - name: Install base dependencies run: | + sudo add-apt-repository universe -y sudo apt-get update sudo apt-get install -y \ build-essential \ @@ -72,6 +75,36 @@ jobs: run: | sudo apt-get install -y ${{ matrix.config.packages }} + - name: Install opentelemetry-cpp runtime dependencies + if: matrix.config.with_otel + run: | + sudo apt-get install -y libprotobuf-dev + + - name: Cache opentelemetry-cpp + if: matrix.config.with_otel + id: cache-otel + uses: actions/cache@v4 + with: + path: /opt/opentelemetry-cpp + key: otel-cpp-1.24.0-ubuntu-x64 + + - name: Build opentelemetry-cpp + if: matrix.config.with_otel && steps.cache-otel.outputs.cache-hit != 'true' + run: | + sudo apt-get install -y libgrpc++-dev protobuf-compiler-grpc || true + wget -q https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v1.24.0.tar.gz + tar xzf v1.24.0.tar.gz + cmake -S opentelemetry-cpp-1.24.0 -B otel-build \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/opt/opentelemetry-cpp \ + -DWITH_OTLP_HTTP=ON \ + -DWITH_OTLP_GRPC=OFF \ + -DBUILD_TESTING=OFF \ + -DWITH_BENCHMARK=OFF \ + -DWITH_EXAMPLES=OFF + cmake --build otel-build -j$(nproc) + sudo cmake --install otel-build + - name: Configure CMake run: | mkdir -p build @@ -271,7 +304,15 @@ jobs: git wget + - name: Cache Cygwin libraries + id: cache-cygwin-libs + uses: actions/cache@v4 + with: + path: C:\cygwin\usr\local + key: cygwin-gtest-1.14.0 + - name: Build and install googletest from source + if: steps.cache-cygwin-libs.outputs.cache-hit != 'true' shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr '{0}' run: | cd /tmp @@ -330,7 +371,15 @@ jobs: git wget + - name: Cache Cygwin libraries + id: cache-cygwin-libs + uses: actions/cache@v4 + with: + path: C:\cygwin\usr\local + key: cygwin-gtest-1.14.0-yaml-0.7.0 + - name: Build and install googletest from source + if: steps.cache-cygwin-libs.outputs.cache-hit != 'true' shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr '{0}' run: | cd /tmp @@ -343,6 +392,7 @@ jobs: make install - name: Build and install yaml-cpp from source + if: steps.cache-cygwin-libs.outputs.cache-hit != 'true' shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr '{0}' run: | cd /tmp diff --git a/CLAUDE.md b/CLAUDE.md index 1c2cfaf12..642d2d69e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -351,10 +351,22 @@ Use separate build directories for different CMake configurations to avoid lengt build/ - default build (without optional features) build_sqlite/ - build with -DHAVE_SQLITE=ON build_debug/ - debug build with -DCMAKE_BUILD_TYPE=Debug +build_otel/ - build with -DWITH_OTEL=ON (requires vcpkg) build_test/ - test data and converted worlds (not for compilation) ``` **Always warn the user when changing build directories or running cmake/make in a different directory.** +### OpenTelemetry Build (WITH_OTEL) +opentelemetry-cpp is installed via vcpkg at `~/repos/vcpkg`. Always pass the toolchain file and prefix path: +```bash +cmake -S . -B build_otel \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_OTEL=ON \ + -DCMAKE_TOOLCHAIN_FILE=~/repos/vcpkg/scripts/buildsystems/vcpkg.cmake \ + -DCMAKE_PREFIX_PATH=~/repos/vcpkg/installed/x64-linux +make -C build_otel -j$(($(nproc)/2)) +``` + ### File Encoding - CRITICAL **Proper workflow for editing KOI8-R files:** diff --git a/CMakeLists.txt b/CMakeLists.txt index 4794ba42d..ee271a607 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,6 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(SOURCES src/engine/structs/blocking_queue.cpp - src/engine/db/influxdb.cpp src/engine/core/heartbeat.cpp src/engine/core/heartbeat_commands.cpp src/gameplay/abilities/abilities_rollsystem.cpp @@ -98,6 +97,8 @@ set(SOURCES src/utils/levenshtein.cpp src/gameplay/mechanics/liquid.cpp src/utils/logger.cpp + src/utils/logging/file_log_sender.cpp + src/utils/logging/log_manager.cpp src/gameplay/magic/magic.cpp src/gameplay/magic/magic_items.cpp src/gameplay/magic/magic_rooms.cpp @@ -158,6 +159,8 @@ set(SOURCES src/engine/structs/flags.hpp src/utils/id_converter.cpp src/utils/utils_time.cpp + src/utils/tracing/trace_manager.cpp + src/engine/observability/otel_trace_sender.cpp src/utils/thread_pool.cpp src/gameplay/mechanics/title.cpp src/gameplay/statistics/top.cpp @@ -461,7 +464,12 @@ set(SOURCES src/engine/db/player_index.cpp src/gameplay/skills/addshot.cpp src/gameplay/classes/mob_classes_info.cpp - src/gameplay/classes/recalc_mob_params_by_vnum.cpp) + src/gameplay/classes/recalc_mob_params_by_vnum.cpp + src/engine/observability/otel_provider.cpp + src/engine/observability/otel_traces.cpp + src/engine/observability/otel_metrics.cpp + src/engine/observability/otel_helpers.cpp + src/engine/observability/otel_log_sender.cpp) @@ -511,7 +519,6 @@ set(HEADERS src/administration/accounts.h src/engine/core/action_targeting.h src/engine/structs/blocking_queue.h - src/engine/db/influxdb.h src/engine/core/heartbeat_commands.h src/gameplay/mechanics/weather.h src/gameplay/core/game_limits.h @@ -605,6 +612,9 @@ set(HEADERS src/utils/levenshtein.h src/gameplay/mechanics/liquid.h src/utils/logger.h + src/utils/logging/log_sender.h + src/utils/logging/file_log_sender.h + src/utils/logging/log_manager.h src/gameplay/magic/magic.h src/gameplay/magic/magic_items.h src/gameplay/magic/magic_rooms.h @@ -662,6 +672,10 @@ set(HEADERS src/engine/core/sysdep.h src/engine/network/telnet.h src/utils/utils_time.h + src/utils/tracing/trace_sender.h + src/utils/tracing/noop_trace_sender.h + src/utils/tracing/trace_manager.h + src/engine/observability/otel_trace_sender.h src/gameplay/mechanics/title.h src/gameplay/statistics/top.h src/utils/utils.h @@ -898,6 +912,12 @@ set(HEADERS src/gameplay/mechanics/tutelar.h src/gameplay/skills/addshot.h src/engine/db/player_index.h + src/engine/observability/otel_provider.h + src/engine/observability/otel_traces.h + src/engine/observability/otel_metrics.h + src/engine/observability/otel_helpers.h + src/engine/observability/otel_log_sender.h + src/utils/logging/log_sender.h src/gameplay/classes/recalc_mob_params_by_vnum.h) # Build types @@ -923,6 +943,10 @@ set(CMAKE_EXE_LINKER_FLAGS_RELEASE "") set(CMAKE_EXE_LINKER_FLAGS_TEST "") set(CMAKE_EXE_LINKER_FLAGS_FASTTEST "") +if (UNIX AND NOT APPLE AND NOT CYGWIN) + add_link_options(-fuse-ld=gold) +endif () + ## Look for required components # Python # You can specify preferred Python version using cmake command line parameter. Like this: @@ -1204,6 +1228,29 @@ else () message(STATUS "SQLite is turned off.") endif () +# OpenTelemetry support +if (WITH_OTEL) + message(STATUS "OpenTelemetry integration: ENABLED") + + # Find OpenTelemetry from vcpkg + find_package(opentelemetry-cpp CONFIG REQUIRED) + + # Define WITH_OTEL for preprocessor + add_definitions(-DWITH_OTEL) + + # Link OpenTelemetry libraries + target_link_libraries(circle.library + opentelemetry-cpp::api + opentelemetry-cpp::sdk + opentelemetry-cpp::ext + opentelemetry-cpp::otlp_http_exporter + opentelemetry-cpp::otlp_http_metric_exporter + opentelemetry-cpp::otlp_http_log_record_exporter + opentelemetry-cpp::resources + ) +else() + message(STATUS "OpenTelemetry integration: DISABLED") +endif() # YAML support if (HAVE_YAML) # Try to find yaml-cpp via CMake config first @@ -1330,7 +1377,7 @@ if (UNIX AND NOT CYGWIN) set(DEFAULT_WITH_ASAN YES) else () set(DEFAULT_WITH_ASAN NO) -endif () +endif() option(WITH_ASAN "Compile with ASAN" ${DEFAULT_WITH_ASAN}) if (WITH_ASAN) @@ -1450,6 +1497,9 @@ if (BUILD_TESTS) add_subdirectory(tests) endif () +option(WITH_OTEL "Enable OpenTelemetry integration" OFF) + +# vim: set ts=4 sw=4 ai tw=0 noet syntax=cmake : # ============================================================================= # Data directories setup for running server from build directory diff --git a/lib/misc/configuration.xml b/lib/misc/configuration.xml index af1d6489d..15cd05434 100644 --- a/lib/misc/configuration.xml +++ b/lib/misc/configuration.xml @@ -114,4 +114,40 @@ --> + + + true + + + http://localhost:4318/v1/metrics + + + http://localhost:4318/v1/traces + + + http://localhost:4318/v1/logs + + + + bylins-${host}-${port} + 1.0.0 + + + duplicate + + \ No newline at end of file diff --git a/src/engine/core/comm.cpp b/src/engine/core/comm.cpp index 58e407408..f2ac1962b 100644 --- a/src/engine/core/comm.cpp +++ b/src/engine/core/comm.cpp @@ -37,6 +37,8 @@ #include "engine/db/world_characters.h" #include "engine/entities/entities_constants.h" #include "administration/shutdown_parameters.h" +#include "engine/observability/otel_provider.h" +#include "utils/timestamp.h" #include "external_trigger.h" #include "handler.h" #include "gameplay/clans/house.h" @@ -704,10 +706,10 @@ int main_function(int argc, char **argv) { * Moved here to distinguish command line options and to show up * in the log if stderr is redirected to a file. */ - printf("%s\r\n", circlemud_version); - printf("%s\r\n", DG_SCRIPT_VERSION); + printf("[%s] %s\r\n", utils::NowTs().c_str(), circlemud_version); + printf("[%s] %s\r\n", utils::NowTs().c_str(), DG_SCRIPT_VERSION); if (getcwd(cwd, sizeof(cwd))) {}; - printf("Current directory '%s' using '%s' as data directory.\r\n", cwd, dir); + printf("[%s] Current directory '%s' using '%s' as data directory.\r\n", utils::NowTs().c_str(), cwd, dir); { std::string config_path = std::string(dir) + "/misc/configuration.xml"; runtime_config.load(config_path.c_str()); @@ -720,18 +722,19 @@ int main_function(int argc, char **argv) { // directories are created in the working directory (next to the binary), // not inside the data directory. runtime_config.setup_logs(); + runtime_config.setup_telemetry(port); logfile = runtime_config.logs(SYSLOG).handle(); if (chdir(dir) < 0) { perror("\r\nSYSERR: Fatal error changing to data directory"); exit(1); } log_code_date(); - printf("Code version %s, revision: %s\r\n", build_datetime, revision); + printf("[%s] Code version %s, revision: %s\r\n", utils::NowTs().c_str(), build_datetime, revision); if (scheck) { GameLoader::BootWorld(); printf("Done."); } else { - printf("Running game on port %d.\r\n", port); + printf("[%s] Running game on port %d.\r\n", utils::NowTs().c_str(), port); // стль и буст юзаются уже немало где, а про их экспешены никто не думает // пока хотя бы стльные ловить и просто логировать факт того, что мы вышли @@ -759,6 +762,11 @@ void stop_game(ush_int port) { log("Opening mother connection."); mother_desc = init_socket(port); + if (mother_desc < 0) { + log("SYSERR: Failed to bind to port %d. Server cannot start.", port); + log("Please check if another instance is running or if you have permission to use this port."); + exit(1); + } #ifdef ENABLE_ADMIN_API if (runtime_config.admin_api_enabled()) { @@ -826,6 +834,9 @@ void stop_game(ush_int port) { game_loop(mother_desc); #endif + // Shutdown OTEL providers to flush remaining telemetry + observability::OtelProvider::Instance().Shutdown(); + FlushPlayerIndex(); // храны надо сейвить до Crash_save_all_rent(), иначе будем брать бабло у чара при записи @@ -1009,9 +1020,9 @@ socket_t init_socket(ush_int port) { sa.sin_addr = *(get_bind_addr()); if (bind(s, (struct sockaddr *) &sa, sizeof(sa)) < 0) { - perror("SYSERR: bind"); + log("SYSERR: bind() failed - port %d is already in use or permission denied", port); CLOSE_SOCKET(s); - exit(1); + return -1; } nonblock(s); listen(s, 5); @@ -1393,7 +1404,7 @@ void game_loop(int epoll, socket_t mother_desc) void game_loop(socket_t mother_desc) #endif { - printf("Game started.\n"); + printf("[%s] Game started.\n", utils::NowTs().c_str()); #ifdef HAS_EPOLL struct epoll_event *events; @@ -2178,8 +2189,7 @@ RETSIGTYPE checkpointing(int/* sig*/) { } RETSIGTYPE hupsig(int/* sig*/) { - log("SYSERR: Received SIGHUP, SIGINT, or SIGTERM. Shutting down..."); - exit(1); // perhaps something more elegant should substituted + shutdown_parameters.shutdown_now(); } #endif // CIRCLE_UNIX diff --git a/src/engine/core/config.cpp b/src/engine/core/config.cpp index d9a896ad5..f5834210a 100644 --- a/src/engine/core/config.cpp +++ b/src/engine/core/config.cpp @@ -22,7 +22,29 @@ #include "engine/structs/meta_enum.h" #if CIRCLE_UNIX +#include +#include + +namespace { +inline std::string NowTs() { + auto now = std::chrono::system_clock::now(); + auto t = std::chrono::system_clock::to_time_t(now); + auto ms = std::chrono::duration_cast(now.time_since_epoch()) % 1000; + struct tm tm_buf; + localtime_r(&t, &tm_buf); + char result[32]; + std::snprintf(result, sizeof(result), "%04d-%02d-%02d %02d:%02d:%02d.%03lld", + tm_buf.tm_year + 1900, tm_buf.tm_mon + 1, tm_buf.tm_mday, + tm_buf.tm_hour, tm_buf.tm_min, tm_buf.tm_sec, (long long)ms.count()); + return result; +} +} // anonymous namespace +#ifdef WITH_OTEL +#include "engine/observability/otel_provider.h" +#endif +using ETelemetryLogMode = RuntimeConfiguration::ETelemetryLogMode; #include +#include #endif #include @@ -481,20 +503,20 @@ void RuntimeConfiguration::setup_logs() { if (logs(stream).filename().empty()) { handle(stream, stderr); - puts("Using file descriptor for logging."); + printf("[%s] Using file descriptor for logging.\n", NowTs().c_str()); continue; } if (!runtime_config.open_log(stream)) //s_fp { - puts("SYSERR: Couldn't open anything to log to, giving up."); + printf("[%s] SYSERR: Couldn't open anything to log to, giving up.\n", NowTs().c_str()); exit(1); } } setup_converters(); - printf("Bylins server will use %schronous output into syslog file.\n", + printf("[%s] Bylins server will use %schronous output into syslog file.\n", NowTs().c_str(), output_thread() ? "asyn" : "syn"); } @@ -720,6 +742,13 @@ RuntimeConfiguration::RuntimeConfiguration() : m_msdp_debug(false), m_changelog_file_name(Boards::constants::CHANGELOG_FILE_NAME), m_changelog_format(Boards::constants::loader_formats::GIT), + m_telemetry_enabled(false), + m_telemetry_metrics_endpoint("http://localhost:4318/v1/metrics"), + m_telemetry_traces_endpoint("http://localhost:4318/v1/traces"), + m_telemetry_logs_endpoint("http://localhost:4318/v1/logs"), + m_telemetry_service_name("bylins-mud"), + m_telemetry_service_version("1.0.0"), + m_telemetry_log_mode(ETelemetryLogMode::kFileOnly), m_yaml_threads(0) { } @@ -740,6 +769,8 @@ void RuntimeConfiguration::load_from_file(const char *filename) { load_boards_configuration(&root); load_external_triggers(&root); load_statistics_configuration(&root); + load_telemetry_configuration_impl(&root); + load_telemetry_configuration(&root); load_world_loader_configuration(&root); #ifdef ENABLE_ADMIN_API load_admin_api_configuration(&root); @@ -782,8 +813,8 @@ bool CLogInfo::open() { setvbuf(handle, m_buffer, buffered(), BUFFER_SIZE); m_handle = handle; - printf("Using log file '%s' with %s buffering. Opening in %s mode.\n", - filename().c_str(), + printf("[%s] Using log file '%s' with %s buffering. Opening in %s mode.\n", + NowTs().c_str(), filename().c_str(), NAME_BY_ITEM(buffered()).c_str(), NAME_BY_ITEM(this->mode()).c_str()); return true; @@ -796,6 +827,111 @@ bool CLogInfo::open() { RuntimeConfiguration runtime_config; +void RuntimeConfiguration::load_telemetry_configuration_impl(const pugi::xml_node *root) { + const auto telemetry = root->child("telemetry"); + if (!telemetry) { + return; + } + + const auto enabled = telemetry.child("enabled"); + if (enabled) { + const std::string value = enabled.child_value(); + m_telemetry_enabled = (value == "true" || value == "1"); + } + + const auto otlp = telemetry.child("otlp"); + if (otlp) { + const auto metrics = otlp.child("metrics"); + if (metrics) { + const auto endpoint = metrics.child("endpoint"); + if (endpoint) { + m_telemetry_metrics_endpoint = endpoint.child_value(); + } + } + + const auto traces = otlp.child("traces"); + if (traces) { + const auto endpoint = traces.child("endpoint"); + if (endpoint) { + m_telemetry_traces_endpoint = endpoint.child_value(); + } + } + + const auto logs_otlp = otlp.child("logs_otlp"); + if (logs_otlp) { + const auto endpoint = logs_otlp.child("endpoint"); + if (endpoint) { + m_telemetry_logs_endpoint = endpoint.child_value(); + } + } + } + + const auto service = telemetry.child("service"); + if (service) { + const auto name = service.child("name"); + if (name) { + m_telemetry_service_name = name.child_value(); + } + const auto version = service.child("version"); + if (version) { + m_telemetry_service_version = version.child_value(); + } + } + + const auto logs = telemetry.child("logs"); + if (logs) { + const auto mode = logs.child("mode"); + if (mode) { + const std::string mode_str = mode.child_value(); + if (mode_str == "file-only") { + m_telemetry_log_mode = ETelemetryLogMode::kFileOnly; + } else if (mode_str == "otel-only") { + m_telemetry_log_mode = ETelemetryLogMode::kOtelOnly; + } else if (mode_str == "duplicate") { + m_telemetry_log_mode = ETelemetryLogMode::kDuplicate; + } + } + } +} + +void RuntimeConfiguration::load_telemetry_configuration(const pugi::xml_node *) { + // OtelProvider is initialized later via setup_telemetry(port) + // once the game port is known from command-line arguments. +} + +void RuntimeConfiguration::setup_telemetry(int port) { +#ifdef WITH_OTEL + if (!m_telemetry_enabled) { + return; + } + + // Interpolate variables in service name: ${port}, ${host}, ${version} + char hostname[256] = "unknown"; + gethostname(hostname, sizeof(hostname)); + + auto replace_all = [](std::string str, const std::string &var, const std::string &val) { + std::string::size_type pos; + while ((pos = str.find(var)) != std::string::npos) { + str.replace(pos, var.size(), val); + } + return str; + }; + + std::string name = m_telemetry_service_name; + name = replace_all(name, "${port}", std::to_string(port)); + name = replace_all(name, "${host}", hostname); + name = replace_all(name, "${version}", m_telemetry_service_version); + + observability::OtelProvider::Instance().Initialize( + m_telemetry_metrics_endpoint, + m_telemetry_traces_endpoint, + m_telemetry_logs_endpoint, + name, + m_telemetry_service_version); +#else + (void)port; +#endif +} // vim: ts=4 sw=4 tw=0 noet syntax=cpp : #ifdef ENABLE_ADMIN_API diff --git a/src/engine/core/config.h b/src/engine/core/config.h index fab67821b..b93d19d58 100644 --- a/src/engine/core/config.h +++ b/src/engine/core/config.h @@ -161,6 +161,7 @@ class RuntimeConfiguration { auto output_queue_size() const { return m_output_queue_size; } void setup_logs(); + void setup_telemetry(int port); auto syslog_converter() const { return m_syslog_converter; } void enable_logging() { m_logging_enabled = true; } @@ -177,6 +178,16 @@ class RuntimeConfiguration { const auto &statistics() const { return m_statistics; } + bool telemetry_enabled() const { return m_telemetry_enabled; } + const std::string &telemetry_metrics_endpoint() const { return m_telemetry_metrics_endpoint; } + const std::string &telemetry_traces_endpoint() const { return m_telemetry_traces_endpoint; } + const std::string &telemetry_logs_endpoint() const { return m_telemetry_logs_endpoint; } + const std::string &telemetry_service_name() const { return m_telemetry_service_name; } + const std::string &telemetry_service_version() const { return m_telemetry_service_version; } + enum class ETelemetryLogMode { kFileOnly, kOtelOnly, kDuplicate, kUndefined }; + ETelemetryLogMode telemetry_log_mode() const { return m_telemetry_log_mode; } + + void load_telemetry_configuration(const pugi::xml_node *root); size_t yaml_threads() const { return m_yaml_threads; } #ifdef ENABLE_ADMIN_API @@ -203,6 +214,7 @@ class RuntimeConfiguration { void load_boards_configuration(const pugi::xml_node *root); void load_external_triggers(const pugi::xml_node *root); void load_statistics_configuration(const pugi::xml_node *root); + void load_telemetry_configuration_impl(const pugi::xml_node *root); void load_world_loader_configuration(const pugi::xml_node *root); #ifdef ENABLE_ADMIN_API void load_admin_api_configuration(const pugi::xml_node *root); @@ -222,6 +234,14 @@ class RuntimeConfiguration { std::string m_external_reboot_trigger_file_name; StatisticsConfiguration m_statistics; + + bool m_telemetry_enabled; + std::string m_telemetry_metrics_endpoint; + std::string m_telemetry_traces_endpoint; + std::string m_telemetry_logs_endpoint; + std::string m_telemetry_service_name; + std::string m_telemetry_service_version; + ETelemetryLogMode m_telemetry_log_mode; size_t m_yaml_threads; diff --git a/src/engine/core/heartbeat.cpp b/src/engine/core/heartbeat.cpp index ad781dbf1..4efc07ce2 100644 --- a/src/engine/core/heartbeat.cpp +++ b/src/engine/core/heartbeat.cpp @@ -26,6 +26,9 @@ #include "gameplay/mechanics/corpse.h" #include "engine/db/global_objects.h" #include "engine/ui/cmd_god/do_set_all.h" +#include "engine/observability/otel_traces.h" +#include "engine/observability/otel_metrics.h" +#include "engine/observability/otel_helpers.h" #include "gameplay/statistics/money_drop.h" #include "gameplay/mechanics/weather.h" #include "utils/utils_time.h" @@ -33,6 +36,7 @@ #include "gameplay/communication/check_invoice.h" #include "gameplay/mechanics/depot.h" #include "gameplay/statistics/spell_usage.h" +#include "utils/tracing/trace_manager.h" #if defined WITH_SCRIPTING #include "scripting.hpp" @@ -546,9 +550,36 @@ Heartbeat::Heartbeat() : m_global_pulse_number(0) { } +void Heartbeat::record_metrics(const pulse_label_t &label, double execution_time_sec, int missed_pulses) { + for (const auto& [step_index, step_time] : label) { + if (step_index < m_steps.size()) { + std::map step_attrs; + step_attrs["step"] = m_steps[step_index].name(); + observability::OtelMetrics::RecordHistogram("heartbeat.step.duration", step_time, step_attrs); + } + } + + std::map pulse_attrs; + pulse_attrs["pulse_mod"] = std::to_string(pulse_number() % 25); + observability::OtelMetrics::RecordHistogram("heartbeat.total.duration", execution_time_sec, pulse_attrs); + + if (missed_pulses > 0) { + observability::OtelMetrics::RecordCounter("heartbeat.missed_pulses_total", missed_pulses); + } +} + void Heartbeat::operator()(const int missed_pulses) { pulse_label_t label; + // Capture current pulse numbers BEFORE advance + const auto current_heartbeat_number = global_pulse_number(); + const auto current_pulse_number = pulse_number(); + + // Create trace span for this pulse + char span_name[64]; + snprintf(span_name, sizeof(span_name), "Heartbeat #%lu pulse #%d", current_heartbeat_number, current_pulse_number); + auto pulse_span = tracing::TraceManager::Instance().StartSpan(span_name); + utils::CExecutionTimer timer; pulse(missed_pulses, label); const auto execution_time = timer.delta(); @@ -561,12 +592,15 @@ void Heartbeat::operator()(const int missed_pulses) { mudlog(tmpbuf, LGH, kLvlImmortal, SYSLOG, true); } m_measurements.add(label, pulse_number(), execution_time.count()); - if (GlobalObjects::stats_sender().ready()) { - influxdb::Record record("heartbeat"); - record.add_tag("pulse", pulse_number()); - record.add_field("duration", execution_time.count()); - GlobalObjects::stats_sender().send(record); - } + record_metrics(label, execution_time.count(), missed_pulses); + + // Close parent span + pulse_span->SetAttribute("heartbeat_number", static_cast(current_heartbeat_number)); + pulse_span->SetAttribute("pulse_number", static_cast(current_pulse_number)); + pulse_span->SetAttribute("execution_time_seconds", execution_time.count()); + pulse_span->SetAttribute("missed_pulses", static_cast(missed_pulses)); + pulse_span->SetAttribute("steps_executed", static_cast(label.size())); + pulse_span->End(); } long long Heartbeat::period() const { @@ -618,7 +652,11 @@ void Heartbeat::pulse(const int missed_pulses, pulse_label_t &label) { if (0 == (m_pulse_number + step.offset()) % step.modulo()) { utils::CExecutionTimer timer; - + + // Create child span for this step + auto step_span = tracing::TraceManager::Instance().StartSpan(step.name()); + step_span->SetAttribute("step_index", static_cast(i)); + step_span->SetAttribute("step_modulo", static_cast(step.modulo())); step.action()->perform(pulse_number(), missed_pulses); const auto execution_time = timer.delta().count(); if (step.modulo() >= kSecsPerMudHour * kPassesPerSec) { @@ -630,6 +668,8 @@ void Heartbeat::pulse(const int missed_pulses, pulse_label_t &label) { log("HeartBeat memory resize, step:(%s), memory used: virt (%d kB) phys (%d kB)", step.name().c_str(), vmem_used, pmem_used); // mudlog(buf, CMP, kLvlGreatGod, SYSLOG, true); } + step_span->SetAttribute("execution_time_seconds", execution_time); + step_span->End(); label.emplace(i, execution_time); m_executed_steps.insert(i); step.add_measurement(i, pulse_number(), execution_time); diff --git a/src/engine/core/heartbeat.h b/src/engine/core/heartbeat.h index 34543e184..e5a12e760 100644 --- a/src/engine/core/heartbeat.h +++ b/src/engine/core/heartbeat.h @@ -206,6 +206,7 @@ class Heartbeat { void advance_pulse_numbers(); void pulse(const int missed_pulses, pulse_label_t &label); + void record_metrics(const pulse_label_t &label, double execution_time_sec, int missed_pulses); steps_t m_steps; pulse_t m_pulse_number; diff --git a/src/engine/db/db.cpp b/src/engine/db/db.cpp index f752d3022..368452f65 100644 --- a/src/engine/db/db.cpp +++ b/src/engine/db/db.cpp @@ -37,12 +37,17 @@ #include "gameplay/mechanics/noob.h" #include "obj_prototypes.h" #include "engine/olc/olc.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" #include "gameplay/communication/offtop.h" #include "gameplay/communication/parcel.h" #include "administration/privilege.h" #include "gameplay/mechanics/sets_drop.h" #include "gameplay/mechanics/stable_objs.h" #include "gameplay/economics/shop_ext.h" +#include "engine/observability/otel_metrics.h" +#include "engine/observability/otel_traces.h" #include "gameplay/mechanics/stuff.h" #include "gameplay/mechanics/title.h" #include "gameplay/statistics/top.h" @@ -1932,6 +1937,11 @@ void ZoneUpdate() { struct reset_q_element *update_u, *temp; static int timer = 0; utils::CExecutionTimer timer_count; + // OpenTelemetry: Track zone updates + auto zone_span = tracing::TraceManager::Instance().StartSpan("Zone Update"); + observability::ScopedMetric zone_metric("zone.update.duration"); + + int zones_reset_count = 0; if (((++timer * kPulseZone) / kPassesPerSec) >= 60) // one minute has passed { /* @@ -1992,6 +2002,14 @@ void ZoneUpdate() { ss << zone_table[it].vnum << " "; if (zone_table[it].vnum < dungeons::kZoneStartDungeons) { ResetZone(it); + zones_reset_count++; + + // OpenTelemetry: Record zone reset + std::map attrs; + attrs["zone_vnum"] = std::to_string(zone_table[it].vnum); + attrs["reset_mode"] = std::to_string(zone_table[it].reset_mode); + + observability::OtelMetrics::RecordCounter("zone.reset.total", 1, attrs); } else { log("Закрываю брошенный dungeon %d", it); dungeons::DungeonReset(it); @@ -2017,6 +2035,9 @@ void ZoneUpdate() { if (k >= kZonesReset) break; } + + // OpenTelemetry: Record total zones reset + zone_span->SetAttribute("zones_reset_count", static_cast(zones_reset_count)); } bool CanBeReset(ZoneRnum zone) { @@ -2307,18 +2328,13 @@ class ZoneReset { void ZoneReset::Reset() { utils::CExecutionTimer timer; - if (GlobalObjects::stats_sender().ready()) { - ResetZoneEssential(); - const auto execution_time = timer.delta(); + ResetZoneEssential(); + const auto execution_time = timer.delta(); - influxdb::Record record("zone_reset"); - record.add_tag("pulse", GlobalObjects::heartbeat().pulse_number()); - record.add_tag("zone", zone_table[m_zone_rnum].vnum); - record.add_field("duration", execution_time.count()); - GlobalObjects::stats_sender().send(record); - } else { - ResetZoneEssential(); - } + std::map attrs; + attrs["pulse"] = std::to_string(GlobalObjects::heartbeat().pulse_number()); + attrs["zone"] = std::to_string(zone_table[m_zone_rnum].vnum); + observability::OtelMetrics::RecordHistogram("zone.reset.duration", execution_time.count(), attrs); } bool ZoneReset::HandleZoneCmdQ(const MobRnum rnum) const { @@ -2345,18 +2361,11 @@ bool ZoneReset::HandleZoneCmdQ(const MobRnum rnum) const { const auto execution_time = overall_timer.delta(); - if (GlobalObjects::stats_sender().ready()) { - influxdb::Record record("Q_command"); - - record.add_tag("pulse", GlobalObjects::heartbeat().pulse_number()); - record.add_tag("zone", zone_table[m_zone_rnum].vnum); - record.add_tag("rnum", rnum); - - record.add_field("duration", execution_time.count()); - record.add_field("extract", extract_time.count()); - record.add_field("get_mobs", get_mobs_time.count()); - GlobalObjects::stats_sender().send(record); - } + std::map attrs; + attrs["pulse"] = std::to_string(GlobalObjects::heartbeat().pulse_number()); + attrs["zone"] = std::to_string(zone_table[m_zone_rnum].vnum); + attrs["rnum"] = std::to_string(rnum); + observability::OtelMetrics::RecordHistogram("zone.command.Q.duration", execution_time.count(), attrs); return extracted; } @@ -2912,6 +2921,12 @@ void SetGodSkills(CharData *ch) { // по умолчанию reboot = 0 (пользуется только при ребуте) int LoadPlayerCharacter(const char *name, CharData *char_element, int load_flags) { const auto player_i = char_element->load_char_ascii(name, load_flags); + // OpenTelemetry: Track player loading + auto load_span = tracing::TraceManager::Instance().StartSpan("Load Player"); + load_span->SetAttribute("character_name", std::string(name)); + + observability::ScopedMetric load_metric("player.load.duration"); + if (player_i > -1) { char_element->set_pfilepos(player_i); } diff --git a/src/engine/db/global_objects.cpp b/src/engine/db/global_objects.cpp index 8f95fdc06..1add04bf3 100644 --- a/src/engine/db/global_objects.cpp +++ b/src/engine/db/global_objects.cpp @@ -3,6 +3,7 @@ #include #include "administration/ban.h" +#include "utils/logging/log_manager.h" namespace { // This struct defines order of creating and destroying global objects @@ -12,6 +13,9 @@ struct GlobalObjectsStorage { /// This object should be destroyed last because it serves all output operations. So I define it first. std::shared_ptr output_thread; + /// Declared before other game objects so it is destroyed after them (C++ reverse-init order). + std::unique_ptr log_manager; + celebrates::CelebrateList mono_celebrates; celebrates::CelebrateList poly_celebrates; celebrates::CelebrateList real_celebrates; @@ -46,7 +50,6 @@ struct GlobalObjectsStorage { InspectRequestDeque inspect_request_deque; BanList *ban; Heartbeat heartbeat; - std::shared_ptr stats_sender; ZoneTable zone_table; DailyQuest::DailyQuestMap daily_quests; Strengthening strengthening; @@ -55,6 +58,7 @@ struct GlobalObjectsStorage { }; GlobalObjectsStorage::GlobalObjectsStorage() : + log_manager(std::make_unique()), ban(nullptr) { } @@ -170,13 +174,8 @@ Heartbeat &GlobalObjects::heartbeat() { return global_objects().heartbeat; } -influxdb::Sender &GlobalObjects::stats_sender() { - if (!global_objects().stats_sender) { - global_objects().stats_sender = std::make_shared( - runtime_config.statistics().host(), runtime_config.statistics().port()); - } - - return *global_objects().stats_sender; +observability::OtelProvider &GlobalObjects::otel_provider() { + return observability::OtelProvider::Instance(); } OutputThread &GlobalObjects::output_thread() { @@ -251,6 +250,9 @@ obj2triggers_t &GlobalObjects::obj_triggers() { return global_objects().obj2triggers; } +logging::LogManager &GlobalObjects::log_manager() { + return *global_objects().log_manager; +} RoomDescriptions &GlobalObjects::descriptions() { return global_objects().room_descriptions; } diff --git a/src/engine/db/global_objects.h b/src/engine/db/global_objects.h index 37dafacb2..d105ff4ff 100644 --- a/src/engine/db/global_objects.h +++ b/src/engine/db/global_objects.h @@ -15,9 +15,11 @@ #include "engine/ui/cmd_god/do_inspect.h" #include "engine/scripting/dg_event.h" #include "gameplay/economics/shops_implementation.h" +#include "engine/observability/otel_provider.h" +#include "utils/logging/log_manager.h" #include "world_objects.h" #include "world_characters.h" -#include "influxdb.h" +#include "engine/observability/otel_provider.h" #include "engine/entities/zone.h" #include "gameplay/quests/daily_quest.h" #include "gameplay/skills/skills_info.h" @@ -70,8 +72,9 @@ class GlobalObjects { static SetAllInspReqListType &setall_inspect_list(); static BanList *&ban(); static Heartbeat &heartbeat(); - static influxdb::Sender &stats_sender(); + static observability::OtelProvider &otel_provider(); static OutputThread &output_thread(); + static logging::LogManager &log_manager(); static ZoneTable &zone_table(); static RunestoneRoster &Runestones(); diff --git a/src/engine/db/influxdb.cpp b/src/engine/db/influxdb.cpp deleted file mode 100644 index 3ca21092b..000000000 --- a/src/engine/db/influxdb.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include "influxdb.h" - -#include "utils/logger.h" - -#include - -#ifndef WIN32 -#include -#include -#include -#include - -constexpr int INVALID_SOCKET = -1; -constexpr int SOCKET_ERROR = -1; -#endif - -namespace influxdb { -class SenderImpl { - public: - SenderImpl(const std::string &host, const unsigned short port); - - bool ready() const { return !m_host.empty(); } - bool send(const std::string &data); - - private: - std::string m_host; - int m_port; - - socket_t m_socket; - struct sockaddr_in m_addr; -}; - -SenderImpl::SenderImpl(const std::string &host, const unsigned short port) : - m_host(host), - m_port(port), - m_socket(INVALID_SOCKET) { - memset(&m_addr, 0, sizeof(m_addr)); - - if (m_host.empty()) { - return; - } - - m_addr.sin_family = AF_INET; - m_addr.sin_port = htons(port); - - struct hostent *hp = gethostbyname(m_host.c_str()); - if (hp) { - in_addr *server_address = reinterpret_cast(hp->h_addr_list[0]); - log("Statistics server has been resolved to '%s'.\n", - inet_ntoa(*server_address)); - memcpy(&m_addr.sin_addr, server_address, hp->h_length); - } else { - log("SYSERR: failed to resolve server name '%s'. Turning sending statistics off.\n", m_host.c_str()); - m_host.clear(); - } - - m_socket = socket(AF_INET, SOCK_DGRAM, 0); - if (INVALID_SOCKET == m_socket) { - log("SYSERR: Couldn't create UDP socket. Turning sending statistics off.\n"); - m_host.clear(); - } -} - -bool SenderImpl::send(const std::string &data) { - if (INVALID_SOCKET != m_socket) { - const int result = sendto(m_socket, data.c_str(), static_cast(data.size()), - 0, reinterpret_cast(&m_addr), sizeof(m_addr)); - - return SOCKET_ERROR != result; - } - - return false; -} - -Sender::Sender(const std::string &host, const unsigned short port) : - m_implementation(new SenderImpl(host, port)) { -} - -Sender::~Sender() { - delete m_implementation; -} - -bool Sender::ready() const { - return m_implementation->ready(); -} - -bool Sender::send(const Record &record) const { - std::string data; - if (!record.get_data(data)) { - return false; - } - - return m_implementation->send(data); -} - -bool Record::get_data(std::string &data) const { - if (m_fields.empty()) { - log("SYSERR: Attempt to send statistics record without any field.\n"); - return false; - } - - std::stringstream ss; - ss << m_measurement; - for (const auto &tag : m_tags) { - ss << "," << tag; - } - ss << " "; - - bool first = true; - for (const auto &field : m_fields) { - ss << (first ? "" : ",") << field; - first = false; - } - - using namespace std::chrono; - nanoseconds timestamp = duration_cast(system_clock::now().time_since_epoch()); - ss << " " << timestamp.count(); - - data = ss.str(); - return true; -} -} - -// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/db/influxdb.h b/src/engine/db/influxdb.h deleted file mode 100644 index ae45411eb..000000000 --- a/src/engine/db/influxdb.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef __INFLUX_HPP__ -#define __INFLUX_HPP__ - -#include -#include -#include - -namespace influxdb { -class Record { - public: - Record(const std::string &measurement) : m_measurement(measurement) {} - - template - Record &add_tag(const std::string &name, const T &value) { return add_to_list(m_tags, name, value); } - - template - Record &add_field(const std::string &name, const T &value) { return add_to_list(m_fields, name, value); } - - bool get_data(std::string &data) const; - - private: - using strings_list_t = std::list; - - template - Record &add_to_list(strings_list_t &list, const std::string &name, const T &value); - - std::string m_measurement; - strings_list_t m_tags; - strings_list_t m_fields; -}; - -template -Record &Record::add_to_list(strings_list_t &list, const std::string &name, const T &value) { - std::stringstream ss; - ss << name << "=" << value; - list.push_back(ss.str()); - - return *this; -} - -class Sender { - public: - Sender(const std::string &host, const unsigned short port); - ~Sender(); - - bool ready() const; - bool send(const Record &record) const; - - private: - class SenderImpl *m_implementation; -}; -} - -#endif // __INFLUX_HPP__ - -// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/db/obj_save.cpp b/src/engine/db/obj_save.cpp index 8043ae1b6..bc4c28217 100644 --- a/src/engine/db/obj_save.cpp +++ b/src/engine/db/obj_save.cpp @@ -27,6 +27,9 @@ #include "player_index.h" #include +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" #include const int LOC_INVENTORY = 0; @@ -2435,6 +2438,12 @@ int receptionist(CharData *ch, void *me, int cmd, char *argument) { void Crash_frac_save_all(int frac_part) { DescriptorData *d; + // OpenTelemetry: Track fractional save + auto save_span = tracing::TraceManager::Instance().StartSpan("Player Save (Fractional)"); + save_span->SetAttribute("save_type", "frac"); + save_span->SetAttribute("frac_part", static_cast(frac_part)); + + int saved_count = 0; for (d = descriptor_list; d; d = d->next) { if ((d->state == EConState::kPlaying) && !d->character->IsNpc() && GET_ACTIVITY(d->character) == frac_part) { @@ -2449,12 +2458,24 @@ void Crash_frac_save_all(int frac_part) { if (timer1.delta().count() > 0.1) log("Crash_frac_save_all: save_char, timer %f, save player: %s", timer1.delta().count(), d->character->get_name().c_str()); d->character->UnsetFlag(EPlrFlag::kCrashSave); + saved_count++; + + // OpenTelemetry: Record save metrics + std::map attrs; + attrs["save_type"] = "frac"; + attrs["character"] = d->character->get_name(); + + observability::OtelMetrics::RecordHistogram("player.save.duration", timer.delta().count(), attrs); + observability::OtelMetrics::RecordCounter("player.save.total", 1, attrs); } } } void Crash_save_all(void) { DescriptorData *d; + auto save_span = tracing::TraceManager::Instance().StartSpan("Player Save (Full)"); + save_span->SetAttribute("save_type", "full"); + for (d = descriptor_list; d; d = d->next) { if ((d->state == EConState::kPlaying) && d->character->IsFlagged(EPlrFlag::kCrashSave)) { Crash_crashsave(d->character.get()); diff --git a/src/engine/network/admin_api/admin_api_constants.h b/src/engine/network/admin_api/admin_api_constants.h index 49bfa6fa5..50b0dd2c4 100644 --- a/src/engine/network/admin_api/admin_api_constants.h +++ b/src/engine/network/admin_api/admin_api_constants.h @@ -27,7 +27,7 @@ constexpr size_t kMaxLargeBufferSize = 1048576; constexpr int kMaxChunks = 4; // Note: Command enum and string conversion functions were removed as unused. -// CommandRegistry uses direct stringБ├▓handler mapping via std::unordered_map. +// CommandRegistry uses direct string->handler mapping via std::unordered_map. } // namespace admin_api diff --git a/src/engine/network/admin_api/command_registry.h b/src/engine/network/admin_api/command_registry.h index d1040ff31..0684ca4aa 100644 --- a/src/engine/network/admin_api/command_registry.h +++ b/src/engine/network/admin_api/command_registry.h @@ -92,7 +92,7 @@ class CommandRegistry private: CommandRegistry() = default; - // Command name Б├▓ handler function + // Command name -> handler function std::unordered_map handlers_; }; diff --git a/src/engine/network/admin_api/json_helpers.h b/src/engine/network/admin_api/json_helpers.h index 062c301e8..c64bcb0c0 100644 --- a/src/engine/network/admin_api/json_helpers.h +++ b/src/engine/network/admin_api/json_helpers.h @@ -182,7 +182,7 @@ inline std::optional ParseNested(const json& j, const char* key) } // ============================================================================ -// String Conversion Helpers (KOI8-R Б├■ UTF-8) +// String Conversion Helpers (KOI8-R ? UTF-8) // ============================================================================ /** diff --git a/src/engine/observability/otel_helpers.cpp b/src/engine/observability/otel_helpers.cpp new file mode 100644 index 000000000..d8f6857a6 --- /dev/null +++ b/src/engine/observability/otel_helpers.cpp @@ -0,0 +1,80 @@ +#include "otel_helpers.h" +#include + +namespace observability { + +// +// ScopedMetric +// + +ScopedMetric::ScopedMetric(const std::string& name, const std::map& attrs) + : m_name(name) + , m_attrs(attrs) + , m_timer() {} + +ScopedMetric::~ScopedMetric() { + auto duration = m_timer.delta().count(); + OtelMetrics::RecordHistogram(m_name, duration, m_attrs); +} + +double ScopedMetric::elapsed_seconds() const { + return m_timer.delta().count(); +} + +std::string koi8r_to_utf8(const std::string& input) { + if (input.empty()) { + return input; + } + // Fast path: ASCII-only strings need no conversion + bool has_high = false; + for (unsigned char c : input) { + if (c >= 128) { + has_high = true; + break; + } + } + if (!has_high) { + return input; + } + // Cache iconv descriptor globally - game loop is single-threaded + struct IconvHandle { + iconv_t cd; + IconvHandle() : cd(iconv_open("UTF-8", "KOI8-R")) {} + ~IconvHandle() { if (cd != (iconv_t)-1) { iconv_close(cd); } } + }; + static IconvHandle handle; + + if (handle.cd == (iconv_t)-1) { + // iconv unavailable: replace non-ASCII bytes with '?' + std::string safe; + safe.reserve(input.size()); + for (unsigned char c : input) { + safe += (c < 128) ? static_cast(c) : '?'; + } + return safe; + } + // Reset shift state from any previous call + iconv(handle.cd, nullptr, nullptr, nullptr, nullptr); + + const size_t out_size = input.size() * 4; + std::string output(out_size, '\0'); + char* in_ptr = const_cast(input.data()); + char* out_ptr = &output[0]; + size_t in_left = input.size(); + size_t out_left = out_size; + if (iconv(handle.cd, &in_ptr, &in_left, &out_ptr, &out_left) == (size_t)-1) { + // Replace non-ASCII bytes with '?' to guarantee valid UTF-8 output + std::string safe; + safe.reserve(input.size()); + for (unsigned char c : input) { + safe += (c < 128) ? static_cast(c) : '?'; + } + return safe; + } + output.resize(out_size - out_left); + return output; +} + +} // namespace observability + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_helpers.h b/src/engine/observability/otel_helpers.h new file mode 100644 index 000000000..dc107bf12 --- /dev/null +++ b/src/engine/observability/otel_helpers.h @@ -0,0 +1,50 @@ +#ifndef BYLINS_OTEL_HELPERS_H +#define BYLINS_OTEL_HELPERS_H + +#include "utils/utils_time.h" +#include "utils/timestamp.h" +#include "otel_metrics.h" +#include +#include +#include + +namespace observability { + +/** + * RAII wrapper for automatic metric timing. + * Records histogram metric with duration on destruction. + * + * Example: + * { + * ScopedMetric metric("operation.duration", {{"type", "combat"}}); + * // ... operation ... + * } // automatically records metric + */ +class ScopedMetric { +public: + ScopedMetric(const std::string& name, const std::map& attrs = {}); + ~ScopedMetric(); + + // Get elapsed time so far (without ending the metric) + double elapsed_seconds() const; + +private: + std::string m_name; + std::map m_attrs; + utils::CExecutionTimer m_timer; +}; + +/** + * Convert string from KOI8-R to UTF-8. + * Safe to call on ASCII strings (pass through unchanged). + * Used to sanitize all strings before sending to OTEL (protobuf requires UTF-8). + */ +std::string koi8r_to_utf8(const std::string& input); + +using utils::NowTs; + +} // namespace observability + +#endif // BYLINS_OTEL_HELPERS_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_log_sender.cpp b/src/engine/observability/otel_log_sender.cpp new file mode 100644 index 000000000..2d22b474a --- /dev/null +++ b/src/engine/observability/otel_log_sender.cpp @@ -0,0 +1,154 @@ +#include "otel_log_sender.h" + +#ifdef WITH_OTEL + +#include + +#include "otel_helpers.h" +#include "otel_provider.h" +#include "opentelemetry/logs/provider.h" +#include "opentelemetry/logs/logger.h" +#include "opentelemetry/trace/provider.h" +#include "opentelemetry/trace/span.h" +#include "opentelemetry/trace/span_context.h" +#include "opentelemetry/trace/trace_id.h" +#include "opentelemetry/trace/span_id.h" +#include "opentelemetry/nostd/span.h" +#include "opentelemetry/nostd/variant.h" +#include "opentelemetry/context/runtime_context.h" +#include "opentelemetry/baggage/baggage.h" +#include "opentelemetry/baggage/baggage_context.h" + +namespace observability { + +// Helper: get SpanContext from current active span +static opentelemetry::trace::SpanContext GetCurrentSpanContext() { + auto context_value = opentelemetry::context::RuntimeContext::GetValue(opentelemetry::trace::kSpanKey); + auto span_ptr = opentelemetry::nostd::get_if>(&context_value); + if (!span_ptr || !(*span_ptr)) { + return opentelemetry::trace::SpanContext::GetInvalid(); + } + return (*span_ptr)->GetContext(); +} + +// Helper: add trace context and user attributes to log record +static void AddAttributesToLogRecord( + opentelemetry::nostd::unique_ptr& log_record, + const std::map& user_attributes) +{ + if (!log_record) { + return; + } + + // Set native OTEL trace context on the log record. + // This populates the standard OTLP LogRecord fields (trace_id, span_id, flags) + // which Loki stores as structured metadata and Grafana can filter by. + auto span_context = GetCurrentSpanContext(); + if (span_context.IsValid()) { + log_record->SetTraceId(span_context.trace_id()); + log_record->SetSpanId(span_context.span_id()); + log_record->SetTraceFlags(span_context.trace_flags()); + + // Also store as string attribute for Loki derivedFields (Loki->Tempo link) + char trace_id_hex[33] = {}; + span_context.trace_id().ToLowerBase16(opentelemetry::nostd::span(trace_id_hex, 32)); + log_record->SetAttribute("trace_id", std::string(trace_id_hex, 32)); + } + + // Add baggage values (combat_trace_id, quest_trace_id, etc.) + auto current_ctx = opentelemetry::context::RuntimeContext::GetCurrent(); + auto baggage = opentelemetry::baggage::GetBaggage(current_ctx); + if (baggage) { + baggage->GetAllEntries([&log_record](opentelemetry::nostd::string_view key, + opentelemetry::nostd::string_view value) { + std::string key_str(key.data(), key.size()); + std::string value_str(koi8r_to_utf8(std::string(value.data(), value.size()))); + log_record->SetAttribute(key_str, value_str); + return true; // continue iteration + }); + } + + // Add user attributes + for (const auto& [key, value] : user_attributes) { + log_record->SetAttribute(key, koi8r_to_utf8(value)); + } +} + +static opentelemetry::logs::Severity to_otel_level(logging::LogLevel level) { + switch (level) { + case logging::LogLevel::kDebug: return opentelemetry::logs::Severity::kDebug; + case logging::LogLevel::kInfo: return opentelemetry::logs::Severity::kInfo; + case logging::LogLevel::kWarn: return opentelemetry::logs::Severity::kWarn; + case logging::LogLevel::kError: return opentelemetry::logs::Severity::kError; + default: return opentelemetry::logs::Severity::kInfo; + } +} + +// Helper: log with any level +static void LogWithLevel(logging::LogLevel level, + const std::string& message, + const std::map& attributes) { + if (OtelProvider::Instance().IsEnabled()) { + auto logger = logs_api::Provider::GetLoggerProvider()->GetLogger("bylins-logger", "", "", ""); + if (logger) { + auto log_record = logger->CreateLogRecord(); + if (log_record) { + log_record->SetSeverity(to_otel_level(level)); + + // Strip timestamp prefix "YYYY-MM-DD HH:MM:SS.mmm :: " added by format_log_message. + // OTEL log records carry their own timestamp metadata. + const auto sep = message.find(" :: "); + const std::string body = (sep != std::string::npos) ? message.substr(sep + 4) : message; + log_record->SetBody(koi8r_to_utf8(body)); + + // Automatically add trace context + user attributes + AddAttributesToLogRecord(log_record, attributes); + + logger->EmitLogRecord(std::move(log_record)); + } + } + } +} + +// All methods now delegate to LogWithLevel +void OtelLogSender::Debug(const std::string& message) { + LogWithLevel(logging::LogLevel::kDebug, message, {}); +} + +void OtelLogSender::Debug(const std::string& message, + const std::map& attributes) { + LogWithLevel(logging::LogLevel::kDebug, message, attributes); +} + +void OtelLogSender::Info(const std::string& message) { + LogWithLevel(logging::LogLevel::kInfo, message, {}); +} + +void OtelLogSender::Info(const std::string& message, + const std::map& attributes) { + LogWithLevel(logging::LogLevel::kInfo, message, attributes); +} + +void OtelLogSender::Warn(const std::string& message) { + LogWithLevel(logging::LogLevel::kWarn, message, {}); +} + +void OtelLogSender::Warn(const std::string& message, + const std::map& attributes) { + LogWithLevel(logging::LogLevel::kWarn, message, attributes); +} + +void OtelLogSender::Error(const std::string& message) { + LogWithLevel(logging::LogLevel::kError, message, {}); +} + +void OtelLogSender::Error(const std::string& message, + const std::map& attributes) { + LogWithLevel(logging::LogLevel::kError, message, attributes); +} + +} // namespace observability + +#endif // WITH_OTEL + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_log_sender.h b/src/engine/observability/otel_log_sender.h new file mode 100644 index 000000000..b18e18d88 --- /dev/null +++ b/src/engine/observability/otel_log_sender.h @@ -0,0 +1,33 @@ +#ifndef BYLINS_OTEL_LOG_SENDER_H +#define BYLINS_OTEL_LOG_SENDER_H + +#include "utils/logging/log_sender.h" + +namespace observability { + +// OTEL implementation of log sender +class OtelLogSender : public logging::ILogSender { +public: + OtelLogSender() = default; + ~OtelLogSender() override = default; + + void Debug(const std::string& message) override; + void Debug(const std::string& message, + const std::map& attributes) override; + + void Info(const std::string& message) override; + void Info(const std::string& message, + const std::map& attributes) override; + + void Warn(const std::string& message) override; + void Warn(const std::string& message, + const std::map& attributes) override; + + void Error(const std::string& message) override; + void Error(const std::string& message, + const std::map& attributes) override; +}; + +} // namespace observability + +#endif // BYLINS_OTEL_LOG_SENDER_H diff --git a/src/engine/observability/otel_metrics.cpp b/src/engine/observability/otel_metrics.cpp new file mode 100644 index 000000000..0ed3b5403 --- /dev/null +++ b/src/engine/observability/otel_metrics.cpp @@ -0,0 +1,142 @@ +#include "otel_metrics.h" +#include "otel_provider.h" +#include +#include + +#ifdef WITH_OTEL +#include "otel_helpers.h" +#include "opentelemetry/metrics/provider.h" +#include "opentelemetry/context/context.h" +#endif + +namespace observability { + +#ifdef WITH_OTEL +static std::unordered_map>> histogram_cache; + +// Convert all string attribute values from KOI8-R to UTF-8 at the wrapper boundary. +// This ensures callers never need to manually convert - the OtelMetrics API handles it. +static std::map ToUtf8Attrs(const std::map& attrs) { + std::map result; + for (const auto& [k, v] : attrs) { + result[k] = koi8r_to_utf8(v); + } + return result; +} +#endif + +void OtelMetrics::RecordCounter(const std::string& name, int64_t value) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + if (value >= 0) { + auto counter = meter->CreateUInt64Counter(name); + counter->Add(static_cast(value)); + } + } + } +#else + (void)name; + (void)value; +#endif +} + +void OtelMetrics::RecordCounter(const std::string& name, int64_t value, + const std::map& attributes) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + if (value >= 0) { + auto counter = meter->CreateUInt64Counter(name); + counter->Add(static_cast(value), ToUtf8Attrs(attributes)); + } + } + } +#else + (void)name; + (void)value; + (void)attributes; +#endif +} + +void OtelMetrics::RecordHistogram(const std::string& name, double value) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + auto histogram = meter->CreateDoubleHistogram(name); + auto context = opentelemetry::context::Context{}; + histogram->Record(value, context); + } + } +#else + (void)name; + (void)value; +#endif +} + +void OtelMetrics::RecordHistogram(const std::string& name, double value, + const std::map& attributes) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + // Cache histogram instruments to avoid recreation + auto it = histogram_cache.find(name); + opentelemetry::metrics::Histogram* histogram = nullptr; + if (it == histogram_cache.end()) { + auto h = meter->CreateDoubleHistogram(name); + histogram = h.get(); + histogram_cache[name] = std::move(h); + } else { + histogram = it->second.get(); + } + auto context = opentelemetry::context::Context{}; + histogram->Record(value, ToUtf8Attrs(attributes), context); + } + } +#else + (void)name; + (void)value; + (void)attributes; +#endif +} + +void OtelMetrics::RecordGauge(const std::string& name, double value) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + // Gauges in OTEL require callbacks, so we'll use histogram instead + auto histogram = meter->CreateDoubleHistogram(name + ".gauge"); + auto context = opentelemetry::context::Context{}; + histogram->Record(value, context); + } + } +#else + (void)name; + (void)value; +#endif +} + +void OtelMetrics::RecordGauge(const std::string& name, double value, + const std::map& attributes) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto meter = metrics_api::Provider::GetMeterProvider()->GetMeter("bylins-meter", "1.0.0"); + if (meter) { + auto histogram = meter->CreateDoubleHistogram(name + ".gauge"); + auto context = opentelemetry::context::Context{}; + histogram->Record(value, ToUtf8Attrs(attributes), context); + } + } +#else + (void)name; + (void)value; + (void)attributes; +#endif +} + +} // namespace observability diff --git a/src/engine/observability/otel_metrics.h b/src/engine/observability/otel_metrics.h new file mode 100644 index 000000000..9d123ac15 --- /dev/null +++ b/src/engine/observability/otel_metrics.h @@ -0,0 +1,30 @@ +#ifndef BYLINS_OTEL_METRICS_H +#define BYLINS_OTEL_METRICS_H + +#include +#include +#include + +namespace observability { + +class OtelMetrics { +public: + // Счётчик (Counter) - монотонно растущее значение + static void RecordCounter(const std::string& name, int64_t value); + static void RecordCounter(const std::string& name, int64_t value, + const std::map& attributes); + + // Гистограмма (Histogram) - распределение значений + static void RecordHistogram(const std::string& name, double value); + static void RecordHistogram(const std::string& name, double value, + const std::map& attributes); + + // Измеритель (Gauge) - текущее значение + static void RecordGauge(const std::string& name, double value); + static void RecordGauge(const std::string& name, double value, + const std::map& attributes); +}; + +} // namespace observability + +#endif // BYLINS_OTEL_METRICS_H \ No newline at end of file diff --git a/src/engine/observability/otel_provider.cpp b/src/engine/observability/otel_provider.cpp new file mode 100644 index 000000000..2c011112c --- /dev/null +++ b/src/engine/observability/otel_provider.cpp @@ -0,0 +1,228 @@ +#include "otel_provider.h" +#include "otel_helpers.h" +#include "utils/timestamp.h" +#include "engine/core/config.h" +#include "utils/logging/log_manager.h" +#include "otel_log_sender.h" +#include "otel_trace_sender.h" +#include "utils/tracing/trace_manager.h" + +#ifdef WITH_OTEL +#include "absl/log/initialize.h" +#include "opentelemetry/sdk/trace/tracer_provider_factory.h" +#include "opentelemetry/sdk/trace/batch_span_processor_factory.h" +#include "opentelemetry/sdk/metrics/meter_provider_factory.h" +#include "opentelemetry/sdk/metrics/meter_context_factory.h" +#include "opentelemetry/sdk/logs/logger_provider_factory.h" +#include "opentelemetry/sdk/logs/batch_log_record_processor_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_exporter_options.h" +#include "opentelemetry/exporters/otlp/otlp_http_metric_exporter_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_metric_exporter_options.h" +#include "opentelemetry/exporters/otlp/otlp_http_log_record_exporter_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_log_record_exporter_options.h" +#include "opentelemetry/sdk/metrics/export/periodic_exporting_metric_reader_factory.h" +#include "opentelemetry/sdk/metrics/export/periodic_exporting_metric_reader_options.h" +#include "opentelemetry/sdk/resource/resource.h" +#include "opentelemetry/trace/provider.h" +#include "opentelemetry/metrics/provider.h" +#include "opentelemetry/logs/provider.h" + +#include +#include +#include +#include +#endif + +#ifndef WITH_OTEL +#include +#include +#include +#include +#endif + +namespace observability { + + +OtelProvider& OtelProvider::Instance() { + static OtelProvider instance; + return instance; +} + +OtelProvider::OtelProvider() { + // Constructor - log senders are managed by LogManager + // See Initialize() for OTEL log sender registration +} + +void OtelProvider::Initialize(const std::string& metrics_endpoint, + const std::string& traces_endpoint, + const std::string& logs_endpoint, + const std::string& service_name, + const std::string& service_version) { +#ifdef WITH_OTEL + if (m_enabled) { + return; // Already initialized + } + + try { + absl::InitializeLog(); + + // Create resource attributes + auto resource = otel::sdk::resource::Resource::Create({ + {"service.name", service_name}, + {"service.version", service_version} + }); + + // Initialize TracerProvider with OTLP HTTP exporter + // Based on examples/otlp/http_log_main.cc + { + otel::exporter::otlp::OtlpHttpExporterOptions trace_options; + trace_options.url = traces_endpoint; + + auto exporter = otel::exporter::otlp::OtlpHttpExporterFactory::Create(trace_options); + auto processor = otel::sdk::trace::BatchSpanProcessorFactory::Create(std::move(exporter), {}); + m_tracer_provider = otel::sdk::trace::TracerProviderFactory::Create(std::move(processor), resource); + + // Set as global provider + std::shared_ptr api_provider = m_tracer_provider; + otel::trace::Provider::SetTracerProvider(api_provider); + } + + // Initialize MeterProvider with OTLP HTTP exporter + // Based on examples/otlp/http_metric_main.cc + { + otel::exporter::otlp::OtlpHttpMetricExporterOptions metric_options; + metric_options.url = metrics_endpoint; + + auto exporter = otel::exporter::otlp::OtlpHttpMetricExporterFactory::Create(metric_options); + + otel::sdk::metrics::PeriodicExportingMetricReaderOptions reader_options; + reader_options.export_interval_millis = std::chrono::milliseconds(5000); + reader_options.export_timeout_millis = std::chrono::milliseconds(3000); + + auto reader = otel::sdk::metrics::PeriodicExportingMetricReaderFactory::Create( + std::move(exporter), reader_options + ); + + // Create context and add reader + auto meter_context = otel::sdk::metrics::MeterContextFactory::Create(); + meter_context->AddMetricReader(std::move(reader)); + + // Create provider from context + auto u_provider = otel::sdk::metrics::MeterProviderFactory::Create(std::move(meter_context)); + m_meter_provider = std::move(u_provider); + + // Set as global provider + std::shared_ptr api_provider = m_meter_provider; + otel::metrics::Provider::SetMeterProvider(api_provider); + } + + // Initialize LoggerProvider with OTLP HTTP exporter + // Based on examples/otlp/http_log_main.cc + { + otel::exporter::otlp::OtlpHttpLogRecordExporterOptions log_options; + log_options.url = logs_endpoint; + + auto exporter = otel::exporter::otlp::OtlpHttpLogRecordExporterFactory::Create(log_options); + + otel::sdk::logs::BatchLogRecordProcessorOptions processor_options; + processor_options.max_queue_size = 2048; + processor_options.schedule_delay_millis = std::chrono::milliseconds(5000); + processor_options.max_export_batch_size = 512; + + auto processor = otel::sdk::logs::BatchLogRecordProcessorFactory::Create( + std::move(exporter), processor_options + ); + + m_logger_provider = otel::sdk::logs::LoggerProviderFactory::Create(std::move(processor), resource); + + // Set as global provider + std::shared_ptr api_provider = m_logger_provider; + otel::logs::Provider::SetLoggerProvider(api_provider); + } + + // Register OTEL log sender with LogManager based on config mode + const auto mode = ::runtime_config.telemetry_log_mode(); + + if (mode == RuntimeConfiguration::ETelemetryLogMode::kDuplicate) { + // File sender already registered by LogManager constructor, add OTEL + logging::LogManager::Instance().AddSender(std::make_unique()); + std::cout << "[" << utils::NowTs() << "] Log mode: duplicate (file + OTEL)" << std::endl; + } else if (mode == RuntimeConfiguration::ETelemetryLogMode::kOtelOnly) { + // Replace file sender with OTEL only + logging::LogManager::Instance().ClearSenders(); + logging::LogManager::Instance().AddSender(std::make_unique()); + std::cout << "[" << utils::NowTs() << "] Log mode: otel-only" << std::endl; + } else { + // kFileOnly - no OTEL senders + // File sender already registered by LogManager constructor, do nothing + std::cout << "[" << utils::NowTs() << "] Log mode: file-only (OTEL initialized but not used for logs)" << std::endl; + } + + // Initialize TraceManager with appropriate sender + tracing::TraceManager::Instance().SetSender( + std::make_unique() + ); + std::cout << "[" << utils::NowTs() << "] TraceManager initialized with OtelTraceSender" << std::endl; + + m_enabled = true; + std::cout << "[" << utils::NowTs() << "] OpenTelemetry initialized successfully:" << std::endl; + std::cout << " Metrics: " << metrics_endpoint << std::endl; + std::cout << " Traces: " << traces_endpoint << std::endl; + std::cout << " Logs: " << logs_endpoint << std::endl; + } catch (const std::exception& e) { + std::cerr << "[" << utils::NowTs() << "] Failed to initialize OpenTelemetry: " << e.what() << std::endl; + m_enabled = false; + // Initialize TraceManager with NoOp sender on error + tracing::TraceManager::Instance().SetSender( + std::make_unique() + ); + std::cout << "[" << utils::NowTs() << "] TraceManager initialized with NoOpTraceSender (OTEL init failed)" << std::endl; + } +#else + (void)metrics_endpoint; + (void)traces_endpoint; + (void)logs_endpoint; + (void)service_name; + (void)service_version; + // Initialize TraceManager with NoOp sender (no OTEL) + tracing::TraceManager::Instance().SetSender( + std::make_unique() + ); + std::cout << "[" << utils::NowTs() << "] TraceManager initialized with NoOpTraceSender (no OTEL)" << std::endl; +#endif +} + +void OtelProvider::Shutdown() { +#ifdef WITH_OTEL + if (!m_enabled) { + return; + } + + try { + // Shutdown providers to flush remaining telemetry + if (m_tracer_provider) { + m_tracer_provider->ForceFlush(); + m_tracer_provider->Shutdown(); + } + if (m_meter_provider) { + m_meter_provider->ForceFlush(); + m_meter_provider->Shutdown(); + } + if (m_logger_provider) { + m_logger_provider->ForceFlush(); + m_logger_provider->Shutdown(); + } + + m_enabled = false; + std::cout << "[" << utils::NowTs() << "] OpenTelemetry shutdown successfully" << std::endl; + } catch (const std::exception& e) { + std::cerr << "[" << utils::NowTs() << "] Error during OpenTelemetry shutdown: " << e.what() << std::endl; + } +#endif +} + + +} // namespace observability + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_provider.h b/src/engine/observability/otel_provider.h new file mode 100644 index 000000000..dc66dee1c --- /dev/null +++ b/src/engine/observability/otel_provider.h @@ -0,0 +1,55 @@ +#ifndef BYLINS_OTEL_PROVIDER_H +#define BYLINS_OTEL_PROVIDER_H + +#include +#include + +#ifdef WITH_OTEL +#include "opentelemetry/sdk/trace/tracer_provider.h" +#include "opentelemetry/sdk/metrics/meter_provider.h" +#include "opentelemetry/sdk/logs/logger_provider.h" +#include "opentelemetry/nostd/shared_ptr.h" + +namespace otel = opentelemetry; +namespace trace_api = otel::trace; +namespace metrics_api = otel::metrics; +namespace logs_api = otel::logs; +namespace nostd = otel::nostd; +#endif + +namespace observability { + +// Forward declaration +class ILogSender; + +class OtelProvider { +public: + static OtelProvider& Instance(); + + void Initialize(const std::string& metrics_endpoint, + const std::string& traces_endpoint, + const std::string& logs_endpoint, + const std::string& service_name, + const std::string& service_version); + void Shutdown(); + + bool IsEnabled() const { return m_enabled; } + +private: + OtelProvider(); + ~OtelProvider() = default; + OtelProvider(const OtelProvider&) = delete; + OtelProvider& operator=(const OtelProvider&) = delete; + + bool m_enabled = false; + +#ifdef WITH_OTEL + std::shared_ptr m_tracer_provider; + std::shared_ptr m_meter_provider; + std::shared_ptr m_logger_provider; +#endif +}; + +} // namespace observability + +#endif // BYLINS_OTEL_PROVIDER_H diff --git a/src/engine/observability/otel_trace_sender.cpp b/src/engine/observability/otel_trace_sender.cpp new file mode 100644 index 000000000..b71068a55 --- /dev/null +++ b/src/engine/observability/otel_trace_sender.cpp @@ -0,0 +1,98 @@ +#include "otel_trace_sender.h" + +#ifdef WITH_OTEL +#include "otel_helpers.h" +#include "otel_provider.h" +#include "opentelemetry/trace/provider.h" + +namespace tracing { +OtelSpan::OtelSpan(opentelemetry::nostd::shared_ptr span) + : m_span(span) + , m_scope(span ? opentelemetry::nostd::unique_ptr( + new opentelemetry::trace::Scope(span)) : nullptr) {} + +OtelSpan::OtelSpan(opentelemetry::nostd::shared_ptr span, bool create_scope) + : m_span(span) + , m_scope(create_scope && span ? opentelemetry::nostd::unique_ptr( + new opentelemetry::trace::Scope(span)) : nullptr) {} + +void OtelSpan::End() { + if (m_span) { + m_span->End(); + } +} + +void OtelSpan::AddEvent(const std::string& name) { + if (m_span) { + m_span->AddEvent(observability::koi8r_to_utf8(name)); + } +} + +void OtelSpan::SetAttribute(const std::string& key, const std::string& value) { + if (m_span) { + m_span->SetAttribute(key, observability::koi8r_to_utf8(value)); + } +} + +void OtelSpan::SetAttribute(const std::string& key, int64_t value) { + if (m_span) { + m_span->SetAttribute(key, value); + } +} + +void OtelSpan::SetAttribute(const std::string& key, double value) { + if (m_span) { + m_span->SetAttribute(key, value); + } +} + +bool OtelSpan::IsValid() const { + return m_span != nullptr; +} + +opentelemetry::trace::SpanContext OtelSpan::GetContext() const { + if (m_span) { + return m_span->GetContext(); + } + return opentelemetry::trace::SpanContext::GetInvalid(); +} + +std::unique_ptr OtelTraceSender::StartSpan(const std::string& name) { + if (observability::OtelProvider::Instance().IsEnabled()) { + auto tracer = trace_api::Provider::GetTracerProvider()->GetTracer("bylins-tracer", "1.0.0"); + if (tracer) { + auto span = tracer->StartSpan(observability::koi8r_to_utf8(name)); + return std::make_unique(span); + } + } + return std::make_unique(); +} + +std::unique_ptr OtelTraceSender::StartChildSpan( + const std::string& name, + const ISpan& parent) +{ + // Downcast to OtelSpan to get context + const OtelSpan* otel_parent = dynamic_cast(&parent); + if (!otel_parent || !otel_parent->IsValid()) { + return std::make_unique(); + } + + if (observability::OtelProvider::Instance().IsEnabled()) { + auto tracer = trace_api::Provider::GetTracerProvider()->GetTracer("bylins-tracer", "1.0.0"); + if (tracer) { + opentelemetry::trace::StartSpanOptions options; + options.parent = otel_parent->GetContext(); + + auto span = tracer->StartSpan(observability::koi8r_to_utf8(name), {}, options); + return std::make_unique(span); + } + } + return std::make_unique(); +} + +} // namespace tracing + +#endif // WITH_OTEL + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_trace_sender.h b/src/engine/observability/otel_trace_sender.h new file mode 100644 index 000000000..525b2ebf2 --- /dev/null +++ b/src/engine/observability/otel_trace_sender.h @@ -0,0 +1,40 @@ +#ifndef BYLINS_OTEL_TRACE_SENDER_H +#define BYLINS_OTEL_TRACE_SENDER_H + +#include "utils/tracing/trace_sender.h" +#include "opentelemetry/trace/span.h" +#include "opentelemetry/trace/scope.h" +#include "opentelemetry/nostd/shared_ptr.h" +#include "opentelemetry/nostd/unique_ptr.h" + +namespace tracing { + +class OtelSpan : public ISpan { +public: + explicit OtelSpan(opentelemetry::nostd::shared_ptr span); + OtelSpan(opentelemetry::nostd::shared_ptr span, bool create_scope); + + void End() override; + void AddEvent(const std::string& name) override; + void SetAttribute(const std::string& key, const std::string& value) override; + void SetAttribute(const std::string& key, int64_t value) override; + void SetAttribute(const std::string& key, double value) override; + bool IsValid() const override; + + opentelemetry::trace::SpanContext GetContext() const; + +private: + opentelemetry::nostd::shared_ptr m_span; + opentelemetry::nostd::unique_ptr m_scope; +}; + +class OtelTraceSender : public ITraceSender { +public: + std::unique_ptr StartSpan(const std::string& name) override; + std::unique_ptr StartChildSpan(const std::string& name, const ISpan& parent) override; +}; + +} // namespace tracing +#endif // BYLINS_OTEL_TRACE_SENDER_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/engine/observability/otel_traces.cpp b/src/engine/observability/otel_traces.cpp new file mode 100644 index 000000000..376250f44 --- /dev/null +++ b/src/engine/observability/otel_traces.cpp @@ -0,0 +1,74 @@ +#include "otel_traces.h" +#include "otel_provider.h" +#include "otel_helpers.h" + +#ifdef WITH_OTEL +#include "opentelemetry/trace/provider.h" +#endif + +namespace observability { + +#ifdef WITH_OTEL +Span::Span(opentelemetry::nostd::shared_ptr span) : m_span(span) {} + +void Span::End() { + if (m_span) { + m_span->End(); + } +} + +void Span::AddEvent(const std::string& name) { + if (m_span) { + m_span->AddEvent(koi8r_to_utf8(name)); + } +} + +void Span::SetAttribute(const std::string& key, const std::string& value) { + if (m_span) { + m_span->SetAttribute(key, koi8r_to_utf8(value)); + } +} + +void Span::SetAttribute(const std::string& key, int64_t value) { + if (m_span) { + m_span->SetAttribute(key, value); + } +} + +void Span::SetAttribute(const std::string& key, double value) { + if (m_span) { + m_span->SetAttribute(key, value); + } +} +#endif + +Span OtelTraces::StartSpan(const std::string& name) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto tracer = trace_api::Provider::GetTracerProvider()->GetTracer("bylins-tracer", "1.0.0"); + if (tracer) { + return Span(tracer->StartSpan(koi8r_to_utf8(name))); + } + } +#endif + return Span(); +} + +Span OtelTraces::StartSpan(const std::string& name, + const std::map& attributes) { +#ifdef WITH_OTEL + if (OtelProvider::Instance().IsEnabled()) { + auto tracer = trace_api::Provider::GetTracerProvider()->GetTracer("bylins-tracer", "1.0.0"); + if (tracer) { + auto span = tracer->StartSpan(koi8r_to_utf8(name)); + for (const auto& attr : attributes) { + span->SetAttribute(attr.first, koi8r_to_utf8(attr.second)); + } + return Span(span); + } + } +#endif + return Span(); +} + +} // namespace observability \ No newline at end of file diff --git a/src/engine/observability/otel_traces.h b/src/engine/observability/otel_traces.h new file mode 100644 index 000000000..dbc4084b0 --- /dev/null +++ b/src/engine/observability/otel_traces.h @@ -0,0 +1,61 @@ +#ifndef BYLINS_OTEL_TRACES_H +#define BYLINS_OTEL_TRACES_H + +#include +#include +#include + +#ifdef WITH_OTEL +#include "opentelemetry/trace/span.h" +namespace trace_api = opentelemetry::trace; +#endif + +namespace observability { + +class Span { +public: + Span() = default; + +#ifdef WITH_OTEL + explicit Span(opentelemetry::nostd::shared_ptr span); + void End(); + void AddEvent(const std::string& name); + void SetAttribute(const std::string& key, const std::string& value); + void SetAttribute(const std::string& key, int64_t value); + void SetAttribute(const std::string& key, double value); +#else + void End() {} + void AddEvent(const std::string&) {} + void SetAttribute(const std::string&, const std::string&) {} + void SetAttribute(const std::string&, int64_t) {} + void SetAttribute(const std::string&, double) {} +#endif + +private: +#ifdef WITH_OTEL + opentelemetry::nostd::shared_ptr m_span; +#endif +}; + +class OtelTraces { +public: + static Span StartSpan(const std::string& name); + static Span StartSpan(const std::string& name, + const std::map& attributes); +}; + +// RAII span guard для автоматического завершения +class SpanGuard { +public: + explicit SpanGuard(Span span) : m_span(std::move(span)) {} + ~SpanGuard() { m_span.End(); } + + Span& GetSpan() { return m_span; } + +private: + Span m_span; +}; + +} // namespace observability + +#endif // BYLINS_OTEL_TRACES_H \ No newline at end of file diff --git a/src/engine/scripting/dg_scripts.cpp b/src/engine/scripting/dg_scripts.cpp index 44598a033..de2db33f2 100644 --- a/src/engine/scripting/dg_scripts.cpp +++ b/src/engine/scripting/dg_scripts.cpp @@ -39,6 +39,9 @@ #include "utils/backtrace.h" #include "gameplay/mechanics/armor.h" #include "gameplay/classes/recalc_mob_params_by_vnum.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" extern int max_exp_gain_pc(CharData *ch); extern long GetExpUntilNextLvl(CharData *ch, int level); @@ -662,10 +665,26 @@ ObjData *get_obj_by_char(CharData *ch, char *name) { return nullptr; } +static const char *trigger_type_name(int mode) { + switch (mode) { + case MOB_TRIGGER: return "MOB"; + case OBJ_TRIGGER: return "OBJ"; + case WLD_TRIGGER: return "WLD"; + default: return "UNKNOWN"; + } +} + // checks every PLUSE_SCRIPT for random triggers void script_trigger_check(int mode) { utils::CExecutionTimer timer; + auto trigger_span = tracing::TraceManager::Instance().StartSpan("Script Trigger Check"); + observability::ScopedMetric trigger_metric("script.trigger.duration"); + + trigger_span->SetAttribute("trigger_type", trigger_type_name(mode)); + trigger_span->SetAttribute("mode", static_cast(mode)); + + switch (mode) { case MOB_TRIGGER: for (auto ch : character_list) { @@ -709,6 +728,7 @@ void script_trigger_check(int mode) { default: break; } + log("script_trigger_check() mode %d всего: %f ms.", mode, timer.delta().count()); } diff --git a/src/engine/ui/cmd_god/do_loadstat.cpp b/src/engine/ui/cmd_god/do_loadstat.cpp index 4879bd61e..c8d1d8e68 100644 --- a/src/engine/ui/cmd_god/do_loadstat.cpp +++ b/src/engine/ui/cmd_god/do_loadstat.cpp @@ -7,17 +7,19 @@ */ #include "engine/entities/char_data.h" +#include "engine/core/config.h" #include "utils/utils_time.h" #include void DoLoadstat(CharData *ch, char * /*argument*/, int/* cmd*/, int/* subcmd*/) { - std::ifstream istream(LOAD_LOG_FOLDER LOAD_LOG_FILE, std::ifstream::in); + const std::string profiler_path = runtime_config.log_dir() + "/" + LOAD_LOG_FILE; + std::ifstream istream(profiler_path, std::ifstream::in); int length; if (!istream.is_open()) { SendMsgToChar("Can't open file", ch); - log("ERROR: Can't open file %s", LOAD_LOG_FOLDER LOAD_LOG_FILE); + log("ERROR: Can't open file %s", profiler_path.c_str()); return; } diff --git a/src/gameplay/ai/mobact.cpp b/src/gameplay/ai/mobact.cpp index 9a94d2248..cb053e544 100644 --- a/src/gameplay/ai/mobact.cpp +++ b/src/gameplay/ai/mobact.cpp @@ -27,6 +27,9 @@ #include "gameplay/abilities/abilities_rollsystem.h" #include "engine/core/action_targeting.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" #include "engine/core/char_movement.h" #include "engine/db/world_characters.h" #include "engine/db/world_objects.h" @@ -878,6 +881,11 @@ bool allow_enter(RoomData *room, CharData *ch) { void mobile_activity(int activity_level, int missed_pulses) { + auto activity_span = tracing::TraceManager::Instance().StartSpan("mob.activity"); + observability::ScopedMetric activity_metric("mob.activity.duration", { + {"activity_level", std::to_string(activity_level)} + }); + // int door, max, was_in = -1, activity_lev, i, ch_activity; // int std_lev = activity_level % kPulseMobile; diff --git a/src/gameplay/core/game_limits.cpp b/src/gameplay/core/game_limits.cpp index dd8b2a621..2d26c061b 100644 --- a/src/gameplay/core/game_limits.cpp +++ b/src/gameplay/core/game_limits.cpp @@ -27,6 +27,9 @@ #include "gameplay/economics/ext_money.h" #include "gameplay/statistics/mob_stat.h" #include "gameplay/mechanics/liquid.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" #include "engine/db/global_objects.h" #include "gameplay/mechanics/sight.h" #include "gameplay/ai/mob_memory.h" @@ -619,7 +622,25 @@ void beat_punish(const CharData::shared_ptr &i) { } } +static void record_level_remort_distribution(const std::map& distribution) { + for (const auto& [key, count] : distribution) { + size_t level_end = key.find("_remort_"); + std::string level_str = key.substr(6, level_end - 6); + std::string remort_str = key.substr(level_end + 8); + observability::OtelMetrics::RecordGauge("players.by_level_remort.count", count, + {{"level", level_str}, {"remort", remort_str}}); + } +} + void beat_points_update(int pulse) { + // OpenTelemetry: Track beat points update + auto beat_span = tracing::TraceManager::Instance().StartSpan("Beat Points Update"); + observability::ScopedMetric beat_metric("player.beat_update.duration"); + + // Player statistics + int online_count = 0; + int in_combat_count = 0; + std::map level_remort_distribution; // "level_X_remort_Y" -> count int restore; if (!UPDATE_PC_ON_BEAT) @@ -637,6 +658,18 @@ void beat_points_update(int pulse) { log("SYSERR: Pulse character in kNowhere."); continue; } + + // OpenTelemetry: Collect player statistics + online_count++; + if (d->character->GetEnemy()) { + in_combat_count++; + } + + // Level/remort distribution + int level = d->character->GetLevel(); + int remort = d->character->get_remort(); + std::string key = "level_" + std::to_string(level) + "_remort_" + std::to_string(remort); + level_remort_distribution[key]++; if (NORENTABLE(d->character.get()) <= time(nullptr)) { d->character->player_specials->may_rent = 0; @@ -743,6 +776,11 @@ void beat_points_update(int pulse) { } //-MZ.overflow_fix } + + beat_span->SetAttribute("player_count", static_cast(online_count)); + observability::OtelMetrics::RecordGauge("players.online.count", online_count); + observability::OtelMetrics::RecordGauge("players.in_combat.count", in_combat_count); + record_level_remort_distribution(level_remort_distribution); } void update_clan_exp(CharData *ch, int gain) { diff --git a/src/gameplay/crafting/item_creation.cpp b/src/gameplay/crafting/item_creation.cpp index 93ffa7d4d..054f2b952 100644 --- a/src/gameplay/crafting/item_creation.cpp +++ b/src/gameplay/crafting/item_creation.cpp @@ -17,6 +17,7 @@ #include "engine/db/global_objects.h" #include "gameplay/core/base_stats.h" #include "gameplay/core/constants.h" +#include "engine/observability/otel_metrics.h" #include @@ -1892,6 +1893,13 @@ int MakeRecept::make(CharData *ch) { ExtractObjFromWorld(ingrs[i]); } } + + // OpenTelemetry: Record craft failure + std::map attrs; + attrs["recipe_id"] = std::to_string(obj_proto); + attrs["skill"] = NAME_BY_ITEM(skill); + attrs["failure_reason"] = "craft_failed"; + observability::OtelMetrics::RecordCounter("craft.failures.total", 1, attrs); return (false); } // Лоадим предмет игроку @@ -2055,6 +2063,12 @@ int MakeRecept::make(CharData *ch) { } else { PlaceObjToInventory(obj.get(), ch); } + + // OpenTelemetry: Record craft success + std::map attrs; + attrs["recipe_id"] = std::to_string(obj_proto); + attrs["skill"] = NAME_BY_ITEM(skill); + observability::OtelMetrics::RecordCounter("craft.completed.total", 1, attrs); return (true); } // вытащить рецепт из строки. diff --git a/src/gameplay/economics/auction.cpp b/src/gameplay/economics/auction.cpp index 5743b5c16..761e8e47d 100644 --- a/src/gameplay/economics/auction.cpp +++ b/src/gameplay/economics/auction.cpp @@ -16,6 +16,9 @@ #include "gameplay/mechanics/named_stuff.h" #include "gameplay/fight/pk.h" #include "gameplay/ai/spec_procs.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" const int kMaxAuctionLot = 3; const int kMaxAuctionTactBuy = 5; @@ -695,6 +698,17 @@ void sell_auction(int lot) { if (!check_sell(lot)) return; + // OpenTelemetry: Track auction sale + auto sale_span = tracing::TraceManager::Instance().StartSpan("Auction Sale"); + double duration_seconds = (GET_LOT(lot)->tact * kAuctionPulses) / 10.0; + + sale_span->SetAttribute("lot", static_cast(lot)); + sale_span->SetAttribute("seller_id", static_cast(GET_LOT(lot)->seller_unique)); + sale_span->SetAttribute("buyer_id", static_cast(GET_LOT(lot)->buyer_unique)); + sale_span->SetAttribute("cost", static_cast(GET_LOT(lot)->cost)); + sale_span->SetAttribute("item_id", static_cast(GET_LOT(lot)->item_id)); + sale_span->SetAttribute("duration_seconds", duration_seconds); + if (ch->in_room != tch->in_room || !ROOM_FLAGGED(ch->in_room, ERoomFlag::kPeaceful)) { if (GET_LOT(lot)->tact >= kMaxAuctionTact) { @@ -737,6 +751,14 @@ void sell_auction(int lot) { ch->add_bank(GET_LOT(lot)->cost); tch->remove_both_gold(GET_LOT(lot)->cost); + + // OpenTelemetry: Record auction sale metrics + std::map attrs; + attrs["seller_id"] = std::to_string(GET_LOT(lot)->seller_unique); + + observability::OtelMetrics::RecordCounter("auction.sale.total", 1, attrs); + observability::OtelMetrics::RecordCounter("auction.revenue.total", GET_LOT(lot)->cost, attrs); + observability::OtelMetrics::RecordHistogram("auction.duration.seconds", duration_seconds, attrs); clear_auction(lot); return; } @@ -821,6 +843,15 @@ void tact_auction(void) { } else sell_auction(i); } + + // OpenTelemetry: Track active auction lots + int active_lots = 0; + for (int j = 0; j < kMaxAuctionLot; j++) { + if (GET_LOT(j)->seller && GET_LOT(j)->item) { + active_lots++; + } + } + observability::OtelMetrics::RecordGauge("auction.lots.active", active_lots); } AuctionItem *free_auction(int *lotnum) { diff --git a/src/gameplay/fight/fight.cpp b/src/gameplay/fight/fight.cpp index 95b424001..e6b789c71 100644 --- a/src/gameplay/fight/fight.cpp +++ b/src/gameplay/fight/fight.cpp @@ -44,6 +44,7 @@ #include "common.h" #include +#include "engine/observability/otel_metrics.h" // Structures std::list combat_list; @@ -2030,6 +2031,7 @@ void perform_violence() { round_profiler.next_step("Calc initiative"); // почистим удаленных между раундами боя std::erase_if(combat_list, [](auto flag) {return flag.deleted;}); + observability::OtelMetrics::RecordGauge("combat.active.count", static_cast(combat_list.size())); for (auto &it : combat_list) { if (it.deleted) continue; diff --git a/src/gameplay/fight/fight_hit.cpp b/src/gameplay/fight/fight_hit.cpp index eaefa2cb6..48faca8b4 100644 --- a/src/gameplay/fight/fight_hit.cpp +++ b/src/gameplay/fight/fight_hit.cpp @@ -27,6 +27,8 @@ #include "gameplay/skills/shield_block.h" #include "gameplay/skills/backstab.h" #include "gameplay/skills/ironwind.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" #include "gameplay/mechanics/armor.h" #include "gameplay/skills/addshot.h" @@ -863,6 +865,9 @@ void hit(CharData *ch, CharData *victim, ESkill type, fight::AttackType weapon) return; } + + // OpenTelemetry: Measure hit duration + observability::ScopedMetric hit_metric("combat.hit.duration"); // Do some sanity checking, in case someone flees, etc. if (ch->in_room != victim->in_room || ch->in_room == kNowhere) { if (ch->GetEnemy() && ch->GetEnemy() == victim) { diff --git a/src/gameplay/fight/fight_stuff.cpp b/src/gameplay/fight/fight_stuff.cpp index df64e7221..48260d1ea 100644 --- a/src/gameplay/fight/fight_stuff.cpp +++ b/src/gameplay/fight/fight_stuff.cpp @@ -28,6 +28,7 @@ #include "gameplay/mechanics/sight.h" #include "gameplay/ai/mob_memory.h" #include "engine/entities/zone.h" +#include "engine/observability/otel_metrics.h" #include "gameplay/core/game_limits.h" #include "gameplay/mechanics/illumination.h" #include "utils/utils_time.h" @@ -564,6 +565,19 @@ void raw_kill(CharData *ch, CharData *killer) { NRM, kLvlGod, ERRLOG, true); return; } + // OpenTelemetry: count player deaths + if (!ch->IsNpc()) { + std::string death_type = "pve"; + if (killer && !killer->IsNpc() && !IS_CHARMICE(killer)) { + death_type = "pvp"; + } else if (!killer) { + death_type = "other"; + } + std::map death_attrs; + death_attrs["death_type"] = death_type; + observability::OtelMetrics::RecordCounter("player.deaths.total", 1, death_attrs); + } + if (!ROOM_FLAGGED(ch->in_room, ERoomFlag::kDominationArena)) { reset_affects(ch); } diff --git a/src/gameplay/magic/magic_utils.cpp b/src/gameplay/magic/magic_utils.cpp index cef747a70..80ec3c1b0 100644 --- a/src/gameplay/magic/magic_utils.cpp +++ b/src/gameplay/magic/magic_utils.cpp @@ -27,6 +27,9 @@ #include "gameplay/statistics/spell_usage.h" #include "utils/backtrace.h" +#include "engine/observability/otel_helpers.h" +#include "engine/observability/otel_metrics.h" +#include "utils/tracing/trace_manager.h" #include char cast_argument[kMaxStringLength]; @@ -340,6 +343,27 @@ bool MayCastHere(CharData *caster, CharData *victim, ESpell spell_id) { * Spellnum 0 is legal but silently ignored here, to make callers simpler. */ int CallMagic(CharData *caster, CharData *cvict, ObjData *ovict, RoomData *rvict, ESpell spell_id, int level) { + // OpenTelemetry: Track spell casting + auto spell_span = tracing::TraceManager::Instance().StartSpan("Spell Cast"); + std::map spell_dur_attrs; + spell_dur_attrs["spell_id"] = std::to_string(to_underlying(spell_id)); + spell_dur_attrs["caster_class"] = NAME_BY_ITEM(caster->GetClass()); + observability::ScopedMetric spell_metric("spell.cast.duration", spell_dur_attrs); + + // Set spell attributes + std::string spell_name = MUD::Spell(spell_id).GetCName(); + spell_span->SetAttribute("spell_id", static_cast(to_underlying(spell_id))); + spell_span->SetAttribute("spell_name", observability::koi8r_to_utf8(spell_name)); + spell_span->SetAttribute("caster_class", observability::koi8r_to_utf8(std::string(NAME_BY_ITEM(caster->GetClass())))); + spell_span->SetAttribute("spell_level", static_cast(level)); + + // Determine target type + std::string target_type = "none"; + if (cvict) target_type = "char"; + else if (ovict) target_type = "obj"; + else if (rvict) target_type = "room"; + spell_span->SetAttribute("target_type", target_type); + if (spell_id < ESpell::kFirst || spell_id > ESpell::kLast) return 0; @@ -367,6 +391,12 @@ int CallMagic(CharData *caster, CharData *cvict, ObjData *ovict, RoomData *rvict if (SpellUsage::is_active) { SpellUsage::AddSpellStat(caster->GetClass(), spell_id); } + + // OpenTelemetry: Record spell cast attempt + std::map attrs; + attrs["spell_id"] = std::to_string(to_underlying(spell_id)); + attrs["caster_class"] = NAME_BY_ITEM(caster->GetClass()); + observability::OtelMetrics::RecordCounter("spell.cast.total", 1, attrs); if (MUD::Spell(spell_id).IsFlagged(kMagAreas) || MUD::Spell(spell_id).IsFlagged(kMagMasses)) { return CallMagicToArea(caster, cvict, rvict, spell_id, abs(level)); diff --git a/src/utils/logger.cpp b/src/utils/logger.cpp index 6c3013a54..88f0dd577 100644 --- a/src/utils/logger.cpp +++ b/src/utils/logger.cpp @@ -58,66 +58,90 @@ void pers_log(CharData *ch, const char *format, ...) { // Файл для вывода FILE *logfile = nullptr; -std::size_t vlog_buffer(char *buffer, const std::size_t buffer_size, const char *format, va_list args) { - std::size_t result = ~0u; - int timestamp_length = -1; - +static std::string make_timestamp() { #if HAS_TIME_ZONE - // Реализация с использованием std::chrono::time_zone const std::chrono::time_zone* time_zone; try { time_zone = std::chrono::current_zone(); } catch(const std::runtime_error&) { puts("SYSERR: failed to get local timezone."); - return result; + return {}; } - const auto utc_now = std::chrono::time_point_cast(std::chrono::system_clock::now()); const auto now = std::chrono::zoned_time{time_zone, utc_now}; - const auto str = std::format("{:%Y-%m-%d %T}", now); - timestamp_length = snprintf(buffer, buffer_size, "%s :: ", str.c_str()); + return std::format("{:%Y-%m-%d %T}", now); #else - // Реализация без std::chrono::time_zone, используем std::chrono::local_time const auto now = std::chrono::time_point_cast(std::chrono::system_clock::now()); - const auto time_t_now = std::chrono::system_clock::to_time_t(now); - auto* local_tm = std::localtime(&time_t_now); - - if (!local_tm) { - puts("SYSERR: failed to get local time."); - return result; - } - - const auto ms = std::chrono::duration_cast(now.time_since_epoch()) % 1000; - char time_str[64]; - std::strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", local_tm); - std::snprintf(time_str + std::strlen(time_str), sizeof(time_str) - std::strlen(time_str), ".%03ld", ms.count()); - const std::string str = time_str; - timestamp_length = snprintf(buffer, buffer_size, "%s :: ", str.c_str()); - #endif - - if (0 > timestamp_length) { - puts("SYSERR: failed to print timestamp inside log() function."); - return result; + const auto time_t_now = std::chrono::system_clock::to_time_t(now); + auto* local_tm = std::localtime(&time_t_now); + if (!local_tm) { + puts("SYSERR: failed to get local time."); + return {}; + } + const auto ms = std::chrono::duration_cast(now.time_since_epoch()) % 1000; + char time_str[64]; + std::strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", local_tm); + std::snprintf(time_str + std::strlen(time_str), sizeof(time_str) - std::strlen(time_str), ".%03ld", ms.count()); + return time_str; +#endif +} + +static std::string format_log_message(const char *format, va_list args) { + const std::string ts = make_timestamp(); + if (ts.empty()) { + return {}; } va_list args_copy; va_copy(args_copy, args); - const int length = vsnprintf(buffer + timestamp_length, buffer_size - timestamp_length, format, args_copy); + const int len = vsnprintf(nullptr, 0, format, args_copy); va_end(args_copy); - if (0 > length) { - puts("SYSERR: failed to print message contents inside log() function."); - return result; + if (len < 0) { + puts("SYSERR: failed to format log message."); + return {}; } - result = timestamp_length + length; - if (buffer_size <= result) { - const char truncated_suffix[] = "[TRUNCATED]"; - snprintf(buffer, buffer_size - sizeof(truncated_suffix), "%s", truncated_suffix); - } + std::string msg(len, '\0'); + va_copy(args_copy, args); + vsnprintf(&msg[0], len + 1, format, args_copy); + va_end(args_copy); + + return ts + " :: " + msg; +} + +static const char* stream_name_for_file(FILE* file) { + for (int i = 0; i <= LAST_LOG; i++) { + const auto stream = static_cast(i); + if (runtime_config.logs(stream).handle() == file) { + switch (stream) { + case SYSLOG: return "syslog"; + case ERRLOG: return "errlog"; + case IMLOG: return "imlog"; + case MSDP_LOG: return "msdp"; + case MONEY_LOG: return "money"; + default: return "syslog"; + } + } + } + return "syslog"; +} - return result; +void write_log_message(const std::string& message, FILE* file) { + if (!file) { + return; + } + if (!runtime_config.output_thread() && runtime_config.log_stderr().empty()) { + fputs(message.c_str(), file); + fputs("\n", file); + } else { + const std::size_t len = message.size(); + std::shared_ptr buffer(new char[len + 1], [](char *p) { delete[] p; }); + memcpy(buffer.get(), message.c_str(), len); + buffer.get()[len] = '\0'; + GlobalObjects::output_thread().output(OutputThread::message_t{buffer, len, file}); + } } void vlog(const char *format, va_list args, FILE *logfile) { @@ -135,20 +159,13 @@ void vlog(const char *format, va_list args, FILE *logfile) { format = "SYSERR: log() received a NULL format."; } - if (!runtime_config.output_thread() - && runtime_config.log_stderr().empty()) { - const time_t ct = time(0); - const char *time_s = asctime(localtime(&ct)); - - fprintf(logfile, "%-15.15s :: ", time_s + 4); - vfprintf(logfile, format, args); - fprintf(logfile, "\n"); - } else { - constexpr std::size_t BUFFER_SIZE = 4096; - std::shared_ptr buffer(new char[BUFFER_SIZE], [](char *p) { delete[] p; }); - const std::size_t length = vlog_buffer(buffer.get(), BUFFER_SIZE, format, args); - GlobalObjects::output_thread().output(OutputThread::message_t{buffer, length, logfile}); + const std::string message = format_log_message(format, args); + if (message.empty()) { + return; } + + const char* stream_name = stream_name_for_file(logfile); + logging::LogManager::Info(message, {{"log_type", stream_name}}); } void vlog(const char *format, va_list args) { @@ -156,13 +173,32 @@ void vlog(const char *format, va_list args) { } void vlog(const EOutputStream steam, const char *format, va_list rargs) { - va_list args; - va_copy(args, rargs); + if (!runtime_config.logging_enabled()) { + return; + } - const auto log = runtime_config.logs(steam).handle(); - vlog(format, args, log); + if (format == nullptr) { + format = "SYSERR: log() received a NULL format."; + } + va_list args; + va_copy(args, rargs); + const std::string message = format_log_message(format, args); va_end(args); + if (message.empty()) { + return; + } + + const char* stream_name; + switch (steam) { + case SYSLOG: stream_name = "syslog"; break; + case ERRLOG: stream_name = "errlog"; break; + case IMLOG: stream_name = "imlog"; break; + case MSDP_LOG: stream_name = "msdp"; break; + case MONEY_LOG: stream_name = "money"; break; + default: stream_name = "syslog"; break; + } + logging::LogManager::Info(message, {{"log_type", stream_name}}); } void log(std::string format) { diff --git a/src/utils/logger.h b/src/utils/logger.h index 88b2ac639..447fe65b5 100644 --- a/src/utils/logger.h +++ b/src/utils/logger.h @@ -3,11 +3,14 @@ #include "engine/core/config.h" #include "engine/core/sysdep.h" +#include "logging/log_manager.h" +#include "engine/observability/otel_provider.h" #include #include #include #include +#include extern FILE *logfile; extern std::list opened_files; @@ -24,6 +27,7 @@ void imm_log(const char *format, ...) __attribute__((format(printf, 1, 2))); void err_log(const char *format, ...) __attribute__((format(printf, 1, 2))); void ip_log(const char *ip); + // defines for mudlog() // enum LogMode : int { OFF = 0, @@ -45,6 +49,7 @@ inline void hexdump(const EOutputStream stream, const char *ptr, size_t buflen, } void write_time(FILE *file); +void write_log_message(const std::string& message, FILE* file); class AbstractLogger { public: diff --git a/src/utils/logging/file_log_sender.cpp b/src/utils/logging/file_log_sender.cpp new file mode 100644 index 000000000..0796c7d3f --- /dev/null +++ b/src/utils/logging/file_log_sender.cpp @@ -0,0 +1,74 @@ +#include "file_log_sender.h" +#include "utils/logger.h" +#include "engine/core/config.h" + +namespace logging { + +FileLogSender::FileLogSender() { + // Constructor - file handles are managed by runtime_config +} + +void FileLogSender::Debug(const std::string& message) { + Debug(message, {}); +} + +void FileLogSender::Debug(const std::string& message, + const std::map& attributes) { + write_to_file(message, attributes); +} + +void FileLogSender::Info(const std::string& message) { + Info(message, {}); +} + +void FileLogSender::Info(const std::string& message, + const std::map& attributes) { + write_to_file(message, attributes); +} + +void FileLogSender::Warn(const std::string& message) { + Warn(message, {}); +} + +void FileLogSender::Warn(const std::string& message, + const std::map& attributes) { + write_to_file(message, attributes); +} + +void FileLogSender::Error(const std::string& message) { + Error(message, {}); +} + +void FileLogSender::Error(const std::string& message, + const std::map& attributes) { + write_to_file(message, attributes); +} + +void FileLogSender::write_to_file(const std::string& message, + const std::map& attributes) { + FILE* file = get_log_file(attributes); + if (!file) { + return; + } + + // Message already contains timestamp (formatted by vlog_buffer) + write_log_message(message, file); +} + +FILE* FileLogSender::get_log_file(const std::map& attributes) { + auto it = attributes.find("log_type"); + const std::string& log_type = (it != attributes.end()) ? it->second : "syslog"; + + if (log_type == "syslog") return runtime_config.logs(SYSLOG).handle(); + if (log_type == "errlog") return runtime_config.logs(ERRLOG).handle(); + if (log_type == "imlog") return runtime_config.logs(IMLOG).handle(); + if (log_type == "msdp") return runtime_config.logs(MSDP_LOG).handle(); + if (log_type == "money") return runtime_config.logs(MONEY_LOG).handle(); + + // Unknown log_type - fall back to syslog + return runtime_config.logs(SYSLOG).handle(); +} + +} // namespace logging + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/logging/file_log_sender.h b/src/utils/logging/file_log_sender.h new file mode 100644 index 000000000..3f0fdac29 --- /dev/null +++ b/src/utils/logging/file_log_sender.h @@ -0,0 +1,40 @@ +#ifndef BYLINS_FILE_LOG_SENDER_H +#define BYLINS_FILE_LOG_SENDER_H + +#include "log_sender.h" +#include + +namespace logging { + +// File-based log sender (writes to actual log files on disk) +class FileLogSender : public ILogSender { +public: + FileLogSender(); + ~FileLogSender() override = default; + + void Debug(const std::string& message) override; + void Debug(const std::string& message, + const std::map& attributes) override; + + void Info(const std::string& message) override; + void Info(const std::string& message, + const std::map& attributes) override; + + void Warn(const std::string& message) override; + void Warn(const std::string& message, + const std::map& attributes) override; + + void Error(const std::string& message) override; + void Error(const std::string& message, + const std::map& attributes) override; + +private: + void write_to_file(const std::string& message, + const std::map& attributes); + + FILE* get_log_file(const std::map& attributes); +}; + +} // namespace logging + +#endif // BYLINS_FILE_LOG_SENDER_H diff --git a/src/utils/logging/log_manager.cpp b/src/utils/logging/log_manager.cpp new file mode 100644 index 000000000..e648c245e --- /dev/null +++ b/src/utils/logging/log_manager.cpp @@ -0,0 +1,84 @@ +#include "log_manager.h" +#include "file_log_sender.h" +#include "engine/db/global_objects.h" + +namespace logging { + +LogManager& LogManager::Instance() { + return GlobalObjects::log_manager(); +} + +LogManager::LogManager() { +#ifdef TEST_BUILD + // In test mode, use NoOp sender by default + m_senders.push_back(std::make_unique()); +#else + // By default, use file logging + m_senders.push_back(std::make_unique()); +#endif +} + +void LogManager::AddSender(std::unique_ptr sender) { + m_senders.push_back(std::move(sender)); +} + +void LogManager::ClearSenders() { + m_senders.clear(); +} + +// Static interface implementations - iterate over all senders +void LogManager::Debug(const std::string& message) { + for (const auto& sender : Instance().m_senders) { + sender->Debug(message); + } +} + +void LogManager::Debug(const std::string& message, + const std::map& attributes) { + for (const auto& sender : Instance().m_senders) { + sender->Debug(message, attributes); + } +} + +void LogManager::Info(const std::string& message) { + for (const auto& sender : Instance().m_senders) { + sender->Info(message); + } +} + +void LogManager::Info(const std::string& message, + const std::map& attributes) { + for (const auto& sender : Instance().m_senders) { + sender->Info(message, attributes); + } +} + +void LogManager::Warn(const std::string& message) { + for (const auto& sender : Instance().m_senders) { + sender->Warn(message); + } +} + +void LogManager::Warn(const std::string& message, + const std::map& attributes) { + for (const auto& sender : Instance().m_senders) { + sender->Warn(message, attributes); + } +} + +void LogManager::Error(const std::string& message) { + for (const auto& sender : Instance().m_senders) { + sender->Error(message); + } +} + +void LogManager::Error(const std::string& message, + const std::map& attributes) { + for (const auto& sender : Instance().m_senders) { + sender->Error(message, attributes); + } +} + +} // namespace logging + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/logging/log_manager.h b/src/utils/logging/log_manager.h new file mode 100644 index 000000000..443d187a6 --- /dev/null +++ b/src/utils/logging/log_manager.h @@ -0,0 +1,54 @@ +#ifndef BYLINS_LOG_MANAGER_H +#define BYLINS_LOG_MANAGER_H + +#include "log_sender.h" +#include +#include + +namespace logging { + +// Central logging manager - coordinates all log senders +class LogManager { +public: + static LogManager& Instance(); + + // Add a log sender to the list + void AddSender(std::unique_ptr sender); + + // Clear all senders + void ClearSenders(); + + // Get current senders (for inspection) + const std::vector>& GetSenders() const { return m_senders; } + + // Static logging interface (delegates to all registered senders) + static void Debug(const std::string& message); + static void Debug(const std::string& message, + const std::map& attributes); + + static void Info(const std::string& message); + static void Info(const std::string& message, + const std::map& attributes); + + static void Warn(const std::string& message); + static void Warn(const std::string& message, + const std::map& attributes); + + static void Error(const std::string& message); + static void Error(const std::string& message, + const std::map& attributes); + + LogManager(const LogManager&) = delete; + LogManager& operator=(const LogManager&) = delete; + +// Made public to allow GlobalObjects to manage lifetime +public: + LogManager(); + ~LogManager() = default; + + std::vector> m_senders; +}; + +} // namespace logging + +#endif // BYLINS_LOG_MANAGER_H diff --git a/src/utils/logging/log_sender.h b/src/utils/logging/log_sender.h new file mode 100644 index 000000000..8173dccab --- /dev/null +++ b/src/utils/logging/log_sender.h @@ -0,0 +1,56 @@ +#ifndef BYLINS_LOG_SENDER_H +#define BYLINS_LOG_SENDER_H + +#include +#include + +namespace logging { + +enum class LogLevel { + kDebug, + kInfo, + kWarn, + kError +}; + +// Interface for log sending (Null Object Pattern) +class ILogSender { +public: + virtual ~ILogSender() = default; + + virtual void Debug(const std::string& message) = 0; + virtual void Debug(const std::string& message, + const std::map& attributes) = 0; + + virtual void Info(const std::string& message) = 0; + virtual void Info(const std::string& message, + const std::map& attributes) = 0; + + virtual void Warn(const std::string& message) = 0; + virtual void Warn(const std::string& message, + const std::map& attributes) = 0; + + virtual void Error(const std::string& message) = 0; + virtual void Error(const std::string& message, + const std::map& attributes) = 0; +}; + +// No-op implementation (for TEST_BUILD or when no senders configured) +class NoOpLogSender : public ILogSender { +public: + void Debug(const std::string&) override {} + void Debug(const std::string&, const std::map&) override {} + + void Info(const std::string&) override {} + void Info(const std::string&, const std::map&) override {} + + void Warn(const std::string&) override {} + void Warn(const std::string&, const std::map&) override {} + + void Error(const std::string&) override {} + void Error(const std::string&, const std::map&) override {} +}; + +} // namespace logging + +#endif // BYLINS_LOG_SENDER_H diff --git a/src/utils/timestamp.h b/src/utils/timestamp.h new file mode 100644 index 000000000..8f3a569fd --- /dev/null +++ b/src/utils/timestamp.h @@ -0,0 +1,32 @@ +#ifndef BYLINS_UTILS_TIMESTAMP_H +#define BYLINS_UTILS_TIMESTAMP_H + +#include +#include +#include +#include + +namespace utils { + +/** + * Current timestamp as "YYYY-MM-DD HH:MM:SS.mmm". + * Used to prefix startup messages printed to stdout. + */ +inline std::string NowTs() { + auto now = std::chrono::system_clock::now(); + auto t = std::chrono::system_clock::to_time_t(now); + auto ms = std::chrono::duration_cast(now.time_since_epoch()) % 1000; + struct tm tm_buf; + localtime_r(&t, &tm_buf); + char result[32]; + std::snprintf(result, sizeof(result), "%04d-%02d-%02d %02d:%02d:%02d.%03lld", + tm_buf.tm_year + 1900, tm_buf.tm_mon + 1, tm_buf.tm_mday, + tm_buf.tm_hour, tm_buf.tm_min, tm_buf.tm_sec, (long long)ms.count()); + return result; +} + +} // namespace utils + +#endif // BYLINS_UTILS_TIMESTAMP_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/tracing/noop_trace_sender.h b/src/utils/tracing/noop_trace_sender.h new file mode 100644 index 000000000..fb861f704 --- /dev/null +++ b/src/utils/tracing/noop_trace_sender.h @@ -0,0 +1,33 @@ +#ifndef BYLINS_NOOP_TRACE_SENDER_H +#define BYLINS_NOOP_TRACE_SENDER_H + +#include "trace_sender.h" + +namespace tracing { + +class NoOpSpan : public ISpan { +public: + void End() override {} + void AddEvent(const std::string&) override {} + void SetAttribute(const std::string&, const std::string&) override {} + void SetAttribute(const std::string&, int64_t) override {} + void SetAttribute(const std::string&, double) override {} + bool IsValid() const override { return false; } +}; + +class NoOpTraceSender : public ITraceSender { +public: + std::unique_ptr StartSpan(const std::string&) override { + return std::make_unique(); + } + + std::unique_ptr StartChildSpan(const std::string&, const ISpan&) override { + return std::make_unique(); + } +}; + +} // namespace tracing + +#endif // BYLINS_NOOP_TRACE_SENDER_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/tracing/trace_manager.cpp b/src/utils/tracing/trace_manager.cpp new file mode 100644 index 000000000..77919f399 --- /dev/null +++ b/src/utils/tracing/trace_manager.cpp @@ -0,0 +1,36 @@ +#include "trace_manager.h" +#include "noop_trace_sender.h" + +namespace tracing { + +TraceManager& TraceManager::Instance() { + static TraceManager instance; + return instance; +} + +TraceManager::TraceManager() { + // By default - NoOp sender + m_sender = std::make_unique(); +} + +void TraceManager::SetSender(std::unique_ptr sender) { + if (sender) { + m_sender = std::move(sender); + } +} + +ITraceSender& TraceManager::GetSender() { + return *m_sender; +} + +std::unique_ptr TraceManager::StartSpan(const std::string& name) { + return m_sender->StartSpan(name); +} + +std::unique_ptr TraceManager::StartChildSpan(const std::string& name, const ISpan& parent) { + return m_sender->StartChildSpan(name, parent); +} + +} // namespace tracing + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/tracing/trace_manager.h b/src/utils/tracing/trace_manager.h new file mode 100644 index 000000000..458f25d44 --- /dev/null +++ b/src/utils/tracing/trace_manager.h @@ -0,0 +1,33 @@ +#ifndef BYLINS_TRACE_MANAGER_H +#define BYLINS_TRACE_MANAGER_H + +#include "trace_sender.h" +#include + +namespace tracing { + +class TraceManager { +public: + static TraceManager& Instance(); + + void SetSender(std::unique_ptr sender); + ITraceSender& GetSender(); + + // Convenience methods (delegate to sender) + std::unique_ptr StartSpan(const std::string& name); + std::unique_ptr StartChildSpan(const std::string& name, const ISpan& parent); + +private: + TraceManager(); + ~TraceManager() = default; + TraceManager(const TraceManager&) = delete; + TraceManager& operator=(const TraceManager&) = delete; + + std::unique_ptr m_sender; +}; + +} // namespace tracing + +#endif // BYLINS_TRACE_MANAGER_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/tracing/trace_sender.h b/src/utils/tracing/trace_sender.h new file mode 100644 index 000000000..a5e3734a9 --- /dev/null +++ b/src/utils/tracing/trace_sender.h @@ -0,0 +1,44 @@ +#ifndef BYLINS_TRACE_SENDER_H +#define BYLINS_TRACE_SENDER_H + +#include +#include + +namespace tracing { + +// Forward declaration +class ISpan; + +// Interface for sending traces +class ITraceSender { +public: + virtual ~ITraceSender() = default; + + // Create parent span + virtual std::unique_ptr StartSpan(const std::string& name) = 0; + + // Create child span with parent context + virtual std::unique_ptr StartChildSpan( + const std::string& name, + const ISpan& parent) = 0; +}; + +// Interface for span (analog of OTEL Span, but through vtable) +class ISpan { +public: + virtual ~ISpan() = default; + + virtual void End() = 0; + virtual void AddEvent(const std::string& name) = 0; + virtual void SetAttribute(const std::string& key, const std::string& value) = 0; + virtual void SetAttribute(const std::string& key, int64_t value) = 0; + virtual void SetAttribute(const std::string& key, double value) = 0; + + virtual bool IsValid() const = 0; +}; + +} // namespace tracing + +#endif // BYLINS_TRACE_SENDER_H + +// vim: ts=4 sw=4 tw=0 noet syntax=cpp : diff --git a/src/utils/utils_time.cpp b/src/utils/utils_time.cpp index bdd853c01..751c7b7a7 100644 --- a/src/utils/utils_time.cpp +++ b/src/utils/utils_time.cpp @@ -1,26 +1,78 @@ #include "utils_time.h" #include "logger.h" +#include "tracing/trace_manager.h" +#include "engine/core/config.h" #include #include + namespace utils { +CSteppedProfiler::CSteppedProfiler(const std::string &scope_name, const double time_probe) + : m_scope_name(scope_name), m_time_probe(time_probe) +{ + // Create parent span + m_parent_span = tracing::TraceManager::Instance().StartSpan(m_scope_name); + if (m_parent_span->IsValid()) { + if (m_time_probe > 0) { + m_parent_span->SetAttribute("time_probe_seconds", m_time_probe); + } + } +} + CSteppedProfiler::~CSteppedProfiler() { if (0 < m_steps.size()) { m_steps.back()->stop(); + + // Close last child span + if (m_current_child_span) { + m_current_child_span->End(); + } + } + + // Add event if threshold exceeded + if (m_parent_span && m_parent_span->IsValid()) { + double total_duration = m_timer.delta().count(); + if (m_time_probe > 0 && total_duration > m_time_probe) { + m_parent_span->AddEvent("threshold_exceeded"); + } + + // Close parent span + m_parent_span->End(); } + report(); } - void CSteppedProfiler::next_step(const std::string &step_name) { + if (0 < m_steps.size()) { m_steps.back()->stop(); + + // Close previous child span + if (m_current_child_span) { + m_current_child_span->End(); + } } + m_steps.push_back(step_t(new CExecutionStepProfiler(step_name))); + + // Create new child span + if (m_parent_span && m_parent_span->IsValid()) { + m_current_child_span = tracing::TraceManager::Instance().StartChildSpan( + step_name, + *m_parent_span + ); + if (m_current_child_span->IsValid()) { + m_current_child_span->SetAttribute("step_index", + static_cast(m_steps.size() - 1)); + } + } } + + void CSteppedProfiler::report() const { FILE *flog; std::stringstream ss; @@ -54,9 +106,10 @@ void CSteppedProfiler::report() const { // спам сислога, кому надо уберите // log("INFO: %s\n", ss.str().c_str()); - flog = fopen(LOAD_LOG_FOLDER LOAD_LOG_FILE, "a"); + const std::string profiler_path = runtime_config.log_dir() + "/" + LOAD_LOG_FILE; + flog = fopen(profiler_path.c_str(), "a"); if (!flog) { - log("ERROR: Can't open file %s", LOAD_LOG_FOLDER LOAD_LOG_FILE); + log("ERROR: Can't open file %s", profiler_path.c_str()); return; } diff --git a/src/utils/utils_time.h b/src/utils/utils_time.h index 45c0a10c0..f6bfcbd92 100644 --- a/src/utils/utils_time.h +++ b/src/utils/utils_time.h @@ -6,8 +6,8 @@ #include #include #include +#include "tracing/trace_sender.h" -#define LOAD_LOG_FOLDER "log/" #define LOAD_LOG_FILE "profiler.log" namespace utils { @@ -51,7 +51,7 @@ class CSteppedProfiler { using step_t = std::shared_ptr; - CSteppedProfiler(const std::string &scope_name, const double time_probe = 0) : m_scope_name(scope_name), m_time_probe(time_probe) {} + CSteppedProfiler(const std::string &scope_name, const double time_probe = 0); ~CSteppedProfiler(); void next_step(const std::string &step_name); @@ -63,6 +63,8 @@ class CSteppedProfiler { const std::string m_scope_name; const double m_time_probe; std::list m_steps; + std::unique_ptr m_parent_span; + std::unique_ptr m_current_child_span; CExecutionTimer m_timer; }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index aa07ada7f..e693962c5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -71,6 +71,7 @@ set(TESTS act.makefood.cpp utils.editor.cpp utils.string.cpp + utils.encoding.cpp fight.penalties.cpp bonus.command.parser.cpp quested.cpp diff --git a/tools/observability/METRICS.md b/tools/observability/METRICS.md new file mode 100644 index 000000000..18b76f680 --- /dev/null +++ b/tools/observability/METRICS.md @@ -0,0 +1,164 @@ +# Bylins MUD — Метрики OpenTelemetry + +Список всех метрик, реально отправляемых в Prometheus. + +## Соглашения + +**Gauges** хранятся как histograms с суффиксом `_gauge`: + +```promql +# Текущее значение gauge: +rate(metric_name_gauge_sum[5m]) / rate(metric_name_gauge_count[5m]) +``` + +**Histograms** — перцентили: + +```promql +histogram_quantile(0.99, rate(metric_name_bucket[5m])) +``` + +**Counters** — скорость событий: + +```promql +rate(metric_name_total[5m]) +``` + +--- + +## Игроки + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `players_online_count` | gauge | — | Игроков онлайн | +| `players_in_combat_count` | gauge | — | Игроков в бою | +| `players_by_level_remort_count` | gauge | `level`, `remort` | Распределение по уровню/реморту | +| `player_beat_update_duration` | histogram | — | Длительность обновления HP/Mana/Move за тик | +| `player_load_duration` | histogram | — | Длительность загрузки персонажа | +| `player_save_duration` | histogram | `save_type` (`frac`/`full`) | Длительность сохранения | +| `player_save_total` | counter | `save_type` | Количество сохранений | +| `player_deaths_total` | counter | `death_type` (`pvp`/`pve`/`other`) | Смерти игроков по типу | + +Prometheus-имена gauges: `players_online_count_gauge_{bucket,sum,count}` и т.д. + +--- + +## Хартбит + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `heartbeat_step_duration` | histogram | `step` | Длительность каждого шага хартбита | +| `heartbeat_total_duration` | histogram | — | Длительность полного тика | +| `heartbeat_missed_pulses_total` | counter | — | Пропущенные пульсы (лаг сервера) | + +--- + +## Мобы + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `mob_active_count` | gauge | — | Активные (агрессивные) мобы | +| `mob_ai_duration` | histogram | — | Длительность цикла AI всех мобов | + +Prometheus: `mob_active_count_gauge_{bucket,sum,count}` + +--- + +## Зоны + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `zone_update_duration` | histogram | — | Длительность цикла обновления зон | +| `zone_reset_duration` | histogram | — | Длительность сброса одной зоны | +| `zone_reset_total` | counter | `zone_vnum`, `reset_mode` | Количество сбросов зон | +| `zone_command_Q_duration` | histogram | — | Длительность обработки очереди команд зоны | + +--- + +## Бой + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `combat_active_count` | gauge | — | Количество активных комбатантов (обновляется каждые 2 сек) | +| `combat_hit_duration` | histogram | — | Длительность расчёта одного удара | + +Prometheus: `combat_active_count_gauge_{bucket,sum,count}` + +> `combat_round_duration`, `combat_rounds_total` — в Prometheus **отсутствуют**. + +--- + +## Магия + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `spell_cast_duration` | histogram | `spell_id`, `caster_class` | Длительность выполнения заклинания | +| `spell_cast_total` | counter | `spell_id`, `caster_class` | Количество кастов | + +--- + +## Скрипты + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `script_trigger_duration` | histogram | `trigger_type` (`MOB`/`OBJ`/`WLD`) | Длительность проверки триггеров за цикл | + +--- + +## Аукцион + +| Метрика | Тип | Атрибуты | Описание | +|---------|-----|----------|----------| +| `auction_lots_active` | gauge | — | Активные лоты | +| `auction_sale_total` | counter | — | Количество продаж | +| `auction_revenue_total` | counter | — | Выручка с продаж (в единицах валюты) | +| `auction_duration_seconds` | histogram | — | Длительность аукциона (от лота до продажи) | + +Prometheus: `auction_lots_active_gauge_{bucket,sum,count}` + +> Метрики `auction_sale_total`, `auction_revenue_total`, `auction_duration_seconds` появятся в Prometheus после первой продажи на аукционе. + +--- + +## Span Metrics (генерируются Tempo) + +| Метрика | Описание | +|---------|----------| +| `traces_spanmetrics_calls_total` | Количество вызовов по имени спана | +| `traces_spanmetrics_latency_{bucket,sum,count}` | Латентность вызовов | +| `traces_spanmetrics_size_total` | Размер спанов | +| `traces_target_info` | Информация о сервисе | + +Атрибуты: `service`, `span_name`, `status_code` + +--- + +## Примеры запросов + +```promql +# Игроки онлайн (текущее) +rate(players_online_count_gauge_sum[5m]) / rate(players_online_count_gauge_count[5m]) + +# p99 тика хартбита +histogram_quantile(0.99, rate(heartbeat_total_duration_bucket[5m])) + +# Длительность шагов хартбита p95 (по шагам) +histogram_quantile(0.95, sum by (step) (rate(heartbeat_step_duration_bucket[5m]))) + +# p95 длительности зоны +histogram_quantile(0.95, rate(zone_update_duration_bucket[5m])) + +# Топ заклинаний по частоте +topk(10, sum by (spell_id) (rate(spell_cast_total[5m]))) + +# Пропуски пульсов за последний час +increase(heartbeat_missed_pulses_total[1h]) + +# Активные мобы (текущее) +rate(mob_active_count_gauge_sum[5m]) / rate(mob_active_count_gauge_count[5m]) + +# Активные комбатанты +rate(combat_active_count_gauge_sum[5m]) / rate(combat_active_count_gauge_count[5m]) + +# Смерти игроков по типу за последний час +sum by (death_type) (increase(player_deaths_total[1h])) +``` diff --git a/tools/observability/PERFORMANCE_IMPACT.md b/tools/observability/PERFORMANCE_IMPACT.md new file mode 100644 index 000000000..94350443d --- /dev/null +++ b/tools/observability/PERFORMANCE_IMPACT.md @@ -0,0 +1,186 @@ +# OpenTelemetry Instrumentation — Влияние на производительность + +## Резюме + +**Общий overhead**: ~0.025–0.08% CPU, ~100–250 KB памяти +**Вывод**: Безопасно для production + +--- + +## Типичные значения операций (OpenTelemetry C++ SDK) + +| Операция | Время | +|----------|-------| +| Создать span | 500–1000 ns | +| Установить атрибут (string) | 100–200 ns | +| Установить атрибут (int64) | 50–100 ns | +| Закрыть span | 200–500 ns | +| RecordCounter / RecordGauge | 100–200 ns | +| RecordHistogram / ScopedMetric | 200–500 ns | + +--- + +## Анализ по системам + +### Хартбит (heartbeat.cpp) + +**Частота**: каждые 40ms (25 Hz) + +- `heartbeat.step.duration` — один RecordHistogram на шаг: ~300 ns +- `heartbeat.total.duration` — один RecordHistogram на тик: ~300 ns +- `heartbeat.missed_pulses_total` — RecordCounter только при лаге: ~150 ns + +**Overhead**: ~1–2 µs на тик = **0.003–0.005% CPU** + +--- + +### Обновление игроков (game_limits.cpp) + +**Частота**: каждые 40ms (каждый тик) + +- ScopedMetric `player.beat_update.duration`: ~300 ns +- RecordGauge `players.online.count`: ~150 ns +- RecordGauge `players.in_combat.count`: ~150 ns +- RecordGauge `players.by_level_remort.count` × N уникальных комбинаций: ~150 ns × N + +**При 20 уникальных level/remort**: ~4 µs на тик = **0.01% CPU** + +--- + +### Бой — violence (fight.cpp) + +**Частота**: каждые 2 секунды (kBattleRound = 50 тиков) + +- RecordGauge `combat.active.count`: ~150 ns — один раз на вызов `perform_violence()` + +**Бой — удар (fight_hit.cpp)** + +**Частота**: N ударов за раунд (1–4 на персонажа) + +- ScopedMetric `combat.hit.duration`: ~300 ns на удар + +**При 10 боях, 2 ударах на раунд**: ~1 µs на вызов `perform_violence()` = **пренебрежимо** + +--- + +### Смерть игрока (fight_stuff.cpp) + +**Частота**: редко (события смерти) + +- RecordCounter `player.deaths.total`: ~150 ns на смерть — **пренебрежимо** + +--- + +### AI мобов (mobact.cpp) + +**Частота**: каждые 400ms (каждые 10 тиков) + +- ScopedMetric `mob.ai.duration`: ~300 ns +- RecordGauge `mob.active.count`: ~150 ns + +**Overhead**: ~0.5 µs на цикл = **пренебрежимо** + +--- + +### Загрузка/сохранение игрока (db.cpp, obj_save.cpp) + +**Частота**: загрузка — при входе; сохранение — каждые 5–30 минут + +- ScopedMetric `player.load.duration` / `player.save.duration`: ~300 ns +- RecordCounter `player.save.total`: ~150 ns + +Overhead <<< времени I/O операции. **Пренебрежимо.** + +--- + +### Обновление зон (db.cpp) + +**Частота**: раз в секунду + +- ScopedMetric `zone.update.duration`: ~300 ns +- RecordCounter `zone.reset.total` × количество сбросов: ~150 ns × N +- RecordHistogram `zone.reset.duration`: ~300 ns +- RecordHistogram `zone.command.Q.duration`: ~300 ns + +**Overhead**: ~1–2 µs на цикл = **пренебрежимо** + +--- + +### Заклинания (magic_utils.cpp) + +**Частота**: 5–50 заклинаний в секунду + +- ScopedMetric `spell.cast.duration` с атрибутами spell_id/caster_class: ~400 ns +- RecordCounter `spell.cast.total`: ~150 ns + +**При 20 заклинаниях/сек**: ~11 µs/сек = **0.001% CPU** + +--- + +### DG Scripts (dg_scripts.cpp) + +**Частота**: каждые 13 секунд + +- ScopedMetric `script.trigger.duration`: ~300 ns — **пренебрежимо** + +--- + +### Аукцион (auction.cpp) + +**Частота**: обновление тактов — раз в 1.2 сек; продажа — редко + +- RecordGauge `auction.lots.active`: ~150 ns на такт +- RecordCounter `auction.sale.total`: ~150 ns на продажу +- RecordCounter `auction.revenue.total`: ~150 ns на продажу +- RecordHistogram `auction.duration.seconds`: ~300 ns на продажу + +**Overhead**: **пренебрежимо** (редкие события) + +> Метрики существуют в коде. Появятся в Prometheus после первой продажи на аукционе. + +--- + +### Крафтинг (item_creation.cpp) + +**Частота**: редко (ручной крафт игроками) + +- ScopedMetric `craft.duration`: ~300 ns +- RecordCounter `craft.completed.total` / `craft.failures.total`: ~150 ns + +**Overhead**: **пренебрежимо** + +> Метрики существуют в коде. Появятся в Prometheus после первого крафта. + +--- + +## Итоговая оценка (worst-case, 50 игроков онлайн) + +| Система | Overhead/сек | +|---------|-------------| +| Хартбит | ~50 µs | +| Обновление игроков | ~100 µs | +| Бой (10 активных) | ~10 µs | +| AI мобов | ~1.25 µs | +| Заклинания (20/сек) | ~11 µs | +| Зоны | ~2 µs | +| Прочее | ~5 µs | +| **Итого** | **~180 µs/сек = 0.018% CPU** | + +### Память + +| Сценарий | Overhead | +|----------|----------| +| Нормальная нагрузка | ~100 KB | +| Пиковая нагрузка | ~250 KB | + +--- + +## Рекомендации + +- **Sampling traces**: 10–20% при >50 игроках (настраивается в OTEL Collector) +- **Метрики**: всегда 100%, overhead минимален +- Batch export каждые 5 сек — оптимально для текущей нагрузки + +--- + +*Методология: OpenTelemetry C++ SDK benchmarks + практические оценки* diff --git a/tools/observability/README.md b/tools/observability/README.md new file mode 100644 index 000000000..6a5430dd0 --- /dev/null +++ b/tools/observability/README.md @@ -0,0 +1,78 @@ +# Bylins MUD — OpenTelemetry Observability + +Стек мониторинга: **OTEL Collector → Prometheus / Loki / Tempo → Grafana** + +## Файлы + +``` +tools/observability/ +├── docker-compose.observability.yml # Docker Compose стек +├── otel-collector-config.yaml # OTEL Collector +├── prometheus.yml # Prometheus scrape config +├── loki-config.yaml # Loki (логи) +├── tempo-config.yaml # Tempo (трейсы) +├── METRICS.md # Список всех метрик +├── grafana/ +│ └── provisioning/ +│ ├── datasources/datasources.yml +│ └── dashboards/dashboards.yml +└── dashboards/ + ├── operational-dashboard.json # Игроки, активность + ├── performance-dashboard.json # Хартбит, мобы, I/O + └── business-logic-dashboard.json # Заклинания, скрипты, аукцион +``` + +## Быстрый старт + +```bash +cd tools/observability +docker-compose -f docker-compose.observability.yml up -d +``` + +Grafana: http://localhost:12000 (admin / admin123) + +## Инструментированные системы + +- **Хартбит** — длительность тика и каждого шага, пропущенные пульсы +- **Игроки** — онлайн, в бою, распределение по уровню/реморту, save/load +- **Мобы** — количество активных, длительность AI +- **Зоны** — обновление, сброс +- **Бой** — длительность расчёта удара +- **Магия** — кол-во и длительность кастов по заклинанию/классу +- **DG Scripts** — длительность проверки триггеров по типу (MOB/OBJ/WLD) +- **Аукцион** — количество активных лотов + +> Полный список метрик с Prometheus-именами → [METRICS.md](METRICS.md) + +## Сборка сервера с OTEL + +```bash +cmake -S . -B build_otel \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_OTEL=ON \ + -DCMAKE_TOOLCHAIN_FILE=~/repos/vcpkg/scripts/buildsystems/vcpkg.cmake \ + -DCMAKE_PREFIX_PATH=~/repos/vcpkg/installed/x64-linux +make -C build_otel -j$(($(nproc)/2)) +``` + +## Архитектура + +``` +Bylins MUD (OTEL C++ SDK) + │ OTLP/gRPC :4317 + ▼ +OTEL Collector + ├── Prometheus (метрики) :9090 + ├── Tempo (трейсы) :3200 + └── Loki (логи) :3100 + ▼ + Grafana :12000 +``` + +## Prometheus: запросы для gauge метрик + +Gauges хранятся как histograms с суффиксом `_gauge`. Для текущего значения: + +```promql +rate(players_online_count_gauge_sum[5m]) / rate(players_online_count_gauge_count[5m]) +``` diff --git "a/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/business-logic-dashboard.json" "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/business-logic-dashboard.json" new file mode 100644 index 000000000..0ce7bd5fd --- /dev/null +++ "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/business-logic-dashboard.json" @@ -0,0 +1,434 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "id": 999, + "type": "text", + "title": "", + "gridPos": {"h": 2, "w": 24, "x": 0, "y": 0}, + "options": { + "mode": "markdown", + "content": "> **Дашборд только для чтения.** Управляется из файла в репозитории. Чтобы внести изменения — используйте **Save As** (создайте копию).\n>\n> Панели аукциона и крафта показывают данные только после соответствующих игровых событий." + }, + "transparent": true + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 2}, + "id": 1, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "topk(10, sum by (spell_id) (rate(spell_cast_total[5m])))", + "legendFormat": "{{spell_id}}", + "refId": "A" + } + ], + "title": "Топ 10 самых используемых заклинаний (частота)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 2}, + "id": 2, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (spell_id, le) (rate(spell_cast_duration_bucket[5m])))", + "legendFormat": "p95 {{spell_id}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(spell_cast_duration_bucket[5m]))", + "legendFormat": "p99 (все)", + "refId": "B" + } + ], + "title": "Длительность выполнения заклинания", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10}, + "id": 3, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "sum by (caster_class) (rate(spell_cast_total[5m]))", + "legendFormat": "{{caster_class}}", + "refId": "A" + } + ], + "title": "Заклинания по классу (частота)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10}, + "id": 4, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (trigger_type, le) (rate(script_trigger_duration_bucket[5m])))", + "legendFormat": "p95 {{trigger_type}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum by (trigger_type, le) (rate(script_trigger_duration_bucket[5m])))", + "legendFormat": "p99 {{trigger_type}}", + "refId": "B" + } + ], + "title": "Длительность триггеров DG Scripts (по типу)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 18}, + "id": 5, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(player_deaths_total[5m])", + "legendFormat": "Смертей/сек ({{death_type}})", + "refId": "A" + } + ], + "title": "Смерти игроков (PvP / PvE)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 18}, + "id": 6, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(craft_completed_total[5m])", + "legendFormat": "Успешно ({{recipe_id}})", + "refId": "A" + }, + { + "expr": "rate(craft_failures_total[5m])", + "legendFormat": "Неудача ({{failure_reason}})", + "refId": "B" + } + ], + "title": "Крафт: успехи и неудачи (появится при первом крафте)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 2}, + {"color": "red", "value": 5} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 6, "x": 0, "y": 26}, + "id": 7, + "options": { + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(auction_lots_active_gauge_sum[5m]) / rate(auction_lots_active_gauge_count[5m])", + "refId": "A" + } + ], + "title": "Активные лоты аукциона", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 18, "x": 6, "y": 26}, + "id": 8, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(auction_lots_active_gauge_sum[5m]) / rate(auction_lots_active_gauge_count[5m])", + "legendFormat": "Активные лоты", + "refId": "A" + }, + { + "expr": "rate(auction_sale_total[5m])", + "legendFormat": "Продаж/сек", + "refId": "B" + } + ], + "title": "Аукцион (продажи появятся после первой сделки)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 34}, + "id": 9, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(auction_duration_seconds_bucket[5m]))", + "legendFormat": "p95 Длительность (от выставления до продажи)", + "refId": "A" + } + ], + "title": "Длительность аукционов (появится после первой продажи)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, + "scaleDistribution": {"type": "linear"}, "showPoints": "never", + "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 34}, + "id": 10, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(zone_reset_total[5m])", + "legendFormat": "Сбросов/сек ({{reset_mode}})", + "refId": "A" + } + ], + "title": "Сбросы зон (по режиму)", + "type": "timeseries" + } + ], + "schemaVersion": 27, + "style": "dark", + "tags": ["bylins", "business-logic", "mud"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Bylins MUD - Дашборд игровой логики", + "description": "Только для чтения. Чтобы внести изменения — клонируйте дашборд в Grafana (кнопка Save As).", + "uid": "bylins-business-logic", + "version": 0 +} diff --git "a/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/operational-dashboard.json" "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/operational-dashboard.json" new file mode 100644 index 000000000..905854c13 --- /dev/null +++ "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/operational-dashboard.json" @@ -0,0 +1,526 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "id": 999, + "type": "text", + "title": "", + "gridPos": {"h": 2, "w": 24, "x": 0, "y": 0}, + "options": { + "mode": "markdown", + "content": "> **Дашборд только для чтения.** Управляется из файла в репозитории. Чтобы внести изменения — используйте **Save As** (создайте копию)." + }, + "transparent": true + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 1}, + {"color": "green", "value": 10} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 6, "x": 0, "y": 2}, + "id": 1, + "options": { + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_online_count_gauge_sum[5m]) / rate(players_online_count_gauge_count[5m])", + "refId": "A" + } + ], + "title": "Игроки онлайн", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 5}, + {"color": "red", "value": 10} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 6, "x": 6, "y": 2}, + "id": 2, + "options": { + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_in_combat_count_gauge_sum[5m]) / rate(players_in_combat_count_gauge_count[5m])", + "refId": "A" + } + ], + "title": "Игроки в бою", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 2}, + "id": 3, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_online_count_gauge_sum[5m]) / rate(players_online_count_gauge_count[5m])", + "legendFormat": "Онлайн", + "refId": "A" + }, + { + "expr": "rate(players_in_combat_count_gauge_sum[5m]) / rate(players_in_combat_count_gauge_count[5m])", + "legendFormat": "В бою", + "refId": "B" + } + ], + "title": "Активность игроков", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"hideFrom": {"tooltip": false, "viz": false, "legend": false}}, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 12, "w": 12, "x": 0, "y": 10}, + "id": 4, + "options": { + "legend": {"displayMode": "table", "placement": "right", "values": ["value"]}, + "pieType": "pie", + "tooltip": {"mode": "single"}, + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "displayLabels": ["percent"] + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "topk(10, rate(players_by_level_remort_count_gauge_sum[5m]) / rate(players_by_level_remort_count_gauge_count[5m]))", + "legendFormat": "Уровень {{level}} Реморт {{remort}}", + "refId": "A" + } + ], + "title": "Распределение игроков по уровню и реморту (Топ 10)", + "type": "piechart" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 12, "w": 12, "x": 12, "y": 10}, + "id": 5, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_by_level_remort_count_gauge_sum[5m]) / rate(players_by_level_remort_count_gauge_count[5m])", + "legendFormat": "Ур{{level}}Р{{remort}}", + "refId": "A" + } + ], + "title": "Все игроки по уровню/реморту (текущие)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 22}, + "id": 6, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(player_beat_update_duration_bucket[5m]))", + "legendFormat": "p95", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(player_beat_update_duration_bucket[5m]))", + "legendFormat": "p99", + "refId": "B" + } + ], + "title": "Обновление HP/Мана/Движение (длительность)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 22}, + "id": 7, + "options": { + "legend": {"calcs": ["mean", "last"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_in_combat_count_gauge_sum[5m]) / rate(players_in_combat_count_gauge_count[5m])", + "legendFormat": "Игроки в бою", + "refId": "A" + }, + { + "expr": "rate(mob_active_count_gauge_sum[5m]) / rate(mob_active_count_gauge_count[5m])", + "legendFormat": "Активные мобы", + "refId": "B" + }, + { + "expr": "rate(auction_lots_active_gauge_sum[5m]) / rate(auction_lots_active_gauge_count[5m])", + "legendFormat": "Лоты аукциона", + "refId": "C" + } + ], + "title": "Счётчики активности системы", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 30}, + "id": 8, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(zone_update_duration_bucket[5m]))", + "legendFormat": "p95 Обновление зоны", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, rate(spell_cast_duration_bucket[5m]))", + "legendFormat": "p95 Заклинание", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.95, rate(mob_ai_duration_bucket[5m]))", + "legendFormat": "p95 AI мобов", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.95, rate(script_trigger_duration_bucket[5m]))", + "legendFormat": "p95 Триггеры скриптов", + "refId": "D" + } + ], + "title": "Задержка ключевых операций (p95)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "custom": {"align": "auto", "displayMode": "auto"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 0.1}, + {"color": "red", "value": 0.5} + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 30}, + "id": 9, + "options": {"showHeader": true}, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.99, rate(heartbeat_total_duration_bucket[5m]))", + "format": "table", + "instant": true, + "legendFormat": "Тик хартбита", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(zone_update_duration_bucket[5m]))", + "format": "table", + "instant": true, + "legendFormat": "Обновление зоны", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, rate(spell_cast_duration_bucket[5m]))", + "format": "table", + "instant": true, + "legendFormat": "Заклинание", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.99, rate(mob_ai_duration_bucket[5m]))", + "format": "table", + "instant": true, + "legendFormat": "AI мобов", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.99, rate(combat_hit_duration_bucket[5m]))", + "format": "table", + "instant": true, + "legendFormat": "Расчёт удара", + "refId": "E" + } + ], + "title": "Задержки p99 (текущие)", + "type": "table", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {"Time": true, "job": true, "instance": true}, + "renameByName": {"Value": "Длительность (с)"} + } + } + ] + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 38}, + "id": 10, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (step) (rate(heartbeat_step_duration_bucket[5m])))", + "legendFormat": "p95 {{step}}", + "refId": "A" + } + ], + "title": "Длительность шагов хартбита (p95 по шагам)", + "type": "timeseries" + } + ], + "schemaVersion": 27, + "style": "dark", + "tags": ["bylins", "operational", "players", "mud"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Bylins MUD - Операционный дашборд", + "description": "Только для чтения. Чтобы внести изменения — клонируйте дашборд в Grafana (кнопка Save As).", + "uid": "bylins-operational", + "version": 0 +} diff --git "a/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/performance-dashboard.json" "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/performance-dashboard.json" new file mode 100644 index 000000000..8646b7cd5 --- /dev/null +++ "b/tools/observability/dashboards/\320\237\321\200\320\270\320\274\320\265\321\200\321\213/performance-dashboard.json" @@ -0,0 +1,565 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "id": 999, + "type": "text", + "title": "", + "gridPos": {"h": 2, "w": 24, "x": 0, "y": 0}, + "options": { + "mode": "markdown", + "content": "> **Дашборд только для чтения.** Управляется из файла в репозитории. Чтобы внести изменения — используйте **Save As** (создайте копию)." + }, + "transparent": true + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 0.03}, + {"color": "red", "value": 0.04} + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 2}, + "id": 1, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.50, rate(heartbeat_total_duration_bucket[5m]))", + "legendFormat": "p50 Тик хартбита", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, rate(heartbeat_total_duration_bucket[5m]))", + "legendFormat": "p95 Тик хартбита", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, rate(heartbeat_total_duration_bucket[5m]))", + "legendFormat": "p99 Тик хартбита", + "refId": "C" + } + ], + "title": "Длительность тика хартбита (перцентили, target: <40ms)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 5}, + {"color": "red", "value": 10} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 2}, + "id": 2, + "options": { + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(players_in_combat_count_gauge_sum[5m]) / rate(players_in_combat_count_gauge_count[5m])", + "refId": "A" + } + ], + "title": "Игроки в бою", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 500}, + {"color": "red", "value": 1000} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 6, "x": 18, "y": 2}, + "id": 3, + "options": { + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(mob_active_count_gauge_sum[5m]) / rate(mob_active_count_gauge_count[5m])", + "refId": "A" + } + ], + "title": "Активные мобы", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10}, + "id": 4, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (step) (rate(heartbeat_step_duration_bucket[5m])))", + "legendFormat": "p95 {{step}}", + "refId": "A" + } + ], + "title": "Длительность шагов хартбита (p95)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10}, + "id": 5, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(mob_ai_duration_bucket[5m]))", + "legendFormat": "p95 AI мобов", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(mob_ai_duration_bucket[5m]))", + "legendFormat": "p99 AI мобов", + "refId": "B" + } + ], + "title": "Длительность обработки AI мобов", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 18}, + "id": 6, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(player_load_duration_bucket[5m]))", + "legendFormat": "p95 Загрузка", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, rate(player_save_duration_bucket{save_type=\"frac\"}[5m]))", + "legendFormat": "p95 Сохранение (частичное)", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.95, rate(player_save_duration_bucket{save_type=\"full\"}[5m]))", + "legendFormat": "p95 Сохранение (полное)", + "refId": "C" + } + ], + "title": "Длительность сохранения/загрузки игрока (p95)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 18}, + "id": 7, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(player_save_total[5m])", + "legendFormat": "Сохранений/сек ({{save_type}})", + "refId": "A" + } + ], + "title": "Частота сохранения игроков", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 26}, + "id": 8, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(zone_update_duration_bucket[5m]))", + "legendFormat": "p95 Обновление зоны", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(zone_update_duration_bucket[5m]))", + "legendFormat": "p99 Обновление зоны", + "refId": "B" + } + ], + "title": "Длительность обновления зоны", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 26}, + "id": 9, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate(zone_reset_total[5m])", + "legendFormat": "Сбросов зоны/сек", + "refId": "A" + } + ], + "title": "Частота сброса зон", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 34}, + "id": 10, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "increase(heartbeat_missed_pulses_total[5m])", + "legendFormat": "Пропущенных пульсов за 5 мин", + "refId": "A" + } + ], + "title": "Пропущенные пульсы хартбита (лаг сервера)", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 34}, + "id": 11, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "right"}, + "tooltip": {"mode": "multi"} + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(combat_hit_duration_bucket[5m]))", + "legendFormat": "p95 Расчёт удара", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(combat_hit_duration_bucket[5m]))", + "legendFormat": "p99 Расчёт удара", + "refId": "B" + } + ], + "title": "Длительность расчёта удара в бою", + "type": "timeseries" + } + ], + "schemaVersion": 27, + "style": "dark", + "tags": ["bylins", "performance", "mud"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Bylins MUD - Дашборд производительности", + "description": "Только для чтения. Чтобы внести изменения — клонируйте дашборд в Grafana (кнопка Save As).", + "uid": "bylins-performance", + "version": 0 +} diff --git a/tools/observability/docker-compose.data-dir.yml b/tools/observability/docker-compose.data-dir.yml new file mode 100644 index 000000000..6b1432449 --- /dev/null +++ b/tools/observability/docker-compose.data-dir.yml @@ -0,0 +1,41 @@ +version: '3.8' + +# Override file: replaces named volumes with bind mounts to ${DATA_DIR}. +# Containers run as the current host user (UID:GID) so that written files +# are owned by the user who started the stack. +# +# Usage: +# export DATA_DIR=/var/lib/mud-observability +# mkdir -p $DATA_DIR/{prometheus,tempo,loki,grafana} +# docker-compose \ +# -f docker-compose.observability.yml \ +# -f docker-compose.data-dir.yml \ +# up -d +# +# Or put DATA_DIR in a .env file next to the compose files. + +services: + prometheus: + user: "${UID}:${GID}" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ${DATA_DIR}/prometheus:/prometheus + + tempo: + user: "${UID}:${GID}" + volumes: + - ./tempo-config.yaml:/etc/tempo.yaml:ro + - ${DATA_DIR}/tempo:/tmp/tempo + + loki: + user: "${UID}:${GID}" + volumes: + - ./loki-config.yaml:/etc/loki/config.yaml:ro + - ${DATA_DIR}/loki:/loki + + grafana: + user: "${UID}:${GID}" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./dashboards:/var/lib/grafana/dashboards:ro + - ${DATA_DIR}/grafana:/var/lib/grafana diff --git a/tools/observability/docker-compose.observability.yml b/tools/observability/docker-compose.observability.yml new file mode 100644 index 000000000..5158880a0 --- /dev/null +++ b/tools/observability/docker-compose.observability.yml @@ -0,0 +1,98 @@ +version: '3.8' + +services: + # OpenTelemetry Collector + otel-collector: + image: otel/opentelemetry-collector-contrib:0.146.0 + container_name: mud-otel-collector + command: ["--config=/etc/otel-collector-config.yaml"] + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + ports: + - "127.0.0.1:4317:4317" # OTLP gRPC receiver + - "127.0.0.1:4318:4318" # OTLP HTTP receiver + - "127.0.0.1:8888:8888" # Prometheus metrics (collector's own metrics) + - "127.0.0.1:13133:13133" # Health check + networks: + - observability + restart: unless-stopped + + # Prometheus (metrics) + prometheus: + image: prom/prometheus:v3.8.0 + container_name: mud-prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + - '--web.enable-remote-write-receiver' + - '--enable-feature=exemplar-storage' + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + ports: + - "127.0.0.1:9090:9090" + networks: + - observability + restart: unless-stopped + + # Tempo (traces) + tempo: + image: grafana/tempo:2.9.0 + container_name: mud-tempo + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./tempo-config.yaml:/etc/tempo.yaml:ro + - tempo-data:/var/tempo + ports: + - "127.0.0.1:3200:3200" # Tempo HTTP + networks: + - observability + restart: unless-stopped + + # Loki (logs) + loki: + image: grafana/loki:3.6.7 + container_name: mud-loki + command: -config.file=/etc/loki/config.yaml + volumes: + - ./loki-config.yaml:/etc/loki/config.yaml:ro + - loki-data:/loki + ports: + - "127.0.0.1:3100:3100" + networks: + - observability + restart: unless-stopped + + # Grafana (visualization) + grafana: + image: grafana/grafana:12.3.2 + container_name: mud-grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin123 + - GF_USERS_ALLOW_SIGN_UP=false + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./dashboards:/var/lib/grafana/dashboards:ro + - grafana-data:/var/lib/grafana + ports: + - "127.0.0.1:12000:3000" + networks: + - observability + depends_on: + - prometheus + - tempo + - loki + restart: unless-stopped + +volumes: + prometheus-data: + tempo-data: + loki-data: + grafana-data: + +networks: + observability: + driver: bridge diff --git a/tools/observability/grafana/provisioning/dashboards/dashboards.yml b/tools/observability/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 000000000..98702a6cd --- /dev/null +++ b/tools/observability/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,13 @@ +apiVersion: 1 + +providers: + - name: 'Bylins MUD Dashboards' + orgId: 1 + folder: 'Bylins' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/tools/observability/grafana/provisioning/datasources/datasources.yml b/tools/observability/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 000000000..9e0151bb7 --- /dev/null +++ b/tools/observability/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,55 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + httpMethod: POST + timeInterval: 15s + exemplarTraceIdDestinations: + - name: trace_id + datasourceUid: tempo + + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + uid: tempo + editable: false + jsonData: + httpMethod: GET + tracesToLogs: + datasourceUid: loki + filterByTraceID: true + filterBySpanID: false + spanStartTimeShift: '-5m' + spanEndTimeShift: '5m' + customQuery: false + tracesToMetrics: + datasourceUid: prometheus + tags: [{key: 'service.name', value: 'service'}] + serviceMap: + datasourceUid: prometheus + nodeGraph: + enabled: true + + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + uid: loki + editable: false + jsonData: + maxLines: 1000 + derivedFields: + - datasourceUid: tempo + matcherType: label + matcherRegex: trace_id + name: TraceID + url: '$${__value.raw}' + urlDisplayLabel: Open in Tempo diff --git a/tools/observability/install-otel-sdk.sh b/tools/observability/install-otel-sdk.sh new file mode 100755 index 000000000..65149a9f5 --- /dev/null +++ b/tools/observability/install-otel-sdk.sh @@ -0,0 +1,102 @@ +#!/bin/sh +# Install opentelemetry-cpp SDK required for building Bylins MUD with WITH_OTEL=ON. +# +# Usage: +# ./tools/install-otel-sdk.sh # vcpkg (default) +# ./tools/install-otel-sdk.sh --source # build from source +# +# After installation, build the server with: +# cmake -S . -B build_otel \ +# -DCMAKE_BUILD_TYPE=Release \ +# -DWITH_OTEL=ON \ +# -DCMAKE_TOOLCHAIN_FILE=~/vcpkg/scripts/buildsystems/vcpkg.cmake \ +# -DCMAKE_PREFIX_PATH=~/vcpkg/installed/x64-linux +# make -C build_otel -j$(($(nproc)/2)) +# +# Note: if you only deploy a pre-built binary, this script is not needed — +# opentelemetry-cpp is a build-time dependency only. + +set -e + +OTEL_VERSION="1.24.0" +VCPKG_DIR="${VCPKG_DIR:-$HOME/vcpkg}" +OTEL_INSTALL_PREFIX="${OTEL_INSTALL_PREFIX:-/usr/local}" + +METHOD="vcpkg" +if [ "$1" = "--source" ]; then + METHOD="source" +fi + +# ── vcpkg (primary) ────────────────────────────────────────────────────────── +install_via_vcpkg() { + for dep in curl zip unzip tar git cmake pkg-config; do + if ! command -v "$dep" >/dev/null 2>&1; then + echo "Error: '$dep' is not installed. Run:" + echo " sudo apt-get install curl zip unzip tar git build-essential cmake pkg-config" + exit 1 + fi + done + + if [ -f "$VCPKG_DIR/vcpkg" ]; then + echo "vcpkg found at $VCPKG_DIR" + elif [ -d "$VCPKG_DIR/.git" ]; then + echo "vcpkg repo found at $VCPKG_DIR, bootstrapping ..." + "$VCPKG_DIR/bootstrap-vcpkg.sh" -disableMetrics + else + echo "Installing vcpkg to $VCPKG_DIR ..." + git clone https://github.com/microsoft/vcpkg "$VCPKG_DIR" + "$VCPKG_DIR/bootstrap-vcpkg.sh" -disableMetrics + fi + + echo "Installing opentelemetry-cpp $OTEL_VERSION via vcpkg ..." + "$VCPKG_DIR/vcpkg" install "opentelemetry-cpp[otlp-http]:x64-linux" --recurse + + echo "" + echo "Done. Build the server with:" + echo " cmake -S . -B build_otel \\" + echo " -DCMAKE_BUILD_TYPE=Release \\" + echo " -DWITH_OTEL=ON \\" + echo " -DCMAKE_TOOLCHAIN_FILE=$VCPKG_DIR/scripts/buildsystems/vcpkg.cmake \\" + echo " -DCMAKE_PREFIX_PATH=$VCPKG_DIR/installed/x64-linux" +} + +# ── From source (alternative) ──────────────────────────────────────────────── +install_from_source() { + echo "Installing build dependencies ..." + sudo apt-get install -y \ + build-essential cmake \ + libssl-dev libcurl4-gnutls-dev \ + zlib1g-dev + + WORKDIR=$(mktemp -d) + trap 'rm -rf "$WORKDIR"' EXIT + + echo "Downloading opentelemetry-cpp $OTEL_VERSION ..." + wget -q -P "$WORKDIR" \ + "https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v${OTEL_VERSION}.tar.gz" + tar xzf "$WORKDIR/v${OTEL_VERSION}.tar.gz" -C "$WORKDIR" + + echo "Building (this takes ~15 minutes) ..." + cmake -S "$WORKDIR/opentelemetry-cpp-${OTEL_VERSION}" -B "$WORKDIR/build" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="$OTEL_INSTALL_PREFIX" \ + -DWITH_OTLP_HTTP=ON \ + -DWITH_OTLP_GRPC=OFF \ + -DBUILD_TESTING=OFF \ + -DWITH_BENCHMARK=OFF \ + -DWITH_EXAMPLES=OFF + cmake --build "$WORKDIR/build" -j"$(nproc)" + sudo cmake --install "$WORKDIR/build" + + echo "" + echo "Done. Build the server with:" + echo " cmake -S . -B build_otel \\" + echo " -DCMAKE_BUILD_TYPE=Release \\" + echo " -DWITH_OTEL=ON" + echo " (opentelemetry-cpp installed to $OTEL_INSTALL_PREFIX, found automatically)" +} + +case "$METHOD" in + vcpkg) install_via_vcpkg ;; + source) install_from_source ;; +esac diff --git a/tools/observability/loki-config.yaml b/tools/observability/loki-config.yaml new file mode 100644 index 000000000..f09fdafc5 --- /dev/null +++ b/tools/observability/loki-config.yaml @@ -0,0 +1,61 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/tsdb-shipper-active + cache_location: /loki/tsdb-shipper-cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks + +limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + ingestion_rate_mb: 10 + ingestion_burst_size_mb: 20 + allow_structured_metadata: true + volume_enabled: true + otlp_config: + resource_attributes: + attributes_config: + - action: index_label + attributes: + - service.name + - service.namespace + - action: structured_metadata + regex: .* + log_attributes: + - action: structured_metadata + regex: .* + scope_attributes: + - action: structured_metadata + regex: .* + +table_manager: + retention_deletes_enabled: true + retention_period: 8760h diff --git a/tools/observability/otel-collector-config.yaml b/tools/observability/otel-collector-config.yaml new file mode 100644 index 000000000..526967002 --- /dev/null +++ b/tools/observability/otel-collector-config.yaml @@ -0,0 +1,62 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 5s + send_batch_size: 512 + send_batch_max_size: 1024 + + memory_limiter: + check_interval: 1s + limit_mib: 512 + spike_limit_mib: 128 + +exporters: + prometheusremotewrite: + endpoint: http://prometheus:9090/api/v1/write + tls: + insecure: true + + otlp_grpc/tempo: + endpoint: tempo:4317 + tls: + insecure: true + + otlp_http/loki: + endpoint: http://loki:3100/otlp + tls: + insecure: true + +service: + pipelines: + metrics: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [prometheusremotewrite] + + traces: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [otlp_grpc/tempo] + + logs: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [otlp_http/loki] + + telemetry: + logs: + level: info + metrics: + readers: + - pull: + exporter: + prometheus: + host: "0.0.0.0" + port: 8888 diff --git a/tools/observability/prometheus.yml b/tools/observability/prometheus.yml new file mode 100644 index 000000000..cf0ffb1a9 --- /dev/null +++ b/tools/observability/prometheus.yml @@ -0,0 +1,15 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + cluster: 'bylins-production' + environment: 'production' + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'otel-collector' + static_configs: + - targets: ['otel-collector:8888'] diff --git a/tools/observability/start.sh b/tools/observability/start.sh new file mode 100755 index 000000000..d5c67da70 --- /dev/null +++ b/tools/observability/start.sh @@ -0,0 +1,32 @@ +#!/bin/sh +set -e + +cd "$(dirname "$0")" + +# Fix config file permissions (containers run as non-root) +chmod 644 ./*.yml ./*.yaml 2>/dev/null || true +chmod 644 grafana/provisioning/datasources/*.yml 2>/dev/null || true +chmod 644 grafana/provisioning/dashboards/*.yml 2>/dev/null || true + +if [ $# -eq 0 ]; then + set -- up -d +fi + +if [ -n "$DATA_DIR" ]; then + echo "Using bind mounts in: $DATA_DIR" + mkdir -p "$DATA_DIR/prometheus" "$DATA_DIR/tempo" "$DATA_DIR/loki" "$DATA_DIR/grafana" + + export UID=$(id -u) + export GID=$(id -g) + + exec docker-compose \ + -f docker-compose.observability.yml \ + -f docker-compose.data-dir.yml \ + "$@" +else + echo "Using Docker named volumes (set DATA_DIR to use a host directory)" + + exec docker-compose \ + -f docker-compose.observability.yml \ + "$@" +fi diff --git a/tools/observability/tempo-config.yaml b/tools/observability/tempo-config.yaml new file mode 100644 index 000000000..6d8c61ae2 --- /dev/null +++ b/tools/observability/tempo-config.yaml @@ -0,0 +1,52 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +ingester: + max_block_duration: 5m + +compactor: + compaction: + block_retention: 720h # 30 days + +metrics_generator: + registry: + external_labels: + source: tempo + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + processor: + service_graphs: + wait: 10s + max_items: 10000 + span_metrics: + histogram_buckets: [0.001, 0.002, 0.005, 0.010, 0.025, 0.050, 0.100, 0.250, 0.500, 1.0, 2.5] + dimensions: + - step + - spell_name + - trigger_type + enable_target_info: true + +overrides: + defaults: + metrics_generator: + processors: [service-graphs, span-metrics] + +storage: + trace: + backend: local + local: + path: /var/tempo/blocks + wal: + path: /var/tempo/wal + search: + chunk_size_bytes: 1_000_000