diff --git a/.gitignore b/.gitignore index 7e53678..086cdb0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,9 @@ .vscode/ build/ +out/ bin/ + +.cache/ +compile_commands.json + diff --git a/CMakeLists.txt b/CMakeLists.txt index 6393e5b..9fc5327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) find_package(protobuf CONFIG REQUIRED) find_package(gRPC CONFIG REQUIRED) find_package(fmt CONFIG REQUIRED) +find_package(tomlplusplus CONFIG REQUIRED) set(PROTO_DIR ${CMAKE_SOURCE_DIR}/libs/proto) set(PROTO_FILE ${PROTO_DIR}/volta.proto) @@ -23,15 +24,15 @@ set(GRPC_HDR "${CMAKE_CURRENT_BINARY_DIR}/volta.grpc.pb.h") add_custom_command( OUTPUT "${PROTO_SRC}" "${PROTO_HDR}" "${GRPC_SRC}" "${GRPC_HDR}" - + COMMAND protobuf::protoc - + ARGS --cpp_out="${CMAKE_CURRENT_BINARY_DIR}" --grpc_out="${CMAKE_CURRENT_BINARY_DIR}" --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" -I "${PROTO_DIR}" "${PROTO_FILE}" - + DEPENDS "${PROTO_FILE}" gRPC::grpc_cpp_plugin COMMENT "Generating C++ code from volta.proto..." ) @@ -40,11 +41,12 @@ add_library(volta_proto ${PROTO_SRC} ${PROTO_HDR} ${GRPC_SRC} ${GRPC_HDR}) target_include_directories(volta_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(volta_proto PRIVATE - protobuf::libprotobuf +target_link_libraries(volta_proto PRIVATE + protobuf::libprotobuf gRPC::grpc++ gRPC::grpc++_reflection fmt::fmt + tomlplusplus::tomlplusplus ${CMAKE_DL_LIBS} ) @@ -64,7 +66,7 @@ if(CLANG_FORMAT_EXE) COMMENT "Running clang-format on source files..." VERBATIM ) - + # optional: check formatting add_custom_target(check-format COMMAND find source/agent -name "*.cc" -o -name "*.h" | xargs ${CLANG_FORMAT_EXE} --dry-run --Werror -style=Google diff --git a/agent.conf b/agent.conf new file mode 100644 index 0000000..a80c3bc --- /dev/null +++ b/agent.conf @@ -0,0 +1 @@ +core_affinity = [2, "3-10"] diff --git a/source/agent/CMakeLists.txt b/source/agent/CMakeLists.txt index 46a770a..bb23533 100644 --- a/source/agent/CMakeLists.txt +++ b/source/agent/CMakeLists.txt @@ -1,6 +1,11 @@ set(AGENT_NAME volta_agent) add_executable(${AGENT_NAME}) +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + message(STATUS "Debug build - adding DEBUG macro") + target_compile_definitions(${AGENT_NAME} PRIVATE DEBUG) +endif() + file(GLOB_RECURSE AGENT_SOURCES "src/*.cc") target_sources(${AGENT_NAME} PRIVATE ${AGENT_SOURCES}) @@ -11,9 +16,14 @@ target_link_libraries(${AGENT_NAME} PRIVATE volta_proto) find_package(Threads REQUIRED) find_package(Protobuf REQUIRED) find_package(gRPC REQUIRED) +find_package(tomlplusplus REQUIRED) -target_link_libraries(${AGENT_NAME} PRIVATE Threads::Threads) -target_link_libraries(${AGENT_NAME} PRIVATE gRPC::grpc++ protobuf::libprotobuf) +target_link_libraries(${AGENT_NAME} PRIVATE + Threads::Threads + gRPC::grpc++ + protobuf::libprotobuf + tomlplusplus::tomlplusplus +) find_path(NVML_INCLUDE_DIR nvml.h PATHS ${CMAKE_SOURCE_DIR}/libs/include/nvidia @@ -30,7 +40,6 @@ if(NVML_INCLUDE_DIR AND NVML_LIBRARY) target_include_directories(${AGENT_NAME} PRIVATE ${NVML_INCLUDE_DIR}) target_link_libraries(${AGENT_NAME} PRIVATE ${NVML_LIBRARY}) target_compile_definitions(${AGENT_NAME} PRIVATE HAVE_NVML) - else() message(STATUS "NVML library not found. Using STUB for compilation.") add_library(nvml_stub STATIC ${CMAKE_SOURCE_DIR}/libs/stubs/nvml_stub.cc) @@ -40,5 +49,11 @@ else() target_include_directories(${AGENT_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/libs/include/nvidia) endif() +configure_file( + ${CMAKE_SOURCE_DIR}/agent.conf + ${CMAKE_CURRENT_BINARY_DIR}/agent.conf + COPYONLY +) + # 5. TODO: find rocm_smi and oneapi level_zero # 6. TODO: find PMU (libpfm) diff --git a/source/agent/src/config/config.h b/source/agent/src/config/config.h index 1a83a7c..7af79da 100644 --- a/source/agent/src/config/config.h +++ b/source/agent/src/config/config.h @@ -1,10 +1,14 @@ #ifndef VOLTA_AGENT_CONFIG_CONFIG_H_ #define VOLTA_AGENT_CONFIG_CONFIG_H_ +#include + #include #include +#include #include #include +#include namespace volta { namespace agent { @@ -39,14 +43,40 @@ struct CollectorConfig { }; struct Config { + void PrintCurrentAffinity() { + cpu_set_t set; + CPU_ZERO(&set); + + if (sched_getaffinity(0, sizeof(set), &set) != 0) { + perror("sched_getaffinity"); + return; + } + + long max_cpus = sysconf(_SC_NPROCESSORS_CONF); + std::cout << "Current CPU affinity: "; + + for (int i = 0; i < max_cpus; ++i) { + if (CPU_ISSET(i, &set)) std::cout << i << " "; + } + std::cout << "\n"; + } + static constexpr int32_t kDefaultIntervalMs = 500; - static constexpr int32_t kDefaultAffinity = -1; static constexpr char const* kDefaultServerAddress = "localhost"; static constexpr uint16_t kDefaultServerPort = 50051; + static inline cpu_set_t kDefaultAffinity = [] { + cpu_set_t mask; + CPU_ZERO(&mask); + unsigned int n_cpus = std::thread::hardware_concurrency(); + for (unsigned int i = 0; i < n_cpus; ++i) { + CPU_SET(i, &mask); + } + return mask; + }(); std::chrono::milliseconds collection_interval = std::chrono::milliseconds(kDefaultIntervalMs); - int32_t core_affinity = kDefaultAffinity; + cpu_set_t core_affinity = kDefaultAffinity; std::string server_address = kDefaultServerAddress; uint16_t server_port = kDefaultServerPort; diff --git a/source/agent/src/config/config_loader.cc b/source/agent/src/config/config_loader.cc index d409213..3c846ee 100644 --- a/source/agent/src/config/config_loader.cc +++ b/source/agent/src/config/config_loader.cc @@ -1,10 +1,37 @@ #include "config/config_loader.h" +#include + +#include +#include +#include + +#include "config/config.h" + namespace volta { namespace agent { namespace config { +std::filesystem::path ConfigLoader::kConfigFile = "agent.conf"; + +std::set ConfigLoader::kValidTopLevelKeys = { + "core_affinity", "core_affinity_mask", "interval", + "server_address", "server_port", "collectors"}; + +std::map> + ConfigLoader::kValidCollectorMetrics = { + {"cpu", {"proc_stat", "cpu_freq", "rapl", "zenpower", "pmu"}}, + {"gpu", {"nvml", "dcgm", "rocm", "level_zero"}}, + {"ram", {"mem_info", "vm_stat"}}, + {"io", {"disk_stats", "net_dev"}}}; + Config ConfigLoader::LoadConfig() { + Config config = LoadDefaultConfig(); + LoadConfigFile(config); + return config; +} + +Config ConfigLoader::LoadDefaultConfig() { Config config; CollectorConfig nvml_collector; @@ -24,9 +51,111 @@ Config ConfigLoader::LoadConfig() { return config; } -Config ConfigLoader::LoadConfig(const std::filesystem::path& filepath) { - // ignore for POC - return LoadConfig(); +inline unsigned int MaxOnlineCpus() { + long n = sysconf(_SC_NPROCESSORS_ONLN); + return (n > 0) ? static_cast(n) : 0; +} + +bool AddCpu(cpu_set_t& set, unsigned int cpu, unsigned int max_cpu) { + if (cpu >= max_cpu) return false; + CPU_SET(cpu, &set); + return true; +} + +bool AddRange(cpu_set_t& set, unsigned int from, unsigned int to, + unsigned int max_cpu) { + if (from > to || to >= max_cpu) return false; + for (unsigned int i = from; i <= to; ++i) CPU_SET(i, &set); + return true; +} + +void ConfigLoader::LoadConfigFile(Config& out_config) { + if (!std::filesystem::exists(kConfigFile)) { + std::cout << "Agent config file not found, loading default settings." + << std::endl; + return; + } + + try { + toml::table tbl = toml::parse_file(kConfigFile.string()); + + if (auto val = tbl["core_affinity"]) { + unsigned int max_cpu = MaxOnlineCpus(); + cpu_set_t mask; + CPU_ZERO(&mask); + + // core_affinity = "all" + if (auto s = val.value(); s && *s == "all") { + for (unsigned int i = 0; i < max_cpu; ++i) CPU_SET(i, &mask); + + out_config.core_affinity = mask; + } + // core_affinity = [ ... ] + else if (auto arr = val.as_array()) { + for (auto& item : *arr) { + // liczba CPU + if (auto cpu = item.value()) { + if (!AddCpu(mask, *cpu, max_cpu)) { + std::cerr << "CPU index out of range: " << *cpu << "\n"; + return; + } + } + // zakres "X-Y" + else if (auto str = item.value()) { + unsigned int from, to; + if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) { + if (!AddRange(mask, from, to, max_cpu)) { + std::cerr << "Invalid CPU range: " << *str << "\n"; + return; + } + } else { + std::cerr << "Invalid core_affinity entry: " << *str << "\n"; + return; + } + } else { + std::cerr << "Invalid core_affinity element type\n"; + return; + } + } + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) { + perror("sched_setaffinity"); + } else { + std::cout << "Successfully set CPU affinity mask." << std::endl; + } + out_config.core_affinity = mask; + } else { + std::cerr << "Invalid core_affinity value\n"; + } + } + // interval + // server_address + // server_port + + auto collectors_node = tbl["collectors"].as_table(); + if (!collectors_node) return; + + for (auto& [collector_name, collector_node] : *collectors_node) { + auto collector_table = collector_node.as_table(); + if (!collector_table) continue; + + CollectorConfig collector; + + if (auto enabled_array = (*collector_table)["enabled"].as_array()) { + for (auto& item : *enabled_array) { + if (auto str = item.value()) { + collector.metrics[*str] = true; + } + } + + collector.enabled = !collector.metrics.empty(); + } + + out_config.collectors[std::string{collector_name.str()}] = collector; + } + } catch (const toml::parse_error& err) { + std::cerr << "Parsing Agent config failed: " << err.description() << " at " + << err.source().begin << std::endl; + } } } // namespace config diff --git a/source/agent/src/config/config_loader.h b/source/agent/src/config/config_loader.h index c683c35..4e70ca5 100644 --- a/source/agent/src/config/config_loader.h +++ b/source/agent/src/config/config_loader.h @@ -2,7 +2,8 @@ #define VOLTA_AGENT_CONFIG_CONFIG_LOADER_H_ #include -#include +#include +#include #include "config/config.h" @@ -13,9 +14,16 @@ namespace config { class ConfigLoader { public: static Config LoadConfig(); - static Config LoadConfig(const std::filesystem::path& filepath); private: + ConfigLoader() = delete; + + static Config LoadDefaultConfig(); + static void LoadConfigFile(Config& out_config); + + static std::filesystem::path kConfigFile; + static std::set kValidTopLevelKeys; + static std::map> kValidCollectorMetrics; }; } // namespace config diff --git a/source/agent/src/platform/platform_detector.cc b/source/agent/src/platform/platform_detector.cc index 243f11d..f0ebb48 100644 --- a/source/agent/src/platform/platform_detector.cc +++ b/source/agent/src/platform/platform_detector.cc @@ -123,7 +123,7 @@ std::string PlatformDetector::DetectOS() { std::string line; while (std::getline(file, line)) { - if (line.starts_with("PRETTY_NAME=") == 0) { + if (line.starts_with("PRETTY_NAME=")) { std::string value = line.substr(12); if (value.size() >= 2 && value.front() == '"' && value.back() == '"') { diff --git a/vcpkg.json b/vcpkg.json index fb12fdb..f7b4c46 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -5,6 +5,7 @@ "dependencies": [ "fmt", "grpc", - "protobuf" + "protobuf", + "tomlplusplus" ] }