diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 68bbb0c..42becf3 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,6 @@ # Mass reformatting - introducing CI Agent (Issue #14) 433cd2b661a88b143fae6e215a487280a5dedf05 + +189ff3dffca02d8824919060b24ef0a084c609af +a91de65471c7ad4145a5b509b06a1d08ca1dfd6f +821770ad1742a746b373d2bd0cf081f2eaebedd9 diff --git a/source/agent/src/config/config.h b/source/agent/src/config/config.h index 7af79da..06421b8 100644 --- a/source/agent/src/config/config.h +++ b/source/agent/src/config/config.h @@ -13,7 +13,8 @@ namespace volta { namespace agent { namespace config { - +// TODO: Metrics and Collector names into something that can be read with a +// string namespace CollectorNames { // CPU static constexpr char const* kProcStat = "proc_stat"; @@ -48,6 +49,7 @@ struct Config { CPU_ZERO(&set); if (sched_getaffinity(0, sizeof(set), &set) != 0) { + // TODO: Log perror("sched_getaffinity"); return; } @@ -67,13 +69,17 @@ struct Config { static inline cpu_set_t kDefaultAffinity = [] { cpu_set_t mask; CPU_ZERO(&mask); - unsigned int n_cpus = std::thread::hardware_concurrency(); - for (unsigned int i = 0; i < n_cpus; ++i) { + long n_cpus = sysconf(_SC_NPROCESSORS_ONLN); + n_cpus = std::min(n_cpus, static_cast(CPU_SETSIZE)); + // TODO: Handle sysconf error + for (long i = 0; i < n_cpus; ++i) { CPU_SET(i, &mask); } return mask; }(); + std::string uuid = ""; + std::chrono::milliseconds collection_interval = std::chrono::milliseconds(kDefaultIntervalMs); cpu_set_t core_affinity = kDefaultAffinity; diff --git a/source/agent/src/config/config_loader.cc b/source/agent/src/config/config_loader.cc index 3c846ee..8a81107 100644 --- a/source/agent/src/config/config_loader.cc +++ b/source/agent/src/config/config_loader.cc @@ -2,24 +2,30 @@ #include +#include +#include #include +#include #include -#include +#include -#include "config/config.h" +#include "utils/utils.h" + +namespace utils = volta::agent::utils; namespace volta { namespace agent { namespace config { +// TODO: different path for prod build std::filesystem::path ConfigLoader::kConfigFile = "agent.conf"; +std::filesystem::path ConfigLoader::kUUIDFile = "agent.uuid"; -std::set ConfigLoader::kValidTopLevelKeys = { - "core_affinity", "core_affinity_mask", "interval", - "server_address", "server_port", "collectors"}; +std::set> ConfigLoader::kValidTopLevelKeys = { + "core_affinity", "interval", "server_address", "server_port", "collectors"}; -std::map> - ConfigLoader::kValidCollectorMetrics = { +std::map>, std::less<>> + ConfigLoader::kValidCollectors = { {"cpu", {"proc_stat", "cpu_freq", "rapl", "zenpower", "pmu"}}, {"gpu", {"nvml", "dcgm", "rocm", "level_zero"}}, {"ram", {"mem_info", "vm_stat"}}, @@ -34,6 +40,8 @@ Config ConfigLoader::LoadConfig() { Config ConfigLoader::LoadDefaultConfig() { Config config; + if (!LoadUUID(config)) CreateUUID(config); + CollectorConfig nvml_collector; nvml_collector.enabled = true; nvml_collector.metrics = { @@ -70,6 +78,7 @@ bool AddRange(cpu_set_t& set, unsigned int from, unsigned int to, } void ConfigLoader::LoadConfigFile(Config& out_config) { + // TODO: Proper logging if (!std::filesystem::exists(kConfigFile)) { std::cout << "Agent config file not found, loading default settings." << std::endl; @@ -79,82 +88,190 @@ void ConfigLoader::LoadConfigFile(Config& out_config) { try { toml::table tbl = toml::parse_file(kConfigFile.string()); - if (auto val = tbl["core_affinity"]) { - unsigned int max_cpu = MaxOnlineCpus(); - cpu_set_t mask; - CPU_ZERO(&mask); + LoadCoreAffinity(tbl, out_config); + LoadInterval(tbl, out_config); + LoadServerAddress(tbl, out_config); + LoadServerPort(tbl, out_config); + LoadCollectors(tbl, out_config); + CheckKeys(tbl); + } catch (const toml::parse_error& err) { + std::cerr << "Parsing Agent config failed: " << err.description() << " at " + << err.source().begin << std::endl; + } +} + +bool ConfigLoader::LoadUUID(Config& out_config) { + if (!std::filesystem::exists(kUUIDFile)) return false; + + // TODO: Handle errors + std::fstream f(kUUIDFile); + std::string uuid; + std::getline(f, uuid); + out_config.uuid = uuid; + return true; +} + +void ConfigLoader::CreateUUID(Config& out_config) { + // TODO: Handle errors + std::string uuid = utils::GenerateUUIDv4(); + + std::filesystem::path tmp = kUUIDFile; + tmp += ".tmp"; + + { + std::ofstream f(tmp, std::ios::trunc); + f << uuid; + f.flush(); + } + + std::filesystem::rename(tmp, kUUIDFile); + out_config.uuid = uuid; +} + +void ConfigLoader::LoadCoreAffinity(toml::table& tbl, Config& out_config) { + if (!tbl.contains("core_affinity")) return; - // core_affinity = "all" - if (auto s = val.value(); s && *s == "all") { - for (unsigned int i = 0; i < max_cpu; ++i) CPU_SET(i, &mask); + auto val = tbl["core_affinity"]; - out_config.core_affinity = mask; + // core_affinity = "all" + if (auto s = val.value(); s && *s == "all") { + out_config.core_affinity = Config::kDefaultAffinity; + } + // core_affinity = [ ... ] + else if (auto arr = val.as_array()) { + unsigned int max_cpu = MaxOnlineCpus(); + cpu_set_t mask; + CPU_ZERO(&mask); + // NOTE: Should setting the affinity in here? + for (auto& item : *arr) { + // liczba CPU + if (auto cpu = item.value()) { + if (!AddCpu(mask, *cpu, max_cpu)) { + std::cerr << "CPU index out of range: " << *cpu << "\n"; + return; + } } - // core_affinity = [ ... ] - else if (auto arr = val.as_array()) { - for (auto& item : *arr) { - // liczba CPU - if (auto cpu = item.value()) { - if (!AddCpu(mask, *cpu, max_cpu)) { - std::cerr << "CPU index out of range: " << *cpu << "\n"; - return; - } - } - // zakres "X-Y" - else if (auto str = item.value()) { - unsigned int from, to; - if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) { - if (!AddRange(mask, from, to, max_cpu)) { - std::cerr << "Invalid CPU range: " << *str << "\n"; - return; - } - } else { - std::cerr << "Invalid core_affinity entry: " << *str << "\n"; - return; - } - } else { - std::cerr << "Invalid core_affinity element type\n"; + // zakres "X-Y" + else if (auto str = item.value()) { + unsigned int from, to; + if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) { + if (!AddRange(mask, from, to, max_cpu)) { + std::cerr << "Invalid CPU range: " << *str << "\n"; return; } - } - if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) { - perror("sched_setaffinity"); } else { - std::cout << "Successfully set CPU affinity mask." << std::endl; + std::cerr << "Invalid core_affinity entry: " << *str << "\n"; + return; } - out_config.core_affinity = mask; } else { - std::cerr << "Invalid core_affinity value\n"; + std::cerr << "Invalid core_affinity element type\n"; + return; } } - // interval - // server_address - // server_port + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) { + perror("sched_setaffinity"); + } else { + std::cout << "Successfully set CPU affinity mask." << std::endl; + } - auto collectors_node = tbl["collectors"].as_table(); - if (!collectors_node) return; + out_config.core_affinity = mask; + } else { + std::cerr << "Invalid core_affinity value\n"; + } +} - for (auto& [collector_name, collector_node] : *collectors_node) { - auto collector_table = collector_node.as_table(); - if (!collector_table) continue; +void ConfigLoader::LoadInterval(toml::table& tbl, Config& out_config) { + if (!tbl.contains("interval")) return; - CollectorConfig collector; + auto val = tbl["interval"]; - if (auto enabled_array = (*collector_table)["enabled"].as_array()) { - for (auto& item : *enabled_array) { - if (auto str = item.value()) { - collector.metrics[*str] = true; - } + if (auto ms = val.value()) { + out_config.collection_interval = std::chrono::milliseconds(*ms); + std::cout << "Collection Interval set to: " + << out_config.collection_interval << std::endl; + } else { + std::cerr << "Invalid interval value type, use uint32" << std::endl; + } +} + +void ConfigLoader::LoadServerAddress(toml::table& tbl, Config& out_config) { + if (!tbl.contains("server_address")) return; + + auto val = tbl["server_address"]; + + if (auto str = val.value()) { + if (utils::IsValidIP(*str) || utils::IsResolvable(*str)) { + out_config.server_address = *str; + std::cout << "Server Address set to " << *str << std::endl; + } else { + std::cerr << "Invalid server_address format" << std::endl; + } + } else { + std::cerr << "Invalid server_address value type, use string" << std::endl; + } +} + +void ConfigLoader::LoadServerPort(toml::table& tbl, Config& out_config) { + if (!tbl.contains("server_port")) return; + + auto val = tbl["server_port"]; + + if (auto port = val.value(); port && *port > 0) { + out_config.server_port = *port; + std::cout << "Server port set to " << *port << std::endl; + } else { + std::cerr << "server_port has an incorrect type or value, use number " + "from range " + "[1, 65535]" + << std::endl; + } +} + +void ConfigLoader::LoadCollectors(toml::table& tbl, Config& out_config) { + if (!tbl.contains("collectors")) return; + + auto collectors_node = tbl["collectors"].as_table(); + + for (auto&& [hardware_type, hardware_node] : *collectors_node) { + if (!kValidCollectors.contains(hardware_type.str())) { + std::cerr << "Invalid hardware type: " << hardware_type << std::endl; + continue; + } + + auto collectors = hardware_node.as_array(); + if (!collectors) { + std::cout << "Element " << hardware_type << " is not an array\n"; + continue; + } + + CollectorConfig collector_config; + + std::cout << hardware_type << std::endl; + for (auto&& collector : *collectors) { + if (auto str = collector.value()) { + const auto& collector_set = kValidCollectors[hardware_type.str()]; + if (!collector_set.contains(*str)) { + std::cout << "Invalid collector: " << *str + << ", for hardware: " << hardware_type << std::endl; + continue; } - collector.enabled = !collector.metrics.empty(); + std::cout << *str << std::endl; + // TODO: Add metrics + + collector_config.enabled = !collector_config.metrics.empty(); + } else { + std::cerr << "Invalid type in " << hardware_type << " array\n"; } + } + } +} - out_config.collectors[std::string{collector_name.str()}] = collector; +void ConfigLoader::CheckKeys(toml::table& tbl) { + for (auto&& [key, value] : tbl) { + if (!kValidTopLevelKeys.contains(key.str())) { + std::cout << "Key '" << key << "' is not a valid key\n"; } - } catch (const toml::parse_error& err) { - std::cerr << "Parsing Agent config failed: " << err.description() << " at " - << err.source().begin << std::endl; } } diff --git a/source/agent/src/config/config_loader.h b/source/agent/src/config/config_loader.h index 4e70ca5..2014423 100644 --- a/source/agent/src/config/config_loader.h +++ b/source/agent/src/config/config_loader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "config/config.h" @@ -14,16 +15,27 @@ namespace config { class ConfigLoader { public: static Config LoadConfig(); + static Config LoadDefaultConfig(); private: ConfigLoader() = delete; - static Config LoadDefaultConfig(); static void LoadConfigFile(Config& out_config); + static bool LoadUUID(Config& out_config); + static void CreateUUID(Config& out_config); + static void LoadCoreAffinity(toml::table& tbl, Config& out_config); + static void LoadInterval(toml::table& tbl, Config& out_config); + static void LoadServerAddress(toml::table& tbl, Config& out_config); + static void LoadServerPort(toml::table& tbl, Config& out_config); + static void LoadCollectors(toml::table& tbl, Config& out_config); + static void CheckKeys(toml::table& tbl); static std::filesystem::path kConfigFile; - static std::set kValidTopLevelKeys; - static std::map> kValidCollectorMetrics; + static std::filesystem::path kUUIDFile; + static std::set> kValidTopLevelKeys; + static std::map>, + std::less<>> + kValidCollectors; }; } // namespace config diff --git a/source/agent/src/platform/platform_detector.cc b/source/agent/src/platform/platform_detector.cc index f0ebb48..2de7fbd 100644 --- a/source/agent/src/platform/platform_detector.cc +++ b/source/agent/src/platform/platform_detector.cc @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #ifdef HAVE_NVML #include diff --git a/source/agent/src/platform/platform_detector.h b/source/agent/src/platform/platform_detector.h index 458f27c..88775ce 100644 --- a/source/agent/src/platform/platform_detector.h +++ b/source/agent/src/platform/platform_detector.h @@ -1,9 +1,7 @@ #ifndef VOLTA_AGENT_PLATFORM_PLATFORM_DETECTOR_H_ #define VOLTA_AGENT_PLATFORM_PLATFORM_DETECTOR_H_ -#include #include -#include #include "platform/hardware_info.h" diff --git a/source/agent/src/scheduler.cc b/source/agent/src/scheduler.cc index 74927c7..d6c806a 100644 --- a/source/agent/src/scheduler.cc +++ b/source/agent/src/scheduler.cc @@ -13,8 +13,7 @@ Scheduler::Scheduler( : config_(config), collectors_(std::move(collectors)) {} void Scheduler::Run() { - std::cout << "[" << typeid(*this).name() - << "] Starting collection loop (Interval: " + std::cout << "[" << config_.uuid << "] Starting collection loop (Interval: " << config_.collection_interval.count() << "ms)..." << std::endl; std::this_thread::sleep_for(config_.collection_interval); diff --git a/source/agent/src/utils/utils.cc b/source/agent/src/utils/utils.cc new file mode 100644 index 0000000..9afc176 --- /dev/null +++ b/source/agent/src/utils/utils.cc @@ -0,0 +1,54 @@ +#include "utils.h" + +#include +#include + +#include +#include + +namespace volta { +namespace agent { +namespace utils { + +bool IsValidIP(const std::string& ip) { + sockaddr_in sa4{}; + sockaddr_in6 sa6{}; + + return inet_pton(AF_INET, ip.c_str(), &sa4.sin_addr) == 1 || + inet_pton(AF_INET6, ip.c_str(), &sa6.sin6_addr) == 1; +} + +bool IsResolvable(const std::string& host) { + addrinfo hints{}, *res = nullptr; + hints.ai_family = AF_UNSPEC; + + bool ok = getaddrinfo(host.c_str(), nullptr, &hints, &res) == 0; + freeaddrinfo(res); + return ok; +} + +std::string GenerateUUIDv4() { + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dist; + + uint64_t a = dist(gen); + uint64_t b = dist(gen); + + // version 4 + a = (a & 0xffffffffffff0fffULL) | 0x0000000000004000ULL; + // variant 1 (RFC 4122) + b = (b & 0x3fffffffffffffffULL) | 0x8000000000000000ULL; + + std::ostringstream oss; + oss << std::hex << std::setfill('0') << std::setw(8) << (a >> 32) << "-" + << std::setw(4) << ((a >> 16) & 0xffff) << "-" << std::setw(4) + << (a & 0xffff) << "-" << std::setw(4) << (b >> 48) << "-" + << std::setw(12) << (b & 0x0000ffffffffffffULL); + + return oss.str(); +} + +} // namespace utils +} // namespace agent +} // namespace volta diff --git a/source/agent/src/utils/utils.h b/source/agent/src/utils/utils.h new file mode 100644 index 0000000..77ae5c9 --- /dev/null +++ b/source/agent/src/utils/utils.h @@ -0,0 +1,18 @@ +#ifndef VOLTA_AGENT_UTILS_UTILS_H_ +#define VOLTA_AGENT_UTILS_UTILS_H_ + +#include + +namespace volta { +namespace agent { +namespace utils { + +bool IsValidIP(const std::string& ip); +bool IsResolvable(const std::string& host); +std::string GenerateUUIDv4(); + +} // namespace utils +} // namespace agent +} // namespace volta + +#endif // VOLTA_AGENT_UTILS_UTILS_H_