Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# Mass reformatting - introducing CI Agent (Issue #14)
433cd2b661a88b143fae6e215a487280a5dedf05

189ff3dffca02d8824919060b24ef0a084c609af
a91de65471c7ad4145a5b509b06a1d08ca1dfd6f
821770ad1742a746b373d2bd0cf081f2eaebedd9
12 changes: 9 additions & 3 deletions source/agent/src/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
namespace volta {
namespace agent {
namespace config {

// TODO: Metrics and Collector names into something that can be read with a
// string
namespace CollectorNames {
// CPU
static constexpr char const* kProcStat = "proc_stat";
Expand Down Expand Up @@ -48,6 +49,7 @@ struct Config {
CPU_ZERO(&set);

if (sched_getaffinity(0, sizeof(set), &set) != 0) {
// TODO: Log
perror("sched_getaffinity");
return;
}
Expand All @@ -67,13 +69,17 @@ struct Config {
static inline cpu_set_t kDefaultAffinity = [] {
cpu_set_t mask;
CPU_ZERO(&mask);
unsigned int n_cpus = std::thread::hardware_concurrency();
for (unsigned int i = 0; i < n_cpus; ++i) {
long n_cpus = sysconf(_SC_NPROCESSORS_ONLN);
n_cpus = std::min(n_cpus, static_cast<long>(CPU_SETSIZE));
// TODO: Handle sysconf error
for (long i = 0; i < n_cpus; ++i) {
CPU_SET(i, &mask);
}
return mask;
}();

std::string uuid = "";

std::chrono::milliseconds collection_interval =
std::chrono::milliseconds(kDefaultIntervalMs);
cpu_set_t core_affinity = kDefaultAffinity;
Expand Down
245 changes: 181 additions & 64 deletions source/agent/src/config/config_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,30 @@

#include <sched.h>

#include <chrono>
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <toml++/toml.hpp>
#include <string>

#include "config/config.h"
#include "utils/utils.h"

namespace utils = volta::agent::utils;

namespace volta {
namespace agent {
namespace config {

// TODO: different path for prod build
std::filesystem::path ConfigLoader::kConfigFile = "agent.conf";
std::filesystem::path ConfigLoader::kUUIDFile = "agent.uuid";

std::set<std::string> ConfigLoader::kValidTopLevelKeys = {
"core_affinity", "core_affinity_mask", "interval",
"server_address", "server_port", "collectors"};
std::set<std::string_view, std::less<>> ConfigLoader::kValidTopLevelKeys = {
"core_affinity", "interval", "server_address", "server_port", "collectors"};

std::map<std::string, std::set<std::string>>
ConfigLoader::kValidCollectorMetrics = {
std::map<std::string_view, std::set<std::string_view, std::less<>>, std::less<>>
ConfigLoader::kValidCollectors = {
{"cpu", {"proc_stat", "cpu_freq", "rapl", "zenpower", "pmu"}},
{"gpu", {"nvml", "dcgm", "rocm", "level_zero"}},
{"ram", {"mem_info", "vm_stat"}},
Expand All @@ -34,6 +40,8 @@ Config ConfigLoader::LoadConfig() {
Config ConfigLoader::LoadDefaultConfig() {
Config config;

if (!LoadUUID(config)) CreateUUID(config);

CollectorConfig nvml_collector;
nvml_collector.enabled = true;
nvml_collector.metrics = {
Expand Down Expand Up @@ -70,6 +78,7 @@ bool AddRange(cpu_set_t& set, unsigned int from, unsigned int to,
}

void ConfigLoader::LoadConfigFile(Config& out_config) {
// TODO: Proper logging
if (!std::filesystem::exists(kConfigFile)) {
std::cout << "Agent config file not found, loading default settings."
<< std::endl;
Expand All @@ -79,82 +88,190 @@ void ConfigLoader::LoadConfigFile(Config& out_config) {
try {
toml::table tbl = toml::parse_file(kConfigFile.string());

if (auto val = tbl["core_affinity"]) {
unsigned int max_cpu = MaxOnlineCpus();
cpu_set_t mask;
CPU_ZERO(&mask);
LoadCoreAffinity(tbl, out_config);
LoadInterval(tbl, out_config);
LoadServerAddress(tbl, out_config);
LoadServerPort(tbl, out_config);
LoadCollectors(tbl, out_config);
CheckKeys(tbl);
} catch (const toml::parse_error& err) {
std::cerr << "Parsing Agent config failed: " << err.description() << " at "
<< err.source().begin << std::endl;
}
}

bool ConfigLoader::LoadUUID(Config& out_config) {
if (!std::filesystem::exists(kUUIDFile)) return false;

// TODO: Handle errors
std::fstream f(kUUIDFile);
std::string uuid;
std::getline(f, uuid);
out_config.uuid = uuid;
return true;
}

void ConfigLoader::CreateUUID(Config& out_config) {
// TODO: Handle errors
std::string uuid = utils::GenerateUUIDv4();

std::filesystem::path tmp = kUUIDFile;
tmp += ".tmp";

{
std::ofstream f(tmp, std::ios::trunc);
f << uuid;
f.flush();
}

std::filesystem::rename(tmp, kUUIDFile);
out_config.uuid = uuid;
}

void ConfigLoader::LoadCoreAffinity(toml::table& tbl, Config& out_config) {
if (!tbl.contains("core_affinity")) return;

// core_affinity = "all"
if (auto s = val.value<std::string>(); s && *s == "all") {
for (unsigned int i = 0; i < max_cpu; ++i) CPU_SET(i, &mask);
auto val = tbl["core_affinity"];

out_config.core_affinity = mask;
// core_affinity = "all"
if (auto s = val.value<std::string>(); s && *s == "all") {
out_config.core_affinity = Config::kDefaultAffinity;
}
// core_affinity = [ ... ]
else if (auto arr = val.as_array()) {
unsigned int max_cpu = MaxOnlineCpus();
cpu_set_t mask;
CPU_ZERO(&mask);
// NOTE: Should setting the affinity in here?
for (auto& item : *arr) {
// liczba CPU
if (auto cpu = item.value<unsigned int>()) {
if (!AddCpu(mask, *cpu, max_cpu)) {
std::cerr << "CPU index out of range: " << *cpu << "\n";
return;
}
}
// core_affinity = [ ... ]
else if (auto arr = val.as_array()) {
for (auto& item : *arr) {
// liczba CPU
if (auto cpu = item.value<unsigned int>()) {
if (!AddCpu(mask, *cpu, max_cpu)) {
std::cerr << "CPU index out of range: " << *cpu << "\n";
return;
}
}
// zakres "X-Y"
else if (auto str = item.value<std::string>()) {
unsigned int from, to;
if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) {
if (!AddRange(mask, from, to, max_cpu)) {
std::cerr << "Invalid CPU range: " << *str << "\n";
return;
}
} else {
std::cerr << "Invalid core_affinity entry: " << *str << "\n";
return;
}
} else {
std::cerr << "Invalid core_affinity element type\n";
// zakres "X-Y"
else if (auto str = item.value<std::string>()) {
unsigned int from, to;
if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) {
if (!AddRange(mask, from, to, max_cpu)) {
std::cerr << "Invalid CPU range: " << *str << "\n";
return;
}
}
if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) {
perror("sched_setaffinity");
} else {
std::cout << "Successfully set CPU affinity mask." << std::endl;
std::cerr << "Invalid core_affinity entry: " << *str << "\n";
return;
}
out_config.core_affinity = mask;
} else {
std::cerr << "Invalid core_affinity value\n";
std::cerr << "Invalid core_affinity element type\n";
return;
}
}
// interval
// server_address
// server_port
if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) {
perror("sched_setaffinity");
} else {
std::cout << "Successfully set CPU affinity mask." << std::endl;
}

auto collectors_node = tbl["collectors"].as_table();
if (!collectors_node) return;
out_config.core_affinity = mask;
} else {
std::cerr << "Invalid core_affinity value\n";
}
}

for (auto& [collector_name, collector_node] : *collectors_node) {
auto collector_table = collector_node.as_table();
if (!collector_table) continue;
void ConfigLoader::LoadInterval(toml::table& tbl, Config& out_config) {
if (!tbl.contains("interval")) return;

CollectorConfig collector;
auto val = tbl["interval"];

if (auto enabled_array = (*collector_table)["enabled"].as_array()) {
for (auto& item : *enabled_array) {
if (auto str = item.value<std::string>()) {
collector.metrics[*str] = true;
}
if (auto ms = val.value<uint32_t>()) {
out_config.collection_interval = std::chrono::milliseconds(*ms);
std::cout << "Collection Interval set to: "
<< out_config.collection_interval << std::endl;
} else {
std::cerr << "Invalid interval value type, use uint32" << std::endl;
}
}

void ConfigLoader::LoadServerAddress(toml::table& tbl, Config& out_config) {
if (!tbl.contains("server_address")) return;

auto val = tbl["server_address"];

if (auto str = val.value<std::string>()) {
if (utils::IsValidIP(*str) || utils::IsResolvable(*str)) {
out_config.server_address = *str;
std::cout << "Server Address set to " << *str << std::endl;
} else {
std::cerr << "Invalid server_address format" << std::endl;
}
} else {
std::cerr << "Invalid server_address value type, use string" << std::endl;
}
}

void ConfigLoader::LoadServerPort(toml::table& tbl, Config& out_config) {
if (!tbl.contains("server_port")) return;

auto val = tbl["server_port"];

if (auto port = val.value<uint16_t>(); port && *port > 0) {
out_config.server_port = *port;
std::cout << "Server port set to " << *port << std::endl;
} else {
std::cerr << "server_port has an incorrect type or value, use number "
"from range "
"[1, 65535]"
<< std::endl;
}
}

void ConfigLoader::LoadCollectors(toml::table& tbl, Config& out_config) {
if (!tbl.contains("collectors")) return;

auto collectors_node = tbl["collectors"].as_table();

for (auto&& [hardware_type, hardware_node] : *collectors_node) {
if (!kValidCollectors.contains(hardware_type.str())) {
std::cerr << "Invalid hardware type: " << hardware_type << std::endl;
continue;
}

auto collectors = hardware_node.as_array();
if (!collectors) {
std::cout << "Element " << hardware_type << " is not an array\n";
continue;
}

CollectorConfig collector_config;

std::cout << hardware_type << std::endl;
for (auto&& collector : *collectors) {
if (auto str = collector.value<std::string>()) {
const auto& collector_set = kValidCollectors[hardware_type.str()];
if (!collector_set.contains(*str)) {
std::cout << "Invalid collector: " << *str
<< ", for hardware: " << hardware_type << std::endl;
continue;
}

collector.enabled = !collector.metrics.empty();
std::cout << *str << std::endl;
// TODO: Add metrics

collector_config.enabled = !collector_config.metrics.empty();
} else {
std::cerr << "Invalid type in " << hardware_type << " array\n";
}
}
}
}

out_config.collectors[std::string{collector_name.str()}] = collector;
void ConfigLoader::CheckKeys(toml::table& tbl) {
for (auto&& [key, value] : tbl) {
if (!kValidTopLevelKeys.contains(key.str())) {
std::cout << "Key '" << key << "' is not a valid key\n";
}
} catch (const toml::parse_error& err) {
std::cerr << "Parsing Agent config failed: " << err.description() << " at "
<< err.source().begin << std::endl;
}
}

Expand Down
18 changes: 15 additions & 3 deletions source/agent/src/config/config_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <filesystem>
#include <map>
#include <set>
#include <toml++/toml.hpp>

#include "config/config.h"

Expand All @@ -14,16 +15,27 @@ namespace config {
class ConfigLoader {
public:
static Config LoadConfig();
static Config LoadDefaultConfig();

private:
ConfigLoader() = delete;

static Config LoadDefaultConfig();
static void LoadConfigFile(Config& out_config);
static bool LoadUUID(Config& out_config);
static void CreateUUID(Config& out_config);
static void LoadCoreAffinity(toml::table& tbl, Config& out_config);
static void LoadInterval(toml::table& tbl, Config& out_config);
static void LoadServerAddress(toml::table& tbl, Config& out_config);
static void LoadServerPort(toml::table& tbl, Config& out_config);
static void LoadCollectors(toml::table& tbl, Config& out_config);
static void CheckKeys(toml::table& tbl);

static std::filesystem::path kConfigFile;
static std::set<std::string> kValidTopLevelKeys;
static std::map<std::string, std::set<std::string>> kValidCollectorMetrics;
static std::filesystem::path kUUIDFile;
static std::set<std::string_view, std::less<>> kValidTopLevelKeys;
static std::map<std::string_view, std::set<std::string_view, std::less<>>,
std::less<>>
kValidCollectors;
};

} // namespace config
Expand Down
2 changes: 0 additions & 2 deletions source/agent/src/platform/platform_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
#include <cstring>
#include <fstream>
#include <iostream>
#include <regex>
#include <string>
#include <string_view>

#ifdef HAVE_NVML
#include <nvml.h>
Expand Down
Loading