Skip to content
Merged
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
.vscode/

build/
out/
bin/

.cache/
compile_commands.json

14 changes: 8 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
find_package(protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)
find_package(fmt CONFIG REQUIRED)
find_package(tomlplusplus CONFIG REQUIRED)

set(PROTO_DIR ${CMAKE_SOURCE_DIR}/libs/proto)
set(PROTO_FILE ${PROTO_DIR}/volta.proto)
Expand All @@ -23,15 +24,15 @@ set(GRPC_HDR "${CMAKE_CURRENT_BINARY_DIR}/volta.grpc.pb.h")

add_custom_command(
OUTPUT "${PROTO_SRC}" "${PROTO_HDR}" "${GRPC_SRC}" "${GRPC_HDR}"

COMMAND protobuf::protoc

ARGS --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
--grpc_out="${CMAKE_CURRENT_BINARY_DIR}"
--plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}"
-I "${PROTO_DIR}"
"${PROTO_FILE}"

DEPENDS "${PROTO_FILE}" gRPC::grpc_cpp_plugin
COMMENT "Generating C++ code from volta.proto..."
)
Expand All @@ -40,11 +41,12 @@ add_library(volta_proto ${PROTO_SRC} ${PROTO_HDR} ${GRPC_SRC} ${GRPC_HDR})

target_include_directories(volta_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})

target_link_libraries(volta_proto PRIVATE
protobuf::libprotobuf
target_link_libraries(volta_proto PRIVATE
protobuf::libprotobuf
gRPC::grpc++
gRPC::grpc++_reflection
fmt::fmt
tomlplusplus::tomlplusplus
${CMAKE_DL_LIBS}
)

Expand All @@ -64,7 +66,7 @@ if(CLANG_FORMAT_EXE)
COMMENT "Running clang-format on source files..."
VERBATIM
)

# optional: check formatting
add_custom_target(check-format
COMMAND find source/agent -name "*.cc" -o -name "*.h" | xargs ${CLANG_FORMAT_EXE} --dry-run --Werror -style=Google
Expand Down
1 change: 1 addition & 0 deletions agent.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
core_affinity = [2, "3-10"]
21 changes: 18 additions & 3 deletions source/agent/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
set(AGENT_NAME volta_agent)
add_executable(${AGENT_NAME})

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Debug build - adding DEBUG macro")
target_compile_definitions(${AGENT_NAME} PRIVATE DEBUG)
endif()

file(GLOB_RECURSE AGENT_SOURCES "src/*.cc")
target_sources(${AGENT_NAME} PRIVATE ${AGENT_SOURCES})

Expand All @@ -11,9 +16,14 @@ target_link_libraries(${AGENT_NAME} PRIVATE volta_proto)
find_package(Threads REQUIRED)
find_package(Protobuf REQUIRED)
find_package(gRPC REQUIRED)
find_package(tomlplusplus REQUIRED)

target_link_libraries(${AGENT_NAME} PRIVATE Threads::Threads)
target_link_libraries(${AGENT_NAME} PRIVATE gRPC::grpc++ protobuf::libprotobuf)
target_link_libraries(${AGENT_NAME} PRIVATE
Threads::Threads
gRPC::grpc++
protobuf::libprotobuf
tomlplusplus::tomlplusplus
)

find_path(NVML_INCLUDE_DIR nvml.h
PATHS ${CMAKE_SOURCE_DIR}/libs/include/nvidia
Expand All @@ -30,7 +40,6 @@ if(NVML_INCLUDE_DIR AND NVML_LIBRARY)
target_include_directories(${AGENT_NAME} PRIVATE ${NVML_INCLUDE_DIR})
target_link_libraries(${AGENT_NAME} PRIVATE ${NVML_LIBRARY})
target_compile_definitions(${AGENT_NAME} PRIVATE HAVE_NVML)

else()
message(STATUS "NVML library not found. Using STUB for compilation.")
add_library(nvml_stub STATIC ${CMAKE_SOURCE_DIR}/libs/stubs/nvml_stub.cc)
Expand All @@ -40,5 +49,11 @@ else()
target_include_directories(${AGENT_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/libs/include/nvidia)
endif()

configure_file(
${CMAKE_SOURCE_DIR}/agent.conf
${CMAKE_CURRENT_BINARY_DIR}/agent.conf
COPYONLY
)

# 5. TODO: find rocm_smi and oneapi level_zero
# 6. TODO: find PMU (libpfm)
34 changes: 32 additions & 2 deletions source/agent/src/config/config.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#ifndef VOLTA_AGENT_CONFIG_CONFIG_H_
#define VOLTA_AGENT_CONFIG_CONFIG_H_

#include <sched.h>

#include <chrono>
#include <cstdint>
#include <iostream>
#include <map>
#include <string>
#include <thread>

namespace volta {
namespace agent {
Expand Down Expand Up @@ -39,14 +43,40 @@ struct CollectorConfig {
};

struct Config {
void PrintCurrentAffinity() {
cpu_set_t set;
CPU_ZERO(&set);

if (sched_getaffinity(0, sizeof(set), &set) != 0) {
perror("sched_getaffinity");
return;
}

long max_cpus = sysconf(_SC_NPROCESSORS_CONF);
std::cout << "Current CPU affinity: ";

for (int i = 0; i < max_cpus; ++i) {
if (CPU_ISSET(i, &set)) std::cout << i << " ";
}
std::cout << "\n";
}

static constexpr int32_t kDefaultIntervalMs = 500;
static constexpr int32_t kDefaultAffinity = -1;
static constexpr char const* kDefaultServerAddress = "localhost";
static constexpr uint16_t kDefaultServerPort = 50051;
static inline cpu_set_t kDefaultAffinity = [] {
cpu_set_t mask;
CPU_ZERO(&mask);
unsigned int n_cpus = std::thread::hardware_concurrency();
for (unsigned int i = 0; i < n_cpus; ++i) {
CPU_SET(i, &mask);
}
return mask;
}();

std::chrono::milliseconds collection_interval =
std::chrono::milliseconds(kDefaultIntervalMs);
int32_t core_affinity = kDefaultAffinity;
cpu_set_t core_affinity = kDefaultAffinity;

std::string server_address = kDefaultServerAddress;
uint16_t server_port = kDefaultServerPort;
Expand Down
135 changes: 132 additions & 3 deletions source/agent/src/config/config_loader.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,37 @@
#include "config/config_loader.h"

#include <sched.h>

#include <filesystem>
#include <iostream>
#include <toml++/toml.hpp>

#include "config/config.h"

namespace volta {
namespace agent {
namespace config {

std::filesystem::path ConfigLoader::kConfigFile = "agent.conf";

std::set<std::string> ConfigLoader::kValidTopLevelKeys = {
"core_affinity", "core_affinity_mask", "interval",
"server_address", "server_port", "collectors"};

std::map<std::string, std::set<std::string>>
ConfigLoader::kValidCollectorMetrics = {
{"cpu", {"proc_stat", "cpu_freq", "rapl", "zenpower", "pmu"}},
{"gpu", {"nvml", "dcgm", "rocm", "level_zero"}},
{"ram", {"mem_info", "vm_stat"}},
{"io", {"disk_stats", "net_dev"}}};

Config ConfigLoader::LoadConfig() {
Config config = LoadDefaultConfig();
LoadConfigFile(config);
return config;
}

Config ConfigLoader::LoadDefaultConfig() {
Config config;

CollectorConfig nvml_collector;
Expand All @@ -24,9 +51,111 @@ Config ConfigLoader::LoadConfig() {
return config;
}

Config ConfigLoader::LoadConfig(const std::filesystem::path& filepath) {
// ignore for POC
return LoadConfig();
inline unsigned int MaxOnlineCpus() {
long n = sysconf(_SC_NPROCESSORS_ONLN);
return (n > 0) ? static_cast<unsigned int>(n) : 0;
}

bool AddCpu(cpu_set_t& set, unsigned int cpu, unsigned int max_cpu) {
if (cpu >= max_cpu) return false;
CPU_SET(cpu, &set);
return true;
}

bool AddRange(cpu_set_t& set, unsigned int from, unsigned int to,
unsigned int max_cpu) {
if (from > to || to >= max_cpu) return false;
for (unsigned int i = from; i <= to; ++i) CPU_SET(i, &set);
return true;
}

void ConfigLoader::LoadConfigFile(Config& out_config) {
if (!std::filesystem::exists(kConfigFile)) {
std::cout << "Agent config file not found, loading default settings."
<< std::endl;
return;
}

try {
toml::table tbl = toml::parse_file(kConfigFile.string());

if (auto val = tbl["core_affinity"]) {
unsigned int max_cpu = MaxOnlineCpus();
cpu_set_t mask;
CPU_ZERO(&mask);

// core_affinity = "all"
if (auto s = val.value<std::string>(); s && *s == "all") {
for (unsigned int i = 0; i < max_cpu; ++i) CPU_SET(i, &mask);

out_config.core_affinity = mask;
}
// core_affinity = [ ... ]
else if (auto arr = val.as_array()) {
for (auto& item : *arr) {
// liczba CPU
if (auto cpu = item.value<unsigned int>()) {
if (!AddCpu(mask, *cpu, max_cpu)) {
std::cerr << "CPU index out of range: " << *cpu << "\n";
return;
}
}
// zakres "X-Y"
else if (auto str = item.value<std::string>()) {
unsigned int from, to;
if (sscanf(str->c_str(), "%u-%u", &from, &to) == 2) {
if (!AddRange(mask, from, to, max_cpu)) {
std::cerr << "Invalid CPU range: " << *str << "\n";
return;
}
} else {
std::cerr << "Invalid core_affinity entry: " << *str << "\n";
return;
}
} else {
std::cerr << "Invalid core_affinity element type\n";
return;
}
}
if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) {
perror("sched_setaffinity");
} else {
std::cout << "Successfully set CPU affinity mask." << std::endl;
}
out_config.core_affinity = mask;
} else {
std::cerr << "Invalid core_affinity value\n";
}
}
// interval
// server_address
// server_port

auto collectors_node = tbl["collectors"].as_table();
if (!collectors_node) return;

for (auto& [collector_name, collector_node] : *collectors_node) {
auto collector_table = collector_node.as_table();
if (!collector_table) continue;

CollectorConfig collector;

if (auto enabled_array = (*collector_table)["enabled"].as_array()) {
for (auto& item : *enabled_array) {
if (auto str = item.value<std::string>()) {
collector.metrics[*str] = true;
}
}

collector.enabled = !collector.metrics.empty();
}

out_config.collectors[std::string{collector_name.str()}] = collector;
}
} catch (const toml::parse_error& err) {
std::cerr << "Parsing Agent config failed: " << err.description() << " at "
<< err.source().begin << std::endl;
}
}

} // namespace config
Expand Down
12 changes: 10 additions & 2 deletions source/agent/src/config/config_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#define VOLTA_AGENT_CONFIG_CONFIG_LOADER_H_

#include <filesystem>
#include <string>
#include <map>
#include <set>

#include "config/config.h"

Expand All @@ -13,9 +14,16 @@ namespace config {
class ConfigLoader {
public:
static Config LoadConfig();
static Config LoadConfig(const std::filesystem::path& filepath);

private:
ConfigLoader() = delete;

static Config LoadDefaultConfig();
static void LoadConfigFile(Config& out_config);

static std::filesystem::path kConfigFile;
static std::set<std::string> kValidTopLevelKeys;
static std::map<std::string, std::set<std::string>> kValidCollectorMetrics;
};

} // namespace config
Expand Down
2 changes: 1 addition & 1 deletion source/agent/src/platform/platform_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ std::string PlatformDetector::DetectOS() {

std::string line;
while (std::getline(file, line)) {
if (line.starts_with("PRETTY_NAME=") == 0) {
if (line.starts_with("PRETTY_NAME=")) {
std::string value = line.substr(12);

if (value.size() >= 2 && value.front() == '"' && value.back() == '"') {
Expand Down
3 changes: 2 additions & 1 deletion vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"dependencies": [
"fmt",
"grpc",
"protobuf"
"protobuf",
"tomlplusplus"
]
}