Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5d07c43
cleanup
tdavidcl Mar 15, 2026
de797a6
cleanup
tdavidcl Mar 15, 2026
b161128
better ?
tdavidcl Mar 15, 2026
58b4695
better reporting ?
tdavidcl Mar 15, 2026
ff5a902
better reporting ?
tdavidcl Mar 15, 2026
e0cd91d
streamline that shit
tdavidcl Mar 15, 2026
6bc2845
whoopsi
tdavidcl Mar 15, 2026
ee3632a
correct max mem usage
tdavidcl Mar 16, 2026
f815d5f
try to permute time mesure and power mesure
tdavidcl Mar 16, 2026
cdca66a
attempt
tdavidcl Mar 16, 2026
d811e5f
attempt
tdavidcl Mar 16, 2026
775d26a
add duration
tdavidcl Mar 16, 2026
3944ef1
Merge branch 'main' into aurora-test4
tdavidcl Mar 16, 2026
1d53302
add support for linked geopm
tdavidcl Mar 17, 2026
9011b2a
enable geopm on aurora
tdavidcl Mar 17, 2026
9dc904d
shut up warnings
tdavidcl Mar 17, 2026
bd98637
Merge branch 'main' into aurora-test4
tdavidcl Mar 17, 2026
9534a22
Merge branch 'main' into aurora-test4
tdavidcl Mar 19, 2026
dcd76b0
better ?
tdavidcl Mar 19, 2026
2ebb27a
more debug infos
tdavidcl Mar 19, 2026
8f7087b
dammit
tdavidcl Mar 19, 2026
36b2516
dammit
tdavidcl Mar 19, 2026
5e2d837
more steps
tdavidcl Mar 20, 2026
8d69537
Merge branch 'main' into aurora-test4
tdavidcl Mar 21, 2026
0f0d14c
Merge branch 'main' into aurora-test4
tdavidcl Mar 21, 2026
c4217cd
Merge branch 'main' into aurora-test4
tdavidcl Mar 21, 2026
e698051
better ?
tdavidcl Mar 22, 2026
9079339
more omp
tdavidcl Mar 22, 2026
05bf4f7
print
tdavidcl Mar 24, 2026
b6228ea
print
tdavidcl Mar 24, 2026
e0a117d
print
tdavidcl Mar 24, 2026
6ec6ec7
print
tdavidcl Mar 24, 2026
2b28ecd
lb weight
tdavidcl Mar 24, 2026
2ab4df4
fix buildbot if invoked outside of repo
tdavidcl Mar 25, 2026
1bed2de
add way of fetching MPI timers
tdavidcl Mar 25, 2026
35941e4
add mpi timers
tdavidcl Mar 25, 2026
5687d48
add timestep callback
tdavidcl Mar 25, 2026
06eee3e
add callback to script
tdavidcl Mar 25, 2026
98b1e99
cleaner
tdavidcl Mar 25, 2026
d2ddf42
faster
tdavidcl Mar 26, 2026
9774603
faster
tdavidcl Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions buildbot/lib/buildbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,36 @@ def print_buildbot_info(utility_name):

print()

str_git = os.popen("git log -n 1 --decorate=full").read()
try:
r_log = subprocess.run(
["git", "log", "-n", "1", "--decorate=full"],
capture_output=True,
text=True,
)
if r_log.returncode != 0:
raise RuntimeError("git log failed")

git_hash = str_git.split()[1]
git_head = str_git[str_git.find("HEAD -> ") + 8 : str_git.find(")")]
str_git = r_log.stdout
git_hash = str_git.split()[1]
git_head = str_git[str_git.find("HEAD -> ") + 8 : str_git.find(")")]

git_head = git_head.split(",")
git_head = git_head.split(",")

if len(git_head) == 1:
git_head = "\033[1;92m" + git_head[0] + "\033[0;0m"
else:
git_head = "\033[1;92m" + git_head[0] + "\033[0;0m , \033[1;91m" + git_head[0] + "\033[0;0m"
if len(git_head) == 1:
git_head = "\033[1;92m" + git_head[0] + "\033[0;0m"
else:
git_head = (
"\033[1;92m" + git_head[0] + "\033[0;0m , \033[1;91m" + git_head[0] + "\033[0;0m"
)

print("\033[1;34mGit status \033[0;0m: ")
print(" \033[1;93mcommit \033[0;0m: ", git_hash)
print(" \033[1;36mHEAD \033[0;0m: ", git_head)
print(" \033[1;31mmodified files\033[0;0m (since last commit):")
print(os.popen('git diff-index --name-only HEAD -- | sed "s/^/ /g"').read())
print("\033[1;90m" + "-" * col_cnt + "\033[0;0m\n")
print("\033[1;34mGit status \033[0;0m: ")
print(" \033[1;93mcommit \033[0;0m: ", git_hash)
print(" \033[1;36mHEAD \033[0;0m: ", git_head)
print(" \033[1;31mmodified files\033[0;0m (since last commit):")
print(os.popen('git diff-index --name-only HEAD -- | sed "s/^/ /g"').read())
print("\033[1;90m" + "-" * col_cnt + "\033[0;0m\n")
except Exception: # noqa: BLE001
print("Warn : couldn't get git status")


def run_cmd(str):
Expand Down
51 changes: 44 additions & 7 deletions examples/benchmarks/sph_weak_scale_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
)
cfg.set_boundary_periodic()
cfg.set_eos_adiabatic(gamma)
cfg.set_max_neigh_cache_size(int(100e9))
cfg.print_status()
model.set_solver_config(cfg)
model.init_scheduler(scheduler_split_val, scheduler_merge_val)
Expand Down Expand Up @@ -102,7 +101,7 @@

model.set_value_in_a_box("uint", "f64", 0, bmin, bmax)

rinj = 8 * dr
rinj = 16 * dr
u_inj = 1
model.add_kernel_value("uint", "f64", u_inj, (0, 0, 0), rinj)

Expand All @@ -116,9 +115,6 @@
model.set_cfl_cour(0.1)
model.set_cfl_force(0.1)

model.set_cfl_multipler(1e-4)
model.set_cfl_mult_stiffness(1e6)

shamrock.backends.reset_mem_info_max()

# converge smoothing length and compute initial dt
Expand All @@ -128,11 +124,40 @@
res_rates = []
res_cnts = []
res_system_metrics = []
res_mpi_timers = []

"""
Here we insert callbacks to measure solver MPI usage by fetching the timers twice at the begining and end of the step
"""
before_mpi_timers, after_mpi_timers = None, None

def callback_before_mpi_timer():
global before_mpi_timers
# print(shamrock.sys.world_rank(), "register before_mpi_timers")
before_mpi_timers = shamrock.comm.get_timers()

def callback_after_mpi_timer():
global after_mpi_timers
# print(shamrock.sys.world_rank(), "register after_mpi_timers")
after_mpi_timers = shamrock.comm.get_timers()

model.add_timestep_callback(
step_begin=callback_before_mpi_timer, step_end=callback_after_mpi_timer
)

for i in range(10):
if shamrock.sys.world_rank() == 0:
print("running step ", i + 1, "/", 10, " ...")

for i in range(5):
shamrock.sys.mpi_barrier()

# To replay the same step
model.set_next_dt(0.0)
model.timestep()

if shamrock.sys.world_rank() == 0:
print("collecting results ...")

tmp_res_rate, tmp_res_cnt, tmp_system_metrics = (
model.solver_logs_last_rate(),
model.solver_logs_last_obj_count(),
Expand All @@ -141,14 +166,25 @@
res_rates.append(tmp_res_rate)
res_cnts.append(tmp_res_cnt)
res_system_metrics.append(tmp_system_metrics)
res_mpi_timers.append(shamrock.comm.mpi_timers_delta(before_mpi_timers, after_mpi_timers))

if shamrock.sys.world_rank() == 0:
print("sleeping 1 second ...")

import time

time.sleep(1)

if shamrock.sys.world_rank() == 0:
print("done sleeping 1 second ...")

# result is the best rate of the 5 steps
res_rate, res_cnt = max(res_rates), res_cnts[0]

# index of the max rate
max_rate_index = res_rates.index(max(res_rates))
max_rate_system_metrics = res_system_metrics[max_rate_index]

max_mpi_timers = res_mpi_timers[max_rate_index]
step_time = res_cnt / res_rate

if shamrock.sys.world_rank() == 0:
Expand All @@ -168,6 +204,7 @@
"rate": res_rate,
"cnt": res_cnt,
"step_time": step_time,
"mpi_timers": max_mpi_timers,
}

# print the system metrics
Expand Down
3 changes: 3 additions & 0 deletions src/shamalgs/src/collective/sparse_exchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ namespace shamalgs::collective {
/// fetch u64_2 from global message data
std::vector<u64_2> fetch_global_message_data(
const std::vector<CommMessageInfo> &messages_send) {
__shamrock_stack_entry();

std::vector<u64_2> local_data = std::vector<u64_2>(messages_send.size());

Expand Down Expand Up @@ -84,6 +85,7 @@ namespace shamalgs::collective {

/// decode message to get message
std::vector<CommMessageInfo> decode_all_message(const std::vector<u64_2> &global_data) {
__shamrock_stack_entry();
std::vector<CommMessageInfo> message_all(global_data.size());
for (u64 i = 0; i < global_data.size(); i++) {
message_all[i] = unpack(global_data[i]);
Expand All @@ -94,6 +96,7 @@ namespace shamalgs::collective {

/// compute message tags
void compute_tags(std::vector<CommMessageInfo> &message_all) {
__shamrock_stack_entry();

std::vector<i32> tag_map(shamcomm::world_size(), 0);

Expand Down
18 changes: 17 additions & 1 deletion src/shamcomm/include/shamcomm/collectives.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ namespace shamcomm {
void gather_basic_str(
const std::basic_string<byte> &send_vec, std::basic_string<byte> &recv_vec);

/**
* @brief Allgathers a string from all nodes and concatenates it in a std::string
*
* This function gathers the string `send_vec` from all nodes and concatenates the
* result in `recv_vec` on every rank. The result is ordered by the order of the
* nodes in the communicator, i.e. the string is ordered by rank.
*/
void allgather_str(const std::string &send_vec, std::string &recv_vec);

/// same as allgather_str but with std::basic_string
void allgather_basic_str(
const std::basic_string<byte> &send_vec, std::basic_string<byte> &recv_vec);

/**
* @brief Constructs a histogram from a vector of strings, counting occurrences
* of each unique string.
Expand All @@ -56,8 +69,11 @@ namespace shamcomm {
* @return An unordered map where keys are unique strings from the input and
* values are the counts of their occurrences. (valid only on rank 0)
*/

std::unordered_map<std::string, int> string_histogram(
const std::vector<std::string> &inputs, std::string delimiter = "\n");

/// same as string_histogram but with result return on every rank
std::unordered_map<std::string, int> all_string_histogram(
const std::vector<std::string> &inputs, std::string delimiter = "\n");

} // namespace shamcomm
8 changes: 8 additions & 0 deletions src/shamcomm/include/shamcomm/wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include "shambase/aliases_float.hpp"
#include "shambase/aliases_int.hpp"
#include "shamcomm/mpi.hpp"
#include <unordered_map>
#include <string>
#include <vector>

namespace shamcomm::mpi {

Expand All @@ -29,6 +31,12 @@ namespace shamcomm::mpi {
/// get a timer value
f64 get_timer(std::string timername);

/// return all internal timers
const std::unordered_map<std::string, f64> &get_timers();

/// return all possible keys for the internal timers
const std::vector<std::string> &get_possible_keys();

/// MPI wrapper for MPI_Allreduce
void Allreduce(
const void *sendbuf,
Expand Down
82 changes: 82 additions & 0 deletions src/shamcomm/src/collectives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,56 @@ namespace {
recv_vec = result;
}

/**
* @brief Allgather a vector of characters from all MPI ranks into a single string
*
* The resulting string is concatenated in rank order and is returned on every rank.
*/
template<class Tchar>
inline void _internal_allgather_str(
const std::basic_string<Tchar> &send_vec, std::basic_string<Tchar> &recv_vec) {
StackEntry stack_loc{};

if (shamcomm::world_size() == 1) {
recv_vec = send_vec;
return;
}

i32 wsize = shamcomm::world_size();
size_t wsize_sz = static_cast<size_t>(wsize);

// counts/displacements are expressed in number of characters.
std::vector<int> counts(wsize_sz);
std::vector<int> disps(wsize_sz);

// MPI counts/displacements use `int`.
int local_count = static_cast<int>(send_vec.size());

shamcomm::mpi::Allgather(
&local_count, 1, MPI_INT, counts.data(), 1, MPI_INT, MPI_COMM_WORLD);

for (size_t i = 0; i < wsize_sz; i++) {
disps[i] = (i > 0) ? (disps[i - 1] + counts[i - 1]) : 0;
}

int global_len = disps[wsize_sz - 1] + counts[wsize_sz - 1];

std::basic_string<Tchar> result;
result.resize(static_cast<size_t>(global_len));

shamcomm::mpi::Allgatherv(
send_vec.data(),
local_count,
MPI_CHAR,
result.data(),
counts.data(),
disps.data(),
MPI_CHAR,
MPI_COMM_WORLD);

recv_vec = result;
}

} // namespace

void shamcomm::gather_str(const std::string &send_vec, std::string &recv_vec) {
Expand All @@ -94,6 +144,17 @@ void shamcomm::gather_basic_str(
_internal_gather_str(send_vec, recv_vec);
}

void shamcomm::allgather_str(const std::string &send_vec, std::string &recv_vec) {
StackEntry stack_loc{};
_internal_allgather_str(send_vec, recv_vec);
}

void shamcomm::allgather_basic_str(
const std::basic_string<byte> &send_vec, std::basic_string<byte> &recv_vec) {
StackEntry stack_loc{};
_internal_allgather_str(send_vec, recv_vec);
}

std::unordered_map<std::string, int> shamcomm::string_histogram(
const std::vector<std::string> &inputs, std::string delimiter) {
std::string accum_loc = "";
Expand All @@ -119,3 +180,24 @@ std::unordered_map<std::string, int> shamcomm::string_histogram(

return {};
}

std::unordered_map<std::string, int> shamcomm::all_string_histogram(
const std::vector<std::string> &inputs, std::string delimiter) {
std::string accum_loc = "";
for (auto &s : inputs) {
accum_loc += s + delimiter;
}

std::string recv = "";
allgather_str(accum_loc, recv);

std::vector<std::string> splitted = shambase::split_str(recv, delimiter);

std::unordered_map<std::string, int> histogram;

for (size_t i = 0; i < splitted.size(); i++) {
histogram[splitted[i]] += 1;
}

return histogram;
}
16 changes: 16 additions & 0 deletions src/shamcomm/src/wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,22 @@ namespace shamcomm::mpi {

f64 get_timer(std::string timername) { return mpi_timers[timername]; }

const std::unordered_map<std::string, f64> &get_timers() { return mpi_timers; }

std::vector<std::string> possible_keys{
"total", "MPI_Isend", "MPI_Irecv",
"MPI_Allreduce", "MPI_Allgather", "MPI_Allgatherv",
"MPI_Exscan", "MPI_Wait", "MPI_Waitall",
"MPI_Barrier", "MPI_Probe", "MPI_Recv",
"MPI_Get_count", "MPI_Send", "MPI_File_set_view",
"MPI_Type_size", "MPI_File_write_all", "MPI_File_write",
"MPI_File_read", "MPI_File_write_at", "MPI_File_read_at",
"MPI_File_close", "MPI_File_open", "MPI_Test",
"MPI_Gather", "MPI_Gatherv",
};

const std::vector<std::string> &get_possible_keys() { return possible_keys; }

} // namespace shamcomm::mpi

namespace {
Expand Down
4 changes: 2 additions & 2 deletions src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,13 +344,13 @@ auto GSPHGhostHandler<vec>::gen_id_table_interfaces(GeneratorMap &&gen)

for (auto &[k, v] : send_count_stats) {
if (v > 0.2) {
warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v);
// warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v);
has_warn = true;
}
}

if (has_warn && shamcomm::world_rank() == 0) {
warn_log = "\n This can lead to high mpi "
warn_log = "\n High interf/patch volume. This can lead to high mpi "
"overhead, try to increase the patch split crit"
+ warn_log;
}
Expand Down
7 changes: 7 additions & 0 deletions src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,14 @@ namespace shammodels::sph {
PatchField<flt> interactR_patch = sched.map_owned_to_patch_field_simple<flt>(
[&](const Patch p, PatchDataLayer &pdat) -> flt {
if (!pdat.is_empty()) {
#if false
auto tmp = pdat.get_field<flt>(ihpart).compute_max() * h_evol_max * Rkern;
shamcomm::logs::raw_ln(
shambase::format("patch {}, Rghost = {}", p.id_patch, tmp));
return tmp;
#else
return pdat.get_field<flt>(ihpart).compute_max() * h_evol_max * Rkern;
#endif
} else {
return shambase::VectorProperties<flt>::get_min();
}
Expand Down
Loading
Loading