diff --git a/paper_files/New_for_hybrid/CMakeLists.txt b/paper_files/New_for_hybrid/CMakeLists.txt index 30c6ef5..5c4fe1d 100644 --- a/paper_files/New_for_hybrid/CMakeLists.txt +++ b/paper_files/New_for_hybrid/CMakeLists.txt @@ -17,5 +17,8 @@ target_include_directories(Benchmark_new_for_hybrid ) add_executable(benchmark benchmark.cxx) +add_executable(benchmark_elems benchmark_elems.cxx) target_link_libraries(benchmark PRIVATE T8CODE::T8) +target_link_libraries(benchmark_elems PRIVATE T8CODE::T8) + diff --git a/paper_files/New_for_hybrid/README.md b/paper_files/New_for_hybrid/README.md index 4ea9096..f4e3d9a 100644 --- a/paper_files/New_for_hybrid/README.md +++ b/paper_files/New_for_hybrid/README.md @@ -1,13 +1,11 @@ # README -## Comparison of the Partitioning Algorithm +## How to run the benchmarks +Install the benchmarks via CMake and provide Paths to t8code (and SC & P4est, which can be installed with t8code alltogether) +Run the benchmark_elems example to evaluate the element-wise performance. With -n you can define the number of repeated runs, with -e you define the type of element used and with -l the initial uniform refinement level of the forest. -To compare the timings between the introduction of the New-for-hybrid algorithm link with -- t8code v4.0.0 for the "old" performance -- t8code add tag for the new performance +Run the benchmark example to evaluate the performance on a large hybrid mesh. Provide a mesh file via -f, and describe the dimension of the mesh via -d. The initial uniform refinement level is given via -l and -r defines the number of refinement levels. -g toggles on the usage of ghost-cells, -b enables the computation of a 2:1 balancing. -x is the minimum coordinate of the mesh, -t describes the thickness of the wall and -D the distance of the wall to travel. -s defines the step the wall should make. -n is number of repeated runs to test. +We used "-d3 -l5 -r2 -x-0.5 -t0.2 -D2 -s5 -n2 -g" for our tests. -## What is benchmarked? +To run the examples on a cluster you can use the scripts provided in the jobs directory. -In benchmark.cxx we create a cmesh, either from a file or from our examples. We use a hybrid mesh from the example, consisting of a hexahedron, a tetrahedron, a prism and a pyramid. For the published run-times we used a cmesh created from xy.msh. - -The mesh is then adaptively refined, coarsened and repartitioned for multiple timesteps. In each timestep elements inside a wall are refined. If an element is outside the wall it is coarsened up until a minimum level. In each timestep the wall is repositioned, enforcing a repartitioning of the forest in each timestep. diff --git a/paper_files/New_for_hybrid/benchmark.cxx b/paper_files/New_for_hybrid/benchmark.cxx index e153ed1..6681844 100644 --- a/paper_files/New_for_hybrid/benchmark.cxx +++ b/paper_files/New_for_hybrid/benchmark.cxx @@ -27,12 +27,9 @@ #include #include -#include - #include #include #include -#include #include #include @@ -62,22 +59,15 @@ typedef struct * \return t8_cmesh_t */ t8_cmesh_t -t8_benchmark_forest_create_cmesh (const char *msh_file, const int mesh_dim, sc_MPI_Comm comm, const int init_level) +t8_benchmark_forest_create_cmesh (const char *msh_file, const int mesh_dim, sc_MPI_Comm comm, const int init_level ) { - t8_cmesh_t cmesh; - if (msh_file != NULL){ - cmesh = t8_cmesh_from_msh_file ((char *) msh_file, 1, comm, mesh_dim, 0, false); - } - else { - cmesh = t8_cmesh_new_full_hybrid(comm); - } + t8_cmesh_t cmesh = t8_cmesh_from_msh_file ((char *) msh_file, true, comm, mesh_dim, 0, false); t8_cmesh_t cmesh_partition; t8_cmesh_init (&cmesh_partition); t8_cmesh_set_derive (cmesh_partition, cmesh); t8_cmesh_set_partition_uniform (cmesh_partition, init_level, t8_scheme_new_default ()); t8_cmesh_set_profiling (cmesh_partition, 1); t8_cmesh_commit (cmesh_partition, comm); - t8_cmesh_destroy (&cmesh); return cmesh_partition; } @@ -135,19 +125,28 @@ t8_band_adapt (t8_forest_t forest, t8_forest_t forest_from, t8_locidx_t which_tr } static void -benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm, const int init_level, const int max_level, - const bool no_vtk, const std::array &x_min_max, const double delta_t, const double max_time) +benchmark_band_adapt(t8_cmesh_t cmesh, sc_MPI_Comm comm, const int init_level, const int max_level, + const double x_min, const bool do_ghost, const int num_steps, const double length, + const double thickness, const bool do_balance) { double adapt_time = 0; double partition_time = 0; double new_time = 0; double total_time = 0; - const int num_stats = 4; + double ghost_time = 0; + t8_locidx_t ghost_sent = 0; + double balance_time = 0; + int balance_rounds = 0; + const int num_stats = 8; std::array times; sc_stats_init (×[0], "new"); sc_stats_init (×[1], "adapt"); sc_stats_init (×[2], "partition"); - sc_stats_init (×[3], "total"); + sc_stats_init (×[3], "ghost"); + sc_stats_init (×[4], "ghost_sent"); + sc_stats_init (×[5], "balance"); + sc_stats_init (×[6], "balance_rounds"); + sc_stats_init (×[7], "total"); t8_forest_t forest; t8_forest_init (&forest); @@ -163,49 +162,53 @@ benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm, sc_stats_set1 (×[0], new_time, "new"); - t8_3D_vec normal({0.8, 0.3, 0.0}); - adapt_data_t adapt_data = {x_min_max[0], x_min_max[1], normal, init_level, max_level}; + const double step = length / num_steps; + + t8_3D_vec normal({1, 0.0, 0.0}); + adapt_data_t adapt_data = {x_min-thickness/2, x_min+thickness/2, normal, init_level, max_level}; t8_normalize (adapt_data.normal); - int num_steps = 0; + t8_gloidx_t max_num_global_elements = -1; t8_forest_t forest_adapt, forest_partition; - for (double time = 0; time < max_time; time += delta_t, ++num_steps) { + for (int istep = 0; istep < num_steps; ++istep) { t8_forest_init (&forest_adapt); t8_forest_set_adapt (forest_adapt, forest, t8_band_adapt, 1); t8_forest_set_profiling (forest_adapt, 1); - adapt_data.c_min = x_min_max[0] + time ; - adapt_data.c_max = x_min_max[1] + time ; + adapt_data.c_min = adapt_data.c_min + step ; + adapt_data.c_max = adapt_data.c_max + step ; t8_forest_set_user_data (forest_adapt, (void *)&adapt_data); - adapt_time -= sc_MPI_Wtime (); t8_forest_commit (forest_adapt); - adapt_time += sc_MPI_Wtime (); + adapt_time += t8_forest_profile_get_adapt_time(forest_adapt); - t8_forest_compute_profile (forest_adapt); t8_forest_ref (forest_adapt); t8_forest_init (&forest_partition); t8_forest_set_partition(forest_partition, forest_adapt, 0); + if( do_balance ) + { + t8_forest_set_balance (forest_partition, NULL, 0); + } t8_forest_set_profiling (forest_partition, 1); + if (do_ghost) { + t8_forest_set_ghost (forest_partition, 1, T8_GHOST_FACES); + } - partition_time -= sc_MPI_Wtime (); t8_forest_commit (forest_partition); - partition_time += sc_MPI_Wtime (); - t8_forest_compute_profile (forest_partition); - t8_cmesh_print_profile (t8_forest_get_cmesh (forest_partition)); + const t8_gloidx_t num_global_elements = t8_forest_get_global_num_leaf_elements (forest_partition); + if (num_global_elements > max_num_global_elements) + max_num_global_elements = num_global_elements; forest = forest_partition; + int ghost_sent_iter = 0; + int procs_sent = 0; + int balance_rounds_iter = 0; + partition_time += t8_forest_profile_get_partition_time (forest_partition, &procs_sent); + ghost_time += t8_forest_profile_get_ghost_time (forest_partition, &ghost_sent_iter); - if (!no_vtk) { - char forest_vtu[BUFSIZ]; - char cmesh_vtu[BUFSIZ]; - snprintf (forest_vtu, BUFSIZ, "%s_forest_partition_%03d", vtu_prefix, num_steps); - snprintf (cmesh_vtu, BUFSIZ, "%s_cmesh_partition_%03d", vtu_prefix, num_steps); - t8_forest_write_vtk (forest_partition, forest_vtu); - t8_cmesh_vtk_write_file (t8_forest_get_cmesh (forest_partition), cmesh_vtu); - t8_debugf ("Wrote partitioned forest and cmesh\n"); - } - t8_cmesh_print_profile (t8_forest_get_cmesh (forest_partition)); - t8_forest_print_profile (forest_partition); + balance_time += t8_forest_profile_get_balance_time (forest_partition, &balance_rounds_iter); + + ghost_sent += ghost_sent_iter; + balance_rounds += balance_rounds_iter; t8_forest_unref (&forest_adapt); } @@ -213,10 +216,16 @@ benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm, t8_global_productionf ("Num steps: %d\n", num_steps); + t8_global_essentialf ("Max num elements after adapt: %llu\n", (unsigned long long)max_num_global_elements); + sc_stats_accumulate (×[0], new_time); sc_stats_accumulate (×[1], adapt_time); sc_stats_accumulate (×[2], partition_time); - sc_stats_accumulate (×[3], total_time); + sc_stats_accumulate (×[3], ghost_time); + sc_stats_accumulate (×[4], ghost_sent); + sc_stats_accumulate (×[5], balance_time); + sc_stats_accumulate (×[6], balance_rounds); + sc_stats_accumulate (×[7], total_time); sc_stats_compute (comm, num_stats, times.data ()); sc_stats_print (t8_get_package_id (), SC_LP_ESSENTIAL, num_stats, times.data (), 1, 1); t8_forest_unref (&forest_partition); @@ -229,59 +238,66 @@ main (int argc, char **argv) /* Initialize MPI. This has to happen before we initialize sc or t8code. */ int mpiret = sc_MPI_Init (&argc, &argv); int help = 0; - int no_vtk; const char *mshfileprefix = NULL; int dim; int initial_level; int level_diff; - std::array x_min_max; - double T; - double cfl = 0; + double x_min; + int num_runs; + int do_ghost; + int do_balance; + double distance = 1.0; + int num_steps = 1; + double thickness = 0.1; /* Error check the MPI return value. */ SC_CHECK_MPI (mpiret); /* Initialize the sc library, has to happen before we initialize t8code. */ sc_init (sc_MPI_COMM_WORLD, 1, 1, NULL, SC_LP_ESSENTIAL); - /* Initialize t8code with log level SC_LP_PRODUCTION. See sc.h for more info on the log levels. */ - t8_init (SC_LP_PRODUCTION); + /* Initialize t8code with log level SC_LP_ESSENTIAL. See sc.h for more info on the log levels. */ + t8_init (SC_LP_ESSENTIAL); sc_options_t *options = sc_options_new (argv[0]); sc_options_add_switch (options, 'h', "help", &help, "Print this help message and exit"); - sc_options_add_switch (options, 'o', "no-vtk", &no_vtk, "Do not write vtk output."); sc_options_add_string (options, 'f', "mshfile", &mshfileprefix, NULL, "If specified, the cmesh is constructed from a .msh file with the given prefix. " "The files must end in .msh and be created with gmsh."); - sc_options_add_int (options, 'd', "dim", &dim, 2, "Together with -f: The dimension of the coarse mesh. 2 or 3."); + sc_options_add_int (options, 'd', "dim", &dim, 3, "Together with -f: The dimension of the coarse mesh. 2 or 3."); sc_options_add_int (options, 'l', "level", &initial_level, 0, "The initial uniform refinement level of the forest."); sc_options_add_int (options, 'r', "rlevel", &level_diff, 1, "The number of levels that the forest is refined from the initial level."); - sc_options_add_double (options, 'x', "xmin", &x_min_max[0], 0, "The minimum x coordinate in the mesh."); - sc_options_add_double (options, 'X', "xmax", &x_min_max[1], 1, "The maximum x coordinate in the mesh."); - sc_options_add_double (options, 'T', "time", &T, 1, - "The simulated time span. We simulate the time from 0 to T. T has to be > 0."); - /* CFL number. delta_t = CFL * 0.64 / 2^level */ - sc_options_add_double (options, 'C', "cfl", &cfl, 0, - "The CFL number. If specified, then delta_t is set to CFL * 0.64 / 2^level. "); + sc_options_add_switch (options, 'g', "ghost", &do_ghost, "If specified, the forest is created with ghost cells."); + sc_options_add_switch (options, 'b', "balance", &do_balance, "If specified, the forest is balanced after each refinement step."); + sc_options_add_double (options, 'x', "xmin", &x_min, 0, "The minimum x coordinate in the mesh."); + sc_options_add_double (options, 't', "thickness", &thickness, 0.1, + "The thickness of the refinement region."); + sc_options_add_double (options, 'D', "distance", &distance, 1.0, + "The distance the plane should move in total."); + sc_options_add_int (options, 's', "steps", &num_steps, 1, + "The number of steps to take in the refinement region. The distance is divided by this number."); + sc_options_add_int (options, 'n', "num-runs", &num_runs, 1, + "The number of runs to perform. If specified, the program will run num_runs times with the same parameters. "); const int options_argc = sc_options_parse (t8_get_package_id (), SC_LP_DEFAULT, options, argc, argv); - if( options_argc <= 0 || options_argc != argc || help || initial_level < 0 || level_diff <= 0 || cfl == 0) + if( options_argc <= 0 || options_argc != argc || help || initial_level < 0 || level_diff <= 0 ) { sc_options_print_usage (t8_get_package_id (), SC_LP_ERROR, options, NULL); return 1; } - const double delta_t = cfl * 0.64 / (1 << initial_level); - t8_global_productionf ("Using CFL %f, delta_t = %f\n", cfl, delta_t); + T8_ASSERT (mshfileprefix != NULL); t8_global_productionf ("Using mshfileprefix %s with dim %d\n", mshfileprefix, dim); - t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (mshfileprefix, dim, sc_MPI_COMM_WORLD, initial_level); - const int max_level = initial_level + level_diff; + for (int irun = 0; irun < num_runs; ++irun) { + t8_global_essentialf ("#################### Run %d of %d ####################\n", irun + 1, num_runs); + t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (mshfileprefix, dim, sc_MPI_COMM_WORLD, initial_level); + - benchmark_band_adapt (cmesh, "benchmark", sc_MPI_COMM_WORLD, initial_level, max_level, no_vtk, - x_min_max, delta_t, T); + benchmark_band_adapt (cmesh, sc_MPI_COMM_WORLD, initial_level, max_level, x_min, do_ghost, num_steps, distance, thickness, do_balance); + } sc_options_destroy (options); sc_finalize (); diff --git a/paper_files/New_for_hybrid/benchmark_elems.cxx b/paper_files/New_for_hybrid/benchmark_elems.cxx new file mode 100644 index 0000000..15a6333 --- /dev/null +++ b/paper_files/New_for_hybrid/benchmark_elems.cxx @@ -0,0 +1,287 @@ +/* + This file is part of t8code. + t8code is a C library to manage a collection (a forest) of multiple + connected adaptive space-trees of general element types in parallel. + + Copyright (C) 2025 the developers + + t8code is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + t8code is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with t8code; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#include + + +t8_cmesh_t two_tets (sc_MPI_Comm comm) +{ + t8_cmesh_t cmesh; + t8_cmesh_init (&cmesh); + + double vertices [15] = { + 0.0, 0.0, 0.0, //v0 + 0.0, 1.0, 0.0, //v1 + 1.0, 0.0, 0.0, //v2 + 0.0, 0.0, 1.0, //v3 + 1.0, 1.0, 1.0, //v4 + }; + + + t8_cmesh_set_tree_class (cmesh, 0, T8_ECLASS_TET); + t8_cmesh_set_tree_class (cmesh, 1, T8_ECLASS_TET); + + t8_cmesh_set_join (cmesh, 0, 1, 0, 3, 0); + + t8_cmesh_set_tree_vertices (cmesh, 0, vertices, 4); + t8_cmesh_set_tree_vertices (cmesh, 1, vertices + 3, 4); + + t8_cmesh_register_geometry (cmesh); + t8_cmesh_commit (cmesh, comm); + return cmesh; +} + +/** + * Create a partitioned cmesh. If no msh_file is given, a new hybrid cmesh is created. + * + * \param[in] msh_file + * \param[in] mesh_dim + * \param[in] comm + * \param[in] init_level + * \return t8_cmesh_t + */ +t8_cmesh_t +t8_benchmark_forest_create_cmesh ( sc_MPI_Comm comm, const int init_level, const t8_eclass_t eclass) +{ + T8_ASSERT (eclass != T8_ECLASS_INVALID); + t8_cmesh_t cmesh; + if (eclass == T8_ECLASS_TET) { + cmesh = two_tets (comm); + } + else if (eclass == T8_ECLASS_PYRAMID) { + cmesh = t8_cmesh_new_pyramid_cake (comm, 8); + } + else { + cmesh = t8_cmesh_new_hypercube (eclass, comm, false, false, false); + } + t8_cmesh_t cmesh_partition; + t8_cmesh_init (&cmesh_partition); + t8_cmesh_set_derive (cmesh_partition, cmesh); + t8_cmesh_set_partition_uniform (cmesh_partition, init_level, t8_scheme_new_default ()); + t8_cmesh_set_profiling (cmesh_partition, 1); + t8_cmesh_commit (cmesh_partition, comm); + return cmesh_partition; +} + + +/* refine the forest in a band, given by a plane E and two constants + * c_min, c_max. We refine the cells in the band c_min*E, c_max*E */ +static int +t8_adapt_pyramid ([[maybe_unused]] t8_forest_t forest, [[maybe_unused]] t8_forest_t forest_from, [[maybe_unused]] t8_locidx_t which_tree, + [[maybe_unused]]t8_eclass_t tree_class, + [[maybe_unused]] t8_locidx_t lelement_id, [[maybe_unused]]const t8_scheme *scheme, [[maybe_unused]]const int is_family, + [[maybe_unused]] const int num_elements, t8_element_t *elements[]) +{ + const t8_dpyramid_t *pyra = (const t8_dpyramid_t *) elements[0]; + const int type = pyra->pyramid.type; + if (type == 6 || type == 0 || type == 2 || type == 4 ){ + return 1; + } + else { + return 0; + } +} + +static int +t8_adapt_second ([[maybe_unused]] t8_forest_t forest, [[maybe_unused]] t8_forest_t forest_from, [[maybe_unused]] t8_locidx_t which_tree, + [[maybe_unused]]t8_eclass_t tree_class, + [[maybe_unused]] t8_locidx_t lelement_id, [[maybe_unused]]const t8_scheme *scheme, [[maybe_unused]]const int is_family, + [[maybe_unused]] const int num_elements, t8_element_t *elements[]) +{ + const int child_id = scheme->element_get_child_id (tree_class, elements[0]); + return child_id % 2; +} + +static void +benchmark_band_adapt(t8_cmesh_t cmesh, sc_MPI_Comm comm, const int init_level, const t8_eclass_t eclass) +{ + double adapt_time = 0; + double partition_time = 0; + double new_time = 0; + double total_time = 0; + double ghost_time = 0; + t8_locidx_t ghost_sent = 0; + double balance_time = 0; + const int num_stats = 7; + std::array times; + sc_stats_init (×[0], "new"); + sc_stats_init (×[1], "adapt"); + sc_stats_init (×[2], "partition"); + sc_stats_init (×[3], "ghost"); + sc_stats_init (×[4], "ghost_sent"); + sc_stats_init (×[5], "balance"); + sc_stats_init (×[6], "total"); + + t8_forest_t forest; + t8_forest_init (&forest); + t8_forest_set_cmesh(forest, cmesh, comm); + t8_forest_set_scheme (forest, t8_scheme_new_default ()); + t8_forest_set_level (forest, init_level); + + total_time -= sc_MPI_Wtime (); + + new_time -= sc_MPI_Wtime (); + t8_forest_commit (forest); + new_time += sc_MPI_Wtime (); + + sc_stats_set1 (×[0], new_time, "new"); + + + t8_forest_t forest_adapt, forest_partition; + t8_forest_init (&forest_adapt); + if (eclass == T8_ECLASS_PYRAMID) { + t8_forest_set_adapt (forest_adapt, forest, t8_adapt_pyramid, 0); + } + else { + t8_forest_set_adapt (forest_adapt, forest, t8_adapt_second, 0); + } + t8_forest_set_profiling (forest_adapt, 1); + + + t8_forest_commit (forest_adapt); + adapt_time += t8_forest_profile_get_adapt_time(forest_adapt); + + t8_forest_ref (forest_adapt); + + t8_forest_init (&forest_partition); + t8_forest_set_partition(forest_partition, forest_adapt, 0); + t8_forest_set_profiling (forest_partition, 1); + t8_forest_set_ghost (forest_partition, 1, T8_GHOST_FACES); + t8_forest_set_balance (forest_partition, NULL, 0); + + t8_forest_commit (forest_partition); + forest = forest_partition; + int procs_sent = 0; + int balance_rounds = 0; + partition_time += t8_forest_profile_get_partition_time (forest_partition, &procs_sent); + ghost_time += t8_forest_profile_get_ghost_time (forest_partition, &ghost_sent); + balance_time += t8_forest_profile_get_balance_time (forest_partition, &balance_rounds); + + + t8_forest_unref (&forest_adapt); + + total_time += sc_MPI_Wtime (); + + sc_stats_accumulate (×[0], new_time); + sc_stats_accumulate (×[1], adapt_time); + sc_stats_accumulate (×[2], partition_time); + sc_stats_accumulate (×[3], ghost_time); + sc_stats_accumulate (×[4], ghost_sent); + sc_stats_accumulate (×[5], balance_time); + sc_stats_accumulate (×[6], total_time); + sc_stats_compute (comm, num_stats, times.data ()); + sc_stats_print (t8_get_package_id (), SC_LP_PRODUCTION, num_stats, times.data (), 1, 1); + t8_forest_unref (&forest_partition); +} + +int +main (int argc, char **argv) +{ + + /* Initialize MPI. This has to happen before we initialize sc or t8code. */ + int mpiret = sc_MPI_Init (&argc, &argv); + int help = 0; + int initial_level; + int eclass_int; + int num_runs; + + /* Error check the MPI return value. */ + SC_CHECK_MPI (mpiret); + + /* Initialize the sc library, has to happen before we initialize t8code. */ + sc_init (sc_MPI_COMM_WORLD, 1, 1, NULL, SC_LP_ESSENTIAL); + /* Initialize t8code with log level SC_LP_PRODUCTION. See sc.h for more info on the log levels. */ + t8_init (SC_LP_PRODUCTION); + + sc_options_t *options = sc_options_new (argv[0]); + + sc_options_add_switch (options, 'h', "help", &help, "Print this help message and exit"); + sc_options_add_int (options, 'e', "eclass", &eclass_int, 0, + "0: Tetrahedron, 1: Hexahedron, 2: Prism, 3: Pyramid"); + sc_options_add_int (options, 'l', "level", &initial_level, 0, "The initial uniform refinement level of the forest."); + sc_options_add_int (options, 'n', "num-runs", &num_runs, 1, + "The number of runs to perform. If specified, the program will run num_runs times with the same parameters. "); + const int options_argc = sc_options_parse (t8_get_package_id (), SC_LP_DEFAULT, options, argc, argv); + + if( options_argc <= 0 || options_argc != argc || help ) + { + sc_options_print_usage (t8_get_package_id (), SC_LP_ERROR, options, NULL); + return 1; + } + t8_eclass_t eclass = T8_ECLASS_INVALID; + + switch (eclass_int) + { + case 0: + eclass = T8_ECLASS_TET; + break; + case 1: + eclass = T8_ECLASS_HEX; + break; + case 2: + eclass = T8_ECLASS_PRISM; + break; + case 3: + eclass = T8_ECLASS_PYRAMID; + break; + default: + break; + } + + for (int irun = 0; irun < num_runs; ++irun) { + t8_global_essentialf ("#################### Run %d of %d ####################\n", irun + 1, num_runs); + t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (sc_MPI_COMM_WORLD, initial_level, eclass); + + + benchmark_band_adapt (cmesh, sc_MPI_COMM_WORLD, initial_level, eclass); + } + + sc_options_destroy (options); + sc_finalize (); + + mpiret = sc_MPI_Finalize (); + SC_CHECK_MPI (mpiret); + + return 0; +} diff --git a/paper_files/New_for_hybrid/evaluate/create_graphic.py b/paper_files/New_for_hybrid/evaluate/create_graphic.py new file mode 100755 index 0000000..a92e95c --- /dev/null +++ b/paper_files/New_for_hybrid/evaluate/create_graphic.py @@ -0,0 +1,337 @@ +import sys +import matplotlib.pyplot as plt + +def extract_data(file_path, columns): +# The file consists of multiple exectuings with differen number of procs. +# Each execution starts with a line containing: +# ------------- Running: with procs ------------- +# In args we find the argument -n which states how often the command is executed. +# Each executin of the command starts with a line containing: +# [t8] #################### Run i of n #################### +# At the end of each run there is a line containing: +# [t8] Summary = [ time time time .... time ]; +# To extract the data we need to find the lines containing: +# ------------- Running: with procs ------------- +# until the end of the file. + data = [] + with open(file_path, 'r') as file: + lines = file.readlines() # Read all lines into a list + for line in lines: + if "------------- Running:" in line and "with" in line and "procs" in line: + parts = line.split("------------- Running:")[1].strip().split("with") + args = parts[0].strip() + procs = parts[1].strip().split()[0] + # Ensure the last element_type is associated with the current procs + element_type = data[-1]["element_type"] if data and "element_type" in data[-1] else "Unknown" + data.append({"element_type": element_type, "procs": int(procs)}) + # Extract the number of runs from the args + # Assuming args is in the form of '-n ' + n_value = None + if any(arg.startswith('-n') and arg[2:].isdigit() for arg in args.split()): + n_value = next(arg[2:] for arg in args.split() if arg.startswith('-n') and arg[2:].isdigit()) + args_list = args.split() # Split args into a list of arguments + else: + print("No -n argument found in args.") + # for each run we need to find the line containing: + # [t8] Summary = [ time time time .... time ]; + current_run_summaries = [] + for run_line in lines[lines.index(line) + 1:]: + if "[t8] #################### Run" in run_line: + # find the number of the current run + run_number = run_line.split("[t8] #################### Run")[1].split("of")[0].strip() + continue # Skip the run header lines + if "[t8] Summary = [" in run_line: + # Extract the summary data + summary_data = run_line.split("[t8] Summary = [")[1].split("]")[0].strip().split() + print(f"Extracted summary data: {summary_data}") + print("\n") + # Filter the times based on the specified columns + summary_data = [summary_data[col] for col in columns if col < len(summary_data)] + # Convert the summary data to floats and filter based on columns + current_run_summaries.append([float(value) for value in summary_data]) + if "------------- Running:" in run_line: + break # Exit the inner loop to reprocess the line in the outer loop + # Compute the average of the times, given by the columns + if current_run_summaries: + avg_summary = [sum(x) / len(x) for x in zip(*current_run_summaries)] + # Append the average summary to the data + data[-1]["summaries"] = avg_summary + else: + print("No summaries found for this run.") + return data + +def extract_data_elems(file_path, columns): + print(f"Extracting data from {file_path} for columns {columns}") +# The file consists of multiple exectuings with differen number of procs. +# Each execution starts with a line containing: +# ------------- Running: with procs ------------- +# In args we find the argument -n which states how often the command is executed. +# Each executin of the command starts with a line containing: +# [t8] #################### Run i of n #################### +# At the end of each run there is a line containing: +# [t8] Summary = [ time time time .... time ]; +# To extract the data we need to find the lines containing: +# ------------- Running: with procs ------------- +# until the end of the file. + data = [] + with open(file_path, 'r') as file: + lines = file.readlines() # Read all lines into a list + element_type = None + num_elements = 0 + for line in lines: + if "TEST " in line: + element_type = line.split("TEST")[1].strip() + num_elements = 0 # Reset num_elements for each new element type + elif "------------- Running:" in line and "with" in line and "procs" in line: + parts = line.split("------------- Running:")[1].strip().split("with") + args = parts[0].strip() + procs = parts[1].strip().split()[0] + data.append({"element_type": element_type, "procs": int(procs)}) + # Extract the number of runs from the args + # Assuming args is in the form of '-n ' + n_value = None + if any(arg.startswith('-n') and arg[2:].isdigit() for arg in args.split()): + n_value = next(arg[2:] for arg in args.split() if arg.startswith('-n') and arg[2:].isdigit()) + args_list = args.split() # Split args into a list of arguments + else: + print("No -n argument found in args.") + # for each run we need to find the line containing: + # [t8] Summary = [ time time time .... time ]; + current_run_summaries = [] + for run_line in lines[lines.index(line) + 1:]: + if "[t8] #################### Run" in run_line: + # find the number of the current run + run_number = run_line.split("[t8] #################### Run")[1].split("of")[0].strip() + continue # Skip the run header lines + if "Done t8_forest_balance with" in run_line and num_elements == 0: + # Extract the number of elements from the line + num_elements = int(run_line.split("with")[1].split("global elements")[0].strip()) + data[-1]["num_elements"] = num_elements + if "[t8] Summary = [" in run_line: + # Extract the summary data + summary_data = run_line.split("[t8] Summary = [")[1].split("]")[0].strip().split() + # Filter the times based on the specified columns + summary_data = [summary_data[col] for col in columns if col < len(summary_data)] + print(f"Extracted summary data for type {element_type}: {summary_data}") + # Convert the summary data to floats and filter based on columns + current_run_summaries.append([float(value) for value in summary_data]) + print(f"Current run summaries: {current_run_summaries}") + if "------------- Running:" in run_line: + break # Exit the inner loop to reprocess the line in the outer loop + # Compute the average of the times, given by the columns + avg_summary = [sum(x) / len(x) for x in zip(*current_run_summaries)] + # Append the average summary to the data + data[-1]["summaries"] = avg_summary + return data + +def create_graphics(num_files, names, graph_name_base, data_base): + plt.figure(figsize=(10, 6)) + plt.xlabel('Number of Processes', fontsize=16) + plt.ylabel('Average Time (s)', fontsize=16) + plt.xscale('log', base=2) + plt.yscale('log', base=10) + plt.xticks(fontsize=14) + plt.yticks(fontsize=14) + + plt.title('Performance Comparison', fontsize=16) + print(data_base) + for i in range(int(num_files)): + entry = data_base[i][0] + procs = entry["procs"] + summaries = entry["summaries"] + total = sum(summaries) + color = plt.cm.tab10(i % 10) + # Use a line plot (single-point plotted as a marker; use sequences to connect points if desired) + if i == 0: + procs_list = [] + totals_list = [] + procs_list.append(procs) + totals_list.append(total) + # draw/update a line that connects the accumulated points + plt.plot(procs_list, totals_list, marker='o', linestyle='-', color='black', label=graph_name_base if i == 0 else None) + #bottom = 0 + #for j, value in enumerate(summaries): + # color = plt.cm.tab10(j % 10) # Use a consistent color for each section + # plt.bar(procs, value, bottom=bottom, label=f"{names[j]}" if i == 0 else "", width=procs / 10, color=color) + # bottom += value + + + plt_name = f"graph_{graph_name_base}.png" + plt.legend(fontsize=14) + plt.savefig(plt_name) + print(f"Graph saved as {plt_name}") + +def create_graphics_elem(names, num_files, data): + compute_ghosts = False + for i in range(len(names)): + if "Ghost" in names[i]: + compute_ghosts = True + if "Ghost_sent" in names[i]: + continue + + plt.figure(figsize=(10, 6)) + plt.xlabel('Number of Processes', fontsize=16) + plt.ylabel('Average Time (s)', fontsize=16) + plt.xscale('log', base=2) + plt.yscale('log', base=10) + plt.xticks(fontsize=14) + plt.yticks(fontsize=14) + plt.title(f'Performance Comparison for {names[i]}', fontsize=16) + color_map = { + "TETRAHEDRON": "green", + "HEXAHEDRON": "blue", + "PRISM": "red", + "PYRAMID": "orange" + } + name_map = { + "TETRAHEDRON": "Tetrahedron", + "HEXAHEDRON": "Hexahedron", + "PRISM": "Prism", + "PYRAMID": "Pyramid" + } + for ifile in range(int(num_files)): + element_types = set(entry["element_type"] for entry in data[ifile]) + for element_type in element_types: + element_data = [entry for entry in data[ifile] if entry["element_type"] == element_type] + procs = [entry["procs"] for entry in element_data] + times = [entry["summaries"][i] for entry in element_data] + if compute_ghosts: + ghost_sent = [entry["summaries"][i+1] for entry in element_data] + parallel_efficiency = [times[0] * ghost_sent[j] / (times[j] * ghost_sent[0]) for j in range(len(times))] + print(f"Parallel Efficiency for {element_type}: {parallel_efficiency}") + if element_type == "PYRAMID": + print(f"Times for PYRAMID: {times}") + print(f"Ghost Sent for PYRAMID: {ghost_sent}") + color = color_map.get(element_type, "gray") + marker_map = { + "TETRAHEDRON": "^", # triangle_up + "HEXAHEDRON": "s", # square + "PRISM": "8", # octagon + "PYRAMID": "D" # diamond + } + marker = marker_map.get(element_type, "o") + plt.plot(procs, times, label=name_map.get(element_type, element_type) if ifile == 0 else None, + marker=marker, color=color, linestyle='-') + + if element_type == "PYRAMID": + if compute_ghosts: + ghost_sent = [entry["summaries"][i+1] for entry in element_data] + ideal_scaling = [times[0]] * len(procs) + for j in range(1, len(procs)): + ideal_scaling[j] = ideal_scaling[j-1] * (ghost_sent[j] / ghost_sent[j-1]) + #print(f"Times for PYRAMID: {times}") + #print(f"Ghost Sent for PYRAMID: {ghost_sent}") + #print(f"Ideal Scaling for PYRAMID: {ideal_scaling}") + plt.plot(procs, ideal_scaling, label="Ideal Strong Scaling" if ifile == 0 else None, color='black', linestyle='dashed') + else: + ideal_scaling = [times[0] / 2**iproc for iproc in range(len(procs))] + plt.plot(procs, ideal_scaling, label="Ideal Strong Scaling" if ifile == 0 else None, color='black', linestyle='dashed') + if ifile == 0: + #if compute_ghosts: + # elem_data = [[entry for entry in data[j] if entry["element_type"] == "PYRAMID"] for j in range(int(num_files))] + # ghost_sent = [[entry["summaries"][i+1] for entry in elem_data[j]] for j in range(int(num_files))] + # print(f"Ghost Sent for PYRAMID: {ghost_sent}") + # for iproc, proc in enumerate(procs): + # val = times[iproc] + # ideal_weak_scaling = [val ] * int(num_files) + # for j in range(1, int(num_files)): + # ideal_weak_scaling[j] = ideal_weak_scaling[j-1] * (ghost_sent[j][iproc] / ghost_sent[j-1][iproc]) + # shifted_procs = [proc * 8**i for i in range(int(num_files))] + # plt.plot(shifted_procs, ideal_weak_scaling, color='black', linestyle='dotted', label="Ideal Weak Scaling" if iproc == 0 and ifile == 0 else None) + #else: + num_elements = element_data[ifile].get("num_elements", 1) + num_elements_next_file = next( + (entry.get("num_elements", 1) for entry in data[ifile + 1] if entry.get("element_type") == "PYRAMID"), + 1 + ) + element_scaling = num_elements_next_file / num_elements + for iproc, proc in enumerate(procs): + val = times[iproc] + ideal_weak_scaling = [val * (element_scaling**i) / (8**i) for i in range(int(num_files))] + shifted_procs = [proc * 8**i for i in range(int(num_files))] + plt.plot(shifted_procs, ideal_weak_scaling, color='black', linestyle='dotted', label="Ideal Weak Scaling" if iproc == 0 and ifile == 0 else None) + + plt.grid() + plt.legend() + plt.legend(loc='lower left', fontsize=14) + handles, labels = plt.gca().get_legend_handles_labels() + order = [] + label_order = ["Pyramid", "Prism", "Tetrahedron", "Hexahedron", "Ideal Strong Scaling", "Ideal Weak Scaling"] + for lbl in label_order: + if lbl in labels: + order.append(labels.index(lbl)) + plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc='lower left') + plt_name = f"graph_{names[i]}.png" + plt.savefig(plt_name) + print(f"Graph saved as {plt_name}") + if "Ghost" in names[i]: + compute_ghosts = False + +def compare_mesh(): + num_files = sys.argv[2] + + base = sys.argv[3:3 + int(num_files)] + + additional_args_index = 3 + int(num_files) + names = [name.strip() for name in sys.argv[additional_args_index].split(',')] + + indices = list(map(int, sys.argv[additional_args_index + 1].split(','))) + + if len(names) != len(indices): + print("Error: The number of names and indices must be the same.") + sys.exit(1) + + graph_name_base = sys.argv[additional_args_index + 2] + + data_base = [] + + for file in range(int(num_files)): + data_base.append(extract_data(base[file], indices)) + + create_graphics(num_files, names, graph_name_base, data_base) + +def compare_elements(): + if (len(sys.argv) < 6): + print("Usage: python create_graphic.py elements ") + sys.exit(1) + + num_files = sys.argv[2] + print (num_files) + base = sys.argv[3:3 + int(num_files)] + print(base) + additional_args_index = 3 + int(num_files) + + names = [name.strip() for name in sys.argv[additional_args_index].split(',')] + print (names) + indices = list(map(int, sys.argv[additional_args_index + 1].split(','))) + + graph_name_base = sys.argv[additional_args_index + 2] + + data_base = [] + for file in range(int(num_files)): + data_base.append(extract_data_elems(base[file], indices)) + + print(f"Data extracted from {base}: {data_base}") + + create_graphics_elem(names, num_files, data_base) + + +def main(): + if len(sys.argv) < 2: + print("Usage: python create_graphic.py [args]") + sys.exit(1) + + mode = sys.argv[1] + + if mode == "mesh": + compare_mesh() + elif mode == "elements": + compare_elements() + else: + print("Error: Invalid mode. Use 'mesh' or 'elements'.") + sys.exit(1) + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/paper_files/New_for_hybrid/evaluate/graph_Ghost.png b/paper_files/New_for_hybrid/evaluate/graph_Ghost.png new file mode 100644 index 0000000..f1469fd --- /dev/null +++ b/paper_files/New_for_hybrid/evaluate/graph_Ghost.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736b0c4c48666d8ddd5944b209db5368e25e8864f913b9a75b286a5d3a14b5b1 +size 122928 diff --git a/paper_files/New_for_hybrid/evaluate/graph_adapt.png b/paper_files/New_for_hybrid/evaluate/graph_adapt.png new file mode 100644 index 0000000..ab550b2 --- /dev/null +++ b/paper_files/New_for_hybrid/evaluate/graph_adapt.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96450c8819686d3d17a2dedd6fc51a41ed3c428a88efd9f6f29f907cdfb2b8d7 +size 108083 diff --git a/paper_files/New_for_hybrid/evaluate/graph_new.png b/paper_files/New_for_hybrid/evaluate/graph_new.png new file mode 100644 index 0000000..e7d3ffe --- /dev/null +++ b/paper_files/New_for_hybrid/evaluate/graph_new.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399fe221a0be513f83e09c7e06ad33bf0f6bfddbfe798ba497bae2a3ab2054c4 +size 110444 diff --git a/paper_files/New_for_hybrid/evaluate/graph_parition.png b/paper_files/New_for_hybrid/evaluate/graph_parition.png new file mode 100644 index 0000000..868d205 --- /dev/null +++ b/paper_files/New_for_hybrid/evaluate/graph_parition.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e98003e6743ed5c02e27380b409c281f269704b3a3bae253e8d0e5c8cfdf6e8 +size 120657 diff --git a/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh b/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh new file mode 100644 index 0000000..840da63 --- /dev/null +++ b/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# This is a batch-file used on CARA. Cara has 2168 nodes, a 2x(AMD EPYC (32 Cores)) + + +#SBATCH --time=00:10:00 +#SBATCH --ntasks=64 +#SBATCH --output=New_for_hybrid_ELEM_%j +#SBATCH --error=New_for_hybrid_ELEM_err_%j +NUM_PROCS="8 16 32 64" +ELEMENT_NAMES=("TETRAHEDRON" "HEXAHEDRON" "PRISM" "PYRAMID") + +# e = Element typ +# l = initial level +# n = number of reruns + +PART_ARGS="-l4 -n3" + + +JOBFILE="/scratch/ws/4/knap_da-t8code_timings/benchmark/benchmark" + +for i in {0..3}; do + ELEMENT=${ELEMENT_NAMES[$i]} + echo "TEST $ELEMENT" + if [ "$i" -eq 3 ]; then + LVAL=$(echo $PART_ARGS | sed -n 's/.*-l\([0-9]\+\).*/\1/p') + NEW_LVAL=$((LVAL - 1)) + PART_ARGS="-l$NEW_LVAL -n3" + echo "Adjusting level to $NEW_LVAL for PYRAMID element" + fi + ARGS="$PART_ARGS -e$i" + JOB_CMD="$JOBFILE $ARGS" + for PROCS in $NUM_PROCS ; do + echo "------------- Running: $JOB_CMD with $PROCS procs ------------" + srun -n $PROCS $JOB_CMD & + done +done + diff --git a/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh b/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh new file mode 100644 index 0000000..5e18310 --- /dev/null +++ b/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# This is a batch-file used on CARA. Cara has 2168 nodes, a 2x(AMD EPYC (32 Cores)) + + +#SBATCH --time=00:10:00 +#SBATCH --ntasks=128 +#SBATCH --output=Mesh_%j +#SBATCH --error=Mesh_err_%j +NUM_PROCS="128" + +# d = Dimension +# l = initial level +# r = number of refinements +# x = left wall of the mesh +# t = thickness of the refinement region +# D = distance of the refinement region to travel +# s = number of steps to travel +# n = number of reruns +# b = balance after each refinement step +# g = ghost cells + +PART_ARGS="-d3 -l5 -r2 -x-0.5 -t0.2 -D2 -s5 -n2 -g" + + +JOBFILE="/scratch_fast/ws/0/knap_da-t8code_benchmarks/benchmark_build/benchmark" + +MSH_FILE="/scratch_fast/ws/0/knap_da-t8code_benchmarks/t8data/paper_files/New_for_hybrid/tonne_100k" + +JOB_CMD="$JOBFILE -f $MSH_FILE $PART_ARGS" +for PROCS in $NUM_PROCS ; do + echo "------------- Running: $JOB_CMD with $PROCS procs ------------" + srun -n $PROCS $JOB_CMD & +done + diff --git a/paper_files/New_for_hybrid/tonne_100k.msh b/paper_files/New_for_hybrid/tonne_100k.msh new file mode 100755 index 0000000..cc12e91 --- /dev/null +++ b/paper_files/New_for_hybrid/tonne_100k.msh @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde35ae4fd98b6678c4d706abf925392febee5de1d8a042e4b81ce41ccd2245e +size 6071157