diff --git a/paper_files/New_for_hybrid/CMakeLists.txt b/paper_files/New_for_hybrid/CMakeLists.txt
index 30c6ef5..5c4fe1d 100644
--- a/paper_files/New_for_hybrid/CMakeLists.txt
+++ b/paper_files/New_for_hybrid/CMakeLists.txt
@@ -17,5 +17,8 @@ target_include_directories(Benchmark_new_for_hybrid
 )
 
 add_executable(benchmark benchmark.cxx)
+add_executable(benchmark_elems benchmark_elems.cxx)
 
 target_link_libraries(benchmark PRIVATE T8CODE::T8)
+target_link_libraries(benchmark_elems PRIVATE T8CODE::T8)
+
diff --git a/paper_files/New_for_hybrid/README.md b/paper_files/New_for_hybrid/README.md
index 4ea9096..f4e3d9a 100644
--- a/paper_files/New_for_hybrid/README.md
+++ b/paper_files/New_for_hybrid/README.md
@@ -1,13 +1,11 @@
 # README
 
-## Comparison of the Partitioning Algorithm
+## How to run the benchmarks
+Install the benchmarks via CMake and provide Paths to t8code (and SC & P4est, which can be installed with t8code alltogether)
+Run the benchmark_elems example to evaluate the element-wise performance. With -n you can define the number of repeated runs, with -e you define the type of element used and with -l the initial uniform refinement level of the forest. 
 
-To compare the timings between the introduction of the New-for-hybrid algorithm link with
-- t8code v4.0.0 for the "old" performance
-- t8code add tag for the new performance
+Run the benchmark example to evaluate the performance on a large hybrid mesh. Provide a mesh file via -f, and describe the dimension of the mesh via -d. The initial uniform refinement level is given via -l and -r defines the number of refinement levels. -g toggles on the usage of ghost-cells, -b enables the computation of a 2:1 balancing. -x is the minimum coordinate of the mesh, -t describes the thickness of the wall and -D the distance of the wall to travel. -s defines the step the wall should make. -n is number of repeated runs to test. 
+We used "-d3 -l5 -r2 -x-0.5 -t0.2 -D2 -s5 -n2 -g" for our tests.
 
-## What is benchmarked?
+To run the examples on a cluster you can use the scripts provided in the jobs directory.
 
-In benchmark.cxx we create a cmesh, either from a file or from our examples. We use a hybrid mesh from the example, consisting of a hexahedron, a tetrahedron, a prism and a pyramid. For the published run-times we used a cmesh created from xy.msh.
-
-The mesh is then adaptively refined, coarsened and repartitioned for multiple timesteps. In each timestep elements inside a wall are refined. If an element is outside the wall it is coarsened up until a minimum level. In each timestep the wall is repositioned, enforcing a repartitioning of the forest in each timestep.
diff --git a/paper_files/New_for_hybrid/benchmark.cxx b/paper_files/New_for_hybrid/benchmark.cxx
index e153ed1..6681844 100644
--- a/paper_files/New_for_hybrid/benchmark.cxx
+++ b/paper_files/New_for_hybrid/benchmark.cxx
@@ -27,12 +27,9 @@
 #include <sc_statistics.h>
 #include <sc_functions.h>
 
-#include <t8_vtk/t8_vtk_writer.h>
-
 #include <t8_cmesh.h>
 #include <t8_cmesh/t8_cmesh_examples.h>
 #include <t8_cmesh_readmshfile.h>
-#include <t8_cmesh/t8_cmesh_partition.h>
 #include <t8_cmesh/t8_cmesh_examples.h>
 
 #include <t8_forest/t8_forest_general.h>
@@ -62,22 +59,15 @@ typedef struct
  * \return t8_cmesh_t 
  */
 t8_cmesh_t
-t8_benchmark_forest_create_cmesh (const char *msh_file, const int mesh_dim, sc_MPI_Comm comm, const int init_level)
+t8_benchmark_forest_create_cmesh (const char *msh_file, const int mesh_dim, sc_MPI_Comm comm, const int init_level )
 {
-  t8_cmesh_t cmesh;
-  if (msh_file != NULL){
-    cmesh = t8_cmesh_from_msh_file ((char *) msh_file, 1, comm, mesh_dim, 0, false);
-  }
-  else {
-    cmesh = t8_cmesh_new_full_hybrid(comm);
-  }
+  t8_cmesh_t cmesh = t8_cmesh_from_msh_file ((char *) msh_file, true, comm, mesh_dim, 0, false);
   t8_cmesh_t cmesh_partition;
   t8_cmesh_init (&cmesh_partition);
   t8_cmesh_set_derive (cmesh_partition, cmesh);
   t8_cmesh_set_partition_uniform (cmesh_partition, init_level, t8_scheme_new_default ());
   t8_cmesh_set_profiling (cmesh_partition, 1);
   t8_cmesh_commit (cmesh_partition, comm);
-  t8_cmesh_destroy (&cmesh);
   return cmesh_partition;
 }
 
@@ -135,19 +125,28 @@ t8_band_adapt (t8_forest_t forest, t8_forest_t forest_from, t8_locidx_t which_tr
 }
 
 static void
-benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm, const int init_level, const int max_level, 
-  const bool no_vtk, const std::array<double, 2> &x_min_max, const double delta_t, const double max_time)
+benchmark_band_adapt(t8_cmesh_t cmesh, sc_MPI_Comm comm, const int init_level, const int max_level, 
+                    const double x_min, const bool do_ghost, const int num_steps, const double length, 
+                    const double thickness, const bool do_balance)
 {
   double adapt_time = 0;
   double partition_time = 0;
   double new_time = 0;
   double total_time = 0;
-  const int num_stats = 4;
+  double ghost_time = 0;
+  t8_locidx_t ghost_sent = 0;
+  double balance_time = 0;
+  int balance_rounds = 0;
+  const int num_stats = 8;
   std::array<sc_statinfo_t, num_stats> times;
   sc_stats_init (&times[0], "new");
   sc_stats_init (&times[1], "adapt");
   sc_stats_init (&times[2], "partition");
-  sc_stats_init (&times[3], "total");
+  sc_stats_init (&times[3], "ghost");
+  sc_stats_init (&times[4], "ghost_sent");
+  sc_stats_init (&times[5], "balance");
+  sc_stats_init (&times[6], "balance_rounds");
+  sc_stats_init (&times[7], "total");
 
   t8_forest_t forest;
   t8_forest_init (&forest);
@@ -163,49 +162,53 @@ benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm,
 
   sc_stats_set1 (&times[0], new_time, "new");
 
-  t8_3D_vec normal({0.8, 0.3, 0.0});
-  adapt_data_t adapt_data = {x_min_max[0], x_min_max[1], normal, init_level, max_level};
+  const double step = length / num_steps;
+
+  t8_3D_vec normal({1, 0.0, 0.0});
+  adapt_data_t adapt_data = {x_min-thickness/2, x_min+thickness/2, normal, init_level, max_level};
   t8_normalize (adapt_data.normal);
-  int num_steps = 0;
+  t8_gloidx_t max_num_global_elements = -1;
   t8_forest_t forest_adapt, forest_partition;
-  for (double time = 0; time < max_time; time += delta_t, ++num_steps) {
+  for (int istep = 0; istep < num_steps; ++istep) {
     t8_forest_init (&forest_adapt);
     t8_forest_set_adapt (forest_adapt, forest, t8_band_adapt, 1);
     t8_forest_set_profiling (forest_adapt, 1);
 
-    adapt_data.c_min = x_min_max[0] + time ;
-    adapt_data.c_max = x_min_max[1] + time ;
+    adapt_data.c_min =  adapt_data.c_min + step ;
+    adapt_data.c_max = adapt_data.c_max + step ;
 
     t8_forest_set_user_data (forest_adapt, (void *)&adapt_data);
-    adapt_time -= sc_MPI_Wtime ();
     t8_forest_commit (forest_adapt);
-    adapt_time += sc_MPI_Wtime ();
+    adapt_time += t8_forest_profile_get_adapt_time(forest_adapt);
 
-    t8_forest_compute_profile (forest_adapt);
     t8_forest_ref (forest_adapt);
 
     t8_forest_init (&forest_partition);
     t8_forest_set_partition(forest_partition, forest_adapt, 0);
+    if( do_balance )
+    {
+      t8_forest_set_balance (forest_partition, NULL, 0);
+    }
     t8_forest_set_profiling (forest_partition, 1);
+    if (do_ghost) {
+      t8_forest_set_ghost (forest_partition, 1, T8_GHOST_FACES);
+    }
  
-    partition_time -= sc_MPI_Wtime ();
     t8_forest_commit (forest_partition);
-    partition_time += sc_MPI_Wtime ();
-    t8_forest_compute_profile (forest_partition);
-    t8_cmesh_print_profile (t8_forest_get_cmesh (forest_partition));
+    const t8_gloidx_t num_global_elements = t8_forest_get_global_num_leaf_elements (forest_partition);
+    if (num_global_elements > max_num_global_elements)
+      max_num_global_elements = num_global_elements;
     forest = forest_partition;
+    int ghost_sent_iter = 0;
+    int procs_sent = 0;
+    int balance_rounds_iter = 0;
+    partition_time += t8_forest_profile_get_partition_time (forest_partition, &procs_sent);
+    ghost_time += t8_forest_profile_get_ghost_time (forest_partition, &ghost_sent_iter);
 
-    if (!no_vtk) {
-      char forest_vtu[BUFSIZ];
-      char cmesh_vtu[BUFSIZ];
-      snprintf (forest_vtu, BUFSIZ, "%s_forest_partition_%03d", vtu_prefix, num_steps);
-      snprintf (cmesh_vtu, BUFSIZ, "%s_cmesh_partition_%03d", vtu_prefix, num_steps);
-      t8_forest_write_vtk (forest_partition, forest_vtu);
-      t8_cmesh_vtk_write_file (t8_forest_get_cmesh (forest_partition), cmesh_vtu);
-      t8_debugf ("Wrote partitioned forest and cmesh\n");
-    }
-    t8_cmesh_print_profile (t8_forest_get_cmesh (forest_partition));
-    t8_forest_print_profile (forest_partition);
+    balance_time += t8_forest_profile_get_balance_time (forest_partition, &balance_rounds_iter);
+
+    ghost_sent += ghost_sent_iter;
+    balance_rounds += balance_rounds_iter;
     t8_forest_unref (&forest_adapt);
   }
   
@@ -213,10 +216,16 @@ benchmark_band_adapt(t8_cmesh_t cmesh, const char *vtu_prefix, sc_MPI_Comm comm,
 
   t8_global_productionf ("Num steps: %d\n", num_steps);
 
+  t8_global_essentialf ("Max num elements after adapt: %llu\n", (unsigned long long)max_num_global_elements);
+
   sc_stats_accumulate (&times[0], new_time);
   sc_stats_accumulate (&times[1], adapt_time);
   sc_stats_accumulate (&times[2], partition_time);
-  sc_stats_accumulate (&times[3], total_time);
+  sc_stats_accumulate (&times[3], ghost_time);
+  sc_stats_accumulate (&times[4], ghost_sent);
+  sc_stats_accumulate (&times[5], balance_time);
+  sc_stats_accumulate (&times[6], balance_rounds);
+  sc_stats_accumulate (&times[7], total_time);
   sc_stats_compute (comm, num_stats, times.data ());
   sc_stats_print (t8_get_package_id (), SC_LP_ESSENTIAL, num_stats, times.data (), 1, 1);
   t8_forest_unref (&forest_partition);
@@ -229,59 +238,66 @@ main (int argc, char **argv)
   /* Initialize MPI. This has to happen before we initialize sc or t8code. */
   int mpiret = sc_MPI_Init (&argc, &argv);
   int help = 0;
-  int no_vtk;
   const char *mshfileprefix = NULL;
   int dim;
   int initial_level;
   int level_diff;
-  std::array<double, 2> x_min_max;
-  double T;
-  double cfl = 0;
+  double x_min;
+  int num_runs;
+  int do_ghost;
+  int do_balance;
+  double distance = 1.0;
+  int num_steps = 1;
+  double thickness = 0.1;
 
   /* Error check the MPI return value. */
   SC_CHECK_MPI (mpiret);
 
   /* Initialize the sc library, has to happen before we initialize t8code. */
   sc_init (sc_MPI_COMM_WORLD, 1, 1, NULL, SC_LP_ESSENTIAL);
-  /* Initialize t8code with log level SC_LP_PRODUCTION. See sc.h for more info on the log levels. */
-  t8_init (SC_LP_PRODUCTION);
+  /* Initialize t8code with log level SC_LP_ESSENTIAL. See sc.h for more info on the log levels. */
+  t8_init (SC_LP_ESSENTIAL);
 
   sc_options_t *options = sc_options_new (argv[0]);
 
   sc_options_add_switch (options, 'h', "help", &help, "Print this help message and exit");
-  sc_options_add_switch (options, 'o', "no-vtk", &no_vtk, "Do not write vtk output.");
   sc_options_add_string (options, 'f', "mshfile", &mshfileprefix, NULL,
                          "If specified, the cmesh is constructed from a .msh file with the given prefix. "
                          "The files must end in .msh and be created with gmsh.");
-  sc_options_add_int (options, 'd', "dim", &dim, 2, "Together with -f: The dimension of the coarse mesh. 2 or 3.");
+  sc_options_add_int (options, 'd', "dim", &dim, 3, "Together with -f: The dimension of the coarse mesh. 2 or 3.");
   sc_options_add_int (options, 'l', "level", &initial_level, 0, "The initial uniform refinement level of the forest.");
   sc_options_add_int (options, 'r', "rlevel", &level_diff, 1,
                       "The number of levels that the forest is refined from the initial level.");
-  sc_options_add_double (options, 'x', "xmin", &x_min_max[0], 0, "The minimum x coordinate in the mesh.");
-  sc_options_add_double (options, 'X', "xmax", &x_min_max[1], 1, "The maximum x coordinate in the mesh.");
-  sc_options_add_double (options, 'T', "time", &T, 1,
-                         "The simulated time span. We simulate the time from 0 to T. T has to be > 0.");
-  /* CFL number. delta_t = CFL * 0.64 / 2^level */
-  sc_options_add_double (options, 'C', "cfl", &cfl, 0,
-                         "The CFL number. If specified, then delta_t is set to CFL * 0.64 / 2^level. ");
+  sc_options_add_switch (options, 'g', "ghost", &do_ghost, "If specified, the forest is created with ghost cells.");
+  sc_options_add_switch (options, 'b', "balance", &do_balance, "If specified, the forest is balanced after each refinement step.");
+  sc_options_add_double (options, 'x', "xmin", &x_min, 0, "The minimum x coordinate in the mesh.");
+  sc_options_add_double (options, 't', "thickness", &thickness, 0.1,
+                         "The thickness of the refinement region.");
+  sc_options_add_double (options, 'D', "distance", &distance, 1.0,
+                         "The distance the plane should move in total.");
+  sc_options_add_int (options, 's', "steps", &num_steps, 1,
+                      "The number of steps to take in the refinement region. The distance is divided by this number.");
+  sc_options_add_int (options, 'n', "num-runs", &num_runs, 1,
+                          "The number of runs to perform. If specified, the program will run num_runs times with the same parameters. ");
   
   const int options_argc = sc_options_parse (t8_get_package_id (), SC_LP_DEFAULT, options, argc, argv);
 
-  if( options_argc <= 0 || options_argc != argc || help || initial_level < 0 || level_diff <= 0 || cfl == 0)
+  if( options_argc <= 0 || options_argc != argc || help || initial_level < 0 || level_diff <= 0 )
   {
     sc_options_print_usage (t8_get_package_id (), SC_LP_ERROR, options, NULL);
     return 1;
   }
-  const double delta_t = cfl * 0.64 / (1 << initial_level);
-  t8_global_productionf ("Using CFL %f, delta_t = %f\n", cfl, delta_t);
 
+  T8_ASSERT (mshfileprefix != NULL);
   t8_global_productionf ("Using mshfileprefix %s with dim %d\n", mshfileprefix, dim);
-  t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (mshfileprefix, dim, sc_MPI_COMM_WORLD, initial_level);
-
   const int max_level = initial_level + level_diff;
+  for (int irun = 0; irun < num_runs; ++irun) {
+    t8_global_essentialf ("#################### Run %d of %d ####################\n", irun + 1, num_runs);
+    t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (mshfileprefix, dim, sc_MPI_COMM_WORLD, initial_level);
+
 
-  benchmark_band_adapt (cmesh, "benchmark", sc_MPI_COMM_WORLD, initial_level, max_level, no_vtk, 
-    x_min_max, delta_t, T);
+    benchmark_band_adapt (cmesh, sc_MPI_COMM_WORLD, initial_level, max_level, x_min, do_ghost, num_steps, distance, thickness, do_balance);
+  }
 
   sc_options_destroy (options);
   sc_finalize ();
diff --git a/paper_files/New_for_hybrid/benchmark_elems.cxx b/paper_files/New_for_hybrid/benchmark_elems.cxx
new file mode 100644
index 0000000..15a6333
--- /dev/null
+++ b/paper_files/New_for_hybrid/benchmark_elems.cxx
@@ -0,0 +1,287 @@
+/*
+  This file is part of t8code.
+  t8code is a C library to manage a collection (a forest) of multiple
+  connected adaptive space-trees of general element types in parallel.
+
+  Copyright (C) 2025 the developers
+
+  t8code is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  t8code is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with t8code; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+*/
+#include <array>
+
+#include <t8.h>
+
+#include <sc_options.h>
+#include <sc_statistics.h>
+#include <sc_functions.h>
+
+#include <t8_cmesh.hxx>
+#include <t8_cmesh/t8_cmesh_examples.h>
+#include <t8_cmesh_readmshfile.h>
+#include <t8_cmesh/t8_cmesh_examples.h>
+
+#include <t8_forest/t8_forest_general.h>
+#include <t8_forest/t8_forest_profiling.h>
+#include <t8_forest/t8_forest_geometrical.h>
+#include <t8_forest/t8_forest_io.h>
+
+#include <t8_schemes/t8_default/t8_default.hxx>
+#include <t8_schemes/t8_default/t8_default_pyramid/t8_default_pyramid.hxx>
+
+#include <t8_geometry/t8_geometry_implementations/t8_geometry_linear.hxx>
+
+#include <t8_types/t8_vec.hxx>
+
+
+t8_cmesh_t two_tets (sc_MPI_Comm comm)
+{
+  t8_cmesh_t cmesh;
+  t8_cmesh_init (&cmesh);
+
+  double vertices [15] = {
+    0.0, 0.0, 0.0, //v0
+    0.0, 1.0, 0.0, //v1
+    1.0, 0.0, 0.0, //v2
+    0.0, 0.0, 1.0, //v3
+    1.0, 1.0, 1.0, //v4
+  };
+
+
+  t8_cmesh_set_tree_class (cmesh, 0, T8_ECLASS_TET);
+  t8_cmesh_set_tree_class (cmesh, 1, T8_ECLASS_TET);
+
+  t8_cmesh_set_join (cmesh, 0, 1, 0, 3, 0);
+
+  t8_cmesh_set_tree_vertices (cmesh, 0, vertices, 4);
+  t8_cmesh_set_tree_vertices (cmesh, 1, vertices + 3, 4);
+
+  t8_cmesh_register_geometry<t8_geometry_linear> (cmesh);
+  t8_cmesh_commit (cmesh, comm);
+  return cmesh;
+}
+
+/**
+ * Create a partitioned cmesh. If no msh_file is given, a new hybrid cmesh is created.
+ * 
+ * \param[in] msh_file 
+ * \param[in] mesh_dim 
+ * \param[in] comm 
+ * \param[in] init_level 
+ * \return t8_cmesh_t 
+ */
+t8_cmesh_t
+t8_benchmark_forest_create_cmesh ( sc_MPI_Comm comm, const int init_level, const t8_eclass_t eclass)
+{
+  T8_ASSERT (eclass != T8_ECLASS_INVALID);
+  t8_cmesh_t cmesh;
+  if (eclass == T8_ECLASS_TET) {
+    cmesh = two_tets (comm);
+  }
+  else if (eclass == T8_ECLASS_PYRAMID) {
+    cmesh = t8_cmesh_new_pyramid_cake (comm, 8);
+  }
+  else {
+    cmesh = t8_cmesh_new_hypercube (eclass, comm, false, false, false);
+  }
+  t8_cmesh_t cmesh_partition;
+  t8_cmesh_init (&cmesh_partition);
+  t8_cmesh_set_derive (cmesh_partition, cmesh);
+  t8_cmesh_set_partition_uniform (cmesh_partition, init_level, t8_scheme_new_default ());
+  t8_cmesh_set_profiling (cmesh_partition, 1);
+  t8_cmesh_commit (cmesh_partition, comm);
+  return cmesh_partition;
+}
+
+
+/* refine the forest in a band, given by a plane E and two constants
+ * c_min, c_max. We refine the cells in the band c_min*E, c_max*E */
+static int
+t8_adapt_pyramid ([[maybe_unused]] t8_forest_t forest, [[maybe_unused]] t8_forest_t forest_from, [[maybe_unused]] t8_locidx_t which_tree, 
+                [[maybe_unused]]t8_eclass_t tree_class,
+               [[maybe_unused]] t8_locidx_t lelement_id, [[maybe_unused]]const t8_scheme *scheme, [[maybe_unused]]const int is_family,
+               [[maybe_unused]] const int num_elements, t8_element_t *elements[])
+{
+    const t8_dpyramid_t *pyra = (const t8_dpyramid_t *) elements[0];
+    const int type = pyra->pyramid.type;
+    if (type == 6 || type == 0 || type == 2 || type == 4 ){
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
+static int
+t8_adapt_second ([[maybe_unused]] t8_forest_t forest, [[maybe_unused]] t8_forest_t forest_from, [[maybe_unused]] t8_locidx_t which_tree, 
+                [[maybe_unused]]t8_eclass_t tree_class,
+               [[maybe_unused]] t8_locidx_t lelement_id, [[maybe_unused]]const t8_scheme *scheme, [[maybe_unused]]const int is_family,
+               [[maybe_unused]] const int num_elements, t8_element_t *elements[])
+{
+    const int child_id = scheme->element_get_child_id (tree_class, elements[0]);
+    return child_id % 2;
+}
+
+static void
+benchmark_band_adapt(t8_cmesh_t cmesh, sc_MPI_Comm comm, const int init_level, const t8_eclass_t eclass)
+{
+  double adapt_time = 0;
+  double partition_time = 0;
+  double new_time = 0;
+  double total_time = 0;
+  double ghost_time = 0;
+  t8_locidx_t ghost_sent = 0;
+  double balance_time = 0;
+  const int num_stats = 7;
+  std::array<sc_statinfo_t, num_stats> times;
+  sc_stats_init (&times[0], "new");
+  sc_stats_init (&times[1], "adapt");
+  sc_stats_init (&times[2], "partition");
+  sc_stats_init (&times[3], "ghost");
+  sc_stats_init (&times[4], "ghost_sent");
+  sc_stats_init (&times[5], "balance");
+  sc_stats_init (&times[6], "total");
+
+  t8_forest_t forest;
+  t8_forest_init (&forest);
+  t8_forest_set_cmesh(forest, cmesh, comm);
+  t8_forest_set_scheme (forest, t8_scheme_new_default ());
+  t8_forest_set_level (forest, init_level); 
+
+  total_time -= sc_MPI_Wtime ();
+
+  new_time -= sc_MPI_Wtime ();
+  t8_forest_commit (forest);
+  new_time += sc_MPI_Wtime ();
+
+  sc_stats_set1 (&times[0], new_time, "new");
+
+
+  t8_forest_t forest_adapt, forest_partition;
+  t8_forest_init (&forest_adapt);
+  if (eclass == T8_ECLASS_PYRAMID) {
+    t8_forest_set_adapt (forest_adapt, forest, t8_adapt_pyramid, 0);
+  }
+  else {
+    t8_forest_set_adapt (forest_adapt, forest, t8_adapt_second, 0);
+  }
+  t8_forest_set_profiling (forest_adapt, 1);
+
+
+  t8_forest_commit (forest_adapt);
+  adapt_time += t8_forest_profile_get_adapt_time(forest_adapt);
+
+  t8_forest_ref (forest_adapt);
+
+  t8_forest_init (&forest_partition);
+  t8_forest_set_partition(forest_partition, forest_adapt, 0);
+  t8_forest_set_profiling (forest_partition, 1);
+  t8_forest_set_ghost (forest_partition, 1, T8_GHOST_FACES);
+  t8_forest_set_balance (forest_partition, NULL, 0);
+
+  t8_forest_commit (forest_partition);
+  forest = forest_partition;
+  int procs_sent = 0;
+  int balance_rounds = 0;
+  partition_time += t8_forest_profile_get_partition_time (forest_partition, &procs_sent);
+  ghost_time += t8_forest_profile_get_ghost_time (forest_partition, &ghost_sent);
+  balance_time += t8_forest_profile_get_balance_time (forest_partition, &balance_rounds);
+
+
+  t8_forest_unref (&forest_adapt);
+  
+  total_time += sc_MPI_Wtime ();
+
+  sc_stats_accumulate (&times[0], new_time);
+  sc_stats_accumulate (&times[1], adapt_time);
+  sc_stats_accumulate (&times[2], partition_time);
+  sc_stats_accumulate (&times[3], ghost_time);
+  sc_stats_accumulate (&times[4], ghost_sent);
+  sc_stats_accumulate (&times[5], balance_time);
+  sc_stats_accumulate (&times[6], total_time);
+  sc_stats_compute (comm, num_stats, times.data ());
+  sc_stats_print (t8_get_package_id (), SC_LP_PRODUCTION, num_stats, times.data (), 1, 1);
+  t8_forest_unref (&forest_partition);
+}
+
+int
+main (int argc, char **argv)
+{
+
+  /* Initialize MPI. This has to happen before we initialize sc or t8code. */
+  int mpiret = sc_MPI_Init (&argc, &argv);
+  int help = 0;
+  int initial_level;
+  int eclass_int;
+  int num_runs;
+
+  /* Error check the MPI return value. */
+  SC_CHECK_MPI (mpiret);
+
+  /* Initialize the sc library, has to happen before we initialize t8code. */
+  sc_init (sc_MPI_COMM_WORLD, 1, 1, NULL, SC_LP_ESSENTIAL);
+  /* Initialize t8code with log level SC_LP_PRODUCTION. See sc.h for more info on the log levels. */
+  t8_init (SC_LP_PRODUCTION);
+
+  sc_options_t *options = sc_options_new (argv[0]);
+
+  sc_options_add_switch (options, 'h', "help", &help, "Print this help message and exit");
+  sc_options_add_int (options, 'e', "eclass", &eclass_int, 0,
+                      "0: Tetrahedron, 1: Hexahedron, 2: Prism, 3: Pyramid");
+  sc_options_add_int (options, 'l', "level", &initial_level, 0, "The initial uniform refinement level of the forest.");
+  sc_options_add_int (options, 'n', "num-runs", &num_runs, 1,
+                          "The number of runs to perform. If specified, the program will run num_runs times with the same parameters. ");
+  const int options_argc = sc_options_parse (t8_get_package_id (), SC_LP_DEFAULT, options, argc, argv);
+
+  if( options_argc <= 0 || options_argc != argc || help )
+  {
+    sc_options_print_usage (t8_get_package_id (), SC_LP_ERROR, options, NULL);
+    return 1;
+  }
+  t8_eclass_t eclass = T8_ECLASS_INVALID;
+
+  switch (eclass_int)
+  {
+  case 0:
+    eclass = T8_ECLASS_TET;
+    break;
+  case 1:
+    eclass = T8_ECLASS_HEX;
+    break;
+  case 2:
+    eclass = T8_ECLASS_PRISM;
+    break;
+  case 3:
+    eclass = T8_ECLASS_PYRAMID;
+    break;
+  default:
+    break;
+  }
+
+  for (int irun = 0; irun < num_runs; ++irun) {
+    t8_global_essentialf ("#################### Run %d of %d ####################\n", irun + 1, num_runs);
+    t8_cmesh_t cmesh = t8_benchmark_forest_create_cmesh (sc_MPI_COMM_WORLD, initial_level, eclass);
+
+
+    benchmark_band_adapt (cmesh, sc_MPI_COMM_WORLD, initial_level, eclass);
+  }
+
+  sc_options_destroy (options);
+  sc_finalize ();
+
+  mpiret = sc_MPI_Finalize ();
+  SC_CHECK_MPI (mpiret);
+
+  return 0;
+}
diff --git a/paper_files/New_for_hybrid/evaluate/create_graphic.py b/paper_files/New_for_hybrid/evaluate/create_graphic.py
new file mode 100755
index 0000000..a92e95c
--- /dev/null
+++ b/paper_files/New_for_hybrid/evaluate/create_graphic.py
@@ -0,0 +1,337 @@
+import sys
+import matplotlib.pyplot as plt
+
+def extract_data(file_path, columns):
+# The file consists of multiple exectuings with differen number of procs. 
+# Each execution starts with a line containing:
+# -------------  Running: <command> <args> with <procs> procs -------------
+# In args we find the argument -n which states how often the command is executed.
+# Each executin of the command starts with a line containing:
+# [t8] #################### Run i of n ####################
+# At the end of each run there is a line containing:
+# [t8] Summary = [ time time time .... time ];
+# To extract the data we need to find the lines containing:
+# -------------  Running: <command> <args> with <procs> procs -------------
+# until the end of the file.
+    data = []
+    with open(file_path, 'r') as file:
+        lines = file.readlines()  # Read all lines into a list
+        for line in lines:
+            if "-------------  Running:" in line and "with" in line and "procs" in line:
+                parts = line.split("-------------  Running:")[1].strip().split("with")
+                args = parts[0].strip()
+                procs = parts[1].strip().split()[0]
+                # Ensure the last element_type is associated with the current procs
+                element_type = data[-1]["element_type"] if data and "element_type" in data[-1] else "Unknown"
+                data.append({"element_type": element_type, "procs": int(procs)})
+                # Extract the number of runs from the args
+                # Assuming args is in the form of '-n <number>'
+                n_value = None
+                if any(arg.startswith('-n') and arg[2:].isdigit() for arg in args.split()):
+                    n_value = next(arg[2:] for arg in args.split() if arg.startswith('-n') and arg[2:].isdigit())
+                    args_list = args.split()  # Split args into a list of arguments
+                else:
+                    print("No -n argument found in args.")
+                # for each run we need to find the line containing:
+                # [t8] Summary = [ time time time .... time ];
+                current_run_summaries = []
+                for run_line in lines[lines.index(line) + 1:]:
+                    if "[t8] #################### Run" in run_line:
+                        # find the number of the current run
+                        run_number = run_line.split("[t8] #################### Run")[1].split("of")[0].strip()
+                        continue  # Skip the run header lines
+                    if "[t8] Summary = [" in run_line:
+                        # Extract the summary data
+                        summary_data = run_line.split("[t8] Summary = [")[1].split("]")[0].strip().split()
+                        print(f"Extracted summary data: {summary_data}")
+                        print("\n")
+                        # Filter the times based on the specified columns
+                        summary_data = [summary_data[col] for col in columns if col < len(summary_data)]
+                        # Convert the summary data to floats and filter based on columns
+                        current_run_summaries.append([float(value) for value in summary_data])
+                    if "-------------  Running:" in run_line:
+                        break  # Exit the inner loop to reprocess the line in the outer loop
+                # Compute the average of the times, given by the columns
+                if current_run_summaries:
+                    avg_summary = [sum(x) / len(x) for x in zip(*current_run_summaries)]
+                    # Append the average summary to the data
+                    data[-1]["summaries"] = avg_summary
+                else:
+                    print("No summaries found for this run.")
+    return data
+
+def extract_data_elems(file_path, columns):
+    print(f"Extracting data from {file_path} for columns {columns}")
+# The file consists of multiple exectuings with differen number of procs. 
+# Each execution starts with a line containing:
+# -------------  Running: <command> <args> with <procs> procs -------------
+# In args we find the argument -n which states how often the command is executed.
+# Each executin of the command starts with a line containing:
+# [t8] #################### Run i of n ####################
+# At the end of each run there is a line containing:
+# [t8] Summary = [ time time time .... time ];
+# To extract the data we need to find the lines containing:
+# -------------  Running: <command> <args> with <procs> procs -------------
+# until the end of the file.
+    data = []
+    with open(file_path, 'r') as file:
+        lines = file.readlines()  # Read all lines into a list
+        element_type = None
+        num_elements = 0
+        for line in lines:
+            if "TEST " in line:
+                element_type = line.split("TEST")[1].strip()
+                num_elements = 0  # Reset num_elements for each new element type
+            elif "-------------  Running:" in line and "with" in line and "procs" in line:
+                parts = line.split("-------------  Running:")[1].strip().split("with")
+                args = parts[0].strip()
+                procs = parts[1].strip().split()[0]
+                data.append({"element_type": element_type, "procs": int(procs)})
+                # Extract the number of runs from the args
+                # Assuming args is in the form of '-n <number>'
+                n_value = None
+                if any(arg.startswith('-n') and arg[2:].isdigit() for arg in args.split()):
+                    n_value = next(arg[2:] for arg in args.split() if arg.startswith('-n') and arg[2:].isdigit())
+                    args_list = args.split()  # Split args into a list of arguments
+                else:
+                    print("No -n argument found in args.")
+                # for each run we need to find the line containing:
+                # [t8] Summary = [ time time time .... time ];
+                current_run_summaries = []
+                for run_line in lines[lines.index(line) + 1:]:
+                    if "[t8] #################### Run" in run_line:
+                        # find the number of the current run
+                        run_number = run_line.split("[t8] #################### Run")[1].split("of")[0].strip()
+                        continue  # Skip the run header lines
+                    if "Done t8_forest_balance with" in run_line and num_elements == 0:
+                        # Extract the number of elements from the line
+                        num_elements = int(run_line.split("with")[1].split("global elements")[0].strip())
+                        data[-1]["num_elements"] = num_elements
+                    if "[t8] Summary = [" in run_line:
+                        # Extract the summary data
+                        summary_data = run_line.split("[t8] Summary = [")[1].split("]")[0].strip().split()
+                        # Filter the times based on the specified columns
+                        summary_data = [summary_data[col] for col in columns if col < len(summary_data)]
+                        print(f"Extracted summary data for type {element_type}: {summary_data}")
+                        # Convert the summary data to floats and filter based on columns
+                        current_run_summaries.append([float(value) for value in summary_data])
+                        print(f"Current run summaries: {current_run_summaries}")
+                    if "-------------  Running:" in run_line:
+                        break  # Exit the inner loop to reprocess the line in the outer loop
+                # Compute the average of the times, given by the columns
+                avg_summary = [sum(x) / len(x) for x in zip(*current_run_summaries)]
+                # Append the average summary to the data
+                data[-1]["summaries"] = avg_summary
+    return data
+
+def create_graphics(num_files, names, graph_name_base, data_base):
+    plt.figure(figsize=(10, 6))
+    plt.xlabel('Number of Processes', fontsize=16)
+    plt.ylabel('Average Time (s)', fontsize=16)
+    plt.xscale('log', base=2)
+    plt.yscale('log', base=10)
+    plt.xticks(fontsize=14)
+    plt.yticks(fontsize=14)
+
+    plt.title('Performance Comparison', fontsize=16)
+    print(data_base)
+    for i in range(int(num_files)):
+        entry = data_base[i][0]
+        procs = entry["procs"]
+        summaries = entry["summaries"]
+        total = sum(summaries)
+        color = plt.cm.tab10(i % 10)
+        # Use a line plot (single-point plotted as a marker; use sequences to connect points if desired)
+        if i == 0:
+            procs_list = []
+            totals_list = []
+        procs_list.append(procs)
+        totals_list.append(total)
+        # draw/update a line that connects the accumulated points
+        plt.plot(procs_list, totals_list, marker='o', linestyle='-', color='black', label=graph_name_base if i == 0 else None)
+        #bottom = 0
+        #for j, value in enumerate(summaries):
+        #    color = plt.cm.tab10(j % 10)  # Use a consistent color for each section
+        #    plt.bar(procs, value, bottom=bottom, label=f"{names[j]}" if i == 0 else "",  width=procs / 10, color=color)
+        #    bottom += value
+
+    
+    plt_name = f"graph_{graph_name_base}.png"
+    plt.legend(fontsize=14)
+    plt.savefig(plt_name)
+    print(f"Graph saved as {plt_name}")
+
+def create_graphics_elem(names, num_files, data):
+    compute_ghosts = False
+    for i in range(len(names)):
+        if "Ghost" in names[i]:
+            compute_ghosts = True
+        if "Ghost_sent" in names[i]:
+            continue
+        
+        plt.figure(figsize=(10, 6))
+        plt.xlabel('Number of Processes', fontsize=16)
+        plt.ylabel('Average Time (s)', fontsize=16)
+        plt.xscale('log', base=2)
+        plt.yscale('log', base=10)
+        plt.xticks(fontsize=14)
+        plt.yticks(fontsize=14)
+        plt.title(f'Performance Comparison for {names[i]}', fontsize=16)
+        color_map = {
+            "TETRAHEDRON": "green",
+            "HEXAHEDRON": "blue",
+            "PRISM": "red",
+            "PYRAMID": "orange"
+        }
+        name_map = {
+            "TETRAHEDRON": "Tetrahedron",
+            "HEXAHEDRON": "Hexahedron",
+            "PRISM": "Prism",
+            "PYRAMID": "Pyramid"
+        }
+        for ifile in range(int(num_files)):
+            element_types = set(entry["element_type"] for entry in data[ifile])
+            for element_type in element_types:
+                element_data = [entry for entry in data[ifile] if entry["element_type"] == element_type]
+                procs = [entry["procs"] for entry in element_data]
+                times = [entry["summaries"][i] for entry in element_data]
+                if compute_ghosts:
+                    ghost_sent = [entry["summaries"][i+1] for entry in element_data]
+                    parallel_efficiency = [times[0] * ghost_sent[j] / (times[j] * ghost_sent[0]) for j in range(len(times))]
+                    print(f"Parallel Efficiency for {element_type}: {parallel_efficiency}")
+                    if element_type == "PYRAMID":
+                        print(f"Times for PYRAMID: {times}")
+                        print(f"Ghost Sent for PYRAMID: {ghost_sent}")
+                color = color_map.get(element_type, "gray")
+                marker_map = {
+                    "TETRAHEDRON": "^",  # triangle_up
+                    "HEXAHEDRON": "s",   # square
+                    "PRISM": "8",        # octagon
+                    "PYRAMID": "D"       # diamond
+                }
+                marker = marker_map.get(element_type, "o")
+                plt.plot(procs, times, label=name_map.get(element_type, element_type) if ifile == 0 else None,
+                         marker=marker, color=color, linestyle='-')
+
+                if element_type == "PYRAMID":
+                    if compute_ghosts:
+                        ghost_sent = [entry["summaries"][i+1] for entry in element_data]
+                        ideal_scaling = [times[0]] * len(procs)
+                        for j in range(1, len(procs)):
+                            ideal_scaling[j] = ideal_scaling[j-1] * (ghost_sent[j] / ghost_sent[j-1])
+                        #print(f"Times for PYRAMID: {times}")
+                        #print(f"Ghost Sent for PYRAMID: {ghost_sent}")
+                        #print(f"Ideal Scaling for PYRAMID: {ideal_scaling}")
+                        plt.plot(procs, ideal_scaling, label="Ideal Strong Scaling" if ifile == 0 else None, color='black', linestyle='dashed')
+                    else:
+                        ideal_scaling = [times[0] / 2**iproc for iproc in range(len(procs))]
+                        plt.plot(procs, ideal_scaling, label="Ideal Strong Scaling" if ifile == 0 else None, color='black', linestyle='dashed')
+                    if ifile == 0:
+                        #if compute_ghosts:
+                        #    elem_data = [[entry for entry in data[j] if entry["element_type"] == "PYRAMID"] for j in range(int(num_files))]
+                        #    ghost_sent = [[entry["summaries"][i+1] for entry in elem_data[j]] for j in range(int(num_files))]
+                        #    print(f"Ghost Sent for PYRAMID: {ghost_sent}")
+                        #    for iproc, proc in enumerate(procs):
+                        #        val = times[iproc]
+                        #        ideal_weak_scaling = [val ] * int(num_files)
+                        #        for j in range(1, int(num_files)):
+                        #            ideal_weak_scaling[j] = ideal_weak_scaling[j-1] * (ghost_sent[j][iproc] / ghost_sent[j-1][iproc])
+                        #        shifted_procs = [proc * 8**i for i in range(int(num_files))]
+                        #        plt.plot(shifted_procs, ideal_weak_scaling, color='black', linestyle='dotted', label="Ideal Weak Scaling" if iproc == 0 and ifile == 0 else None)
+                        #else:
+                        num_elements = element_data[ifile].get("num_elements", 1)
+                        num_elements_next_file = next(
+                            (entry.get("num_elements", 1) for entry in data[ifile + 1] if entry.get("element_type") == "PYRAMID"),
+                            1
+                        )
+                        element_scaling = num_elements_next_file / num_elements
+                        for iproc, proc in enumerate(procs):
+                            val = times[iproc]
+                            ideal_weak_scaling = [val * (element_scaling**i) / (8**i) for i in range(int(num_files))]
+                            shifted_procs = [proc * 8**i for i in range(int(num_files))]
+                            plt.plot(shifted_procs, ideal_weak_scaling, color='black', linestyle='dotted', label="Ideal Weak Scaling" if iproc == 0 and ifile == 0 else None)
+
+        plt.grid()
+        plt.legend()
+        plt.legend(loc='lower left', fontsize=14)
+        handles, labels = plt.gca().get_legend_handles_labels()
+        order = []
+        label_order = ["Pyramid", "Prism", "Tetrahedron", "Hexahedron", "Ideal Strong Scaling", "Ideal Weak Scaling"]
+        for lbl in label_order:
+            if lbl in labels:
+                order.append(labels.index(lbl))
+        plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc='lower left')
+        plt_name = f"graph_{names[i]}.png"
+        plt.savefig(plt_name)
+        print(f"Graph saved as {plt_name}")
+        if "Ghost" in names[i]:
+            compute_ghosts = False
+
+def compare_mesh():
+    num_files = sys.argv[2]
+
+    base = sys.argv[3:3 + int(num_files)]
+
+    additional_args_index = 3 + int(num_files)
+    names = [name.strip() for name in sys.argv[additional_args_index].split(',')]
+
+    indices = list(map(int, sys.argv[additional_args_index + 1].split(',')))
+
+    if len(names) != len(indices):
+        print("Error: The number of names and indices must be the same.")
+        sys.exit(1)
+    
+    graph_name_base = sys.argv[additional_args_index + 2]
+
+    data_base = []
+
+    for file in range(int(num_files)):
+        data_base.append(extract_data(base[file], indices))
+
+    create_graphics(num_files, names, graph_name_base, data_base)
+
+def compare_elements():
+    if (len(sys.argv) < 6):
+        print("Usage: python create_graphic.py elements <num_files> <base> <names> <indices> <graph_name_base>")
+        sys.exit(1)
+
+    num_files = sys.argv[2]
+    print (num_files)
+    base = sys.argv[3:3 + int(num_files)]
+    print(base)
+    additional_args_index = 3 + int(num_files)
+
+    names = [name.strip() for name in sys.argv[additional_args_index].split(',')]
+    print (names)
+    indices = list(map(int, sys.argv[additional_args_index + 1].split(',')))
+
+    graph_name_base = sys.argv[additional_args_index + 2]
+
+    data_base = []
+    for file in range(int(num_files)):
+        data_base.append(extract_data_elems(base[file], indices))
+
+    print(f"Data extracted from {base}: {data_base}")
+
+    create_graphics_elem(names, num_files, data_base)
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python create_graphic.py <mode> [args]")
+        sys.exit(1)
+    
+    mode = sys.argv[1]
+
+    if mode == "mesh":
+        compare_mesh()
+    elif mode == "elements":
+        compare_elements()
+    else:
+        print("Error: Invalid mode. Use 'mesh' or 'elements'.")
+        sys.exit(1)
+
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/paper_files/New_for_hybrid/evaluate/graph_Ghost.png b/paper_files/New_for_hybrid/evaluate/graph_Ghost.png
new file mode 100644
index 0000000..f1469fd
--- /dev/null
+++ b/paper_files/New_for_hybrid/evaluate/graph_Ghost.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:736b0c4c48666d8ddd5944b209db5368e25e8864f913b9a75b286a5d3a14b5b1
+size 122928
diff --git a/paper_files/New_for_hybrid/evaluate/graph_adapt.png b/paper_files/New_for_hybrid/evaluate/graph_adapt.png
new file mode 100644
index 0000000..ab550b2
--- /dev/null
+++ b/paper_files/New_for_hybrid/evaluate/graph_adapt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96450c8819686d3d17a2dedd6fc51a41ed3c428a88efd9f6f29f907cdfb2b8d7
+size 108083
diff --git a/paper_files/New_for_hybrid/evaluate/graph_new.png b/paper_files/New_for_hybrid/evaluate/graph_new.png
new file mode 100644
index 0000000..e7d3ffe
--- /dev/null
+++ b/paper_files/New_for_hybrid/evaluate/graph_new.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:399fe221a0be513f83e09c7e06ad33bf0f6bfddbfe798ba497bae2a3ab2054c4
+size 110444
diff --git a/paper_files/New_for_hybrid/evaluate/graph_parition.png b/paper_files/New_for_hybrid/evaluate/graph_parition.png
new file mode 100644
index 0000000..868d205
--- /dev/null
+++ b/paper_files/New_for_hybrid/evaluate/graph_parition.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e98003e6743ed5c02e27380b409c281f269704b3a3bae253e8d0e5c8cfdf6e8
+size 120657
diff --git a/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh b/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh
new file mode 100644
index 0000000..840da63
--- /dev/null
+++ b/paper_files/New_for_hybrid/jobs/benchmark_eclass.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# This is a batch-file used on CARA. Cara has 2168 nodes, a 2x(AMD EPYC (32 Cores))
+
+
+#SBATCH --time=00:10:00
+#SBATCH --ntasks=64
+#SBATCH --output=New_for_hybrid_ELEM_%j
+#SBATCH --error=New_for_hybrid_ELEM_err_%j
+NUM_PROCS="8 16 32 64"
+ELEMENT_NAMES=("TETRAHEDRON" "HEXAHEDRON" "PRISM" "PYRAMID")
+
+# e = Element typ
+# l = initial level
+# n = number of reruns
+
+PART_ARGS="-l4 -n3"
+
+
+JOBFILE="/scratch/ws/4/knap_da-t8code_timings/benchmark/benchmark"
+
+for i in {0..3}; do
+	ELEMENT=${ELEMENT_NAMES[$i]}
+	echo "TEST $ELEMENT"
+	if [ "$i" -eq 3 ]; then
+		LVAL=$(echo $PART_ARGS | sed -n 's/.*-l\([0-9]\+\).*/\1/p')
+		NEW_LVAL=$((LVAL - 1))
+		PART_ARGS="-l$NEW_LVAL -n3"
+		echo "Adjusting level to $NEW_LVAL for PYRAMID element"
+	fi
+	ARGS="$PART_ARGS -e$i"
+	JOB_CMD="$JOBFILE $ARGS"
+	for PROCS in $NUM_PROCS ; do
+		echo "-------------  Running: $JOB_CMD with $PROCS procs ------------"
+		srun -n $PROCS $JOB_CMD &
+	done
+done
+
diff --git a/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh b/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh
new file mode 100644
index 0000000..5e18310
--- /dev/null
+++ b/paper_files/New_for_hybrid/jobs/benchmark_mesh.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# This is a batch-file used on CARA. Cara has 2168 nodes, a 2x(AMD EPYC (32 Cores))
+
+
+#SBATCH --time=00:10:00
+#SBATCH --ntasks=128
+#SBATCH --output=Mesh_%j
+#SBATCH --error=Mesh_err_%j
+NUM_PROCS="128"
+
+# d = Dimension
+# l = initial level
+# r = number of refinements
+# x = left wall of the mesh
+# t = thickness of the refinement region
+# D = distance of the refinement region to travel
+# s = number of steps to travel
+# n = number of reruns
+# b = balance after each refinement step
+# g = ghost cells
+
+PART_ARGS="-d3 -l5 -r2 -x-0.5 -t0.2 -D2 -s5 -n2 -g"
+
+
+JOBFILE="/scratch_fast/ws/0/knap_da-t8code_benchmarks/benchmark_build/benchmark"
+
+MSH_FILE="/scratch_fast/ws/0/knap_da-t8code_benchmarks/t8data/paper_files/New_for_hybrid/tonne_100k"
+
+JOB_CMD="$JOBFILE -f $MSH_FILE $PART_ARGS"
+for PROCS in $NUM_PROCS ; do
+    echo "-------------  Running: $JOB_CMD with $PROCS procs ------------"
+    srun -n $PROCS $JOB_CMD &
+done
+
diff --git a/paper_files/New_for_hybrid/tonne_100k.msh b/paper_files/New_for_hybrid/tonne_100k.msh
new file mode 100755
index 0000000..cc12e91
--- /dev/null
+++ b/paper_files/New_for_hybrid/tonne_100k.msh
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cde35ae4fd98b6678c4d706abf925392febee5de1d8a042e4b81ce41ccd2245e
+size 6071157