SciCompMod · julianlitz · May 16, 2026 · May 16, 2026 · May 18, 2026 · May 20, 2026
diff --git a/include/GMGPolar/gmgpolar.h b/include/GMGPolar/gmgpolar.h
@@ -128,6 +128,7 @@ class GMGPolar : public IGMGPolar
     double mean_residual_reduction_factor_;
     bool converged(double current_residual_norm, double first_residual_norm);
 
+public: // Public due to cuda restrictions
     /* ---------------------------------------------------- */
     /* Compute exact error if an exact solution is provided */
     // The results are stored as a pair: (weighted L2 error, infinity error).
@@ -138,15 +139,19 @@ class GMGPolar : public IGMGPolar
 
     /* --------------- */
     /* Setup Functions */
-    int chooseNumberOfLevels(const PolarGrid& finest_grid);
-    // Public due to cuda restrictions
-public:
     template <concepts::BoundaryConditions BoundaryConditions, concepts::SourceTerm SourceTerm>
     void build_rhs_f(const Level<DomainGeometry, DensityProfileCoefficients>& level, HostVector<double> rhs_f,
                      const BoundaryConditions& boundary_conditions, const SourceTerm& source_term);
     void discretize_rhs_f(const Level<DomainGeometry, DensityProfileCoefficients>& level, HostVector<double> rhs_f);
 
+    /* --------------- */
+    /* Solve Functions */
+    void applyExtrapolation(int current_level, HostVector<double> fine_values, HostConstVector<double> coarse_values);
+
 private:
+    /* --------------- */
+    /* Setup Functions */
+    int chooseNumberOfLevels(const PolarGrid& finest_grid);
     bool checkUniformRefinement(const PolarGrid& grid, double tolerance) const;
 
     /* --------------- */
@@ -168,15 +173,13 @@ class GMGPolar : public IGMGPolar
                                   int iterations);
     void applyExtrapolatedMultigridIterations(Level<DomainGeometry, DensityProfileCoefficients>& level,
                                               MultigridCycleType cycle, int iterations);
-    // Compute the extrapolated values: u_ex = 4/3 u_fine - 1/3 u_coarse
-    void applyExtrapolation(int current_level, HostVector<double> fine_values, HostConstVector<double> coarse_values);
 
     /* ----------------- */
     /* Print information */
     void printSettings(const PolarGrid& finest_grid, const PolarGrid& coarsest_grid) const;
-    void printIterationHeader(const ExactSolution* exact_solution);
+    void printIterationHeader(const bool is_exact_solution_provided);
     void printIterationInfo(int iteration, double current_residual_norm, double current_relative_residual_norm,
-                            const ExactSolution* exact_solution);
+                            const bool is_exact_solution_provided);
 
     /* ------------------- */
     /* Multigrid Functions */
@@ -219,4 +222,5 @@ class GMGPolar : public IGMGPolar
 #include "MultigridMethods/extrapolated_multigrid_V_Cycle.h"
 #include "MultigridMethods/multigrid_F_Cycle.h"
 #include "MultigridMethods/multigrid_W_Cycle.h"
+
 } // namespace gmgpolar
diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h
@@ -96,8 +96,9 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solve(const BoundaryC
     LIKWID_STOP("Solver");
     auto start_check_exact_error = std::chrono::high_resolution_clock::now();
 
-    if (exact_solution_ != nullptr)
+    if (exact_solution_) {
         evaluateExactError(level, *exact_solution_);
+    }
 
     auto end_check_exact_error = std::chrono::high_resolution_clock::now();
     t_check_exact_error_ += std::chrono::duration<double>(end_check_exact_error - start_check_exact_error).count();
@@ -169,7 +170,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solve(const BoundaryC
 
     if (paraview_) {
         writeToVTK("output_solution", level, level.solution());
-        if (exact_solution_ != nullptr) {
+        if (exact_solution_) {
             computeExactError(level, level.solution(), level.residual(), *exact_solution_);
             writeToVTK("output_error", level, level.residual());
         }
@@ -244,8 +245,9 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solveMultigrid(double
         LIKWID_STOP("Solver");
         auto start_check_exact_error = std::chrono::high_resolution_clock::now();
 
-        if (exact_solution_ != nullptr)
+        if (exact_solution_) {
             evaluateExactError(level, *exact_solution_);
+        }
 
         auto end_check_exact_error = std::chrono::high_resolution_clock::now();
         t_check_exact_error_ += std::chrono::duration<double>(end_check_exact_error - start_check_exact_error).count();
@@ -511,46 +513,48 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::applyExtrapolation(in
     assert(std::ssize(fine_values) == fineGrid.numberOfNodes());
     assert(std::ssize(coarse_values) == coarseGrid.numberOfNodes());
 
-#pragma omp parallel
-    {
-/* Circluar Indexing Section */
-/* For loop matches circular access pattern */
-#pragma omp for nowait
-        for (int i_r = 0; i_r < fineGrid.numberSmootherCircles(); i_r++) {
-            int i_r_coarse = i_r / 2;
-            for (int i_theta = 0; i_theta < fineGrid.ntheta(); i_theta++) {
-                int i_theta_coarse = i_theta / 2;
-
-                if (i_r & 1 || i_theta & 1) {
-                    fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
-                }
-                else {
-                    int fine_idx          = fineGrid.index(i_r, i_theta);
-                    int coarse_idx        = coarseGrid.index(i_r_coarse, i_theta_coarse);
-                    fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
-                }
+    /* We split the loops into two regions to better respect the */
+    /* access patterns of the smoother and improve cache locality. */
+
+    /* Circular Indexing Section */
+    /* For loop matches circular access pattern */
+    Kokkos::parallel_for(
+        "Extrapolation: Apply Extrapolation (Circular)",
+        Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
+            {0, 0}, {fineGrid.numberSmootherCircles(), fineGrid.ntheta()}),
+        KOKKOS_LAMBDA(const int i_r, const int i_theta) {
+            const int i_r_coarse     = i_r / 2;
+            const int i_theta_coarse = i_theta / 2;
+            if (i_r & 1 || i_theta & 1) {
+                fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
             }
-        }
-
-/* Radial Indexing Section */
-/* For loop matches radial access pattern */
-#pragma omp for nowait
-        for (int i_theta = 0; i_theta < fineGrid.ntheta(); i_theta++) {
-            int i_theta_coarse = i_theta / 2;
-            for (int i_r = fineGrid.numberSmootherCircles(); i_r < fineGrid.nr(); i_r++) {
-                int i_r_coarse = i_r / 2;
-
-                if (i_r & 1 || i_theta & 1) {
-                    fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
-                }
-                else {
-                    int fine_idx          = fineGrid.index(i_r, i_theta);
-                    int coarse_idx        = coarseGrid.index(i_r_coarse, i_theta_coarse);
-                    fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
-                }
+            else {
+                const int fine_idx    = fineGrid.index(i_r, i_theta);
+                const int coarse_idx  = coarseGrid.index(i_r_coarse, i_theta_coarse);
+                fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
             }
-        }
-    }
+        });
+
+    /* Radial Indexing Section */
+    /* For loop matches radial access pattern */
+    Kokkos::parallel_for(
+        "Extrapolation: Apply Extrapolation (Radial)",
+        Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, fineGrid.numberSmootherCircles()},
+                                                                                  {fineGrid.ntheta(), fineGrid.nr()}),
+        KOKKOS_LAMBDA(const int i_theta, const int i_r) {
+            const int i_r_coarse     = i_r / 2;
+            const int i_theta_coarse = i_theta / 2;
+            if (i_r & 1 || i_theta & 1) {
+                fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
+            }
+            else {
+                const int fine_idx    = fineGrid.index(i_r, i_theta);
+                const int coarse_idx  = coarseGrid.index(i_r_coarse, i_theta_coarse);
+                fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
+            }
+        });
+
+    Kokkos::fence();
 }
 
 // =============================================================================
@@ -591,31 +595,46 @@ std::pair<double, double> GMGPolar<DomainGeometry, DensityProfileCoefficients>::
     assert(solution.size() == error.size());
     assert(std::ssize(solution) == grid.numberOfNodes());
 
-#pragma omp parallel
-    {
-#pragma omp for nowait
-        for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
-            double r = grid.radius(i_r);
-            for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
-                double theta = grid.theta(i_theta);
-                error[grid.index(i_r, i_theta)] =
-                    exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)];
-            }
-        }
-#pragma omp for nowait
-        for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
-            double theta = grid.theta(i_theta);
-            for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
-                double r = grid.radius(i_r);
-                error[grid.index(i_r, i_theta)] =
-                    exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)];
-            }
-        }
-    }
-    HostConstVector<double> c_error = error;
-    double weighted_euclidean_error = l2_norm(c_error) / std::sqrt(grid.numberOfNodes());
-    double infinity_error           = infinity_norm(c_error);
-
+    /* Fill exact solution on the host. */
+    // Note: We must compute the exact solution values on the host because the
+    //       ExactSolution object is not designed to be used on the device.
+    //       As it is planned that the solution and error vector will use device memory during GPU porting,
+    //       we need to transfer the exact solution values from host to device using the Kokkos::deep_copy operation.
+    HostVector<double> exact_solution_values_host("Exact Solution Values Host", grid.numberOfNodes());
+
+    Kokkos::parallel_for(
+        "Exact Error: Compute Exact Solution Values (Circular)",
+        Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>( // Host parallel loop
+            {0, 0}, // Starting index
+            {grid.numberSmootherCircles(), grid.ntheta()}), // Ending index
+        [&](const int i_r, const int i_theta) {
+            const double radius               = grid.radius(i_r);
+            const double theta                = grid.theta(i_theta);
+            const int index                   = grid.index(i_r, i_theta);
+            exact_solution_values_host[index] = exact_solution.exact_solution(radius, theta);
+        });
+    Kokkos::parallel_for(
+        "Exact Error: Compute Exact Solution Values (Radial)",
+        Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>( // Host parallel loop
+            {0, grid.numberSmootherCircles()}, // Starting index
+            {grid.ntheta(), grid.nr()}), // Ending index
+        [&](const int i_theta, const int i_r) {
+            const double radius               = grid.radius(i_r);
+            const double theta                = grid.theta(i_theta);
+            const int index                   = grid.index(i_r, i_theta);
+            exact_solution_values_host[index] = exact_solution.exact_solution(radius, theta);
+        });
+    Kokkos::fence();
+
+    // Transfer the exact solution values from host to device memory for error computation.
+    Kokkos::deep_copy(error, exact_solution_values_host);
+
+    // Compute the error as the difference between the exact and numerical solutions.
+    subtract(error, HostConstVector<double>(solution));
+
+    // Compute the weighted L2 norm and infinity norm of the error.
+    const double weighted_euclidean_error = l2_norm(HostConstVector<double>(error)) / std::sqrt(grid.numberOfNodes());
+    const double infinity_error           = infinity_norm(HostConstVector<double>(error));
     return std::make_pair(weighted_euclidean_error, infinity_error);
 }
 
@@ -645,7 +664,7 @@ bool GMGPolar<DomainGeometry, DensityProfileCoefficients>::converged(double resi
 // =============================================================================
 
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(const ExactSolution* exact_solution)
+void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(const bool is_exact_solution_provided)
 {
     if (verbose_ <= 0)
         return;
@@ -663,7 +682,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(
         else
             std::cout << std::setw(15 + table_spacing) << "||r_k||/||rhs||";
     }
-    if (exact_solution != nullptr) {
+    if (is_exact_solution_provided) {
         std::cout << std::setw(12 + table_spacing) << "||u-u_k||_l2";
         std::cout << std::setw(13 + table_spacing) << "||u-u_k||_inf";
     }
@@ -675,7 +694,7 @@ template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoeff
 void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationInfo(int iteration,
                                                                               double current_residual_norm,
                                                                               double current_relative_residual_norm,
-                                                                              const ExactSolution* exact_solution)
+                                                                              const bool is_exact_solution_provided)
 {
     if (verbose_ <= 0)
         return;
@@ -687,7 +706,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationInfo(in
         std::cout << std::setw(9 + table_spacing + 2) << current_residual_norm;
         std::cout << std::setw(15 + table_spacing) << current_relative_residual_norm;
     }
-    if (exact_solution != nullptr) {
+    if (is_exact_solution_provided) {
         std::cout << std::setw(12 + table_spacing) << exact_errors_.back().first;
         std::cout << std::setw(13 + table_spacing) << exact_errors_.back().second;
     }