From 8d46fd6fa25e4f3961f6a94a961c5c22d885890a Mon Sep 17 00:00:00 2001 From: KARA Abdelhadi 265943 Date: Tue, 19 May 2026 14:24:48 +0200 Subject: [PATCH 1/5] fill exact solution on host --- include/GMGPolar/solver.h | 48 ++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index 927327da..6206af38 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -591,28 +591,34 @@ std::pair GMGPolar:: assert(solution.size() == error.size()); assert(std::ssize(solution) == grid.numberOfNodes()); - -#pragma omp parallel num_threads(max_omp_threads_) - { -#pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - double r = grid.radius(i_r); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - double theta = grid.theta(i_theta); - error[grid.index(i_r, i_theta)] = - exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)]; - } - } -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + Vector solution_d("solution_d", solution.size()); + HostVector exact_sol_h("exact_sol_h", solution.size()); + Vector exact_sol_d("exact_sol", solution.size()); + Vector error_d("error_d", solution.size()); + + Kokkos::parallel_for( + "fill exact sol on host", + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + KOKKOS_LAMBDA(const int i_r, const int i_theta) { + double r = grid.radius(i_r); + double theta = grid.theta(i_theta); + exact_sol_h(grid.index(i_r, i_theta)) = exact_solution.exact_solution(r, theta); + }); + + Kokkos::deep_copy(exact_sol_d, exact_sol_h); + Kokkos::deep_copy(solution_d, solution); + + Kokkos::parallel_for( + "compute error on device", + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + KOKKOS_LAMBDA(const int i_r, const int i_theta) { + double r = grid.radius(i_r); double theta = grid.theta(i_theta); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - double r = grid.radius(i_r); - error[grid.index(i_r, i_theta)] = - exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)]; - } - } - } + error_d[grid.index(i_r, i_theta)] = + exact_sol_d(grid.index(i_r, i_theta)) - solution_d[grid.index(i_r, i_theta)]; + }); + + Kokkos::deep_copy(error, error_d); HostConstVector c_error = error; double weighted_euclidean_error = l2_norm(c_error) / std::sqrt(grid.numberOfNodes()); double infinity_error = infinity_norm(c_error); From a215fe67e1ea69f299a58eed57cc948581c1545b Mon Sep 17 00:00:00 2001 From: KARA Abdelhadi 265943 Date: Tue, 19 May 2026 17:03:35 +0200 Subject: [PATCH 2/5] compute and store exact solution once, pass it as a Vector --- include/GMGPolar/gmgpolar.h | 9 +++--- include/GMGPolar/solver.h | 56 +++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/include/GMGPolar/gmgpolar.h b/include/GMGPolar/gmgpolar.h index 1f46bfe3..875ad3bc 100644 --- a/include/GMGPolar/gmgpolar.h +++ b/include/GMGPolar/gmgpolar.h @@ -134,7 +134,7 @@ class GMGPolar : public IGMGPolar std::vector> exact_errors_; std::pair computeExactError(Level& level, HostConstVector solution, HostVector error, - const ExactSolution& exact_solution); + HostConstVector exact_solution); /* --------------- */ /* Setup Functions */ @@ -153,13 +153,14 @@ class GMGPolar : public IGMGPolar /* Solve Functions */ void fullMultigridApproximation(MultigridCycleType FMG_cycle, int FMG_iterations); void solveMultigrid(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm); - void solvePCG(double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm); + double& current_relative_residual_norm, HostConstVector exact_solution); + void solvePCG(double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm, + HostConstVector exact_solution); double residualNorm(const ResidualNormType& norm_type, const Level& level, HostConstVector residual) const; void evaluateExactError(Level& level, - const ExactSolution& exact_solution); + HostConstVector exact_solution); void updateResidualNorms(Level& level, int iteration, double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm); diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index 6206af38..d43a52cf 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -95,9 +95,20 @@ void GMGPolar::solve(const BoundaryC /* ---------------------------------------------- */ LIKWID_STOP("Solver"); auto start_check_exact_error = std::chrono::high_resolution_clock::now(); + // fill exact solution on host to avoid repeat same comptuation + HostVector exact_sol("exact_sol", level.solution().size()); + const PolarGrid& grid = level.grid(); + Kokkos::parallel_for( + "fill exact sol on host", + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + KOKKOS_LAMBDA(const int i_r, const int i_theta) { + double r = grid.radius(i_r); + double theta = grid.theta(i_theta); + exact_sol(grid.index(i_r, i_theta)) = exact_solution_->exact_solution(r, theta); + }); if (exact_solution_ != nullptr) - evaluateExactError(level, *exact_solution_); + evaluateExactError(level, exact_sol); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -121,7 +132,7 @@ void GMGPolar::solve(const BoundaryC if (!PCG_) { // Solve A*x = b directly using multigrid cycles (V/W/F-cycle) // until convergence or max_iterations_ is reached. - solveMultigrid(initial_residual_norm, current_residual_norm, current_relative_residual_norm); + solveMultigrid(initial_residual_norm, current_residual_norm, current_relative_residual_norm, exact_sol); } else { // Solve A*x = b using Preconditioned Conjugate Gradient (PCG), @@ -129,7 +140,7 @@ void GMGPolar::solve(const BoundaryC // cycle approximates the action of A^{-1} at each PCG iteration). auto start_conjugate_gradient = std::chrono::high_resolution_clock::now(); - solvePCG(initial_residual_norm, current_residual_norm, current_relative_residual_norm); + solvePCG(initial_residual_norm, current_residual_norm, current_relative_residual_norm, exact_sol); auto end_conjugate_gradient = std::chrono::high_resolution_clock::now(); t_conjugate_gradient_ += @@ -170,7 +181,7 @@ void GMGPolar::solve(const BoundaryC if (paraview_) { writeToVTK("output_solution", level, level.solution()); if (exact_solution_ != nullptr) { - computeExactError(level, level.solution(), level.residual(), *exact_solution_); + computeExactError(level, level.solution(), level.residual(), exact_sol); writeToVTK("output_error", level, level.residual()); } } @@ -215,7 +226,8 @@ void GMGPolar::fullMultigridApproxim template void GMGPolar::solveMultigrid(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm) + double& current_relative_residual_norm, + HostConstVector exact_solution) { Level& level = levels_[0]; @@ -245,7 +257,7 @@ void GMGPolar::solveMultigrid(double auto start_check_exact_error = std::chrono::high_resolution_clock::now(); if (exact_solution_ != nullptr) - evaluateExactError(level, *exact_solution_); + evaluateExactError(level, exact_solution); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -288,7 +300,8 @@ void GMGPolar::solveMultigrid(double template void GMGPolar::solvePCG(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm) + double& current_relative_residual_norm, + HostConstVector exact_solution) { Level& level = levels_[0]; @@ -355,7 +368,7 @@ void GMGPolar::solvePCG(double& init auto start_check_exact_error = std::chrono::high_resolution_clock::now(); if (exact_solution_ != nullptr) - exact_errors_.push_back(computeExactError(level, pcg_solution_, level.residual(), *exact_solution_)); + exact_errors_.push_back(computeExactError(level, pcg_solution_, level.residual(), exact_solution)); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -574,7 +587,7 @@ void GMGPolar::initRhsHierarchy(Host template void GMGPolar::evaluateExactError( - Level& level, const ExactSolution& exact_solution) + Level& level, HostConstVector exact_solution) { // Compute the weighted L2 norm and infinity norm of the error between the numerical and exact solution. // The results are stored as a pair: (weighted L2 error, infinity error). @@ -584,41 +597,24 @@ void GMGPolar::evaluateExactError( template std::pair GMGPolar::computeExactError( Level& level, HostConstVector solution, - HostVector error, const ExactSolution& exact_solution) + HostVector error, HostConstVector exact_solution) { const PolarGrid& grid = level.grid(); const LevelCache& levelCache = level.levelCache(); assert(solution.size() == error.size()); assert(std::ssize(solution) == grid.numberOfNodes()); - Vector solution_d("solution_d", solution.size()); - HostVector exact_sol_h("exact_sol_h", solution.size()); - Vector exact_sol_d("exact_sol", solution.size()); - Vector error_d("error_d", solution.size()); - - Kokkos::parallel_for( - "fill exact sol on host", - Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), - KOKKOS_LAMBDA(const int i_r, const int i_theta) { - double r = grid.radius(i_r); - double theta = grid.theta(i_theta); - exact_sol_h(grid.index(i_r, i_theta)) = exact_solution.exact_solution(r, theta); - }); - - Kokkos::deep_copy(exact_sol_d, exact_sol_h); - Kokkos::deep_copy(solution_d, solution); Kokkos::parallel_for( "compute error on device", - Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), KOKKOS_LAMBDA(const int i_r, const int i_theta) { double r = grid.radius(i_r); double theta = grid.theta(i_theta); - error_d[grid.index(i_r, i_theta)] = - exact_sol_d(grid.index(i_r, i_theta)) - solution_d[grid.index(i_r, i_theta)]; + error[grid.index(i_r, i_theta)] = + exact_solution(grid.index(i_r, i_theta)) - solution[grid.index(i_r, i_theta)]; }); - Kokkos::deep_copy(error, error_d); HostConstVector c_error = error; double weighted_euclidean_error = l2_norm(c_error) / std::sqrt(grid.numberOfNodes()); double infinity_error = infinity_norm(c_error); From e7baa1ef48ba14f532a42b9777c49f984ffbc127 Mon Sep 17 00:00:00 2001 From: KARA Abdelhadi 265943 Date: Tue, 19 May 2026 17:08:06 +0200 Subject: [PATCH 3/5] fix typo --- include/GMGPolar/solver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index d43a52cf..11f37922 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -95,7 +95,7 @@ void GMGPolar::solve(const BoundaryC /* ---------------------------------------------- */ LIKWID_STOP("Solver"); auto start_check_exact_error = std::chrono::high_resolution_clock::now(); - // fill exact solution on host to avoid repeat same comptuation + // fill exact solution on host to avoid repeat same computation HostVector exact_sol("exact_sol", level.solution().size()); const PolarGrid& grid = level.grid(); Kokkos::parallel_for( From 65a99dc75115850c059ccb96b83a7e19deaf5143 Mon Sep 17 00:00:00 2001 From: KARA Abdelhadi 265943 Date: Wed, 20 May 2026 10:34:19 +0200 Subject: [PATCH 4/5] use class_lambda for host parallel loop --- include/GMGPolar/solver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index 53bd7815..7ce407ee 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -608,7 +608,7 @@ std::pair GMGPolar:: Kokkos::parallel_for( "compute error on host", Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), - KOKKOS_LAMBDA(const int i_r, const int i_theta) { + KOKKOS_CLASS_LAMBDA(const int i_r, const int i_theta) { double r = grid.radius(i_r); double theta = grid.theta(i_theta); error[grid.index(i_r, i_theta)] = From 678e9158a3986b44732dadda6288db7f5fa3166f Mon Sep 17 00:00:00 2001 From: KARA Abdelhadi 265943 Date: Wed, 20 May 2026 10:49:14 +0200 Subject: [PATCH 5/5] pass by reference in cpu loops --- include/GMGPolar/solver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index 7ce407ee..95b5e050 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -101,7 +101,7 @@ void GMGPolar::solve(const BoundaryC Kokkos::parallel_for( "fill exact sol on host", Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), - KOKKOS_LAMBDA(const int i_r, const int i_theta) { + [&](const int i_r, const int i_theta) { double r = grid.radius(i_r); double theta = grid.theta(i_theta); exact_sol(grid.index(i_r, i_theta)) = exact_solution_->exact_solution(r, theta); @@ -608,7 +608,7 @@ std::pair GMGPolar:: Kokkos::parallel_for( "compute error on host", Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), - KOKKOS_CLASS_LAMBDA(const int i_r, const int i_theta) { + [&](const int i_r, const int i_theta) { double r = grid.radius(i_r); double theta = grid.theta(i_theta); error[grid.index(i_r, i_theta)] =