diff --git a/include/GMGPolar/gmgpolar.h b/include/GMGPolar/gmgpolar.h index 1f46bfe3..875ad3bc 100644 --- a/include/GMGPolar/gmgpolar.h +++ b/include/GMGPolar/gmgpolar.h @@ -134,7 +134,7 @@ class GMGPolar : public IGMGPolar std::vector> exact_errors_; std::pair computeExactError(Level& level, HostConstVector solution, HostVector error, - const ExactSolution& exact_solution); + HostConstVector exact_solution); /* --------------- */ /* Setup Functions */ @@ -153,13 +153,14 @@ class GMGPolar : public IGMGPolar /* Solve Functions */ void fullMultigridApproximation(MultigridCycleType FMG_cycle, int FMG_iterations); void solveMultigrid(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm); - void solvePCG(double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm); + double& current_relative_residual_norm, HostConstVector exact_solution); + void solvePCG(double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm, + HostConstVector exact_solution); double residualNorm(const ResidualNormType& norm_type, const Level& level, HostConstVector residual) const; void evaluateExactError(Level& level, - const ExactSolution& exact_solution); + HostConstVector exact_solution); void updateResidualNorms(Level& level, int iteration, double& initial_residual_norm, double& current_residual_norm, double& current_relative_residual_norm); diff --git a/include/GMGPolar/solver.h b/include/GMGPolar/solver.h index 6a38ee74..14ea4b03 100644 --- a/include/GMGPolar/solver.h +++ b/include/GMGPolar/solver.h @@ -95,9 +95,20 @@ void GMGPolar::solve(const BoundaryC /* ---------------------------------------------- */ LIKWID_STOP("Solver"); auto start_check_exact_error = std::chrono::high_resolution_clock::now(); + // fill exact solution on host to avoid repeat same computation + HostVector exact_sol("exact_sol", level.solution().size()); + const PolarGrid& grid = level.grid(); + Kokkos::parallel_for( + "fill exact sol on host", + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + [&](const int i_r, const int i_theta) { + double r = grid.radius(i_r); + double theta = grid.theta(i_theta); + exact_sol(grid.index(i_r, i_theta)) = exact_solution_->exact_solution(r, theta); + }); if (exact_solution_ != nullptr) - evaluateExactError(level, *exact_solution_); + evaluateExactError(level, exact_sol); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -121,7 +132,7 @@ void GMGPolar::solve(const BoundaryC if (!PCG_) { // Solve A*x = b directly using multigrid cycles (V/W/F-cycle) // until convergence or max_iterations_ is reached. - solveMultigrid(initial_residual_norm, current_residual_norm, current_relative_residual_norm); + solveMultigrid(initial_residual_norm, current_residual_norm, current_relative_residual_norm, exact_sol); } else { // Solve A*x = b using Preconditioned Conjugate Gradient (PCG), @@ -129,7 +140,7 @@ void GMGPolar::solve(const BoundaryC // cycle approximates the action of A^{-1} at each PCG iteration). auto start_conjugate_gradient = std::chrono::high_resolution_clock::now(); - solvePCG(initial_residual_norm, current_residual_norm, current_relative_residual_norm); + solvePCG(initial_residual_norm, current_residual_norm, current_relative_residual_norm, exact_sol); auto end_conjugate_gradient = std::chrono::high_resolution_clock::now(); t_conjugate_gradient_ += @@ -170,7 +181,7 @@ void GMGPolar::solve(const BoundaryC if (paraview_) { writeToVTK("output_solution", level, level.solution()); if (exact_solution_ != nullptr) { - computeExactError(level, level.solution(), level.residual(), *exact_solution_); + computeExactError(level, level.solution(), level.residual(), exact_sol); writeToVTK("output_error", level, level.residual()); } } @@ -215,7 +226,8 @@ void GMGPolar::fullMultigridApproxim template void GMGPolar::solveMultigrid(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm) + double& current_relative_residual_norm, + HostConstVector exact_solution) { Level& level = levels_[0]; @@ -245,7 +257,7 @@ void GMGPolar::solveMultigrid(double auto start_check_exact_error = std::chrono::high_resolution_clock::now(); if (exact_solution_ != nullptr) - evaluateExactError(level, *exact_solution_); + evaluateExactError(level, exact_solution); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -288,7 +300,8 @@ void GMGPolar::solveMultigrid(double template void GMGPolar::solvePCG(double& initial_residual_norm, double& current_residual_norm, - double& current_relative_residual_norm) + double& current_relative_residual_norm, + HostConstVector exact_solution) { Level& level = levels_[0]; @@ -355,7 +368,7 @@ void GMGPolar::solvePCG(double& init auto start_check_exact_error = std::chrono::high_resolution_clock::now(); if (exact_solution_ != nullptr) - exact_errors_.push_back(computeExactError(level, pcg_solution_, level.residual(), *exact_solution_)); + exact_errors_.push_back(computeExactError(level, pcg_solution_, level.residual(), exact_solution)); auto end_check_exact_error = std::chrono::high_resolution_clock::now(); t_check_exact_error_ += std::chrono::duration(end_check_exact_error - start_check_exact_error).count(); @@ -574,7 +587,7 @@ void GMGPolar::initRhsHierarchy(Host template void GMGPolar::evaluateExactError( - Level& level, const ExactSolution& exact_solution) + Level& level, HostConstVector exact_solution) { // Compute the weighted L2 norm and infinity norm of the error between the numerical and exact solution. // The results are stored as a pair: (weighted L2 error, infinity error). @@ -584,34 +597,23 @@ void GMGPolar::evaluateExactError( template std::pair GMGPolar::computeExactError( Level& level, HostConstVector solution, - HostVector error, const ExactSolution& exact_solution) + HostVector error, HostConstVector exact_solution) { const PolarGrid& grid = level.grid(); assert(solution.size() == error.size()); assert(std::ssize(solution) == grid.numberOfNodes()); -#pragma omp parallel - { -#pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - double r = grid.radius(i_r); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - double theta = grid.theta(i_theta); - error[grid.index(i_r, i_theta)] = - exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)]; - } - } -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + Kokkos::parallel_for( + "compute error on host", + Kokkos::MDRangePolicy>({0, 0}, {grid.nr(), grid.ntheta()}), + [&](const int i_r, const int i_theta) { + double r = grid.radius(i_r); double theta = grid.theta(i_theta); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - double r = grid.radius(i_r); - error[grid.index(i_r, i_theta)] = - exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)]; - } - } - } + error[grid.index(i_r, i_theta)] = + exact_solution(grid.index(i_r, i_theta)) - solution[grid.index(i_r, i_theta)]; + }); + HostConstVector c_error = error; double weighted_euclidean_error = l2_norm(c_error) / std::sqrt(grid.numberOfNodes()); double infinity_error = infinity_norm(c_error);