Skip to content
18 changes: 11 additions & 7 deletions include/GMGPolar/gmgpolar.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class GMGPolar : public IGMGPolar
double mean_residual_reduction_factor_;
bool converged(double current_residual_norm, double first_residual_norm);

public: // Public due to cuda restrictions
/* ---------------------------------------------------- */
/* Compute exact error if an exact solution is provided */
// The results are stored as a pair: (weighted L2 error, infinity error).
Expand All @@ -138,15 +139,19 @@ class GMGPolar : public IGMGPolar

/* --------------- */
/* Setup Functions */
int chooseNumberOfLevels(const PolarGrid& finest_grid);
// Public due to cuda restrictions
public:
template <concepts::BoundaryConditions BoundaryConditions, concepts::SourceTerm SourceTerm>
void build_rhs_f(const Level<DomainGeometry, DensityProfileCoefficients>& level, HostVector<double> rhs_f,
const BoundaryConditions& boundary_conditions, const SourceTerm& source_term);
void discretize_rhs_f(const Level<DomainGeometry, DensityProfileCoefficients>& level, HostVector<double> rhs_f);

/* --------------- */
/* Solve Functions */
void applyExtrapolation(int current_level, HostVector<double> fine_values, HostConstVector<double> coarse_values);

private:
/* --------------- */
/* Setup Functions */
int chooseNumberOfLevels(const PolarGrid& finest_grid);
bool checkUniformRefinement(const PolarGrid& grid, double tolerance) const;

/* --------------- */
Expand All @@ -168,15 +173,13 @@ class GMGPolar : public IGMGPolar
int iterations);
void applyExtrapolatedMultigridIterations(Level<DomainGeometry, DensityProfileCoefficients>& level,
MultigridCycleType cycle, int iterations);
// Compute the extrapolated values: u_ex = 4/3 u_fine - 1/3 u_coarse
void applyExtrapolation(int current_level, HostVector<double> fine_values, HostConstVector<double> coarse_values);

/* ----------------- */
/* Print information */
void printSettings(const PolarGrid& finest_grid, const PolarGrid& coarsest_grid) const;
void printIterationHeader(const ExactSolution* exact_solution);
void printIterationHeader(const bool is_exact_solution_provided);
void printIterationInfo(int iteration, double current_residual_norm, double current_relative_residual_norm,
const ExactSolution* exact_solution);
const bool is_exact_solution_provided);

/* ------------------- */
/* Multigrid Functions */
Expand Down Expand Up @@ -219,4 +222,5 @@ class GMGPolar : public IGMGPolar
#include "MultigridMethods/extrapolated_multigrid_V_Cycle.h"
#include "MultigridMethods/multigrid_F_Cycle.h"
#include "MultigridMethods/multigrid_W_Cycle.h"

} // namespace gmgpolar
159 changes: 89 additions & 70 deletions include/GMGPolar/solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solve(const BoundaryC
LIKWID_STOP("Solver");
auto start_check_exact_error = std::chrono::high_resolution_clock::now();

if (exact_solution_ != nullptr)
if (exact_solution_) {
evaluateExactError(level, *exact_solution_);
}

auto end_check_exact_error = std::chrono::high_resolution_clock::now();
t_check_exact_error_ += std::chrono::duration<double>(end_check_exact_error - start_check_exact_error).count();
Expand Down Expand Up @@ -169,7 +170,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solve(const BoundaryC

if (paraview_) {
writeToVTK("output_solution", level, level.solution());
if (exact_solution_ != nullptr) {
if (exact_solution_) {
computeExactError(level, level.solution(), level.residual(), *exact_solution_);
writeToVTK("output_error", level, level.residual());
}
Expand Down Expand Up @@ -244,8 +245,9 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::solveMultigrid(double
LIKWID_STOP("Solver");
auto start_check_exact_error = std::chrono::high_resolution_clock::now();

if (exact_solution_ != nullptr)
if (exact_solution_) {
evaluateExactError(level, *exact_solution_);
}

auto end_check_exact_error = std::chrono::high_resolution_clock::now();
t_check_exact_error_ += std::chrono::duration<double>(end_check_exact_error - start_check_exact_error).count();
Expand Down Expand Up @@ -511,46 +513,48 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::applyExtrapolation(in
assert(std::ssize(fine_values) == fineGrid.numberOfNodes());
assert(std::ssize(coarse_values) == coarseGrid.numberOfNodes());

#pragma omp parallel
{
/* Circluar Indexing Section */
/* For loop matches circular access pattern */
#pragma omp for nowait
for (int i_r = 0; i_r < fineGrid.numberSmootherCircles(); i_r++) {
int i_r_coarse = i_r / 2;
for (int i_theta = 0; i_theta < fineGrid.ntheta(); i_theta++) {
int i_theta_coarse = i_theta / 2;

if (i_r & 1 || i_theta & 1) {
fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
}
else {
int fine_idx = fineGrid.index(i_r, i_theta);
int coarse_idx = coarseGrid.index(i_r_coarse, i_theta_coarse);
fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
}
/* We split the loops into two regions to better respect the */
/* access patterns of the smoother and improve cache locality. */

/* Circular Indexing Section */
/* For loop matches circular access pattern */
Kokkos::parallel_for(
"Extrapolation: Apply Extrapolation (Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
{0, 0}, {fineGrid.numberSmootherCircles(), fineGrid.ntheta()}),
KOKKOS_LAMBDA(const int i_r, const int i_theta) {
const int i_r_coarse = i_r / 2;
const int i_theta_coarse = i_theta / 2;
if (i_r & 1 || i_theta & 1) {
fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
}
}

/* Radial Indexing Section */
/* For loop matches radial access pattern */
#pragma omp for nowait
for (int i_theta = 0; i_theta < fineGrid.ntheta(); i_theta++) {
int i_theta_coarse = i_theta / 2;
for (int i_r = fineGrid.numberSmootherCircles(); i_r < fineGrid.nr(); i_r++) {
int i_r_coarse = i_r / 2;

if (i_r & 1 || i_theta & 1) {
fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
}
else {
int fine_idx = fineGrid.index(i_r, i_theta);
int coarse_idx = coarseGrid.index(i_r_coarse, i_theta_coarse);
fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
}
else {
const int fine_idx = fineGrid.index(i_r, i_theta);
const int coarse_idx = coarseGrid.index(i_r_coarse, i_theta_coarse);
fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
}
}
}
});

/* Radial Indexing Section */
/* For loop matches radial access pattern */
Kokkos::parallel_for(
"Extrapolation: Apply Extrapolation (Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, fineGrid.numberSmootherCircles()},
{fineGrid.ntheta(), fineGrid.nr()}),
KOKKOS_LAMBDA(const int i_theta, const int i_r) {
const int i_r_coarse = i_r / 2;
const int i_theta_coarse = i_theta / 2;
if (i_r & 1 || i_theta & 1) {
fine_values[fineGrid.index(i_r, i_theta)] *= 4.0 / 3.0;
}
else {
const int fine_idx = fineGrid.index(i_r, i_theta);
const int coarse_idx = coarseGrid.index(i_r_coarse, i_theta_coarse);
fine_values[fine_idx] = (4.0 * fine_values[fine_idx] - coarse_values[coarse_idx]) / 3.0;
}
});

Kokkos::fence();
}

// =============================================================================
Expand Down Expand Up @@ -591,31 +595,46 @@ std::pair<double, double> GMGPolar<DomainGeometry, DensityProfileCoefficients>::
assert(solution.size() == error.size());
assert(std::ssize(solution) == grid.numberOfNodes());

#pragma omp parallel
{
#pragma omp for nowait
for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
double r = grid.radius(i_r);
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
double theta = grid.theta(i_theta);
error[grid.index(i_r, i_theta)] =
exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)];
}
}
#pragma omp for nowait
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
double theta = grid.theta(i_theta);
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
double r = grid.radius(i_r);
error[grid.index(i_r, i_theta)] =
exact_solution.exact_solution(r, theta) - solution[grid.index(i_r, i_theta)];
}
}
}
HostConstVector<double> c_error = error;
double weighted_euclidean_error = l2_norm(c_error) / std::sqrt(grid.numberOfNodes());
double infinity_error = infinity_norm(c_error);

/* Fill exact solution on the host. */
// Note: We must compute the exact solution values on the host because the
// ExactSolution object is not designed to be used on the device.
// As it is planned that the solution and error vector will use device memory during GPU porting,
// we need to transfer the exact solution values from host to device using the Kokkos::deep_copy operation.
HostVector<double> exact_solution_values_host("Exact Solution Values Host", grid.numberOfNodes());

Kokkos::parallel_for(
"Exact Error: Compute Exact Solution Values (Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>( // Host parallel loop
{0, 0}, // Starting index
{grid.numberSmootherCircles(), grid.ntheta()}), // Ending index
[&](const int i_r, const int i_theta) {
const double radius = grid.radius(i_r);
const double theta = grid.theta(i_theta);
const int index = grid.index(i_r, i_theta);
exact_solution_values_host[index] = exact_solution.exact_solution(radius, theta);
});
Kokkos::parallel_for(
"Exact Error: Compute Exact Solution Values (Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>( // Host parallel loop
{0, grid.numberSmootherCircles()}, // Starting index
{grid.ntheta(), grid.nr()}), // Ending index
[&](const int i_theta, const int i_r) {
const double radius = grid.radius(i_r);
const double theta = grid.theta(i_theta);
const int index = grid.index(i_r, i_theta);
exact_solution_values_host[index] = exact_solution.exact_solution(radius, theta);
});
Kokkos::fence();

// Transfer the exact solution values from host to device memory for error computation.
Kokkos::deep_copy(error, exact_solution_values_host);

// Compute the error as the difference between the exact and numerical solutions.
subtract(error, HostConstVector<double>(solution));

// Compute the weighted L2 norm and infinity norm of the error.
const double weighted_euclidean_error = l2_norm(HostConstVector<double>(error)) / std::sqrt(grid.numberOfNodes());
const double infinity_error = infinity_norm(HostConstVector<double>(error));
return std::make_pair(weighted_euclidean_error, infinity_error);
}

Expand Down Expand Up @@ -645,7 +664,7 @@ bool GMGPolar<DomainGeometry, DensityProfileCoefficients>::converged(double resi
// =============================================================================

template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(const ExactSolution* exact_solution)
void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(const bool is_exact_solution_provided)
{
if (verbose_ <= 0)
return;
Expand All @@ -663,7 +682,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationHeader(
else
std::cout << std::setw(15 + table_spacing) << "||r_k||/||rhs||";
}
if (exact_solution != nullptr) {
if (is_exact_solution_provided) {
std::cout << std::setw(12 + table_spacing) << "||u-u_k||_l2";
std::cout << std::setw(13 + table_spacing) << "||u-u_k||_inf";
}
Expand All @@ -675,7 +694,7 @@ template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoeff
void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationInfo(int iteration,
double current_residual_norm,
double current_relative_residual_norm,
const ExactSolution* exact_solution)
const bool is_exact_solution_provided)
{
if (verbose_ <= 0)
return;
Expand All @@ -687,7 +706,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::printIterationInfo(in
std::cout << std::setw(9 + table_spacing + 2) << current_residual_norm;
std::cout << std::setw(15 + table_spacing) << current_relative_residual_norm;
}
if (exact_solution != nullptr) {
if (is_exact_solution_provided) {
std::cout << std::setw(12 + table_spacing) << exact_errors_.back().first;
std::cout << std::setw(13 + table_spacing) << exact_errors_.back().second;
}
Expand Down
Loading