diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 84be8e384a..7001573f5b 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -11,6 +11,8 @@ a71cad2dd6ace5741a754e2ca7daacd4bb094e0e 2c2402ed59c91164eaff46dee0f79386b7347e9e 05b7c571544c3bcb153fce67d12b9ac48947fc2d c8f38049359170a34c915e209276238ea66b9a1e +65880e4af16cb95438437bf057c4b9fdb427b142 +d1cfb8abd6aa5c76e6c1a4d7ab20929c65f8afc2 8d5cb39e03c2644715a50684f8cd0318b4e82676 ec69e8838be2dde140a915e50506f8e1ce3cb534 f2bc0488a298f136294c523bc5ab4086d090059b diff --git a/CMakeLists.txt b/CMakeLists.txt index e8c4657fb0..11ff1df22e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -296,11 +296,12 @@ set(BOUT_SOURCES ./src/mesh/interpolation/hermite_spline_z.cxx ./src/mesh/interpolation/interpolation_z.cxx ./src/mesh/interpolation/lagrange_4pt_xz.cxx - ./src/mesh/interpolation/monotonic_hermite_spline_xz.cxx ./src/mesh/invert3x3.hxx ./src/mesh/mesh.cxx ./src/mesh/parallel/fci.cxx ./src/mesh/parallel/fci.hxx + ./src/mesh/parallel/fci_comm.cxx + ./src/mesh/parallel/fci_comm.hxx ./src/mesh/parallel/identity.cxx ./src/mesh/parallel/shiftedmetric.cxx ./src/mesh/parallel/shiftedmetricinterp.cxx diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 4dd24259fd..d14a5eeca0 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -27,17 +27,18 @@ #include #include #include +#include -#define USE_NEW_WEIGHTS 1 #if BOUT_HAS_PETSC -#define HS_USE_PETSC 1 -#endif - -#ifdef HS_USE_PETSC #include "bout/petsclib.hxx" #endif +namespace { +enum class implementation_type { new_weights, petsc, legacy }; +} + class Options; +class GlobalField3DAccess; /// Interpolate a field onto a perturbed set of points const Field3D interpolate(const Field3D& f, const Field3D& delta_x, @@ -135,7 +136,16 @@ public: } }; -class XZHermiteSpline : public XZInterpolation { +/// Monotonic Hermite spline interpolator +/// +/// Similar to XZHermiteSpline, so uses most of the same code. +/// Forces the interpolated result to be in the range of the +/// neighbouring cell values. This prevents unphysical overshoots, +/// but also degrades accuracy near maxima and minima. +/// Perhaps should only impose near boundaries, since that is where +/// problems most obviously occur. +template +class XZHermiteSplineBase : public XZInterpolation { protected: /// This is protected rather than private so that it can be /// extended and used by HermiteSplineMonotonic @@ -143,6 +153,9 @@ protected: Tensor> i_corner; // index of bottom-left grid point Tensor k_corner; // z-index of bottom-left grid point + std::unique_ptr gf3daccess; + Tensor> g3dinds; + // Basis functions for cubic Hermite spline interpolation // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline // The h00 and h01 basis functions are applied to the function itself @@ -160,23 +173,28 @@ protected: std::vector newWeights; -#if HS_USE_PETSC +#if BOUT_HAS_PETSC PetscLib* petsclib; bool isInit{false}; - Mat petscWeights; - Vec rhs, result; + Mat petscWeights{}; + Vec rhs{}, result{}; #endif + /// Factors to allow for some wiggleroom + BoutReal abs_fac_monotonic{1e-2}; + BoutReal rel_fac_monotonic{1e-1}; + public: - XZHermiteSpline(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) - : XZHermiteSpline(0, mesh) {} - XZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr); - XZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { + XZHermiteSplineBase(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) + : XZHermiteSplineBase(0, mesh, options) {} + XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr, Options* options = nullptr); + XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr, + Options* options = nullptr) + : XZHermiteSplineBase(y_offset, mesh, options) { setRegion(regionFromMask(mask, localmesh)); } - ~XZHermiteSpline() { -#if HS_USE_PETSC + ~XZHermiteSplineBase() override { +#if BOUT_HAS_PETSC if (isInit) { MatDestroy(&petscWeights); VecDestroy(&rhs); @@ -205,61 +223,21 @@ public: getWeightsForYApproximation(int i, int j, int k, int yoffset) override; }; -/// Monotonic Hermite spline interpolator -/// -/// Similar to XZHermiteSpline, so uses most of the same code. -/// Forces the interpolated result to be in the range of the -/// neighbouring cell values. This prevents unphysical overshoots, -/// but also degrades accuracy near maxima and minima. -/// Perhaps should only impose near boundaries, since that is where -/// problems most obviously occur. -/// -/// You can control how tight the clipping to the range of the neighbouring cell -/// values through ``rtol`` and ``atol``: -/// -/// diff = (max_of_neighours - min_of_neighours) * rtol + atol -/// -/// and the interpolated value is instead clipped to the range -/// ``[min_of_neighours - diff, max_of_neighours + diff]`` -class XZMonotonicHermiteSpline : public XZHermiteSpline { - /// Absolute tolerance for clipping - BoutReal atol = 0.0; - /// Relative tolerance for clipping - BoutReal rtol = 1.0; - -public: - XZMonotonicHermiteSpline(Mesh* mesh = nullptr, Options* options = nullptr) - : XZHermiteSpline(0, mesh), - atol{(*options)["atol"] - .doc("Absolute tolerance for clipping overshoot") - .withDefault(0.0)}, - rtol{(*options)["rtol"] - .doc("Relative tolerance for clipping overshoot") - .withDefault(1.0)} { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(mask, y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - - using XZHermiteSpline::interpolate; - /// Interpolate using precalculated weights. - /// This function is called by the other interpolate functions - /// in the base class XZHermiteSpline. - Field3D interpolate(const Field3D& f, - const std::string& region = "RGN_NOBNDRY") const override; -}; +using XZMonotonicHermiteSplineSerial = + XZHermiteSplineBase; +using XZHermiteSplineSerial = + XZHermiteSplineBase; +using XZMonotonicHermiteSplineLegacy = + XZHermiteSplineBase; +using XZHermiteSplineLegacy = XZHermiteSplineBase; +#if BOUT_HAS_PETSC +using XZMonotonicHermiteSpline = XZHermiteSplineBase; +using XZHermiteSpline = XZHermiteSplineBase; +#else +using XZMonotonicHermiteSpline = + XZHermiteSplineBase; +using XZHermiteSpline = XZHermiteSplineBase; +#endif /// XZLagrange4pt interpolation class /// diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index 502b0c5668..a1ed6a9011 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -360,6 +360,8 @@ public: virtual int getXProcIndex() const = 0; ///< This processor's index in X direction virtual int getYProcIndex() const = 0; ///< This processor's index in Y direction virtual int getZProcIndex() const = 0; ///< This processor's index in Z direction + /// The index of a processor with given X, Y, and Z index + virtual int getProcIndex(int X, int Y, int Z) const = 0; // X communications virtual bool firstX() diff --git a/include/bout/msg_stack.hxx b/include/bout/msg_stack.hxx index 1abb26d2c7..0fe22cd0e6 100644 --- a/include/bout/msg_stack.hxx +++ b/include/bout/msg_stack.hxx @@ -80,7 +80,7 @@ public: } void pop() {} - void pop(int [[maybe_unused]] id) {} + void pop([[maybe_unused]] int id) {} void clear() {} void dump() {} diff --git a/include/bout/region.hxx b/include/bout/region.hxx index bb1cf82bf1..17ded65987 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -139,7 +139,7 @@ class BoutMask; BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) -enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 }; +enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2, IND_GLOBAL_3D = 3 }; /// Indices base class for Fields -- Regions are dereferenced into these /// @@ -386,6 +386,7 @@ inline SpecificInd operator-(SpecificInd lhs, const SpecificInd& rhs) { using Ind3D = SpecificInd; using Ind2D = SpecificInd; using IndPerp = SpecificInd; +using IndG3D = SpecificInd; /// Get string representation of Ind3D inline std::string toString(const Ind3D& i) { diff --git a/src/mesh/impls/bout/boutmesh.cxx b/src/mesh/impls/bout/boutmesh.cxx index d0bdcf33d8..3dad85f211 100644 --- a/src/mesh/impls/bout/boutmesh.cxx +++ b/src/mesh/impls/bout/boutmesh.cxx @@ -534,6 +534,8 @@ int BoutMesh::load() { PE_XIND = MYPE % NXPE; PE_ZIND = 0; + ASSERT2(MYPE == getProcIndex(PE_XIND, PE_YIND, PE_ZIND)); + // Set the other grid sizes from nx, ny, nz setDerivedGridSizes(); @@ -1015,6 +1017,8 @@ void BoutMesh::createXBoundaries() { } } +int BoutMesh::getProcIndex(int X, int Y, int Z) const { return Y * NXPE + X; } + void BoutMesh::createYBoundaries() { if (MYG <= 0) { return; diff --git a/src/mesh/impls/bout/boutmesh.hxx b/src/mesh/impls/bout/boutmesh.hxx index 3923c34511..b42bf325b5 100644 --- a/src/mesh/impls/bout/boutmesh.hxx +++ b/src/mesh/impls/bout/boutmesh.hxx @@ -63,6 +63,7 @@ public: int getXProcIndex() const override; ///< This processor's index in X direction int getYProcIndex() const override; ///< This processor's index in Y direction int getZProcIndex() const override; ///< This processor's index in Z direction + int getProcIndex(int X, int Y, int Z) const override; ///////////////////////////////////////////// // X communications diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 27a4f1d614..48e7d2904e 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -21,11 +21,18 @@ **************************************************************************/ #include "../impls/bout/boutmesh.hxx" +#include "../parallel/fci_comm.hxx" #include "bout/bout.hxx" +#include "bout/boutexception.hxx" +#include "bout/build_defines.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" +#include "bout/mask.hxx" +#include +#include +#include #include class IndConverter { @@ -35,9 +42,9 @@ class IndConverter { xstart(mesh->xstart), ystart(mesh->ystart), zstart(0), lnx(mesh->LocalNx - 2 * xstart), lny(mesh->LocalNy - 2 * ystart), lnz(mesh->LocalNz - 2 * zstart) {} - // ix and iy are global indices + // ix and iz are global indices // iy is local - int fromMeshToGlobal(int ix, int iy, int iz) { + int fromMeshToGlobal(int ix, int iy, int iz) const { const int xstart = mesh->xstart; const int lnx = mesh->LocalNx - xstart * 2; // x-proc-id @@ -71,12 +78,12 @@ class IndConverter { return fromLocalToGlobal(ix - pex * lnx, iy - pey_offset * lny, iz - pez * lnz, pex, pey, 0); } - int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz) { + int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz) const { return fromLocalToGlobal(ilocalx, ilocaly, ilocalz, mesh->getXProcIndex(), mesh->getYProcIndex(), 0); } int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz, - const int pex, const int pey, const int pez) { + const int pex, const int pey, const int pez) const { ASSERT3(ilocalx >= 0); ASSERT3(ilocaly >= 0); ASSERT3(ilocalz >= 0); @@ -102,11 +109,21 @@ class IndConverter { } }; -XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) +template +XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin, + Options* options) : XZInterpolation(y_offset, meshin), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { + if constexpr (monotonic) { + if (options == nullptr) { + options = &Options::root()["mesh:paralleltransform:xzinterpolation"]; + } + abs_fac_monotonic = (*options)["abs_tol"].withDefault(abs_fac_monotonic); + rel_fac_monotonic = (*options)["rel_tol"].withDefault(rel_fac_monotonic); + } + // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); @@ -125,37 +142,48 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) h10_z.allocate(); h11_z.allocate(); -#if USE_NEW_WEIGHTS - newWeights.reserve(16); - for (int w = 0; w < 16; ++w) { - newWeights.emplace_back(localmesh); - newWeights[w].allocate(); + if constexpr (imp_type == implementation_type::new_weights + || imp_type == implementation_type::petsc) { + newWeights.reserve(16); + for (int w = 0; w < 16; ++w) { + newWeights.emplace_back(localmesh); + newWeights[w].allocate(); + } } -#ifdef HS_USE_PETSC - petsclib = new PetscLib( - &Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]); - const int m = localmesh->LocalNx * localmesh->LocalNy * localmesh->LocalNz; - const int M = m * localmesh->getNXPE() * localmesh->getNYPE(); - MatCreateAIJ(BoutComm::get(), m, m, M, M, 16, nullptr, 16, nullptr, &petscWeights); -#endif + if constexpr (imp_type == implementation_type::petsc) { +#if BOUT_HAS_PETSC + petsclib = new PetscLib( + &Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]); + const int m = localmesh->LocalNx * localmesh->LocalNy * localmesh->LocalNz; + const int M = m * localmesh->getNXPE() * localmesh->getNYPE() * localmesh->getNZPE(); + MatCreateAIJ(BoutComm::get(), m, m, M, M, 16, nullptr, 16, nullptr, &petscWeights); #endif -#ifndef HS_USE_PETSC - if (localmesh->getNXPE() > 1) { - throw BoutException("Require PETSc for MPI splitting in X"); } -#endif + if constexpr (monotonic) { + gf3daccess = std::make_unique(localmesh); + g3dinds.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); + } + if constexpr (imp_type == implementation_type::new_weights + || imp_type == implementation_type::legacy) { + if (localmesh->getNXPE() > 1) { + throw BoutException("Require PETSc for MPI splitting in X"); + } + } } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const std::string& region) { +template +void XZHermiteSplineBase::calcWeights( + const Field3D& delta_x, const Field3D& delta_z, + [[maybe_unused]] const std::string& region) { const int ny = localmesh->LocalNy; const int nz = localmesh->LocalNz; const int xend = (localmesh->xend - localmesh->xstart + 1) * localmesh->getNXPE() + localmesh->xstart - 1; -#ifdef HS_USE_PETSC - IndConverter conv{localmesh}; -#endif + [[maybe_unused]] const IndConverter conv{localmesh}; + + [[maybe_unused]] const int y_global_offset = + localmesh->getYProcIndex() * (localmesh->yend - localmesh->ystart + 1); BOUT_FOR(i, getRegion(region)) { const int x = i.x(); const int y = i.y(); @@ -223,96 +251,112 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z h11_x[i] = (t_x * t_x * t_x) - (t_x * t_x); h11_z[i] = (t_z * t_z * t_z) - (t_z * t_z); -#if USE_NEW_WEIGHTS - - for (int w = 0; w < 16; ++w) { - newWeights[w][i] = 0; - } - // The distribution of our weights: - // 0 4 8 12 - // 1 5 9 13 - // 2 6 10 14 - // 3 7 11 15 - // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); - - // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - newWeights[5][i] += h00_x[i] * h00_z[i]; - newWeights[9][i] += h01_x[i] * h00_z[i]; - newWeights[9][i] += h10_x[i] * h00_z[i] / 2; - newWeights[1][i] -= h10_x[i] * h00_z[i] / 2; - newWeights[13][i] += h11_x[i] * h00_z[i] / 2; - newWeights[5][i] -= h11_x[i] * h00_z[i] / 2; - - // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + - // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; - newWeights[6][i] += h00_x[i] * h01_z[i]; - newWeights[10][i] += h01_x[i] * h01_z[i]; - newWeights[10][i] += h10_x[i] * h01_z[i] / 2; - newWeights[2][i] -= h10_x[i] * h01_z[i] / 2; - newWeights[14][i] += h11_x[i] * h01_z[i] / 2; - newWeights[6][i] -= h11_x[i] * h01_z[i] / 2; - - // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + - // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; - newWeights[6][i] += h00_x[i] * h10_z[i] / 2; - newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; - newWeights[10][i] += h01_x[i] * h10_z[i] / 2; - newWeights[8][i] -= h01_x[i] * h10_z[i] / 2; - newWeights[10][i] += h10_x[i] * h10_z[i] / 4; - newWeights[8][i] -= h10_x[i] * h10_z[i] / 4; - newWeights[2][i] -= h10_x[i] * h10_z[i] / 4; - newWeights[0][i] += h10_x[i] * h10_z[i] / 4; - newWeights[14][i] += h11_x[i] * h10_z[i] / 4; - newWeights[12][i] -= h11_x[i] * h10_z[i] / 4; - newWeights[6][i] -= h11_x[i] * h10_z[i] / 4; - newWeights[4][i] += h11_x[i] * h10_z[i] / 4; - - // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + - // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - newWeights[7][i] += h00_x[i] * h11_z[i] / 2; - newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; - newWeights[11][i] += h01_x[i] * h11_z[i] / 2; - newWeights[9][i] -= h01_x[i] * h11_z[i] / 2; - newWeights[11][i] += h10_x[i] * h11_z[i] / 4; - newWeights[9][i] -= h10_x[i] * h11_z[i] / 4; - newWeights[3][i] -= h10_x[i] * h11_z[i] / 4; - newWeights[1][i] += h10_x[i] * h11_z[i] / 4; - newWeights[15][i] += h11_x[i] * h11_z[i] / 4; - newWeights[13][i] -= h11_x[i] * h11_z[i] / 4; - newWeights[7][i] -= h11_x[i] * h11_z[i] / 4; - newWeights[5][i] += h11_x[i] * h11_z[i] / 4; -#ifdef HS_USE_PETSC - PetscInt idxn[1] = {conv.fromLocalToGlobal(x, y + y_offset, z)}; - // output.write("debug: {:d} -> {:d}: {:d}:{:d} -> {:d}:{:d}\n", - // conv.fromLocalToGlobal(x, y + y_offset, z), - // conv.fromMeshToGlobal(i_corn, y + y_offset, k_corner(x, y, z)), - // x, z, i_corn, k_corner(x, y, z)); - // ixstep = mesh->LocalNx * mesh->LocalNz; - for (int j = 0; j < 4; ++j) { - PetscInt idxm[4]; - PetscScalar vals[4]; - for (int k = 0; k < 4; ++k) { - idxm[k] = conv.fromMeshToGlobal(i_corn - 1 + j, y + y_offset, - k_corner(x, y, z) - 1 + k); - vals[k] = newWeights[j * 4 + k][i]; + if constexpr (imp_type == implementation_type::new_weights + || imp_type == implementation_type::petsc) { + for (int w = 0; w < 16; ++w) { + newWeights[w][i] = 0; } - MatSetValues(petscWeights, 1, idxn, 4, idxm, vals, INSERT_VALUES); - } -#endif + // The distribution of our weights: + // 0 4 8 12 + // 1 5 9 13 + // 2 6 10 14 + // 3 7 11 15 + // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); + + // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; + newWeights[5][i] += h00_x[i] * h00_z[i]; + newWeights[9][i] += h01_x[i] * h00_z[i]; + newWeights[9][i] += h10_x[i] * h00_z[i] / 2; + newWeights[1][i] -= h10_x[i] * h00_z[i] / 2; + newWeights[13][i] += h11_x[i] * h00_z[i] / 2; + newWeights[5][i] -= h11_x[i] * h00_z[i] / 2; + + // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + + // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h01_z[i]; + newWeights[10][i] += h01_x[i] * h01_z[i]; + newWeights[10][i] += h10_x[i] * h01_z[i] / 2; + newWeights[2][i] -= h10_x[i] * h01_z[i] / 2; + newWeights[14][i] += h11_x[i] * h01_z[i] / 2; + newWeights[6][i] -= h11_x[i] * h01_z[i] / 2; + + // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + + // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h10_z[i] / 2; + newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; + newWeights[10][i] += h01_x[i] * h10_z[i] / 2; + newWeights[8][i] -= h01_x[i] * h10_z[i] / 2; + newWeights[10][i] += h10_x[i] * h10_z[i] / 4; + newWeights[8][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[2][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[0][i] += h10_x[i] * h10_z[i] / 4; + newWeights[14][i] += h11_x[i] * h10_z[i] / 4; + newWeights[12][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[6][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[4][i] += h11_x[i] * h10_z[i] / 4; + + // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + newWeights[7][i] += h00_x[i] * h11_z[i] / 2; + newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; + newWeights[11][i] += h01_x[i] * h11_z[i] / 2; + newWeights[9][i] -= h01_x[i] * h11_z[i] / 2; + newWeights[11][i] += h10_x[i] * h11_z[i] / 4; + newWeights[9][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[3][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[1][i] += h10_x[i] * h11_z[i] / 4; + newWeights[15][i] += h11_x[i] * h11_z[i] / 4; + newWeights[13][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[7][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[5][i] += h11_x[i] * h11_z[i] / 4; + if (imp_type == implementation_type::petsc) { +#if BOUT_HAS_PETSC + PetscInt idxn[1] = {conv.fromLocalToGlobal(x, y + y_offset, z)}; + // output.write("debug: {:d} -> {:d}: {:d}:{:d} -> {:d}:{:d}\n", + // conv.fromLocalToGlobal(x, y + y_offset, z), + // conv.fromMeshToGlobal(i_corn, y + y_offset, k_corner(x, y, z)), + // x, z, i_corn, k_corner(x, y, z)); + // ixstep = mesh->LocalNx * mesh->LocalNz; + for (int j = 0; j < 4; ++j) { + PetscInt idxm[4]; + PetscScalar vals[4]; + for (int k = 0; k < 4; ++k) { + idxm[k] = conv.fromMeshToGlobal(i_corn - 1 + j, y + y_offset, + k_corner(x, y, z) - 1 + k); + vals[k] = newWeights[(j * 4) + k][i]; + } + MatSetValues(petscWeights, 1, idxn, 4, idxm, vals, INSERT_VALUES); + } #endif + } + } + if constexpr (monotonic) { + const auto gind = gf3daccess->xyzglobal(i_corn, y + y_offset + y_global_offset, + k_corner(x, y, z)); + gf3daccess->request(gind); + gf3daccess->request(gind.xp(1)); + gf3daccess->request(gind.zp(1)); + gf3daccess->request(gind.xp(1).zp(1)); + g3dinds[i] = {gind.ind, gind.xp(1).ind, gind.zp(1).ind, gind.xp(1).zp(1).ind}; + } } -#ifdef HS_USE_PETSC - MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); - if (!isInit) { - MatCreateVecs(petscWeights, &rhs, &result); - } - isInit = true; + if constexpr (imp_type == implementation_type::petsc) { +#if BOUT_HAS_PETSC + MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); + if (!isInit) { + MatCreateVecs(petscWeights, &rhs, &result); + } + isInit = true; #endif + } } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { +template +void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, + const Field3D& delta_z, + const BoutMask& mask, + const std::string& region) { setMask(mask); calcWeights(delta_x, delta_z, region); } @@ -333,8 +377,14 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z * (i, j+1, k+1) h01_z + h10_z / 2 * (i, j+1, k+2) h11_z / 2 */ +template std::vector -XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { +XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, + int yoffset) { + if (localmesh->getNXPE() > 1) { + throw BoutException("It is likely that the function calling this is not handling the " + "result correctly."); + } const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); @@ -347,99 +397,157 @@ XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { {i, j + yoffset, k_mod_p2, 0.5 * h11_z(i, j, k)}}; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region) const { +template +Field3D XZHermiteSplineBase::interpolate( + const Field3D& f, [[maybe_unused]] const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; - const auto region2 = - y_offset == 0 ? "RGN_NOY" : fmt::format("RGN_YPAR_{:+d}", y_offset); - -#if USE_NEW_WEIGHTS -#ifdef HS_USE_PETSC - BoutReal* ptr; - const BoutReal* cptr; - VecGetArray(rhs, &ptr); - BOUT_FOR(i, f.getRegion("RGN_NOY")) { ptr[int(i)] = f[i]; } - VecRestoreArray(rhs, &ptr); - MatMult(petscWeights, rhs, result); - VecGetArrayRead(result, &cptr); - BOUT_FOR(i, f.getRegion(region2)) { - f_interp[i] = cptr[int(i)]; - ASSERT2(std::isfinite(cptr[int(i)])); + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; + + std::unique_ptr gf; + if constexpr (monotonic) { + gf = gf3daccess->communicate_asPtr(f); } - VecRestoreArrayRead(result, &cptr); -#else - BOUT_FOR(i, getRegion(region)) { - auto ic = i_corner[i]; - auto iyp = i.yp(y_offset); - - f_interp[iyp] = 0; - for (int w = 0; w < 4; ++w) { - f_interp[iyp] += newWeights[w * 4 + 0][i] * f[ic.zm().xp(w - 1)]; - f_interp[iyp] += newWeights[w * 4 + 1][i] * f[ic.xp(w - 1)]; - f_interp[iyp] += newWeights[w * 4 + 2][i] * f[ic.zp().xp(w - 1)]; - f_interp[iyp] += newWeights[w * 4 + 3][i] * f[ic.zp(2).xp(w - 1)]; + + if constexpr (imp_type == implementation_type::petsc) { +#if BOUT_HAS_PETSC + BoutReal* ptr = nullptr; + const BoutReal* cptr = nullptr; + VecGetArray(rhs, &ptr); + BOUT_FOR(i, f.getRegion("RGN_NOY")) { ptr[int(i)] = f[i]; } + VecRestoreArray(rhs, &ptr); + MatMult(petscWeights, rhs, result); + VecGetArrayRead(result, &cptr); + BOUT_FOR(i, f.getRegion(region2)) { + f_interp[i] = cptr[int(i)]; + if constexpr (monotonic) { + const auto iyp = i; + const auto i = iyp.ym(y_offset); + const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], + (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; + const auto minmax = std::minmax(corners); + const auto diff = + ((minmax.second - minmax.first) * rel_fac_monotonic) + abs_fac_monotonic; + f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); + f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); + } + ASSERT2(std::isfinite(cptr[int(i)])); } - } + VecRestoreArrayRead(result, &cptr); #endif -#else - // Derivatives are used for tension and need to be on dimensionless - // coordinates - - // f has been communcated, and thus we can assume that the x-boundaries are - // also valid in the y-boundary. Thus the differentiated field needs no - // extra comms. - Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT", region2); - Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); - Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); - - BOUT_FOR(i, getRegion(region)) { - const auto iyp = i.yp(y_offset); - - const auto ic = i_corner[i]; - const auto iczp = ic.zp(); - const auto icxp = ic.xp(); - const auto icxpzp = iczp.xp(); - - // Interpolate f in X at Z - const BoutReal f_z = - f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - - // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] - + fx[icxpzp] * h11_x[i]; - - // Interpolate fz in X at Z - const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] - + fxz[icxp] * h11_x[i]; - - // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] - + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - - // Interpolate in Z - f_interp[iyp] = - +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; + } - ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); + if constexpr (imp_type == implementation_type::new_weights) { + BOUT_FOR(i, getRegion(region)) { + auto ic = i_corner[i]; + auto iyp = i.yp(y_offset); + + f_interp[iyp] = 0; + for (int w = 0; w < 4; ++w) { + f_interp[iyp] += newWeights[(w * 4) + 0][i] * f[ic.zm().xp(w - 1)]; + f_interp[iyp] += newWeights[(w * 4) + 1][i] * f[ic.xp(w - 1)]; + f_interp[iyp] += newWeights[(w * 4) + 2][i] * f[ic.zp().xp(w - 1)]; + f_interp[iyp] += newWeights[(w * 4) + 3][i] * f[ic.zp(2).xp(w - 1)]; + } + if constexpr (monotonic) { + const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], + (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; + const auto minmax = std::minmax(corners); + const auto diff = + ((minmax.second - minmax.first) * rel_fac_monotonic) + abs_fac_monotonic; + f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); + f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); + } + ASSERT2(std::isfinite(f_interp[iyp])); + } + } + if constexpr (imp_type == implementation_type::legacy) { + // Legacy interpolation + // TODO(peter): Should we apply dirichlet BCs to derivatives? + // Derivatives are used for tension and need to be on dimensionless + // coordinates + + // f has been communcated, and thus we can assume that the x-boundaries are + // also valid in the y-boundary. Thus the differentiated field needs no + // extra comms. + // TODO(dave) Add assert that we do not use z-splitting or z-guards. + Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT", region2); + Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); + Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); + + BOUT_FOR(i, getRegion(region)) { + const auto iyp = i.yp(y_offset); + + const auto ic = i_corner[i]; + const auto iczp = ic.zp(); + const auto icxp = ic.xp(); + const auto icxpzp = iczp.xp(); + + // Interpolate f in X at Z + const BoutReal f_z = + f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; + + // Interpolate f in X at Z+1 + const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + + fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + + // Interpolate fz in X at Z + const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] + + fxz[icxp] * h11_x[i]; + + // Interpolate fz in X at Z+1 + const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + + // Interpolate in Z + f_interp[iyp] = + +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; + + if constexpr (monotonic) { + const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], + (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; + const auto minmax = std::minmax(corners); + const auto diff = + ((minmax.second - minmax.first) * rel_fac_monotonic) + abs_fac_monotonic; + f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); + f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); + } + ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart + || i.x() > localmesh->xend); + } } -#endif f_interp.setRegion(region2); ASSERT2(f_interp.getRegionID()); return f_interp; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { +template +Field3D XZHermiteSplineBase::interpolate(const Field3D& f, + const Field3D& delta_x, + const Field3D& delta_z, + const std::string& region) { calcWeights(delta_x, delta_z, region); return interpolate(f, region); } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { +template +Field3D XZHermiteSplineBase::interpolate(const Field3D& f, + const Field3D& delta_x, + const Field3D& delta_z, + const BoutMask& mask, + const std::string& region) { calcWeights(delta_x, delta_z, mask, region); return interpolate(f, region); } + +// ensure they are instantiated +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; +#if BOUT_HAS_PETSC +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; +#endif diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index e16b2699d1..c82c9fa36e 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -20,6 +20,7 @@ * **************************************************************************/ +#include "fmt/format.h" #include "bout/globals.hxx" #include "bout/interpolation_xz.hxx" #include "bout/mesh.hxx" @@ -133,7 +134,10 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) // Then in X f_interp(x, y_next, z) = lagrange_4pt(xvals, t_x(x, y, z)); + ASSERT2(std::isfinite(f_interp(x, y_next, z))); } + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; + f_interp.setRegion(region2); return f_interp; } diff --git a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx deleted file mode 100644 index f206ed1e0f..0000000000 --- a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************************************** - * Copyright 2018 B.D.Dudson, P. Hill - * - * Contact: Ben Dudson, bd512@york.ac.uk - * - * This file is part of BOUT++. - * - * BOUT++ is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * BOUT++ is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with BOUT++. If not, see . - * - **************************************************************************/ - -#include "bout/globals.hxx" -#include "bout/index_derivs_interface.hxx" -#include "bout/interpolation_xz.hxx" -#include "bout/mesh.hxx" - -#include - -Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, - const std::string& region) const { - ASSERT1(f.getMesh() == localmesh); - Field3D f_interp(f.getMesh()); - f_interp.allocate(); - - // Derivatives are used for tension and need to be on dimensionless - // coordinates - Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT"); - Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", "RGN_ALL"); - localmesh->communicate_no_slices(fx, fz); - Field3D fxz = bout::derivatives::index::DDX(fz, CELL_DEFAULT, "DEFAULT"); - localmesh->communicate_no_slices(fxz); - - const auto curregion{getRegion(region)}; - BOUT_FOR(i, curregion) { - const auto iyp = i.yp(y_offset); - - const auto ic = i_corner[i]; - const auto iczp = ic.zp(); - const auto icxp = ic.xp(); - const auto icxpzp = iczp.xp(); - - // Interpolate f in X at Z - const BoutReal f_z = - f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - - // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] - + fx[icxpzp] * h11_x[i]; - - // Interpolate fz in X at Z - const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] - + fxz[icxp] * h11_x[i]; - - // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] - + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - - // Interpolate in Z - BoutReal result = - +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; - - ASSERT2(std::isfinite(result) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - // Monotonicity - // Force the interpolated result to be in the range of the - // neighbouring cell values. This prevents unphysical overshoots, - // but also degrades accuracy near maxima and minima. - // Perhaps should only impose near boundaries, since that is where - // problems most obviously occur. - const BoutReal localmax = BOUTMAX(f[ic], f[icxp], f[iczp], f[icxpzp]); - const BoutReal localmin = BOUTMIN(f[ic], f[icxp], f[iczp], f[icxpzp]); - - ASSERT2(std::isfinite(localmax) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - ASSERT2(std::isfinite(localmin) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - const auto diff = ((localmax - localmin) * rtol) + atol; - - result = std::min(result, localmax + diff); - result = std::max(result, localmin - diff); - - f_interp[iyp] = result; - } - return f_interp; -} diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index a58554dc82..ec8bcc0502 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -23,6 +23,7 @@ * **************************************************************************/ +#include "parallel/fci_comm.hxx" #include #include #include @@ -86,6 +87,14 @@ namespace { RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; RegisterXZInterpolation registerinterpmonotonichermitespline{ "monotonichermitespline"}; +RegisterXZInterpolation registerinterphermitesplines{ + "hermitesplineserial"}; +RegisterXZInterpolation + registerinterpmonotonichermitesplines{"monotonichermitesplineserial"}; +RegisterXZInterpolation registerinterphermitesplinel{ + "hermitesplinelegacy"}; +RegisterXZInterpolation + registerinterpmonotonichermitesplinel{"monotonichermitesplinelegacy"}; RegisterXZInterpolation registerinterplagrange4pt{"lagrange4pt"}; RegisterXZInterpolation registerinterpbilinear{"bilinear"}; } // namespace diff --git a/src/mesh/parallel/fci_comm.cxx b/src/mesh/parallel/fci_comm.cxx new file mode 100644 index 0000000000..3d213ad6e4 --- /dev/null +++ b/src/mesh/parallel/fci_comm.cxx @@ -0,0 +1,224 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#include "fci_comm.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/field3d.hxx" +#include "bout/region.hxx" + +#include +#include +#include +#include + +fci_comm::ProcLocal fci_comm::GlobalToLocal1D::convert(int id) const { + if (periodic) { + while (id < mg) { + id += global; + } + while (id >= global + mg) { + id -= global; + } + } + const int idwo = id - mg; + int proc = idwo / local; + if (not periodic) { + if (proc >= npe) { + proc = npe - 1; + } + } + const int loc = id - (local * proc); +#if CHECK > 1 + ASSERT1(loc >= 0); + ASSERT1(loc <= localwith); + ASSERT1(proc >= 0); + ASSERT1(proc < npe); + if (periodic and (loc < mg or loc >= local + mg)) { + throw BoutException( + "GlobalToLocal1D failure - expected {} < {} < {} because we are periodic\n", mg, + loc, local + mg); + } +#endif + return {proc, loc}; +} + +void GlobalField3DAccess::setup() { + // We need to send a list of data to every processor of which data + // we want We also get a list of all the data that every other + // processor wants Some of theses lists may be empty. We still need + // to send them, later we can skip that. We also compute where the + // requested data will be stored later on. This is currently + // implemented as a map, to memory efficient, as the data is + // sparse. We could also store the mapping as a dense array, if it + // turns out this lookup is not fast enough, but that may limit + // scaling at some point. + ASSERT2(is_setup == false); +#ifdef _OPENMP + for (auto& o_id : openmp_ids) { + ids.merge(o_id); + } + openmp_ids.clear(); +#endif + toGet.resize(static_cast(global2local_x.getNPE() * global2local_y.getNPE() + * global2local_z.getNPE())); + for (const auto id : ids) { + const IndG3D gind{id, global2local_y.getGlobalWith(), global2local_z.getGlobalWith()}; + const auto pix = global2local_x.convert(gind.x()); + const auto piy = global2local_y.convert(gind.y()); + const auto piz = global2local_z.convert(gind.z()); + ASSERT3(piz.proc == 0); + toGet[mesh->getProcIndex(pix.proc, piy.proc, piz.proc)].push_back( + xyzlocal.convert(pix.index, piy.index, piz.index).ind); + } + for (auto& v : toGet) { + std::sort(v.begin(), v.end()); + } + commCommLists(); + { + int offset = 0; + for (const auto& get : toGet) { + getOffsets.push_back(offset); + offset += get.size(); + } + getOffsets.push_back(offset); + } + for (const auto id : ids) { + const IndG3D gind{id, global2local_y.getGlobalWith(), global2local_z.getGlobalWith()}; + const auto pix = global2local_x.convert(gind.x()); + const auto piy = global2local_y.convert(gind.y()); + const auto piz = global2local_z.convert(gind.z()); + ASSERT3(piz.proc == 0); + const auto proc = mesh->getProcIndex(pix.proc, piy.proc, piz.proc); + const auto& vec = toGet[proc]; + const auto tofind = xyzlocal.convert(pix.index, piy.index, piz.index).ind; + auto it = std::lower_bound(vec.begin(), vec.end(), tofind); + ASSERT3(it != vec.end()); + ASSERT3(*it == tofind); + mapping[id] = std::distance(vec.begin(), it) + getOffsets[proc]; + } + is_setup = true; +} + +void GlobalField3DAccess::commCommLists() { + toSend.resize(toGet.size()); + std::vector toGetSizes(toGet.size(), -1); + std::vector toSendSizes(toSend.size(), -1); +#if CHECK > 3 + { + int thisproc; + MPI_Comm_rank(comm, &thisproc); + ASSERT0(thisproc + == mesh->getProcIndex(mesh->getXProcIndex(), mesh->getYProcIndex(), + mesh->getZProcIndex())); + } +#endif + std::vector reqs(toSend.size()); + for (size_t proc = 0; proc < toGet.size(); ++proc) { + auto ret = MPI_Irecv(&toSendSizes[proc], 1, MPI_INT, proc, 666, comm, &reqs[proc]); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + toGetSizes[proc] = toGet[proc].size(); + auto ret = MPI_Send(&toGetSizes[proc], 1, MPI_INT, proc, 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + std::vector reqs2(toSend.size()); + int cnt = 0; + for ([[maybe_unused]] auto dummy : reqs) { + int ind{0}; + auto ret = MPI_Waitany(reqs.size(), reqs.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + ASSERT2(static_cast(ind) < toSend.size()); + ASSERT3(toSendSizes[ind] >= 0); + if (toSendSizes[ind] == 0) { + continue; + } + sendBufferSize += toSendSizes[ind]; + toSend[ind].resize(toSendSizes[ind], -1); + + ret = MPI_Irecv(toSend[ind].data(), toSend[ind].size(), MPI_INT, ind, 666 * 666, comm, + reqs2.data() + cnt++); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (!toGet[proc].empty()) { + const auto ret = MPI_Send(toGet[proc].data(), toGet[proc].size(), MPI_INT, proc, + 666 * 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + } + for (int c = 0; c < cnt; c++) { + int ind{0}; + const auto ret = MPI_Waitany(cnt, reqs2.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + } +} + +std::vector GlobalField3DAccess::communicate_data(const Field3D& f) { + // Ensure setup is called, to setup communication pattern + if (not is_setup) { + setup(); + } + // We now send the previosly requested data to every processor, that wanted some. + // We also get the data we requested. + ASSERT2(f.getMesh() == mesh); + std::vector data(getOffsets.back()); + std::vector sendBuffer(sendBufferSize); + std::vector reqs(toSend.size()); + int cnt1 = 0; + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (toGet[proc].empty()) { + continue; + } + auto ret = MPI_Irecv(data.data() + getOffsets[proc], toGet[proc].size(), MPI_DOUBLE, + proc, 666, comm, reqs.data() + cnt1); + ASSERT0(ret == MPI_SUCCESS); + cnt1++; + } + int cnt = 0; + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (toSend[proc].empty()) { + continue; + } + const void* start = sendBuffer.data() + cnt; + for (auto i : toSend[proc]) { + sendBuffer[cnt++] = f[Ind3D(i)]; + } + auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + for (int j = 0; j < cnt1; ++j) { + int ind{0}; + auto ret = MPI_Waitany(cnt1, reqs.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + ASSERT3(ind >= 0); + ASSERT3(ind < cnt1); + } + return data; +} diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx new file mode 100644 index 0000000000..324cae8a22 --- /dev/null +++ b/src/mesh/parallel/fci_comm.hxx @@ -0,0 +1,187 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#pragma once + +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/boutcomm.hxx" +#include "bout/field3d.hxx" +#include "bout/mesh.hxx" +#include "bout/region.hxx" +#include +#include +#include +#include +#include +#include + +/// GlobalField3DAccess is a class to set up the communication +/// patterns, to request abitrary data form the global +/// field. GlobalField3DAccessInstance is an instance. after +/// GlobalField3DAccess has communicated the pre-requested data. Only +/// data that has been pre-requested can be requested from +/// GlobalField3DAccessInstance after communication. +/// +/// The usage looks a bit like this: +/// +/// GlobalField3DAccess gfa; +/// // Request an abitrary number of global indices ``gi``: +/// gfa.request(gi) +/// // Communicate data +/// const auto data = gfa.communicate(f3d); +/// // Now data can be accesssed for all previously requested ``gi``s: +/// data[gi] +class GlobalField3DAccess; + +namespace fci_comm { +struct ProcLocal { + int proc; + int index; +}; + +/// Class to convert global to local indices for 1D +/// given the global index, it returns the local index and the processor. +struct GlobalToLocal1D { + GlobalToLocal1D(int mg, int npe, int localwith, bool periodic) + : mg(mg), npe(npe), localwith(localwith), local(localwith - (2 * mg)), + global(local * npe), globalwith(global + (2 * mg)), periodic(periodic){}; + ProcLocal convert(int id) const; + int getLocalWith() const { return localwith; } + int getGlobalWith() const { return globalwith; } + int getNPE() const { return npe; } + +private: + int mg; + int npe; + int localwith; + int local; + int global; + int globalwith; + bool periodic; +}; + +/// Convert an x-y-z tupple to an Ind +template +struct XYZ2Ind { + XYZ2Ind(const int nx, const int ny, const int nz) : nx(nx), ny(ny), nz(nz) {} + ind convert(const int x, const int y, const int z) const { + return {z + ((y + x * ny) * nz), ny, nz}; + } + ind operator()(const int x, const int y, const int z) const { return convert(x, y, z); } + +private: + int nx; + int ny; + int nz; +}; +} // namespace fci_comm + +class GlobalField3DAccessInstance { +public: + const BoutReal& operator[](IndG3D ind) const; + GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, + std::vector&& data) + : gfa(gfa), data(std::move(data)){}; + +private: + const GlobalField3DAccess* gfa; + std::vector data; +}; + +class GlobalField3DAccess { +public: + friend class GlobalField3DAccessInstance; + GlobalField3DAccess(Mesh* mesh) + : mesh(mesh), + global2local_x(mesh->xstart, mesh->getNXPE(), mesh->LocalNx, mesh->periodicX), + global2local_y(mesh->ystart, mesh->getNYPE(), mesh->LocalNy, true), + global2local_z(mesh->zstart, mesh->getNZPE(), mesh->LocalNz, true), + xyzlocal(global2local_x.getLocalWith(), global2local_y.getLocalWith(), + global2local_z.getLocalWith()), + xyzglobal(global2local_x.getGlobalWith(), global2local_y.getGlobalWith(), + global2local_z.getGlobalWith()), + comm(BoutComm::get()) { +#ifdef _OPENMP + openmp_ids.resize(omp_get_max_threads()); +#endif +#if CHECK >= 2 + // We could also allow false, but then we would need to ensure it + // is false everywhere. + for (int x = 0; x < mesh->LocalNx; ++x) { + ASSERT2(mesh->periodicY(x) == true); + } +#endif + }; + void request(IndG3D ind) { + ASSERT2(is_setup == false); +#ifdef _OPENMP + ASSERT2(openmp_ids.size() > static_cast(omp_get_thread_num())); + openmp_ids[omp_get_thread_num()].emplace(ind.ind); +#else + ids.emplace(ind.ind); +#endif + } + + GlobalField3DAccessInstance communicate(const Field3D& f) { + return {this, communicate_data(f)}; + } + std::unique_ptr communicate_asPtr(const Field3D& f) { + return std::make_unique(this, communicate_data(f)); + } + +private: + void setup(); + void commCommLists(); + Mesh* mesh; +#ifdef _OPENMP + // openmp thread-local variable + std::vector> openmp_ids; +#endif + std::set ids; + std::map mapping; + bool is_setup{false}; + fci_comm::GlobalToLocal1D global2local_x; + fci_comm::GlobalToLocal1D global2local_y; + fci_comm::GlobalToLocal1D global2local_z; + +public: + fci_comm::XYZ2Ind xyzlocal; + fci_comm::XYZ2Ind xyzglobal; + +private: + std::vector> toGet; + std::vector> toSend; + std::vector getOffsets; + int sendBufferSize{0}; + MPI_Comm comm; + std::vector communicate_data(const Field3D& f); +}; + +inline const BoutReal& GlobalField3DAccessInstance::operator[](IndG3D ind) const { + auto it = gfa->mapping.find(ind.ind); + ASSERT2(it != gfa->mapping.end()); + return data[it->second]; +} diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 34340e53f4..c76ad27833 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -17,7 +17,6 @@ import zoidberg as zb from boutdata.collect import collect from boututils.run_wrapper import build_and_log, launch_safe from numpy import arange, array, linspace, log, polyfit -from scipy.interpolate import RectBivariateSpline as RBS # Global parameters DIRECTORY = "data" @@ -32,15 +31,16 @@ NLIST = [8, 16, 32, 64] dx = 1.0 / array(NLIST) -def myRBS(a, b, c): - """RectBivariateSpline, but automatically tune spline degree for small arrays""" - mx, _ = c.shape - kx = max(mx - 1, 1) - kx = min(kx, 3) - return RBS(a, b, c, kx=kx) +success = True +error_2 = {} +error_inf = {} +method_orders = {} -zb.poloidal_grid.RectBivariateSpline = myRBS +# Run with periodic Y? +yperiodic = True + +build_and_log("FCI MMS test") def quiet_collect(name: str) -> float: diff --git a/tests/integrated/test-fci-mpi/runtest b/tests/integrated/test-fci-mpi/runtest index c18ab0391d..9962567217 100755 --- a/tests/integrated/test-fci-mpi/runtest +++ b/tests/integrated/test-fci-mpi/runtest @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Python script to run and analyse MMS test +# Python script to run and analyse MPI test from boututils.run_wrapper import build_and_log, launch_safe from boutdata.collect import collect @@ -14,13 +14,13 @@ NLIST = [1, 2, 4] MAXCORES = 8 NSLICES = [1] -build_and_log("FCI MMS test") +build_and_log("FCI MPI test") COLLECT_KW = dict(info=False, xguards=False, yguards=False, path="data") def run_case(nxpe: int, nype: int, mthread: int): - cmd = f"./fci_mpi NXPE={nxpe} NYPE={nype}" + cmd = f"./fci_mpi NXPE={nxpe} NYPE={nype} mesh:paralleltransform:xzinterpolation:type={implementation}" print(f"Running command: {cmd}") _, out = launch_safe(cmd, nproc=nxpe * nype, mthread=mthread, pipe=True) @@ -46,28 +46,29 @@ def test_case(nxpe: int, nype: int, mthread: int, ref: dict) -> bool: failures = [] -for nslice in NSLICES: - # reference data! - run_case(1, 1, MAXCORES) - - ref = {} - for i in range(4): - for yp in range(1, nslice + 1): - for y in [-yp, yp]: - name = f"output_{i}_{y:+d}" - ref[name] = collect(name, **COLLECT_KW) - - for nxpe, nype in itertools.product(NLIST, NLIST): - if (nxpe, nype) == (1, 1): - # reference case, done above - continue - - if nxpe * nype > MAXCORES: - continue - - mthread = MAXCORES // (nxpe * nype) - failures_ = test_case(nxpe, nype, mthread, ref) - failures.extend(failures_) +for implementation in ["hermitespline", "monotonichermitespline"]: + for nslice in NSLICES: + # reference data! + run_case(1, 1, MAXCORES) + + ref = {} + for i in range(4): + for yp in range(1, nslice + 1): + for y in [-yp, yp]: + name = f"output_{i}_{y:+d}" + ref[name] = collect(name, **COLLECT_KW) + + for nxpe, nype in itertools.product(NLIST, NLIST): + if (nxpe, nype) == (1, 1): + # reference case, done above + continue + + if nxpe * nype > MAXCORES: + continue + + mthread = MAXCORES // (nxpe * nype) + failures_ = test_case(nxpe, nype, mthread, ref) + failures.extend(failures_) success = len(failures) == 0 diff --git a/tests/integrated/test-interpolate/runtest b/tests/integrated/test-interpolate/runtest index f5460aff2a..91503f5f9e 100755 --- a/tests/integrated/test-interpolate/runtest +++ b/tests/integrated/test-interpolate/runtest @@ -23,6 +23,8 @@ labels = [r"$" + var + r"$" for var in varlist] methods = { "hermitespline": 3, + "hermitesplinelegacy": 3, + "hermitesplineserial": 3, "lagrange4pt": 3, "bilinear": 2, } diff --git a/tests/unit/fake_mesh.hxx b/tests/unit/fake_mesh.hxx index 4656ee0282..cb7509ebde 100644 --- a/tests/unit/fake_mesh.hxx +++ b/tests/unit/fake_mesh.hxx @@ -113,9 +113,13 @@ public: int getNXPE() const override { return 1; } int getNYPE() const override { return 1; } int getNZPE() const override { return 1; } - int getXProcIndex() const override { return 1; } - int getYProcIndex() const override { return 1; } - int getZProcIndex() const override { return 1; } + int getXProcIndex() const override { return 0; } + int getYProcIndex() const override { return 0; } + int getZProcIndex() const override { return 0; } + int getProcIndex([[maybe_unused]] int X, [[maybe_unused]] int Y, + [[maybe_unused]] int Z) const override { + return 0; + } bool firstX() const override { return true; } bool lastX() const override { return true; } int sendXOut(BoutReal* UNUSED(buffer), int UNUSED(size), int UNUSED(tag)) override {