diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index abd11c7762..7fbaa021ce 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -11,3 +11,5 @@ a71cad2dd6ace5741a754e2ca7daacd4bb094e0e 2c2402ed59c91164eaff46dee0f79386b7347e9e 05b7c571544c3bcb153fce67d12b9ac48947fc2d c8f38049359170a34c915e209276238ea66b9a1e +65880e4af16cb95438437bf057c4b9fdb427b142 +d1cfb8abd6aa5c76e6c1a4d7ab20929c65f8afc2 diff --git a/CMakeLists.txt b/CMakeLists.txt index e8c4657fb0..11ff1df22e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -296,11 +296,12 @@ set(BOUT_SOURCES ./src/mesh/interpolation/hermite_spline_z.cxx ./src/mesh/interpolation/interpolation_z.cxx ./src/mesh/interpolation/lagrange_4pt_xz.cxx - ./src/mesh/interpolation/monotonic_hermite_spline_xz.cxx ./src/mesh/invert3x3.hxx ./src/mesh/mesh.cxx ./src/mesh/parallel/fci.cxx ./src/mesh/parallel/fci.hxx + ./src/mesh/parallel/fci_comm.cxx + ./src/mesh/parallel/fci_comm.hxx ./src/mesh/parallel/identity.cxx ./src/mesh/parallel/shiftedmetric.cxx ./src/mesh/parallel/shiftedmetricinterp.cxx diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 4dd24259fd..6e4f12b1b0 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -27,6 +27,7 @@ #include #include #include +#include #define USE_NEW_WEIGHTS 1 #if BOUT_HAS_PETSC @@ -38,6 +39,7 @@ #endif class Options; +class GlobalField3DAccess; /// Interpolate a field onto a perturbed set of points const Field3D interpolate(const Field3D& f, const Field3D& delta_x, @@ -135,7 +137,16 @@ public: } }; -class XZHermiteSpline : public XZInterpolation { +/// Monotonic Hermite spline interpolator +/// +/// Similar to XZHermiteSpline, so uses most of the same code. +/// Forces the interpolated result to be in the range of the +/// neighbouring cell values. This prevents unphysical overshoots, +/// but also degrades accuracy near maxima and minima. +/// Perhaps should only impose near boundaries, since that is where +/// problems most obviously occur. +template +class XZHermiteSplineBase : public XZInterpolation { protected: /// This is protected rather than private so that it can be /// extended and used by HermiteSplineMonotonic @@ -143,6 +154,9 @@ protected: Tensor> i_corner; // index of bottom-left grid point Tensor k_corner; // z-index of bottom-left grid point + std::unique_ptr gf3daccess; + Tensor> g3dinds; + // Basis functions for cubic Hermite spline interpolation // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline // The h00 and h01 basis functions are applied to the function itself @@ -167,15 +181,20 @@ protected: Vec rhs, result; #endif + /// Factors to allow for some wiggleroom + BoutReal abs_fac_monotonic{1e-2}; + BoutReal rel_fac_monotonic{1e-1}; + public: - XZHermiteSpline(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) - : XZHermiteSpline(0, mesh) {} - XZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr); - XZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { + XZHermiteSplineBase(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) + : XZHermiteSplineBase(0, mesh, options) {} + XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr, Options* options = nullptr); + XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr, + Options* options = nullptr) + : XZHermiteSplineBase(y_offset, mesh, options) { setRegion(regionFromMask(mask, localmesh)); } - ~XZHermiteSpline() { + ~XZHermiteSplineBase() override { #if HS_USE_PETSC if (isInit) { MatDestroy(&petscWeights); @@ -205,61 +224,8 @@ public: getWeightsForYApproximation(int i, int j, int k, int yoffset) override; }; -/// Monotonic Hermite spline interpolator -/// -/// Similar to XZHermiteSpline, so uses most of the same code. -/// Forces the interpolated result to be in the range of the -/// neighbouring cell values. This prevents unphysical overshoots, -/// but also degrades accuracy near maxima and minima. -/// Perhaps should only impose near boundaries, since that is where -/// problems most obviously occur. -/// -/// You can control how tight the clipping to the range of the neighbouring cell -/// values through ``rtol`` and ``atol``: -/// -/// diff = (max_of_neighours - min_of_neighours) * rtol + atol -/// -/// and the interpolated value is instead clipped to the range -/// ``[min_of_neighours - diff, max_of_neighours + diff]`` -class XZMonotonicHermiteSpline : public XZHermiteSpline { - /// Absolute tolerance for clipping - BoutReal atol = 0.0; - /// Relative tolerance for clipping - BoutReal rtol = 1.0; - -public: - XZMonotonicHermiteSpline(Mesh* mesh = nullptr, Options* options = nullptr) - : XZHermiteSpline(0, mesh), - atol{(*options)["atol"] - .doc("Absolute tolerance for clipping overshoot") - .withDefault(0.0)}, - rtol{(*options)["rtol"] - .doc("Relative tolerance for clipping overshoot") - .withDefault(1.0)} { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(mask, y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - - using XZHermiteSpline::interpolate; - /// Interpolate using precalculated weights. - /// This function is called by the other interpolate functions - /// in the base class XZHermiteSpline. - Field3D interpolate(const Field3D& f, - const std::string& region = "RGN_NOBNDRY") const override; -}; +using XZMonotonicHermiteSpline = XZHermiteSplineBase; +using XZHermiteSpline = XZHermiteSplineBase; /// XZLagrange4pt interpolation class /// diff --git a/include/bout/msg_stack.hxx b/include/bout/msg_stack.hxx index 1abb26d2c7..0fe22cd0e6 100644 --- a/include/bout/msg_stack.hxx +++ b/include/bout/msg_stack.hxx @@ -80,7 +80,7 @@ public: } void pop() {} - void pop(int [[maybe_unused]] id) {} + void pop([[maybe_unused]] int id) {} void clear() {} void dump() {} diff --git a/include/bout/region.hxx b/include/bout/region.hxx index bb1cf82bf1..17ded65987 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -139,7 +139,7 @@ class BoutMask; BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) -enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 }; +enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2, IND_GLOBAL_3D = 3 }; /// Indices base class for Fields -- Regions are dereferenced into these /// @@ -386,6 +386,7 @@ inline SpecificInd operator-(SpecificInd lhs, const SpecificInd& rhs) { using Ind3D = SpecificInd; using Ind2D = SpecificInd; using IndPerp = SpecificInd; +using IndG3D = SpecificInd; /// Get string representation of Ind3D inline std::string toString(const Ind3D& i) { diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 27a4f1d614..2a4f32070a 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -21,11 +21,16 @@ **************************************************************************/ #include "../impls/bout/boutmesh.hxx" +#include "../parallel/fci_comm.hxx" #include "bout/bout.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" +#include "bout/mask.hxx" +#include +#include +#include #include class IndConverter { @@ -35,7 +40,7 @@ class IndConverter { xstart(mesh->xstart), ystart(mesh->ystart), zstart(0), lnx(mesh->LocalNx - 2 * xstart), lny(mesh->LocalNy - 2 * ystart), lnz(mesh->LocalNz - 2 * zstart) {} - // ix and iy are global indices + // ix and iz are global indices // iy is local int fromMeshToGlobal(int ix, int iy, int iz) { const int xstart = mesh->xstart; @@ -102,11 +107,21 @@ class IndConverter { } }; -XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) +template +XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin, + Options* options) : XZInterpolation(y_offset, meshin), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { + if constexpr (monotonic) { + if (options == nullptr) { + options = &Options::root()["mesh:paralleltransform:xzinterpolation"]; + } + abs_fac_monotonic = (*options)["abs_tol"].withDefault(abs_fac_monotonic); + rel_fac_monotonic = (*options)["rel_tol"].withDefault(rel_fac_monotonic); + } + // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); @@ -139,6 +154,10 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) MatCreateAIJ(BoutComm::get(), m, m, M, M, 16, nullptr, 16, nullptr, &petscWeights); #endif #endif + if constexpr (monotonic) { + gf3daccess = std::make_unique(localmesh); + g3dinds.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); + } #ifndef HS_USE_PETSC if (localmesh->getNXPE() > 1) { throw BoutException("Require PETSc for MPI splitting in X"); @@ -146,8 +165,10 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) #endif } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const std::string& region) { +template +void XZHermiteSplineBase::calcWeights( + const Field3D& delta_x, const Field3D& delta_z, + [[maybe_unused]] const std::string& region) { const int ny = localmesh->LocalNy; const int nz = localmesh->LocalNz; @@ -156,6 +177,8 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z #ifdef HS_USE_PETSC IndConverter conv{localmesh}; #endif + [[maybe_unused]] const int y_global_offset = + localmesh->getYProcIndex() * (localmesh->yend - localmesh->ystart + 1); BOUT_FOR(i, getRegion(region)) { const int x = i.x(); const int y = i.y(); @@ -300,6 +323,18 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z } #endif #endif + if constexpr (monotonic) { + const auto gind = + gf3daccess->xyzg(i_corn, y + y_offset + y_global_offset, k_corner(x, y, z)); + gf3daccess->get(gind); + gf3daccess->get(gind.xp(1)); + gf3daccess->get(gind.zp(1)); + gf3daccess->get(gind.xp(1).zp(1)); + g3dinds[i] = {gind.ind, gind.xp(1).ind, gind.zp(1).ind, gind.xp(1).zp(1).ind}; + } + } + if constexpr (monotonic) { + gf3daccess->setup(); } #ifdef HS_USE_PETSC MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); @@ -311,8 +346,11 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z #endif } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { +template +void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, + const Field3D& delta_z, + const BoutMask& mask, + const std::string& region) { setMask(mask); calcWeights(delta_x, delta_z, region); } @@ -333,8 +371,14 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z * (i, j+1, k+1) h01_z + h10_z / 2 * (i, j+1, k+2) h11_z / 2 */ +template std::vector -XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { +XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, + int yoffset) { + if (localmesh->getNXPE() > 1) { + throw BoutException("It is likely that the function calling this is not handling the " + "result correctly."); + } const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); @@ -347,16 +391,21 @@ XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { {i, j + yoffset, k_mod_p2, 0.5 * h11_z(i, j, k)}}; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region) const { +template +Field3D XZHermiteSplineBase::interpolate( + const Field3D& f, [[maybe_unused]] const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; - const auto region2 = - y_offset == 0 ? "RGN_NOY" : fmt::format("RGN_YPAR_{:+d}", y_offset); + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; -#if USE_NEW_WEIGHTS -#ifdef HS_USE_PETSC + std::unique_ptr gf; + if constexpr (monotonic) { + gf = gf3daccess->communicate_asPtr(f); + } + +#if USE_NEW_WEIGHTS and defined(HS_USE_PETSC) BoutReal* ptr; const BoutReal* cptr; VecGetArray(rhs, &ptr); @@ -366,10 +415,10 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region VecGetArrayRead(result, &cptr); BOUT_FOR(i, f.getRegion(region2)) { f_interp[i] = cptr[int(i)]; - ASSERT2(std::isfinite(cptr[int(i)])); - } - VecRestoreArrayRead(result, &cptr); -#else + if constexpr (monotonic) { + const auto iyp = i; + const auto i = iyp.ym(y_offset); +#elif USE_NEW_WEIGHTS // No Petsc BOUT_FOR(i, getRegion(region)) { auto ic = i_corner[i]; auto iyp = i.yp(y_offset); @@ -381,15 +430,16 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region f_interp[iyp] += newWeights[w * 4 + 2][i] * f[ic.zp().xp(w - 1)]; f_interp[iyp] += newWeights[w * 4 + 3][i] * f[ic.zp(2).xp(w - 1)]; } - } -#endif -#else + if constexpr (monotonic) { +#else // Legacy interpolation + // TODO(peter): Should we apply dirichlet BCs to derivatives? // Derivatives are used for tension and need to be on dimensionless // coordinates // f has been communcated, and thus we can assume that the x-boundaries are // also valid in the y-boundary. Thus the differentiated field needs no // extra comms. + // TODO(dave) Add assert that we do not use z-splitting or z-guards. Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); @@ -422,6 +472,24 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region f_interp[iyp] = +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; + if constexpr (monotonic) { +#endif + const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], + (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; + const auto minmax = std::minmax(corners); + const auto diff = + ((minmax.second - minmax.first) * rel_fac_monotonic) + abs_fac_monotonic; + f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); + f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); + } +#if USE_NEW_WEIGHTS and defined(HS_USE_PETSC) + ASSERT2(std::isfinite(cptr[int(i)])); + } + VecRestoreArrayRead(result, &cptr); +#elif USE_NEW_WEIGHTS + ASSERT2(std::isfinite(f_interp[iyp])); + } +#else ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart || i.x() > localmesh->xend); } @@ -431,15 +499,24 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region return f_interp; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { +template +Field3D XZHermiteSplineBase::interpolate(const Field3D& f, + const Field3D& delta_x, + const Field3D& delta_z, + const std::string& region) { calcWeights(delta_x, delta_z, region); return interpolate(f, region); } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { +template +Field3D +XZHermiteSplineBase::interpolate(const Field3D& f, const Field3D& delta_x, + const Field3D& delta_z, const BoutMask& mask, + const std::string& region) { calcWeights(delta_x, delta_z, mask, region); return interpolate(f, region); } + +// ensure they are instantiated +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index e16b2699d1..c82c9fa36e 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -20,6 +20,7 @@ * **************************************************************************/ +#include "fmt/format.h" #include "bout/globals.hxx" #include "bout/interpolation_xz.hxx" #include "bout/mesh.hxx" @@ -133,7 +134,10 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) // Then in X f_interp(x, y_next, z) = lagrange_4pt(xvals, t_x(x, y, z)); + ASSERT2(std::isfinite(f_interp(x, y_next, z))); } + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; + f_interp.setRegion(region2); return f_interp; } diff --git a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx deleted file mode 100644 index f206ed1e0f..0000000000 --- a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************************************** - * Copyright 2018 B.D.Dudson, P. Hill - * - * Contact: Ben Dudson, bd512@york.ac.uk - * - * This file is part of BOUT++. - * - * BOUT++ is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * BOUT++ is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with BOUT++. If not, see . - * - **************************************************************************/ - -#include "bout/globals.hxx" -#include "bout/index_derivs_interface.hxx" -#include "bout/interpolation_xz.hxx" -#include "bout/mesh.hxx" - -#include - -Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, - const std::string& region) const { - ASSERT1(f.getMesh() == localmesh); - Field3D f_interp(f.getMesh()); - f_interp.allocate(); - - // Derivatives are used for tension and need to be on dimensionless - // coordinates - Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT"); - Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", "RGN_ALL"); - localmesh->communicate_no_slices(fx, fz); - Field3D fxz = bout::derivatives::index::DDX(fz, CELL_DEFAULT, "DEFAULT"); - localmesh->communicate_no_slices(fxz); - - const auto curregion{getRegion(region)}; - BOUT_FOR(i, curregion) { - const auto iyp = i.yp(y_offset); - - const auto ic = i_corner[i]; - const auto iczp = ic.zp(); - const auto icxp = ic.xp(); - const auto icxpzp = iczp.xp(); - - // Interpolate f in X at Z - const BoutReal f_z = - f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - - // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] - + fx[icxpzp] * h11_x[i]; - - // Interpolate fz in X at Z - const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] - + fxz[icxp] * h11_x[i]; - - // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] - + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - - // Interpolate in Z - BoutReal result = - +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; - - ASSERT2(std::isfinite(result) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - // Monotonicity - // Force the interpolated result to be in the range of the - // neighbouring cell values. This prevents unphysical overshoots, - // but also degrades accuracy near maxima and minima. - // Perhaps should only impose near boundaries, since that is where - // problems most obviously occur. - const BoutReal localmax = BOUTMAX(f[ic], f[icxp], f[iczp], f[icxpzp]); - const BoutReal localmin = BOUTMIN(f[ic], f[icxp], f[iczp], f[icxpzp]); - - ASSERT2(std::isfinite(localmax) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - ASSERT2(std::isfinite(localmin) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - const auto diff = ((localmax - localmin) * rtol) + atol; - - result = std::min(result, localmax + diff); - result = std::max(result, localmin - diff); - - f_interp[iyp] = result; - } - return f_interp; -} diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index a58554dc82..f80a361bd1 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -23,6 +23,7 @@ * **************************************************************************/ +#include "parallel/fci_comm.hxx" #include #include #include diff --git a/src/mesh/parallel/fci_comm.cxx b/src/mesh/parallel/fci_comm.cxx new file mode 100644 index 0000000000..65a8731740 --- /dev/null +++ b/src/mesh/parallel/fci_comm.cxx @@ -0,0 +1,37 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#include "fci_comm.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/region.hxx" + +#include + +const BoutReal& GlobalField3DAccessInstance::operator[](IndG3D ind) const { + auto it = gfa->mapping.find(ind.ind); + ASSERT2(it != gfa->mapping.end()); + return data[it->second]; +} diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx new file mode 100644 index 0000000000..8c34885981 --- /dev/null +++ b/src/mesh/parallel/fci_comm.hxx @@ -0,0 +1,310 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#pragma once + +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/boutcomm.hxx" +#include "bout/field3d.hxx" +#include "bout/mesh.hxx" +#include "bout/region.hxx" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +class GlobalField3DAccess; + +namespace fci_comm { +struct ProcLocal { + int proc; + int ind; +}; +struct globalToLocal1D { + int mg; + int npe; + int localwith; + int local; + int global; + int globalwith; + bool periodic; + globalToLocal1D(int mg, int npe, int localwith, bool periodic) + : mg(mg), npe(npe), localwith(localwith), local(localwith - (2 * mg)), + global(local * npe), globalwith(global + (2 * mg)), periodic(periodic){}; + ProcLocal convert(int id) const { + if (periodic) { + while (id < mg) { + id += global; + } + while (id >= global + mg) { + id -= global; + } + } + const int idwo = id - mg; + int proc = idwo / local; + if (not periodic) { + if (proc >= npe) { + proc = npe - 1; + } + } + const int loc = id - (local * proc); +#if CHECK > 1 + if ((loc < 0 or loc > localwith or proc < 0 or proc >= npe) + or (periodic and (loc < mg or loc >= local + mg))) { + printf("globalToLocal1D failure: %d %d, %d %d, %d %d %s\n", id, idwo, globalwith, + npe, proc, loc, periodic ? "periodic" : "non-periodic"); + ASSERT0(false); + } +#endif + return {proc, loc}; + } +}; +template +struct XYZ2Ind { + const int nx; + const int ny; + const int nz; + ind convert(const int x, const int y, const int z) const { + return {z + ((y + x * ny) * nz), ny, nz}; + } + ind operator()(const int x, const int y, const int z) const { return convert(x, y, z); } + XYZ2Ind(const int nx, const int ny, const int nz) : nx(nx), ny(ny), nz(nz) {} +}; +} // namespace fci_comm + +class GlobalField3DAccessInstance { +public: + const BoutReal& operator[](IndG3D ind) const; + + GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, + std::vector&& data) + : gfa(gfa), data(std::move(data)){}; + +private: + const GlobalField3DAccess* gfa; + std::vector data; +}; + +class GlobalField3DAccess { +public: + friend class GlobalField3DAccessInstance; + GlobalField3DAccess(Mesh* mesh) + : mesh(mesh), g2lx(mesh->xstart, mesh->getNXPE(), mesh->LocalNx, false), + g2ly(mesh->ystart, mesh->getNYPE(), mesh->LocalNy, true), + g2lz(mesh->zstart, 1, mesh->LocalNz, true), + xyzl(g2lx.localwith, g2ly.localwith, g2lz.localwith), + xyzg(g2lx.globalwith, g2ly.globalwith, g2lz.globalwith), comm(BoutComm::get()) { +#ifdef _OPENMP + o_ids.resize(omp_get_max_threads()); +#endif + }; + void get(IndG3D ind) { + ASSERT2(is_setup == false); +#ifdef _OPENMP + ASSERT2(o_ids.size() > static_cast(omp_get_thread_num())); + o_ids[omp_get_thread_num()].emplace(ind.ind); +#else + ids.emplace(ind.ind); +#endif + } + + void operator[](IndG3D ind) { get(ind); } + void setup() { + ASSERT2(is_setup == false); +#ifdef _OPENMP + for (auto& o_id : o_ids) { + ids.merge(o_id); + } + o_ids.clear(); +#endif + toGet.resize(static_cast(g2lx.npe * g2ly.npe * g2lz.npe)); + for (const auto id : ids) { + const IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const auto pix = g2lx.convert(gind.x()); + const auto piy = g2ly.convert(gind.y()); + const auto piz = g2lz.convert(gind.z()); + ASSERT3(piz.proc == 0); + toGet[(piy.proc * g2lx.npe) + pix.proc].push_back( + xyzl.convert(pix.ind, piy.ind, piz.ind).ind); + } + for (auto& v : toGet) { + std::sort(v.begin(), v.end()); + } + commCommLists(); + { + int offset = 0; + for (const auto& get : toGet) { + getOffsets.push_back(offset); + offset += get.size(); + } + getOffsets.push_back(offset); + } + for (const auto id : ids) { + const IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const auto pix = g2lx.convert(gind.x()); + const auto piy = g2ly.convert(gind.y()); + const auto piz = g2lz.convert(gind.z()); + ASSERT3(piz.proc == 0); + const auto proc = (piy.proc * g2lx.npe) + pix.proc; + const auto& vec = toGet[proc]; + const auto tofind = xyzl.convert(pix.ind, piy.ind, piz.ind).ind; + auto it = std::lower_bound(vec.begin(), vec.end(), tofind); + ASSERT3(it != vec.end()); + ASSERT3(*it == tofind); + mapping[id] = std::distance(vec.begin(), it) + getOffsets[proc]; + } + is_setup = true; + } + GlobalField3DAccessInstance communicate(const Field3D& f) { + return {this, communicate_data(f)}; + } + std::unique_ptr communicate_asPtr(const Field3D& f) { + return std::make_unique(this, communicate_data(f)); + } + +private: + void commCommLists() { + toSend.resize(toGet.size()); + std::vector toGetSizes(toGet.size(), -1); + std::vector toSendSizes(toSend.size(), -1); +#if CHECK > 3 + { + int thisproc; + MPI_Comm_rank(comm, &thisproc); + ASSERT0(thisproc == mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex()); + } +#endif + std::vector reqs(toSend.size()); + for (size_t proc = 0; proc < toGet.size(); ++proc) { + auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, 666, + comm, &reqs[proc]); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + toGetSizes[proc] = toGet[proc].size(); + auto ret = + MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + std::vector reqs2(toSend.size()); + int cnt = 0; + for ([[maybe_unused]] auto dummy : reqs) { + int ind{0}; + auto ret = MPI_Waitany(reqs.size(), reqs.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + ASSERT2(static_cast(ind) < toSend.size()); + ASSERT3(toSendSizes[ind] >= 0); + if (toSendSizes[ind] == 0) { + continue; + } + sendBufferSize += toSendSizes[ind]; + toSend[ind].resize(toSendSizes[ind], -1); + + ret = MPI_Irecv(static_cast(toSend[ind].data()), toSend[ind].size(), MPI_INT, + ind, 666 * 666, comm, reqs2.data() + cnt++); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (!toGet[proc].empty()) { + const auto ret = MPI_Send(static_cast(toGet[proc].data()), + toGet[proc].size(), MPI_INT, proc, 666 * 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + } + for (int c = 0; c < cnt; c++) { + int ind{0}; + const auto ret = MPI_Waitany(cnt, reqs2.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + } + } + Mesh* mesh; +#ifdef _OPENMP + std::vector> o_ids; +#endif + std::set ids; + std::map mapping; + bool is_setup{false}; + const fci_comm::globalToLocal1D g2lx; + const fci_comm::globalToLocal1D g2ly; + const fci_comm::globalToLocal1D g2lz; + +public: + const fci_comm::XYZ2Ind xyzl; + const fci_comm::XYZ2Ind xyzg; + +private: + std::vector> toGet; + std::vector> toSend; + std::vector getOffsets; + int sendBufferSize{0}; + MPI_Comm comm; + std::vector communicate_data(const Field3D& f) { + ASSERT2(is_setup); + ASSERT2(f.getMesh() == mesh); + std::vector data(getOffsets.back()); + std::vector sendBuffer(sendBufferSize); + std::vector reqs(toSend.size()); + int cnt1 = 0; + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (toGet[proc].empty()) { + continue; + } + auto ret = + MPI_Irecv(static_cast(data.data() + getOffsets[proc]), + toGet[proc].size(), MPI_DOUBLE, proc, 666, comm, reqs.data() + cnt1); + ASSERT0(ret == MPI_SUCCESS); + cnt1++; + } + int cnt = 0; + for (size_t proc = 0; proc < toGet.size(); ++proc) { + if (toSend[proc].empty()) { + continue; + } + const void* start = static_cast(sendBuffer.data() + cnt); + for (auto i : toSend[proc]) { + sendBuffer[cnt++] = f[Ind3D(i)]; + } + auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } + for (int j = 0; j < cnt1; ++j) { + int ind{0}; + auto ret = MPI_Waitany(cnt1, reqs.data(), &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + ASSERT3(ind >= 0); + ASSERT3(ind < cnt1); + } + return data; + } +}; diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 34340e53f4..c76ad27833 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -17,7 +17,6 @@ import zoidberg as zb from boutdata.collect import collect from boututils.run_wrapper import build_and_log, launch_safe from numpy import arange, array, linspace, log, polyfit -from scipy.interpolate import RectBivariateSpline as RBS # Global parameters DIRECTORY = "data" @@ -32,15 +31,16 @@ NLIST = [8, 16, 32, 64] dx = 1.0 / array(NLIST) -def myRBS(a, b, c): - """RectBivariateSpline, but automatically tune spline degree for small arrays""" - mx, _ = c.shape - kx = max(mx - 1, 1) - kx = min(kx, 3) - return RBS(a, b, c, kx=kx) +success = True +error_2 = {} +error_inf = {} +method_orders = {} -zb.poloidal_grid.RectBivariateSpline = myRBS +# Run with periodic Y? +yperiodic = True + +build_and_log("FCI MMS test") def quiet_collect(name: str) -> float: