From 5502ce360b975ed9d47a3e49dae88b49bdc033ef Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 27 Sep 2024 11:05:58 +0200 Subject: [PATCH 01/42] Allow XZHermiteSpline also without y-offset --- src/mesh/interpolation/hermite_spline_xz.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 27a4f1d614..40dad9b7bd 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -352,8 +352,7 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; - const auto region2 = - y_offset == 0 ? "RGN_NOY" : fmt::format("RGN_YPAR_{:+d}", y_offset); + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; #if USE_NEW_WEIGHTS #ifdef HS_USE_PETSC From c74d33065293652ded5b64f130cb3c1e0891c4e3 Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 15 Jan 2025 13:20:10 +0100 Subject: [PATCH 02/42] Add communication routine for FCI operation For FCI we need to be able to access "random" data from the adjacent slices. If they are split in x-direction, this requires some tricky communication pattern. It can be used like this: ``` // Create object GlobalField3DAccess fci_comm(thismesh); // let it know what data points will be required: // where IndG3D is an index in the global field, which would be the // normal Ind3D if there would be only one proc. fci_comm.get(IndG3D(i, ny, nz)); // If all index have been added, the communication pattern will be // established. This has to be called by all processors in parallel fci_comm.setup() // Once the data for a given field is needed, it needs to be // communicated: GlobalField3DAccessInstance global_data = fci_comm.communicate(f3d); // and can be accessed like this BoutReal data = global_data[IndG3D(i, ny, nz)]; // ny and nz in the IndG3D are always optional. ``` --- CMakeLists.txt | 2 + include/bout/region.hxx | 3 +- src/mesh/parallel/fci_comm.cxx | 34 +++++ src/mesh/parallel/fci_comm.hxx | 241 +++++++++++++++++++++++++++++++++ 4 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 src/mesh/parallel/fci_comm.cxx create mode 100644 src/mesh/parallel/fci_comm.hxx diff --git a/CMakeLists.txt b/CMakeLists.txt index e8c4657fb0..615a31d722 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -301,6 +301,8 @@ set(BOUT_SOURCES ./src/mesh/mesh.cxx ./src/mesh/parallel/fci.cxx ./src/mesh/parallel/fci.hxx + ./src/mesh/parallel/fci_comm.cxx + ./src/mesh/parallel/fci_comm.hxx ./src/mesh/parallel/identity.cxx ./src/mesh/parallel/shiftedmetric.cxx ./src/mesh/parallel/shiftedmetricinterp.cxx diff --git a/include/bout/region.hxx b/include/bout/region.hxx index bb1cf82bf1..f441b3edd7 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -139,7 +139,7 @@ class BoutMask; BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) -enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 }; +enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2, IND_GLOBAL_3D }; /// Indices base class for Fields -- Regions are dereferenced into these /// @@ -386,6 +386,7 @@ inline SpecificInd operator-(SpecificInd lhs, const SpecificInd& rhs) { using Ind3D = SpecificInd; using Ind2D = SpecificInd; using IndPerp = SpecificInd; +using IndG3D = SpecificInd; /// Get string representation of Ind3D inline std::string toString(const Ind3D& i) { diff --git a/src/mesh/parallel/fci_comm.cxx b/src/mesh/parallel/fci_comm.cxx new file mode 100644 index 0000000000..c0d51d1eb9 --- /dev/null +++ b/src/mesh/parallel/fci_comm.cxx @@ -0,0 +1,34 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#include "fci_comm.hxx" + +#include + +const BoutReal& GlobalField3DAccessInstance::operator[](IndG3D ind) const { + auto it = gfa.mapping.find(ind.ind); + ASSERT2(it != gfa.mapping.end()); + return data[it->second]; +} diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx new file mode 100644 index 0000000000..aa3b5ecfb5 --- /dev/null +++ b/src/mesh/parallel/fci_comm.hxx @@ -0,0 +1,241 @@ +/************************************************************************** + * Communication for Flux-coordinate Independent interpolation + * + ************************************************************************** + * Copyright 2025 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#pragma once + +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/boutcomm.hxx" +#include "bout/field3d.hxx" +#include "bout/mesh.hxx" +#include "bout/region.hxx" +#include +#include +#include +#include +#include +#include +#include +class GlobalField3DAccess; + +namespace fci_comm { +struct ProcLocal { + int proc; + int ind; +}; +struct globalToLocal1D { + const int mg; + const int npe; + const int localwith; + const int local; + const int global; + const int globalwith; + globalToLocal1D(int mg, int npe, int localwith) + : mg(mg), npe(npe), localwith(localwith), local(localwith - 2 * mg), + global(local * npe), globalwith(global + 2 * mg) {}; + ProcLocal convert(int id) const { + int idwo = id - mg; + int proc = idwo / local; + if (proc >= npe) { + proc = npe - 1; + } + ASSERT2(proc >= 0); + int loc = id - local * proc; + ASSERT2(0 <= loc); + ASSERT2(loc < (local + 2 * mg)); + return {proc, loc}; + } +}; +template +struct XYZ2Ind { + const int nx; + const int ny; + const int nz; + ind convert(const int x, const int y, const int z) const { + return {z + (y + x * ny) * nz, ny, nz}; + } + ind operator()(const int x, const int y, const int z) const { return convert(x, y, z); } + XYZ2Ind(const int nx, const int ny, const int nz) : nx(nx), ny(ny), nz(nz) {} +}; +} // namespace fci_comm + +class GlobalField3DAccessInstance { +public: + const BoutReal& operator[](IndG3D ind) const; + + GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, + const std::vector&& data) + : gfa(*gfa), data(std::move(data)) {}; + +private: + const GlobalField3DAccess& gfa; + const std::vector data; +}; + +class GlobalField3DAccess { +public: + friend class GlobalField3DAccessInstance; + GlobalField3DAccess(Mesh* mesh) + : mesh(mesh), g2lx(mesh->xstart, mesh->getNXPE(), mesh->LocalNx), + g2ly(mesh->ystart, mesh->getNYPE(), mesh->LocalNy), + g2lz(mesh->zstart, 1, mesh->LocalNz), + xyzl(g2lx.localwith, g2ly.localwith, g2lz.localwith), + xyzg(g2lx.globalwith, g2ly.globalwith, g2lz.globalwith), comm(BoutComm::get()) {}; + void get(IndG3D ind) { ids.emplace(ind.ind); } + void operator[](IndG3D ind) { return get(ind); } + void setup() { + ASSERT2(is_setup == false); + toGet.resize(g2lx.npe * g2ly.npe * g2lz.npe); + for (const auto id : ids) { + IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const auto pix = g2lx.convert(gind.x()); + const auto piy = g2ly.convert(gind.y()); + const auto piz = g2lz.convert(gind.z()); + ASSERT3(piz.proc == 0); + toGet[piy.proc * g2lx.npe + pix.proc].push_back( + xyzl.convert(pix.ind, piy.ind, piz.ind).ind); + } + for (auto v : toGet) { + std::sort(v.begin(), v.end()); + } + commCommLists(); + { + int offset = 0; + for (auto get : toGet) { + offsets.push_back(offset); + offset += get.size(); + } + offsets.push_back(offset); + } + std::map mapping; + for (const auto id : ids) { + IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const auto pix = g2lx.convert(gind.x()); + const auto piy = g2ly.convert(gind.y()); + const auto piz = g2lz.convert(gind.z()); + ASSERT3(piz.proc == 0); + const auto proc = piy.proc * g2lx.npe + pix.proc; + const auto& vec = toGet[proc]; + auto it = + std::find(vec.begin(), vec.end(), xyzl.convert(pix.ind, piy.ind, piz.ind).ind); + ASSERT3(it != vec.end()); + mapping[id] = it - vec.begin() + offsets[proc]; + } + is_setup = true; + } + GlobalField3DAccessInstance communicate(const Field3D& f) { + return {this, communicate_data(f)}; + } + std::unique_ptr communicate_asPtr(const Field3D& f) { + return std::make_unique(this, communicate_data(f)); + } + +private: + void commCommLists() { + toSend.resize(toGet.size()); + std::vector toGetSizes(toGet.size()); + std::vector toSendSizes(toSend.size()); + //const int thisproc = mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex(); + std::vector reqs(toSend.size()); + for (size_t proc = 0; proc < toGet.size(); ++proc) { + auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, + 666 + proc, comm, &reqs[proc]); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + toGetSizes[proc] = toGet[proc].size(); + sendBufferSize += toGetSizes[proc]; + auto ret = MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, + 666 + proc, comm); + ASSERT0(ret == MPI_SUCCESS); + } + for ([[maybe_unused]] auto dummy : reqs) { + int ind{0}; + auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + toSend[ind].resize(toSendSizes[ind]); + ret = MPI_Irecv(static_cast(&toSend[ind]), toSend[ind].size(), MPI_INT, ind, + 666 * 666 + ind, comm, &reqs[ind]); + ASSERT0(ret == MPI_SUCCESS); + } + for (size_t proc = 0; proc < toGet.size(); ++proc) { + const auto ret = MPI_Send(static_cast(&toGet[proc]), toGet[proc].size(), + MPI_INT, proc, 666 * 666 + proc, comm); + ASSERT0(ret == MPI_SUCCESS); + } + for ([[maybe_unused]] auto dummy : reqs) { + int ind{0}; + const auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + } + } + Mesh* mesh; + std::set ids; + std::map mapping; + bool is_setup{false}; + const fci_comm::globalToLocal1D g2lx; + const fci_comm::globalToLocal1D g2ly; + const fci_comm::globalToLocal1D g2lz; + +public: + const fci_comm::XYZ2Ind xyzl; + const fci_comm::XYZ2Ind xyzg; + +private: + std::vector> toGet; + std::vector> toSend; + std::vector offsets; + int sendBufferSize{0}; + MPI_Comm comm; + std::vector communicate_data(const Field3D& f) { + ASSERT2(f.getMesh() == mesh); + std::vector data(offsets.back()); + std::vector sendBuffer(sendBufferSize); + std::vector reqs(toSend.size()); + for (size_t proc = 0; proc < toGet.size(); ++proc) { + auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), + MPI_DOUBLE, proc, 666 + proc, comm, &reqs[proc]); + ASSERT0(ret == MPI_SUCCESS); + } + int cnt = 0; + for (size_t proc = 0; proc < toGet.size(); ++proc) { + void* start = static_cast(&sendBuffer[cnt]); + for (auto i : toSend[proc]) { + sendBuffer[cnt++] = f[Ind3D(i)]; + } + auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666 + proc, comm); + ASSERT0(ret == MPI_SUCCESS); + } + for ([[maybe_unused]] auto dummy : reqs) { + int ind{0}; + auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + ASSERT0(ret == MPI_SUCCESS); + ASSERT3(ind != MPI_UNDEFINED); + } + return data; + } +}; From afc68d971bb6c0c6c93fa66873d0a2ff397a211e Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 15 Jan 2025 13:26:37 +0100 Subject: [PATCH 03/42] Unify XZMonotonicHermiteSpline and XZMonotonicHermiteSpline If they are two instances of the same template, this allows to have an if in the inner loop that can be optimised out. --- CMakeLists.txt | 1 - include/bout/interpolation_xz.hxx | 84 +++++----------- src/mesh/interpolation/hermite_spline_xz.cxx | 74 +++++++++++--- .../monotonic_hermite_spline_xz.cxx | 98 ------------------- src/mesh/interpolation_xz.cxx | 1 + 5 files changed, 85 insertions(+), 173 deletions(-) delete mode 100644 src/mesh/interpolation/monotonic_hermite_spline_xz.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 615a31d722..11ff1df22e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -296,7 +296,6 @@ set(BOUT_SOURCES ./src/mesh/interpolation/hermite_spline_z.cxx ./src/mesh/interpolation/interpolation_z.cxx ./src/mesh/interpolation/lagrange_4pt_xz.cxx - ./src/mesh/interpolation/monotonic_hermite_spline_xz.cxx ./src/mesh/invert3x3.hxx ./src/mesh/mesh.cxx ./src/mesh/parallel/fci.cxx diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 4dd24259fd..2832899e8a 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -38,6 +38,7 @@ #endif class Options; +class GlobalField3DAccess; /// Interpolate a field onto a perturbed set of points const Field3D interpolate(const Field3D& f, const Field3D& delta_x, @@ -135,7 +136,16 @@ public: } }; -class XZHermiteSpline : public XZInterpolation { +/// Monotonic Hermite spline interpolator +/// +/// Similar to XZHermiteSpline, so uses most of the same code. +/// Forces the interpolated result to be in the range of the +/// neighbouring cell values. This prevents unphysical overshoots, +/// but also degrades accuracy near maxima and minima. +/// Perhaps should only impose near boundaries, since that is where +/// problems most obviously occur. +template +class XZHermiteSplineBase : public XZInterpolation { protected: /// This is protected rather than private so that it can be /// extended and used by HermiteSplineMonotonic @@ -143,6 +153,9 @@ protected: Tensor> i_corner; // index of bottom-left grid point Tensor k_corner; // z-index of bottom-left grid point + std::unique_ptr gf3daccess; + Tensor> g3dinds; + // Basis functions for cubic Hermite spline interpolation // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline // The h00 and h01 basis functions are applied to the function itself @@ -168,14 +181,14 @@ protected: #endif public: - XZHermiteSpline(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) - : XZHermiteSpline(0, mesh) {} - XZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr); - XZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { + XZHermiteSplineBase(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) + : XZHermiteSplineBase(0, mesh) {} + XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr); + XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) + : XZHermiteSplineBase(y_offset, mesh) { setRegion(regionFromMask(mask, localmesh)); } - ~XZHermiteSpline() { + ~XZHermiteSplineBase() { #if HS_USE_PETSC if (isInit) { MatDestroy(&petscWeights); @@ -205,61 +218,8 @@ public: getWeightsForYApproximation(int i, int j, int k, int yoffset) override; }; -/// Monotonic Hermite spline interpolator -/// -/// Similar to XZHermiteSpline, so uses most of the same code. -/// Forces the interpolated result to be in the range of the -/// neighbouring cell values. This prevents unphysical overshoots, -/// but also degrades accuracy near maxima and minima. -/// Perhaps should only impose near boundaries, since that is where -/// problems most obviously occur. -/// -/// You can control how tight the clipping to the range of the neighbouring cell -/// values through ``rtol`` and ``atol``: -/// -/// diff = (max_of_neighours - min_of_neighours) * rtol + atol -/// -/// and the interpolated value is instead clipped to the range -/// ``[min_of_neighours - diff, max_of_neighours + diff]`` -class XZMonotonicHermiteSpline : public XZHermiteSpline { - /// Absolute tolerance for clipping - BoutReal atol = 0.0; - /// Relative tolerance for clipping - BoutReal rtol = 1.0; - -public: - XZMonotonicHermiteSpline(Mesh* mesh = nullptr, Options* options = nullptr) - : XZHermiteSpline(0, mesh), - atol{(*options)["atol"] - .doc("Absolute tolerance for clipping overshoot") - .withDefault(0.0)}, - rtol{(*options)["rtol"] - .doc("Relative tolerance for clipping overshoot") - .withDefault(1.0)} { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - XZMonotonicHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSpline(mask, y_offset, mesh) { - if (localmesh->getNXPE() > 1) { - throw BoutException("Do not support MPI splitting in X"); - } - } - - using XZHermiteSpline::interpolate; - /// Interpolate using precalculated weights. - /// This function is called by the other interpolate functions - /// in the base class XZHermiteSpline. - Field3D interpolate(const Field3D& f, - const std::string& region = "RGN_NOBNDRY") const override; -}; +using XZMonotonicHermiteSpline = XZHermiteSplineBase; +using XZHermiteSpline = XZHermiteSplineBase; /// XZLagrange4pt interpolation class /// diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 40dad9b7bd..59c2f4724d 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -22,6 +22,7 @@ #include "../impls/bout/boutmesh.hxx" #include "bout/bout.hxx" +#include "../parallel/fci_comm.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" @@ -102,7 +103,8 @@ class IndConverter { } }; -XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) +template +XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin) : XZInterpolation(y_offset, meshin), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { @@ -139,6 +141,10 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) MatCreateAIJ(BoutComm::get(), m, m, M, M, 16, nullptr, 16, nullptr, &petscWeights); #endif #endif + if constexpr (monotonic) { + gf3daccess = std::make_unique(localmesh); + g3dinds.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); + } #ifndef HS_USE_PETSC if (localmesh->getNXPE() > 1) { throw BoutException("Require PETSc for MPI splitting in X"); @@ -146,8 +152,10 @@ XZHermiteSpline::XZHermiteSpline(int y_offset, Mesh* meshin) #endif } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const std::string& region) { +template +void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, + const Field3D& delta_z, + const std::string& region) { const int ny = localmesh->LocalNy; const int nz = localmesh->LocalNz; @@ -300,6 +308,14 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z } #endif #endif + if constexpr (monotonic) { + const auto gind = gf3daccess->xyzg(i_corn, y + y_offset, k_corner(x, y, z)); + gf3daccess->get(gind); + gf3daccess->get(gind.xp(1)); + gf3daccess->get(gind.zp(1)); + gf3daccess->get(gind.xp(1).zp(1)); + g3dinds[i] = {gind.ind, gind.xp(1).ind, gind.zp(1).ind, gind.xp(1).zp(1).ind}; + } } #ifdef HS_USE_PETSC MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); @@ -311,8 +327,11 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z #endif } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { +template +void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, + const Field3D& delta_z, + const BoutMask& mask, + const std::string& region) { setMask(mask); calcWeights(delta_x, delta_z, region); } @@ -333,8 +352,10 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z * (i, j+1, k+1) h01_z + h10_z / 2 * (i, j+1, k+2) h11_z / 2 */ +template std::vector -XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { +XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, + int yoffset) { const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); @@ -347,7 +368,9 @@ XZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { {i, j + yoffset, k_mod_p2, 0.5 * h11_z(i, j, k)}}; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region) const { +template +Field3D XZHermiteSplineBase::interpolate(const Field3D& f, + const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; @@ -393,6 +416,11 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); + std::unique_ptr g3d; + if constexpr (monotonic) { + gf = gf3daccess.communicate_asPtr(f); + } + BOUT_FOR(i, getRegion(region)) { const auto iyp = i.yp(y_offset); @@ -421,6 +449,19 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region f_interp[iyp] = +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; + if constexpr (monotonic) { + const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], + gf[g3dinds[i][3]]}; + const auto minmax = std::minmax(corners); + if (f_interp[iyp] < minmax.first) { + f_interp[iyp] = minmax.first; + } else { + if (f_interp[iyp] > minmax.second) { + f_interp[iyp] = minmax.second; + } + } + } + ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart || i.x() > localmesh->xend); } @@ -430,15 +471,24 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region return f_interp; } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { +template +Field3D XZHermiteSplineBase::interpolate(const Field3D& f, + const Field3D& delta_x, + const Field3D& delta_z, + const std::string& region) { calcWeights(delta_x, delta_z, region); return interpolate(f, region); } -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { +template +Field3D +XZHermiteSplineBase::interpolate(const Field3D& f, const Field3D& delta_x, + const Field3D& delta_z, const BoutMask& mask, + const std::string& region) { calcWeights(delta_x, delta_z, mask, region); return interpolate(f, region); } + +// ensure they are instantiated +template class XZHermiteSplineBase; +template class XZHermiteSplineBase; diff --git a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx deleted file mode 100644 index f206ed1e0f..0000000000 --- a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************************************** - * Copyright 2018 B.D.Dudson, P. Hill - * - * Contact: Ben Dudson, bd512@york.ac.uk - * - * This file is part of BOUT++. - * - * BOUT++ is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * BOUT++ is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with BOUT++. If not, see . - * - **************************************************************************/ - -#include "bout/globals.hxx" -#include "bout/index_derivs_interface.hxx" -#include "bout/interpolation_xz.hxx" -#include "bout/mesh.hxx" - -#include - -Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f, - const std::string& region) const { - ASSERT1(f.getMesh() == localmesh); - Field3D f_interp(f.getMesh()); - f_interp.allocate(); - - // Derivatives are used for tension and need to be on dimensionless - // coordinates - Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT"); - Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", "RGN_ALL"); - localmesh->communicate_no_slices(fx, fz); - Field3D fxz = bout::derivatives::index::DDX(fz, CELL_DEFAULT, "DEFAULT"); - localmesh->communicate_no_slices(fxz); - - const auto curregion{getRegion(region)}; - BOUT_FOR(i, curregion) { - const auto iyp = i.yp(y_offset); - - const auto ic = i_corner[i]; - const auto iczp = ic.zp(); - const auto icxp = ic.xp(); - const auto icxpzp = iczp.xp(); - - // Interpolate f in X at Z - const BoutReal f_z = - f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - - // Interpolate f in X at Z+1 - const BoutReal f_zp1 = f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] - + fx[icxpzp] * h11_x[i]; - - // Interpolate fz in X at Z - const BoutReal fz_z = fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i] - + fxz[icxp] * h11_x[i]; - - // Interpolate fz in X at Z+1 - const BoutReal fz_zp1 = fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] - + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - - // Interpolate in Z - BoutReal result = - +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; - - ASSERT2(std::isfinite(result) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - // Monotonicity - // Force the interpolated result to be in the range of the - // neighbouring cell values. This prevents unphysical overshoots, - // but also degrades accuracy near maxima and minima. - // Perhaps should only impose near boundaries, since that is where - // problems most obviously occur. - const BoutReal localmax = BOUTMAX(f[ic], f[icxp], f[iczp], f[icxpzp]); - const BoutReal localmin = BOUTMIN(f[ic], f[icxp], f[iczp], f[icxpzp]); - - ASSERT2(std::isfinite(localmax) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - ASSERT2(std::isfinite(localmin) || i.x() < localmesh->xstart - || i.x() > localmesh->xend); - - const auto diff = ((localmax - localmin) * rtol) + atol; - - result = std::min(result, localmax + diff); - result = std::max(result, localmin - diff); - - f_interp[iyp] = result; - } - return f_interp; -} diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index a58554dc82..f80a361bd1 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -23,6 +23,7 @@ * **************************************************************************/ +#include "parallel/fci_comm.hxx" #include #include #include From 84237c8a474ff6b8b4f2cf04dc76beb3aa4dac9d Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 15 Jan 2025 13:26:59 +0100 Subject: [PATCH 04/42] Use x-splitting for monotonichermitespline test --- tests/MMS/spatial/fci/runtest | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 34340e53f4..c76ad27833 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -17,7 +17,6 @@ import zoidberg as zb from boutdata.collect import collect from boututils.run_wrapper import build_and_log, launch_safe from numpy import arange, array, linspace, log, polyfit -from scipy.interpolate import RectBivariateSpline as RBS # Global parameters DIRECTORY = "data" @@ -32,15 +31,16 @@ NLIST = [8, 16, 32, 64] dx = 1.0 / array(NLIST) -def myRBS(a, b, c): - """RectBivariateSpline, but automatically tune spline degree for small arrays""" - mx, _ = c.shape - kx = max(mx - 1, 1) - kx = min(kx, 3) - return RBS(a, b, c, kx=kx) +success = True +error_2 = {} +error_inf = {} +method_orders = {} -zb.poloidal_grid.RectBivariateSpline = myRBS +# Run with periodic Y? +yperiodic = True + +build_and_log("FCI MMS test") def quiet_collect(name: str) -> float: From 06624497a52f1be434f34fb8b157bee68b8c96e1 Mon Sep 17 00:00:00 2001 From: David Bold Date: Thu, 16 Jan 2025 09:07:51 +0100 Subject: [PATCH 05/42] Set region for lagrange4pt --- src/mesh/interpolation/lagrange_4pt_xz.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index e16b2699d1..7fb0fb502b 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -133,7 +133,11 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) // Then in X f_interp(x, y_next, z) = lagrange_4pt(xvals, t_x(x, y, z)); + ASSERT2(std::isfinite(f_interp(x, y_next, z))); + } + const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; + f_interp.setRegion(region2); return f_interp; } From f77849eb9fcc544e89c2839c6e220867753a702f Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 10:43:58 +0100 Subject: [PATCH 06/42] Add monotonic check also to other code branches --- src/mesh/interpolation/hermite_spline_xz.cxx | 27 +++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 59c2f4724d..28455f77e8 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -36,7 +36,7 @@ class IndConverter { xstart(mesh->xstart), ystart(mesh->ystart), zstart(0), lnx(mesh->LocalNx - 2 * xstart), lny(mesh->LocalNy - 2 * ystart), lnz(mesh->LocalNz - 2 * zstart) {} - // ix and iy are global indices + // ix and iz are global indices // iy is local int fromMeshToGlobal(int ix, int iy, int iz) { const int xstart = mesh->xstart; @@ -388,6 +388,19 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, VecGetArrayRead(result, &cptr); BOUT_FOR(i, f.getRegion(region2)) { f_interp[i] = cptr[int(i)]; + if constexpr (monotonic) { + const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], + gf[g3dinds[i][3]]}; + const auto minmax = std::minmax(corners); + if (f_interp[iyp] < minmax.first) { + f_interp[iyp] = minmax.first; + } else { + if (f_interp[iyp] > minmax.second) { + f_interp[iyp] = minmax.second; + } + } + } + ASSERT2(std::isfinite(cptr[int(i)])); } VecRestoreArrayRead(result, &cptr); @@ -403,6 +416,18 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, f_interp[iyp] += newWeights[w * 4 + 2][i] * f[ic.zp().xp(w - 1)]; f_interp[iyp] += newWeights[w * 4 + 3][i] * f[ic.zp(2).xp(w - 1)]; } + if constexpr (monotonic) { + const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], + gf[g3dinds[i][3]]}; + const auto minmax = std::minmax(corners); + if (f_interp[iyp] < minmax.first) { + f_interp[iyp] = minmax.first; + } else { + if (f_interp[iyp] > minmax.second) { + f_interp[iyp] = minmax.second; + } + } + } } #endif #else From 820f0a5cf2e5dfc9134f4ac760edac4b63cbc398 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:31:14 +0100 Subject: [PATCH 07/42] use lower_bound instead of find lower_bound takes into account the data is sorted --- src/mesh/parallel/fci_comm.hxx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index aa3b5ecfb5..a847700548 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -138,10 +138,11 @@ public: ASSERT3(piz.proc == 0); const auto proc = piy.proc * g2lx.npe + pix.proc; const auto& vec = toGet[proc]; - auto it = - std::find(vec.begin(), vec.end(), xyzl.convert(pix.ind, piy.ind, piz.ind).ind); + const auto tofind = xyzl.convert(pix.ind, piy.ind, piz.ind).ind; + auto it = std::lower_bound(vec.begin(), vec.end(), tofind); ASSERT3(it != vec.end()); - mapping[id] = it - vec.begin() + offsets[proc]; + ASSERT3(*it == tofind); + mapping[id] = std::distance(vec.begin(), it) + offsets[proc]; } is_setup = true; } From 1178243fb42ba9afb595cf31c684504a83f09b63 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:31:27 +0100 Subject: [PATCH 08/42] Do not shadow mapping --- src/mesh/parallel/fci_comm.hxx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index a847700548..5250fb99d3 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -129,7 +129,6 @@ public: } offsets.push_back(offset); } - std::map mapping; for (const auto id : ids) { IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; const auto pix = g2lx.convert(gind.x()); From 3d6f587ee8fc84418f4810bc537146dec4b3c2bb Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:32:01 +0100 Subject: [PATCH 09/42] Ensure setup has been called --- src/mesh/parallel/fci_comm.hxx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 5250fb99d3..6c83d58f63 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -212,6 +212,7 @@ private: int sendBufferSize{0}; MPI_Comm comm; std::vector communicate_data(const Field3D& f) { + ASSERT2(is_setup); ASSERT2(f.getMesh() == mesh); std::vector data(offsets.back()); std::vector sendBuffer(sendBufferSize); From 265c8cd3b5abdf3dd7313a4c6ea4ed81048d1e11 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:32:12 +0100 Subject: [PATCH 10/42] Use pointer to data --- src/mesh/parallel/fci_comm.hxx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 6c83d58f63..d44eea76ef 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -176,13 +176,14 @@ private: auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); + ASSERT2(static_cast(ind) < toSend.size()); toSend[ind].resize(toSendSizes[ind]); - ret = MPI_Irecv(static_cast(&toSend[ind]), toSend[ind].size(), MPI_INT, ind, - 666 * 666 + ind, comm, &reqs[ind]); + ret = MPI_Irecv(static_cast(&toSend[ind][0]), toSend[ind].size(), MPI_INT, + ind, 666 * 666 + ind, comm, &reqs[ind]); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { - const auto ret = MPI_Send(static_cast(&toGet[proc]), toGet[proc].size(), + const auto ret = MPI_Send(static_cast(&toGet[proc][0]), toGet[proc].size(), MPI_INT, proc, 666 * 666 + proc, comm); ASSERT0(ret == MPI_SUCCESS); } From 16a162e180baf86ef183e5f92e75fd88f072cff9 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:32:36 +0100 Subject: [PATCH 11/42] Call setup before communicator is used --- src/mesh/interpolation/hermite_spline_xz.cxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 28455f77e8..46eda18a0a 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -317,6 +317,9 @@ void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, g3dinds[i] = {gind.ind, gind.xp(1).ind, gind.zp(1).ind, gind.xp(1).zp(1).ind}; } } + if constexpr (monotonic) { + gf3daccess->setup(); + } #ifdef HS_USE_PETSC MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); From e5878c19b28e46c674d6e7c28a9e85a58a3b0bc6 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 13:34:36 +0100 Subject: [PATCH 12/42] Deduplicate code Ensures we all ways check for monotonicity --- src/mesh/interpolation/hermite_spline_xz.cxx | 61 +++++++------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 46eda18a0a..80a1336c02 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -380,8 +380,12 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; -#if USE_NEW_WEIGHTS -#ifdef HS_USE_PETSC + std::unique_ptr gf; + if constexpr (monotonic) { + gf = gf3daccess->communicate_asPtr(f); + } + +#if USE_NEW_WEIGHTS and defined(HS_USE_PETSC) BoutReal* ptr; const BoutReal* cptr; VecGetArray(rhs, &ptr); @@ -392,22 +396,9 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, BOUT_FOR(i, f.getRegion(region2)) { f_interp[i] = cptr[int(i)]; if constexpr (monotonic) { - const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], - gf[g3dinds[i][3]]}; - const auto minmax = std::minmax(corners); - if (f_interp[iyp] < minmax.first) { - f_interp[iyp] = minmax.first; - } else { - if (f_interp[iyp] > minmax.second) { - f_interp[iyp] = minmax.second; - } - } - } - - ASSERT2(std::isfinite(cptr[int(i)])); - } - VecRestoreArrayRead(result, &cptr); -#else + const auto iyp = i; + const auto i = iyp.ym(y_offset); +#elif USE_NEW_WEIGHTS // No Petsc BOUT_FOR(i, getRegion(region)) { auto ic = i_corner[i]; auto iyp = i.yp(y_offset); @@ -420,20 +411,7 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, f_interp[iyp] += newWeights[w * 4 + 3][i] * f[ic.zp(2).xp(w - 1)]; } if constexpr (monotonic) { - const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], - gf[g3dinds[i][3]]}; - const auto minmax = std::minmax(corners); - if (f_interp[iyp] < minmax.first) { - f_interp[iyp] = minmax.first; - } else { - if (f_interp[iyp] > minmax.second) { - f_interp[iyp] = minmax.second; - } - } - } - } -#endif -#else +#else // Legacy interpolation // Derivatives are used for tension and need to be on dimensionless // coordinates @@ -444,11 +422,6 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); - std::unique_ptr g3d; - if constexpr (monotonic) { - gf = gf3daccess.communicate_asPtr(f); - } - BOUT_FOR(i, getRegion(region)) { const auto iyp = i.yp(y_offset); @@ -478,8 +451,9 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, +f_z * h00_z[i] + f_zp1 * h01_z[i] + fz_z * h10_z[i] + fz_zp1 * h11_z[i]; if constexpr (monotonic) { - const auto corners = {gf[g3dinds[i][0]], gf[g3dinds[i][1]], gf[g3dinds[i][2]], - gf[g3dinds[i][3]]}; +#endif + const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], + (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; const auto minmax = std::minmax(corners); if (f_interp[iyp] < minmax.first) { f_interp[iyp] = minmax.first; @@ -489,7 +463,14 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, } } } - +#if USE_NEW_WEIGHTS and defined(HS_USE_PETSC) + ASSERT2(std::isfinite(cptr[int(i)])); + } + VecRestoreArrayRead(result, &cptr); +#elif USE_NEW_WEIGHTS + ASSERT2(std::isfinite(f_interp[iyp])); + } +#else ASSERT2(std::isfinite(f_interp[iyp]) || i.x() < localmesh->xstart || i.x() > localmesh->xend); } From aee27131b93e6ab89b8111064ee8deb79e138f8b Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 14:30:05 +0100 Subject: [PATCH 13/42] Fix tags for comm Tags were different for sender and receiver --- src/mesh/parallel/fci_comm.hxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index d44eea76ef..c0227f4773 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -161,14 +161,14 @@ private: std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, - 666 + proc, comm, &reqs[proc]); + 666, comm, &reqs[proc]); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { toGetSizes[proc] = toGet[proc].size(); sendBufferSize += toGetSizes[proc]; auto ret = MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, - 666 + proc, comm); + 666, comm); ASSERT0(ret == MPI_SUCCESS); } for ([[maybe_unused]] auto dummy : reqs) { @@ -179,12 +179,12 @@ private: ASSERT2(static_cast(ind) < toSend.size()); toSend[ind].resize(toSendSizes[ind]); ret = MPI_Irecv(static_cast(&toSend[ind][0]), toSend[ind].size(), MPI_INT, - ind, 666 * 666 + ind, comm, &reqs[ind]); + ind, 666 * 666, comm, &reqs[ind]); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { const auto ret = MPI_Send(static_cast(&toGet[proc][0]), toGet[proc].size(), - MPI_INT, proc, 666 * 666 + proc, comm); + MPI_INT, proc, 666 * 666, comm); ASSERT0(ret == MPI_SUCCESS); } for ([[maybe_unused]] auto dummy : reqs) { @@ -220,7 +220,7 @@ private: std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), - MPI_DOUBLE, proc, 666 + proc, comm, &reqs[proc]); + MPI_DOUBLE, proc, 666, comm, &reqs[proc]); ASSERT0(ret == MPI_SUCCESS); } int cnt = 0; @@ -229,7 +229,7 @@ private: for (auto i : toSend[proc]) { sendBuffer[cnt++] = f[Ind3D(i)]; } - auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666 + proc, comm); + auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666, comm); ASSERT0(ret == MPI_SUCCESS); } for ([[maybe_unused]] auto dummy : reqs) { From b4f7be51d9216d7b7f3416c63e3c609e3edda274 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 14:39:43 +0100 Subject: [PATCH 14/42] Use pointer instead of std::vector --- src/mesh/parallel/fci_comm.hxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index c0227f4773..257639bb32 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -216,7 +216,8 @@ private: ASSERT2(is_setup); ASSERT2(f.getMesh() == mesh); std::vector data(offsets.back()); - std::vector sendBuffer(sendBufferSize); + //std::vector sendBuffer(sendBufferSize); + BoutReal* sendBuffer = new BoutReal[sendBufferSize]; std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), @@ -238,6 +239,7 @@ private: ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); } + delete[] sendBuffer; return data; } }; From 578ee8d28551e1614ae0c62877a528ef45ab3b67 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 21 Jan 2025 14:43:12 +0100 Subject: [PATCH 15/42] Do not reuse requests if the array is still in use Otherwise mpi might wait for the wrong request. --- src/mesh/parallel/fci_comm.hxx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 257639bb32..ec34d622a6 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -171,6 +171,7 @@ private: 666, comm); ASSERT0(ret == MPI_SUCCESS); } + std::vector reqs2(toSend.size()); for ([[maybe_unused]] auto dummy : reqs) { int ind{0}; auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); @@ -179,7 +180,7 @@ private: ASSERT2(static_cast(ind) < toSend.size()); toSend[ind].resize(toSendSizes[ind]); ret = MPI_Irecv(static_cast(&toSend[ind][0]), toSend[ind].size(), MPI_INT, - ind, 666 * 666, comm, &reqs[ind]); + ind, 666 * 666, comm, &reqs2[ind]); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { @@ -189,7 +190,7 @@ private: } for ([[maybe_unused]] auto dummy : reqs) { int ind{0}; - const auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + const auto ret = MPI_Waitany(reqs.size(), &reqs2[0], &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); } From bf9c9eb013fbb8fce3a8f4252f16dfe4dc287e8f Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:27:11 +0100 Subject: [PATCH 16/42] rename offset to getOffsets to avoid confusion whether the offsets are for sending or receiving --- src/mesh/parallel/fci_comm.hxx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index ec34d622a6..f079385dc1 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -124,10 +124,10 @@ public: { int offset = 0; for (auto get : toGet) { - offsets.push_back(offset); + getOffsets.push_back(offset); offset += get.size(); } - offsets.push_back(offset); + getOffsets.push_back(offset); } for (const auto id : ids) { IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; @@ -141,7 +141,7 @@ public: auto it = std::lower_bound(vec.begin(), vec.end(), tofind); ASSERT3(it != vec.end()); ASSERT3(*it == tofind); - mapping[id] = std::distance(vec.begin(), it) + offsets[proc]; + mapping[id] = std::distance(vec.begin(), it) + getOffsets[proc]; } is_setup = true; } @@ -155,9 +155,8 @@ public: private: void commCommLists() { toSend.resize(toGet.size()); - std::vector toGetSizes(toGet.size()); - std::vector toSendSizes(toSend.size()); - //const int thisproc = mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex(); + std::vector toGetSizes(toGet.size(), -1); + std::vector toSendSizes(toSend.size(), -1); std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, @@ -210,15 +209,15 @@ public: private: std::vector> toGet; std::vector> toSend; - std::vector offsets; + std::vector getOffsets; int sendBufferSize{0}; MPI_Comm comm; std::vector communicate_data(const Field3D& f) { ASSERT2(is_setup); ASSERT2(f.getMesh() == mesh); - std::vector data(offsets.back()); //std::vector sendBuffer(sendBufferSize); BoutReal* sendBuffer = new BoutReal[sendBufferSize]; + std::vector data(getOffsets.back()); std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), From 2155f068deda476df34d076b183b14c4255b8e92 Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:28:04 +0100 Subject: [PATCH 17/42] Fix: mixup of sending / receiving size --- src/mesh/parallel/fci_comm.hxx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index f079385dc1..0926f5c415 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -165,7 +165,6 @@ private: } for (size_t proc = 0; proc < toGet.size(); ++proc) { toGetSizes[proc] = toGet[proc].size(); - sendBufferSize += toGetSizes[proc]; auto ret = MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, 666, comm); ASSERT0(ret == MPI_SUCCESS); @@ -177,6 +176,8 @@ private: ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); ASSERT2(static_cast(ind) < toSend.size()); + ASSERT3(toSendSizes[ind] >= 0); + sendBufferSize += toSendSizes[ind]; toSend[ind].resize(toSendSizes[ind]); ret = MPI_Irecv(static_cast(&toSend[ind][0]), toSend[ind].size(), MPI_INT, ind, 666 * 666, comm, &reqs2[ind]); @@ -215,9 +216,8 @@ private: std::vector communicate_data(const Field3D& f) { ASSERT2(is_setup); ASSERT2(f.getMesh() == mesh); - //std::vector sendBuffer(sendBufferSize); - BoutReal* sendBuffer = new BoutReal[sendBufferSize]; std::vector data(getOffsets.back()); + std::vector sendBuffer(sendBufferSize); std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), @@ -239,7 +239,6 @@ private: ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); } - delete[] sendBuffer; return data; } }; From 06622473f901863250a5e927cd5a060d068dcdac Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:28:43 +0100 Subject: [PATCH 18/42] Fix receive data offset --- src/mesh/parallel/fci_comm.hxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 0926f5c415..8db9d8ef14 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -220,8 +220,8 @@ private: std::vector sendBuffer(sendBufferSize); std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { - auto ret = MPI_Irecv(static_cast(&data[proc]), toGet[proc].size(), - MPI_DOUBLE, proc, 666, comm, &reqs[proc]); + auto ret = MPI_Irecv(static_cast(&data[getOffsets[proc]]), + toGet[proc].size(), MPI_DOUBLE, proc, 666, comm, &reqs[proc]); ASSERT0(ret == MPI_SUCCESS); } int cnt = 0; From 28d4e28dc7bbbb579913d974e5d4727b57a55eee Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:30:47 +0100 Subject: [PATCH 19/42] Add check to ensure the proc layout is as expected --- src/mesh/parallel/fci_comm.hxx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 8db9d8ef14..30f7df76ee 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -157,6 +157,13 @@ private: toSend.resize(toGet.size()); std::vector toGetSizes(toGet.size(), -1); std::vector toSendSizes(toSend.size(), -1); +#if CHECK > 3 + { + int thisproc; + MPI_Comm_rank(comm, thisproc); + assert(thisproc == mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex()); + } +#endif std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, From e4507d953e51f75237202f72bba8d445586a2530 Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:31:54 +0100 Subject: [PATCH 20/42] clang-format --- src/mesh/parallel/fci_comm.hxx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 30f7df76ee..bd4b955f12 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -166,14 +166,14 @@ private: #endif std::vector reqs(toSend.size()); for (size_t proc = 0; proc < toGet.size(); ++proc) { - auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, - 666, comm, &reqs[proc]); + auto ret = MPI_Irecv(static_cast(&toSendSizes[proc]), 1, MPI_INT, proc, 666, + comm, &reqs[proc]); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { toGetSizes[proc] = toGet[proc].size(); - auto ret = MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, - 666, comm); + auto ret = + MPI_Send(static_cast(&toGetSizes[proc]), 1, MPI_INT, proc, 666, comm); ASSERT0(ret == MPI_SUCCESS); } std::vector reqs2(toSend.size()); From d7ba297dda1e35d60bc677856d21ccc472148b3e Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:40:35 +0100 Subject: [PATCH 21/42] Use BOUT++ assert --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index bd4b955f12..39348d2e10 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -161,7 +161,7 @@ private: { int thisproc; MPI_Comm_rank(comm, thisproc); - assert(thisproc == mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex()); + ASSERT0(thisproc == mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex()); } #endif std::vector reqs(toSend.size()); From f55f8e4c188806ac38191361d5d5e64356dba83f Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 22 Jan 2025 10:41:15 +0100 Subject: [PATCH 22/42] Fix check --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 39348d2e10..ec4aeaba49 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -160,7 +160,7 @@ private: #if CHECK > 3 { int thisproc; - MPI_Comm_rank(comm, thisproc); + MPI_Comm_rank(comm, &thisproc); ASSERT0(thisproc == mesh->getYProcIndex() * g2lx.npe + mesh->getXProcIndex()); } #endif From 0e9f7d69ed72469c5e0dbf85deb9a692746bef15 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Jan 2025 09:49:41 +0100 Subject: [PATCH 23/42] Take periodicity into account --- src/mesh/parallel/fci_comm.hxx | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index ec4aeaba49..a93b55244a 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -52,19 +52,35 @@ struct globalToLocal1D { const int local; const int global; const int globalwith; - globalToLocal1D(int mg, int npe, int localwith) + const bool periodic; + globalToLocal1D(int mg, int npe, int localwith, bool periodic) : mg(mg), npe(npe), localwith(localwith), local(localwith - 2 * mg), - global(local * npe), globalwith(global + 2 * mg) {}; + global(local * npe), globalwith(global + 2 * mg), periodic(periodic) {}; ProcLocal convert(int id) const { + if (periodic) { + while (id < mg) { + id += global; + } + while (id >= global + mg) { + id -= global; + } + } int idwo = id - mg; int proc = idwo / local; - if (proc >= npe) { - proc = npe - 1; + if (not periodic) { + if (proc >= npe) { + proc = npe - 1; + } } - ASSERT2(proc >= 0); int loc = id - local * proc; - ASSERT2(0 <= loc); - ASSERT2(loc < (local + 2 * mg)); +#if CHECK > 1 + if ((loc < 0 or loc > localwith or proc < 0 or proc > npe) + or (periodic and (loc < mg or loc >= local + mg))) { + printf("globalToLocal1D failure: %d %d, %d %d, %d %d %s\n", id, idwo, globalwith, + npe, proc, loc, periodic ? "periodic" : "non-periodic"); + ASSERT0(false); + } +#endif return {proc, loc}; } }; @@ -98,9 +114,9 @@ class GlobalField3DAccess { public: friend class GlobalField3DAccessInstance; GlobalField3DAccess(Mesh* mesh) - : mesh(mesh), g2lx(mesh->xstart, mesh->getNXPE(), mesh->LocalNx), - g2ly(mesh->ystart, mesh->getNYPE(), mesh->LocalNy), - g2lz(mesh->zstart, 1, mesh->LocalNz), + : mesh(mesh), g2lx(mesh->xstart, mesh->getNXPE(), mesh->LocalNx, false), + g2ly(mesh->ystart, mesh->getNYPE(), mesh->LocalNy, true), + g2lz(mesh->zstart, 1, mesh->LocalNz, true), xyzl(g2lx.localwith, g2ly.localwith, g2lz.localwith), xyzg(g2lx.globalwith, g2ly.globalwith, g2lz.globalwith), comm(BoutComm::get()) {}; void get(IndG3D ind) { ids.emplace(ind.ind); } From 944adfbf68b0bdf0440dfe0283634331ec6901a1 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Jan 2025 09:49:56 +0100 Subject: [PATCH 24/42] Sort a reference, not a copy --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index a93b55244a..9f296848b1 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -133,7 +133,7 @@ public: toGet[piy.proc * g2lx.npe + pix.proc].push_back( xyzl.convert(pix.ind, piy.ind, piz.ind).ind); } - for (auto v : toGet) { + for (auto& v : toGet) { std::sort(v.begin(), v.end()); } commCommLists(); From 6daec201722fd162574d831778babdf89f1b7742 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Jan 2025 09:52:06 +0100 Subject: [PATCH 25/42] Only communicate non-empty vectors --- src/mesh/parallel/fci_comm.hxx | 44 ++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 9f296848b1..1ca67f0ecc 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -193,6 +193,7 @@ private: ASSERT0(ret == MPI_SUCCESS); } std::vector reqs2(toSend.size()); + int cnt = 0; for ([[maybe_unused]] auto dummy : reqs) { int ind{0}; auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); @@ -200,20 +201,26 @@ private: ASSERT3(ind != MPI_UNDEFINED); ASSERT2(static_cast(ind) < toSend.size()); ASSERT3(toSendSizes[ind] >= 0); + if (toSendSizes[ind] == 0) { + continue; + } sendBufferSize += toSendSizes[ind]; - toSend[ind].resize(toSendSizes[ind]); - ret = MPI_Irecv(static_cast(&toSend[ind][0]), toSend[ind].size(), MPI_INT, - ind, 666 * 666, comm, &reqs2[ind]); + toSend[ind].resize(toSendSizes[ind], -1); + + ret = MPI_Irecv(static_cast(toSend[ind].data()), toSend[ind].size(), MPI_INT, + ind, 666 * 666, comm, reqs2.data() + cnt++); ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { - const auto ret = MPI_Send(static_cast(&toGet[proc][0]), toGet[proc].size(), - MPI_INT, proc, 666 * 666, comm); - ASSERT0(ret == MPI_SUCCESS); + if (toGet.size() != 0) { + const auto ret = MPI_Send(static_cast(toGet[proc].data()), + toGet[proc].size(), MPI_INT, proc, 666 * 666, comm); + ASSERT0(ret == MPI_SUCCESS); + } } - for ([[maybe_unused]] auto dummy : reqs) { + for (int c = 0; c < cnt; c++) { int ind{0}; - const auto ret = MPI_Waitany(reqs.size(), &reqs2[0], &ind, MPI_STATUS_IGNORE); + const auto ret = MPI_Waitany(cnt, reqs2.data(), &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); } @@ -242,25 +249,36 @@ private: std::vector data(getOffsets.back()); std::vector sendBuffer(sendBufferSize); std::vector reqs(toSend.size()); + int cnt1 = 0; for (size_t proc = 0; proc < toGet.size(); ++proc) { - auto ret = MPI_Irecv(static_cast(&data[getOffsets[proc]]), - toGet[proc].size(), MPI_DOUBLE, proc, 666, comm, &reqs[proc]); + if (toGet[proc].size() == 0) { + continue; + } + auto ret = + MPI_Irecv(static_cast(data.data() + getOffsets[proc]), + toGet[proc].size(), MPI_DOUBLE, proc, 666, comm, reqs.data() + cnt1); ASSERT0(ret == MPI_SUCCESS); + cnt1++; } int cnt = 0; for (size_t proc = 0; proc < toGet.size(); ++proc) { - void* start = static_cast(&sendBuffer[cnt]); + if (toSend[proc].size() == 0) { + continue; + } + const void* start = static_cast(sendBuffer.data() + cnt); for (auto i : toSend[proc]) { sendBuffer[cnt++] = f[Ind3D(i)]; } auto ret = MPI_Send(start, toSend[proc].size(), MPI_DOUBLE, proc, 666, comm); ASSERT0(ret == MPI_SUCCESS); } - for ([[maybe_unused]] auto dummy : reqs) { + for (int j = 0; j < cnt1; ++j) { int ind{0}; - auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + auto ret = MPI_Waitany(cnt1, reqs.data(), &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); + ASSERT3(ind >= 0); + ASSERT3(ind < cnt1); } return data; } From 3444dd0f7ee424ffa6206629f79063b301798516 Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 31 Jan 2025 11:45:08 +0100 Subject: [PATCH 26/42] Make check stricter --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 1ca67f0ecc..df08a02dda 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -74,7 +74,7 @@ struct globalToLocal1D { } int loc = id - local * proc; #if CHECK > 1 - if ((loc < 0 or loc > localwith or proc < 0 or proc > npe) + if ((loc < 0 or loc > localwith or proc < 0 or proc >= npe) or (periodic and (loc < mg or loc >= local + mg))) { printf("globalToLocal1D failure: %d %d, %d %d, %d %d %s\n", id, idwo, globalwith, npe, proc, loc, periodic ? "periodic" : "non-periodic"); From 91bee6811566a04656fb8b05ca4493effcbf7a39 Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 31 Jan 2025 11:49:47 +0100 Subject: [PATCH 27/42] Fix: include global offset in monotonic spline --- src/mesh/interpolation/hermite_spline_xz.cxx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 80a1336c02..c2a97c1c05 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -164,6 +164,8 @@ void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, #ifdef HS_USE_PETSC IndConverter conv{localmesh}; #endif + [[maybe_unused]] const int y_global_offset = + localmesh->getYProcIndex() * (localmesh->yend - localmesh->ystart + 1); BOUT_FOR(i, getRegion(region)) { const int x = i.x(); const int y = i.y(); @@ -309,7 +311,8 @@ void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, #endif #endif if constexpr (monotonic) { - const auto gind = gf3daccess->xyzg(i_corn, y + y_offset, k_corner(x, y, z)); + const auto gind = + gf3daccess->xyzg(i_corn, y + y_offset + y_global_offset, k_corner(x, y, z)); gf3daccess->get(gind); gf3daccess->get(gind.xp(1)); gf3daccess->get(gind.zp(1)); From d7aeae490d6499a17ca7287fe291d48c630bb386 Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 31 Jan 2025 14:51:11 +0100 Subject: [PATCH 28/42] make fci_comm openmp thread safe Using a local set for each thread ensures we do not need a mutex for adding data, at the cost of having to merge the different sets later. --- src/mesh/parallel/fci_comm.hxx | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index df08a02dda..40c1fe9f72 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -118,11 +118,30 @@ public: g2ly(mesh->ystart, mesh->getNYPE(), mesh->LocalNy, true), g2lz(mesh->zstart, 1, mesh->LocalNz, true), xyzl(g2lx.localwith, g2ly.localwith, g2lz.localwith), - xyzg(g2lx.globalwith, g2ly.globalwith, g2lz.globalwith), comm(BoutComm::get()) {}; - void get(IndG3D ind) { ids.emplace(ind.ind); } + xyzg(g2lx.globalwith, g2ly.globalwith, g2lz.globalwith), comm(BoutComm::get()) { +#ifdef _OPENMP + o_ids.resize(omp_get_max_threads()); +#endif + }; + void get(IndG3D ind) { + ASSERT2(is_setup == false); +#ifdef _OPENMP + ASSERT2(o_ids.size() > static_cast(omp_get_thread_num())); + o_ids[omp_get_thread_num()].emplace(ind.ind); +#else + ids.emplace(ind.ind); +#endif + } + void operator[](IndG3D ind) { return get(ind); } void setup() { ASSERT2(is_setup == false); +#ifdef _OPENMP + for (auto& o_id : o_ids) { + ids.merge(o_id); + } + o_ids.clear(); +#endif toGet.resize(g2lx.npe * g2ly.npe * g2lz.npe); for (const auto id : ids) { IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; @@ -226,6 +245,9 @@ private: } } Mesh* mesh; +#ifdef _OPENMP + std::vector> o_ids; +#endif std::set ids; std::map mapping; bool is_setup{false}; From ac5cc7086d737ed0f41a1b5274e5815566b223f0 Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 7 Feb 2025 13:50:10 +0100 Subject: [PATCH 29/42] Fix communication for fci We want to skip sending if there is no data for this process ... --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 40c1fe9f72..27dd111765 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -231,7 +231,7 @@ private: ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { - if (toGet.size() != 0) { + if (toGet[proc].size() != 0) { const auto ret = MPI_Send(static_cast(toGet[proc].data()), toGet[proc].size(), MPI_INT, proc, 666 * 666, comm); ASSERT0(ret == MPI_SUCCESS); From 885b465cbb81d3e12345569337693b2c39e768dd Mon Sep 17 00:00:00 2001 From: David Bold Date: Fri, 7 Feb 2025 15:58:07 +0100 Subject: [PATCH 30/42] Add check to reduce risk of bugs The result needs to be well understood, as the indices are a mix of local and global indices, as we need to access non-local data. --- src/mesh/interpolation/hermite_spline_xz.cxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index c2a97c1c05..036f5b2669 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -362,6 +362,9 @@ template std::vector XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, int yoffset) { + if (localmesh->getNXPE() > 1){ + throw BoutException("It is likely that the function calling this is not handling the result correctly."); + } const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); From a1c40336dc0bfbcedf7f622f2a2986e8cc9b8284 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 9 Oct 2025 17:23:45 +0100 Subject: [PATCH 31/42] tests: Increase nx for hermitespline interpolation boundary problem --- src/mesh/interpolation/hermite_spline_xz.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 036f5b2669..41ca857829 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -418,12 +418,14 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, } if constexpr (monotonic) { #else // Legacy interpolation + // TODO(peter): Should we apply dirichlet BCs to derivatives? // Derivatives are used for tension and need to be on dimensionless // coordinates // f has been communcated, and thus we can assume that the x-boundaries are // also valid in the y-boundary. Thus the differentiated field needs no // extra comms. + // TODO(dave) Add assert that we do not use z-splitting or z-guards. Field3D fx = bout::derivatives::index::DDX(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fz = bout::derivatives::index::DDZ(f, CELL_DEFAULT, "DEFAULT", region2); Field3D fxz = bout::derivatives::index::DDZ(fx, CELL_DEFAULT, "DEFAULT", region2); From 1c25b0e3afc6b7505e35842256a08f545e0d0aef Mon Sep 17 00:00:00 2001 From: David Bold Date: Mon, 2 Feb 2026 13:02:00 +0100 Subject: [PATCH 32/42] region may be not used --- src/mesh/interpolation/hermite_spline_xz.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 41ca857829..bbd8c9abc0 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -155,7 +155,7 @@ XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin) template void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const std::string& region) { + [[maybe_unused]] const std::string& region) { const int ny = localmesh->LocalNy; const int nz = localmesh->LocalNz; @@ -379,7 +379,7 @@ XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, template Field3D XZHermiteSplineBase::interpolate(const Field3D& f, - const std::string& region) const { + [[maybe_unused]] const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; From c7f8504324f1b8174c8f3317202a89b32f2fc735 Mon Sep 17 00:00:00 2001 From: David Bold Date: Mon, 2 Feb 2026 13:52:00 +0100 Subject: [PATCH 33/42] Fix [[maybe_unused]] location --- include/bout/msg_stack.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/bout/msg_stack.hxx b/include/bout/msg_stack.hxx index 1abb26d2c7..0fe22cd0e6 100644 --- a/include/bout/msg_stack.hxx +++ b/include/bout/msg_stack.hxx @@ -80,7 +80,7 @@ public: } void pop() {} - void pop(int [[maybe_unused]] id) {} + void pop([[maybe_unused]] int id) {} void clear() {} void dump() {} From 96aa2bdbae97c573580fac747de58a1eb79c167b Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 4 Feb 2026 10:47:00 +0100 Subject: [PATCH 34/42] Add some wiggle room for monotonichermitespline --- include/bout/interpolation_xz.hxx | 13 ++++++++---- src/mesh/interpolation/hermite_spline_xz.cxx | 22 +++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 2832899e8a..fb06faacc2 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -180,12 +180,17 @@ protected: Vec rhs, result; #endif + /// Factors to allow for some wiggleroom + BoutReal abs_fac_monotonic{1e-2}; + BoutReal rel_fac_monotonic{1e-1}; + public: XZHermiteSplineBase(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr) - : XZHermiteSplineBase(0, mesh) {} - XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr); - XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) - : XZHermiteSplineBase(y_offset, mesh) { + : XZHermiteSplineBase(0, mesh, options) {} + XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr, Options* options = nullptr); + XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr, + Options* options = nullptr) + : XZHermiteSplineBase(y_offset, mesh, options) { setRegion(regionFromMask(mask, localmesh)); } ~XZHermiteSplineBase() { diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index bbd8c9abc0..8eac4037b7 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -104,11 +104,20 @@ class IndConverter { }; template -XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin) +XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin, + Options* options) : XZInterpolation(y_offset, meshin), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { + if constexpr (monotonic) { + if (options == nullptr) { + options = &Options::root()["mesh:paralleltransform:xzinterpolation"]; + } + abs_fac_monotonic = (*options)["abs_tol"].withDefault(abs_fac_monotonic); + rel_fac_monotonic = (*options)["rel_tol"].withDefault(rel_fac_monotonic); + } + // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); @@ -463,13 +472,10 @@ Field3D XZHermiteSplineBase::interpolate(const Field3D& f, const auto corners = {(*gf)[IndG3D(g3dinds[i][0])], (*gf)[IndG3D(g3dinds[i][1])], (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; const auto minmax = std::minmax(corners); - if (f_interp[iyp] < minmax.first) { - f_interp[iyp] = minmax.first; - } else { - if (f_interp[iyp] > minmax.second) { - f_interp[iyp] = minmax.second; - } - } + const auto diff = + (minmax.second - minmax.first) * rel_fac_monotonic + abs_fac_monotonic; + f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); + f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); } #if USE_NEW_WEIGHTS and defined(HS_USE_PETSC) ASSERT2(std::isfinite(cptr[int(i)])); From 65880e4af16cb95438437bf057c4b9fdb427b142 Mon Sep 17 00:00:00 2001 From: dschwoerer <5637662+dschwoerer@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:09:09 +0000 Subject: [PATCH 35/42] [bot] Apply format changes --- include/bout/interpolation_xz.hxx | 2 +- src/mesh/interpolation/hermite_spline_xz.cxx | 17 +++++++++-------- src/mesh/interpolation/lagrange_4pt_xz.cxx | 1 - src/mesh/parallel/fci_comm.hxx | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index fb06faacc2..578627997f 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -189,7 +189,7 @@ public: : XZHermiteSplineBase(0, mesh, options) {} XZHermiteSplineBase(int y_offset = 0, Mesh* mesh = nullptr, Options* options = nullptr); XZHermiteSplineBase(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr, - Options* options = nullptr) + Options* options = nullptr) : XZHermiteSplineBase(y_offset, mesh, options) { setRegion(regionFromMask(mask, localmesh)); } diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 8eac4037b7..6b9441f539 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -21,8 +21,8 @@ **************************************************************************/ #include "../impls/bout/boutmesh.hxx" -#include "bout/bout.hxx" #include "../parallel/fci_comm.hxx" +#include "bout/bout.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" @@ -162,9 +162,9 @@ XZHermiteSplineBase::XZHermiteSplineBase(int y_offset, Mesh* meshin, } template -void XZHermiteSplineBase::calcWeights(const Field3D& delta_x, - const Field3D& delta_z, - [[maybe_unused]] const std::string& region) { +void XZHermiteSplineBase::calcWeights( + const Field3D& delta_x, const Field3D& delta_z, + [[maybe_unused]] const std::string& region) { const int ny = localmesh->LocalNy; const int nz = localmesh->LocalNz; @@ -371,8 +371,9 @@ template std::vector XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, int yoffset) { - if (localmesh->getNXPE() > 1){ - throw BoutException("It is likely that the function calling this is not handling the result correctly."); + if (localmesh->getNXPE() > 1) { + throw BoutException("It is likely that the function calling this is not handling the " + "result correctly."); } const int nz = localmesh->LocalNz; const int k_mod = k_corner(i, j, k); @@ -387,8 +388,8 @@ XZHermiteSplineBase::getWeightsForYApproximation(int i, int j, int k, } template -Field3D XZHermiteSplineBase::interpolate(const Field3D& f, - [[maybe_unused]] const std::string& region) const { +Field3D XZHermiteSplineBase::interpolate( + const Field3D& f, [[maybe_unused]] const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index 7fb0fb502b..d5e862fc0b 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -134,7 +134,6 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) // Then in X f_interp(x, y_next, z) = lagrange_4pt(xvals, t_x(x, y, z)); ASSERT2(std::isfinite(f_interp(x, y_next, z))); - } const auto region2 = y_offset != 0 ? fmt::format("RGN_YPAR_{:+d}", y_offset) : region; f_interp.setRegion(region2); diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 27dd111765..3514e4ba17 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -55,7 +55,7 @@ struct globalToLocal1D { const bool periodic; globalToLocal1D(int mg, int npe, int localwith, bool periodic) : mg(mg), npe(npe), localwith(localwith), local(localwith - 2 * mg), - global(local * npe), globalwith(global + 2 * mg), periodic(periodic) {}; + global(local * npe), globalwith(global + 2 * mg), periodic(periodic){}; ProcLocal convert(int id) const { if (periodic) { while (id < mg) { @@ -103,7 +103,7 @@ public: GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, const std::vector&& data) - : gfa(*gfa), data(std::move(data)) {}; + : gfa(*gfa), data(std::move(data)){}; private: const GlobalField3DAccess& gfa; From a91d3e75b31ba43951792f5ede617af602b4261b Mon Sep 17 00:00:00 2001 From: dschwoerer <5637662+dschwoerer@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:09:10 +0000 Subject: [PATCH 36/42] [bot] Add last format changes commit to ignore file --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index abd11c7762..9ec546dec7 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -11,3 +11,4 @@ a71cad2dd6ace5741a754e2ca7daacd4bb094e0e 2c2402ed59c91164eaff46dee0f79386b7347e9e 05b7c571544c3bcb153fce67d12b9ac48947fc2d c8f38049359170a34c915e209276238ea66b9a1e +65880e4af16cb95438437bf057c4b9fdb427b142 From f473a6bfd972421769c61fda4cef9edc03535ca0 Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 4 Mar 2026 10:16:51 +0100 Subject: [PATCH 37/42] Apply fixes from clang-format --- src/mesh/interpolation/hermite_spline_xz.cxx | 2 ++ src/mesh/parallel/fci_comm.cxx | 3 +++ src/mesh/parallel/fci_comm.hxx | 3 +++ 3 files changed, 8 insertions(+) diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 6b9441f539..577b4cb9db 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -27,6 +27,8 @@ #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" +#include +#include #include class IndConverter { diff --git a/src/mesh/parallel/fci_comm.cxx b/src/mesh/parallel/fci_comm.cxx index c0d51d1eb9..95bfa6958e 100644 --- a/src/mesh/parallel/fci_comm.cxx +++ b/src/mesh/parallel/fci_comm.cxx @@ -24,6 +24,9 @@ **************************************************************************/ #include "fci_comm.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/region.hxx" #include diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 3514e4ba17..cbe5af3a7f 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -32,6 +32,9 @@ #include "bout/mesh.hxx" #include "bout/region.hxx" #include +#include +#include +#include #include #include #include From f341037b5fbb26184898190112932004985b010e Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 4 Mar 2026 11:20:14 +0100 Subject: [PATCH 38/42] Apply clang-tidy changes --- include/bout/interpolation_xz.hxx | 3 +- include/bout/region.hxx | 2 +- src/mesh/interpolation/hermite_spline_xz.cxx | 4 ++- src/mesh/interpolation/lagrange_4pt_xz.cxx | 1 + src/mesh/parallel/fci_comm.hxx | 38 ++++++++++---------- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 578627997f..6e4f12b1b0 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -27,6 +27,7 @@ #include #include #include +#include #define USE_NEW_WEIGHTS 1 #if BOUT_HAS_PETSC @@ -193,7 +194,7 @@ public: : XZHermiteSplineBase(y_offset, mesh, options) { setRegion(regionFromMask(mask, localmesh)); } - ~XZHermiteSplineBase() { + ~XZHermiteSplineBase() override { #if HS_USE_PETSC if (isInit) { MatDestroy(&petscWeights); diff --git a/include/bout/region.hxx b/include/bout/region.hxx index f441b3edd7..17ded65987 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -139,7 +139,7 @@ class BoutMask; BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) -enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2, IND_GLOBAL_3D }; +enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2, IND_GLOBAL_3D = 3 }; /// Indices base class for Fields -- Regions are dereferenced into these /// diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index 577b4cb9db..2a4f32070a 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -26,8 +26,10 @@ #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" +#include "bout/mask.hxx" #include +#include #include #include @@ -476,7 +478,7 @@ Field3D XZHermiteSplineBase::interpolate( (*gf)[IndG3D(g3dinds[i][2])], (*gf)[IndG3D(g3dinds[i][3])]}; const auto minmax = std::minmax(corners); const auto diff = - (minmax.second - minmax.first) * rel_fac_monotonic + abs_fac_monotonic; + ((minmax.second - minmax.first) * rel_fac_monotonic) + abs_fac_monotonic; f_interp[iyp] = std::max(f_interp[iyp], minmax.first - diff); f_interp[iyp] = std::min(f_interp[iyp], minmax.second + diff); } diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index d5e862fc0b..c82c9fa36e 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -20,6 +20,7 @@ * **************************************************************************/ +#include "fmt/format.h" #include "bout/globals.hxx" #include "bout/interpolation_xz.hxx" #include "bout/mesh.hxx" diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index cbe5af3a7f..17b30a9c25 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -57,8 +57,8 @@ struct globalToLocal1D { const int globalwith; const bool periodic; globalToLocal1D(int mg, int npe, int localwith, bool periodic) - : mg(mg), npe(npe), localwith(localwith), local(localwith - 2 * mg), - global(local * npe), globalwith(global + 2 * mg), periodic(periodic){}; + : mg(mg), npe(npe), localwith(localwith), local(localwith - (2 * mg)), + global(local * npe), globalwith(global + (2 * mg)), periodic(periodic) {}; ProcLocal convert(int id) const { if (periodic) { while (id < mg) { @@ -68,14 +68,14 @@ struct globalToLocal1D { id -= global; } } - int idwo = id - mg; + const int idwo = id - mg; int proc = idwo / local; if (not periodic) { if (proc >= npe) { proc = npe - 1; } } - int loc = id - local * proc; + const int loc = id - (local * proc); #if CHECK > 1 if ((loc < 0 or loc > localwith or proc < 0 or proc >= npe) or (periodic and (loc < mg or loc >= local + mg))) { @@ -93,7 +93,7 @@ struct XYZ2Ind { const int ny; const int nz; ind convert(const int x, const int y, const int z) const { - return {z + (y + x * ny) * nz, ny, nz}; + return {z + ((y + x * ny) * nz), ny, nz}; } ind operator()(const int x, const int y, const int z) const { return convert(x, y, z); } XYZ2Ind(const int nx, const int ny, const int nz) : nx(nx), ny(ny), nz(nz) {} @@ -105,8 +105,8 @@ public: const BoutReal& operator[](IndG3D ind) const; GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, - const std::vector&& data) - : gfa(*gfa), data(std::move(data)){}; + std::vector&& data) + : gfa(*gfa), data(std::move(data)) {}; private: const GlobalField3DAccess& gfa; @@ -136,7 +136,7 @@ public: #endif } - void operator[](IndG3D ind) { return get(ind); } + void operator[](IndG3D ind) { get(ind); } void setup() { ASSERT2(is_setup == false); #ifdef _OPENMP @@ -145,14 +145,14 @@ public: } o_ids.clear(); #endif - toGet.resize(g2lx.npe * g2ly.npe * g2lz.npe); + toGet.resize(static_cast(g2lx.npe * g2ly.npe * g2lz.npe)); for (const auto id : ids) { - IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; const auto pix = g2lx.convert(gind.x()); const auto piy = g2ly.convert(gind.y()); const auto piz = g2lz.convert(gind.z()); ASSERT3(piz.proc == 0); - toGet[piy.proc * g2lx.npe + pix.proc].push_back( + toGet[(piy.proc * g2lx.npe) + pix.proc].push_back( xyzl.convert(pix.ind, piy.ind, piz.ind).ind); } for (auto& v : toGet) { @@ -161,19 +161,19 @@ public: commCommLists(); { int offset = 0; - for (auto get : toGet) { + for (const auto& get : toGet) { getOffsets.push_back(offset); offset += get.size(); } getOffsets.push_back(offset); } for (const auto id : ids) { - IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; + const IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; const auto pix = g2lx.convert(gind.x()); const auto piy = g2ly.convert(gind.y()); const auto piz = g2lz.convert(gind.z()); ASSERT3(piz.proc == 0); - const auto proc = piy.proc * g2lx.npe + pix.proc; + const auto proc = (piy.proc * g2lx.npe) + pix.proc; const auto& vec = toGet[proc]; const auto tofind = xyzl.convert(pix.ind, piy.ind, piz.ind).ind; auto it = std::lower_bound(vec.begin(), vec.end(), tofind); @@ -216,9 +216,9 @@ private: } std::vector reqs2(toSend.size()); int cnt = 0; - for ([[maybe_unused]] auto dummy : reqs) { + for ([[maybe_unused]] auto* dummy : reqs) { int ind{0}; - auto ret = MPI_Waitany(reqs.size(), &reqs[0], &ind, MPI_STATUS_IGNORE); + auto ret = MPI_Waitany(reqs.size(), reqs.data(), &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); ASSERT3(ind != MPI_UNDEFINED); ASSERT2(static_cast(ind) < toSend.size()); @@ -234,7 +234,7 @@ private: ASSERT0(ret == MPI_SUCCESS); } for (size_t proc = 0; proc < toGet.size(); ++proc) { - if (toGet[proc].size() != 0) { + if (!toGet[proc].empty()) { const auto ret = MPI_Send(static_cast(toGet[proc].data()), toGet[proc].size(), MPI_INT, proc, 666 * 666, comm); ASSERT0(ret == MPI_SUCCESS); @@ -276,7 +276,7 @@ private: std::vector reqs(toSend.size()); int cnt1 = 0; for (size_t proc = 0; proc < toGet.size(); ++proc) { - if (toGet[proc].size() == 0) { + if (toGet[proc].empty()) { continue; } auto ret = @@ -287,7 +287,7 @@ private: } int cnt = 0; for (size_t proc = 0; proc < toGet.size(); ++proc) { - if (toSend[proc].size() == 0) { + if (toSend[proc].empty()) { continue; } const void* start = static_cast(sendBuffer.data() + cnt); From 999d4ca69e32e47ce7b46c60c6d2818593a6dd9f Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 4 Mar 2026 11:31:37 +0100 Subject: [PATCH 39/42] Apply clang-tidy improvements --- src/mesh/parallel/fci_comm.cxx | 4 ++-- src/mesh/parallel/fci_comm.hxx | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/mesh/parallel/fci_comm.cxx b/src/mesh/parallel/fci_comm.cxx index 95bfa6958e..65a8731740 100644 --- a/src/mesh/parallel/fci_comm.cxx +++ b/src/mesh/parallel/fci_comm.cxx @@ -31,7 +31,7 @@ #include const BoutReal& GlobalField3DAccessInstance::operator[](IndG3D ind) const { - auto it = gfa.mapping.find(ind.ind); - ASSERT2(it != gfa.mapping.end()); + auto it = gfa->mapping.find(ind.ind); + ASSERT2(it != gfa->mapping.end()); return data[it->second]; } diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 17b30a9c25..6d80a2e326 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -49,13 +49,13 @@ struct ProcLocal { int ind; }; struct globalToLocal1D { - const int mg; - const int npe; - const int localwith; - const int local; - const int global; - const int globalwith; - const bool periodic; + int mg; + int npe; + int localwith; + int local; + int global; + int globalwith; + bool periodic; globalToLocal1D(int mg, int npe, int localwith, bool periodic) : mg(mg), npe(npe), localwith(localwith), local(localwith - (2 * mg)), global(local * npe), globalwith(global + (2 * mg)), periodic(periodic) {}; @@ -106,11 +106,11 @@ public: GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, std::vector&& data) - : gfa(*gfa), data(std::move(data)) {}; + : gfa(gfa), data(std::move(data)) {}; private: - const GlobalField3DAccess& gfa; - const std::vector data; + const GlobalField3DAccess* gfa; + std::vector data; }; class GlobalField3DAccess { @@ -145,7 +145,7 @@ public: } o_ids.clear(); #endif - toGet.resize(static_cast(g2lx.npe * g2ly.npe * g2lz.npe)); + toGet.resize(static_cast(g2lx.npe * g2ly.npe * g2lz.npe)); for (const auto id : ids) { const IndG3D gind{id, g2ly.globalwith, g2lz.globalwith}; const auto pix = g2lx.convert(gind.x()); From 614a727ca68ff8ca0fd3f330d4d48dcf127989a6 Mon Sep 17 00:00:00 2001 From: David Bold Date: Wed, 4 Mar 2026 11:52:57 +0100 Subject: [PATCH 40/42] MPI_Request may not be a pointer --- src/mesh/parallel/fci_comm.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index 6d80a2e326..cf0ddb01a2 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -216,7 +216,7 @@ private: } std::vector reqs2(toSend.size()); int cnt = 0; - for ([[maybe_unused]] auto* dummy : reqs) { + for ([[maybe_unused]] auto dummy : reqs) { int ind{0}; auto ret = MPI_Waitany(reqs.size(), reqs.data(), &ind, MPI_STATUS_IGNORE); ASSERT0(ret == MPI_SUCCESS); From d1cfb8abd6aa5c76e6c1a4d7ab20929c65f8afc2 Mon Sep 17 00:00:00 2001 From: dschwoerer <5637662+dschwoerer@users.noreply.github.com> Date: Wed, 4 Mar 2026 11:19:18 +0000 Subject: [PATCH 41/42] [bot] Apply format changes --- src/mesh/parallel/fci_comm.hxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesh/parallel/fci_comm.hxx b/src/mesh/parallel/fci_comm.hxx index cf0ddb01a2..8c34885981 100644 --- a/src/mesh/parallel/fci_comm.hxx +++ b/src/mesh/parallel/fci_comm.hxx @@ -58,7 +58,7 @@ struct globalToLocal1D { bool periodic; globalToLocal1D(int mg, int npe, int localwith, bool periodic) : mg(mg), npe(npe), localwith(localwith), local(localwith - (2 * mg)), - global(local * npe), globalwith(global + (2 * mg)), periodic(periodic) {}; + global(local * npe), globalwith(global + (2 * mg)), periodic(periodic){}; ProcLocal convert(int id) const { if (periodic) { while (id < mg) { @@ -106,7 +106,7 @@ public: GlobalField3DAccessInstance(const GlobalField3DAccess* gfa, std::vector&& data) - : gfa(gfa), data(std::move(data)) {}; + : gfa(gfa), data(std::move(data)){}; private: const GlobalField3DAccess* gfa; From 5c0d41d8b126a59ae261b2448f6895a2954a50b4 Mon Sep 17 00:00:00 2001 From: dschwoerer <5637662+dschwoerer@users.noreply.github.com> Date: Wed, 4 Mar 2026 11:19:19 +0000 Subject: [PATCH 42/42] [bot] Add last format changes commit to ignore file --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 9ec546dec7..7fbaa021ce 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -12,3 +12,4 @@ a71cad2dd6ace5741a754e2ca7daacd4bb094e0e 05b7c571544c3bcb153fce67d12b9ac48947fc2d c8f38049359170a34c915e209276238ea66b9a1e 65880e4af16cb95438437bf057c4b9fdb427b142 +d1cfb8abd6aa5c76e6c1a4d7ab20929c65f8afc2