diff --git a/CMakeLists.txt b/CMakeLists.txt
index 44d100e444..eabfc055ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -376,6 +376,7 @@ if (zoidberg_FOUND EQUAL 0)
 else()
   set(zoidberg_FOUND OFF)
 endif()
+message(STATUS "Found Zoidberg for FCI tests: ${zoidberg_FOUND}")
 
 option(BOUT_GENERATE_FIELDOPS "Automatically re-generate the Field arithmetic operators from the Python templates. \
 Requires Python3, clang-format, and Jinja2. Turn this OFF to skip generating them if, for example, \
diff --git a/include/bout/difops.hxx b/include/bout/difops.hxx
index 71053d454a..c415980d18 100644
--- a/include/bout/difops.hxx
+++ b/include/bout/difops.hxx
@@ -40,7 +40,9 @@
 #include "bout/field3d.hxx"
 
 #include "bout/bout_types.hxx"
-#include "bout/solver.hxx"
+#include "bout/coordinates.hxx"
+
+class Solver;
 
 /*!
  * Parallel derivative (central differencing) in Y
@@ -193,6 +195,10 @@ Field3D Div_par_K_Grad_par(const Field3D& kY, const Field2D& f,
 Field3D Div_par_K_Grad_par(const Field3D& kY, const Field3D& f,
                            CELL_LOC outloc = CELL_DEFAULT);
 
+/// Version with energy flow diagnostic
+Field3D Div_par_K_Grad_par_mod(const Field3D& k, const Field3D& f, Field3D& flow_ylow,
+                               bool bndry_flux = true);
+
 /*!
  * Perpendicular Laplacian operator
  *
diff --git a/include/bout/fv_ops.hxx b/include/bout/fv_ops.hxx
index 94007a57a2..678e7499c7 100644
--- a/include/bout/fv_ops.hxx
+++ b/include/bout/fv_ops.hxx
@@ -5,29 +5,38 @@
 #ifndef BOUT_FV_OPS_H
 #define BOUT_FV_OPS_H
 
+#include "bout/assert.hxx"
+#include "bout/bout_types.hxx"
+#include "bout/boutexception.hxx"
+#include "bout/build_defines.hxx"
+#include "bout/coordinates.hxx"
+#include "bout/field.hxx"
+#include "bout/field2d.hxx"
 #include "bout/field3d.hxx"
 #include "bout/globals.hxx"
+#include "bout/mesh.hxx"
+#include "bout/output_bout_types.hxx" // NOLINT(unused-includes, misc-include-cleaner)
+#include "bout/region.hxx"
+#include "bout/utils.hxx"
 #include "bout/vector2d.hxx"
 
-#include "bout/utils.hxx"
-#include <bout/mesh.hxx>
+#include <cmath>
 
 namespace FV {
 /*!
  * Div ( a Grad_perp(f) ) -- ∇⊥ ( a ⋅ ∇⊥ f) -- Vorticity
  */
-Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& x);
+Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f);
 
 [[deprecated("Please use Div_a_Grad_perp instead")]] inline Field3D
-Div_a_Laplace_perp(const Field3D& a, const Field3D& x) {
-  return Div_a_Grad_perp(a, x);
+Div_a_Laplace_perp(const Field3D& a, const Field3D& f) {
+  return Div_a_Grad_perp(a, f);
 }
 
 /*!
    * Divergence of a parallel diffusion Div( k * Grad_par(f) )
    */
-const Field3D Div_par_K_Grad_par(const Field3D& k, const Field3D& f,
-                                 bool bndry_flux = true);
+Field3D Div_par_K_Grad_par(const Field3D& k, const Field3D& f, bool bndry_flux = true);
 
 /*!
    * 4th-order derivative in Y, using derivatives
@@ -49,7 +58,7 @@ const Field3D Div_par_K_Grad_par(const Field3D& k, const Field3D& f,
    *
    * No fluxes through domain boundaries
    */
-const Field3D D4DY4(const Field3D& d, const Field3D& f);
+Field3D D4DY4(const Field3D& d, const Field3D& f);
 
 /*!
    * 4th-order dissipation term
@@ -67,18 +76,24 @@ const Field3D D4DY4(const Field3D& d, const Field3D& f);
    *    f_2 | f_1 | f_0 |
    *                   f_b
    */
-const Field3D D4DY4_Index(const Field3D& f, bool bndry_flux = true);
+Field3D D4DY4_Index(const Field3D& f, bool bndry_flux = true);
 
 /*!
    * Stencil used for Finite Volume calculations
    * which includes cell face values L and R
    */
 struct Stencil1D {
-  // Cell centre values
-  BoutReal c, m, p, mm, pp;
-
-  // Left and right cell face values
-  BoutReal L, R;
+  /// Cell centre values
+  BoutReal c;
+  BoutReal m;
+  BoutReal p;
+  BoutReal mm = BoutNaN;
+  BoutReal pp = BoutNaN;
+
+  /// Left cell face value
+  BoutReal L = BoutNaN;
+  /// Right cell face value
+  BoutReal R = BoutNaN;
 };
 
 /*!
@@ -93,8 +108,8 @@ struct Upwind {
    */
 struct Fromm {
   void operator()(Stencil1D& n) {
-    n.L = n.c - 0.25 * (n.p - n.m);
-    n.R = n.c + 0.25 * (n.p - n.m);
+    n.L = n.c - (0.25 * (n.p - n.m));
+    n.R = n.c + (0.25 * (n.p - n.m));
   }
 };
 
@@ -110,9 +125,9 @@ struct MinMod {
   void operator()(Stencil1D& n) {
     // Choose the gradient within the cell
     // as the minimum (smoothest) solution
-    BoutReal slope = _minmod(n.p - n.c, n.c - n.m);
-    n.L = n.c - 0.5 * slope;
-    n.R = n.c + 0.5 * slope;
+    const BoutReal slope = _minmod(n.p - n.c, n.c - n.m);
+    n.L = n.c - (0.5 * slope);
+    n.R = n.c + (0.5 * slope);
   }
 
 private:
@@ -123,7 +138,7 @@ private:
      * returns zero, otherwise chooses the value
      * with the minimum magnitude.
      */
-  BoutReal _minmod(BoutReal a, BoutReal b) {
+  static BoutReal _minmod(BoutReal a, BoutReal b) {
     if (a * b <= 0.0) {
       return 0.0;
     }
@@ -145,17 +160,17 @@ private:
    */
 struct MC {
   void operator()(Stencil1D& n) {
-    BoutReal slope = minmod(2. * (n.p - n.c),  // 2*right difference
-                            0.5 * (n.p - n.m), // Central difference
-                            2. * (n.c - n.m)); // 2*left difference
-    n.L = n.c - 0.5 * slope;
-    n.R = n.c + 0.5 * slope;
+    const BoutReal slope = minmod(2. * (n.p - n.c),  // 2*right difference
+                                  0.5 * (n.p - n.m), // Central difference
+                                  2. * (n.c - n.m)); // 2*left difference
+    n.L = n.c - (0.5 * slope);
+    n.R = n.c + (0.5 * slope);
   }
 
 private:
   // Return zero if any signs are different
   // otherwise return the value with the minimum magnitude
-  BoutReal minmod(BoutReal a, BoutReal b, BoutReal c) {
+  static BoutReal minmod(BoutReal a, BoutReal b, BoutReal c) {
     // if any of the signs are different, return zero gradient
     if ((a * b <= 0.0) || (a * c <= 0.0)) {
       return 0.0;
@@ -166,6 +181,52 @@ private:
   }
 };
 
+/// Superbee limiter
+///
+/// This corresponds to the limiter function
+///    φ(r) = max(0, min(2r, 1), min(r,2)
+///
+/// The value at cell right (i.e. i + 1/2) is:
+///
+///   n.R = n.c - φ(r) (n.c - (n.p + n.c)/2)
+///       = n.c + φ(r) (n.p - n.c)/2
+///
+/// Four regimes:
+///  a) r < 1/2 -> φ(r) = 2r
+///     n.R = n.c + gL
+///  b) 1/2 < r < 1 -> φ(r) = 1
+///     n.R = n.c + gR/2
+///  c) 1 < r < 2 -> φ(r) = r
+///     n.R = n.c + gL/2
+///  d) 2 < r  -> φ(r) = 2
+///     n.R = n.c + gR
+///
+///  where the left and right gradients are:
+///   gL = n.c - n.m
+///   gR = n.p - n.c
+///
+struct Superbee {
+  void operator()(Stencil1D& n) {
+    const BoutReal gL = n.c - n.m;
+    const BoutReal gR = n.p - n.c;
+
+    // r = gL / gR
+    // Limiter is φ(r)
+    if (gL * gR < 0) {
+      // Different signs => Zero gradient
+      n.L = n.R = n.c;
+    } else {
+      const BoutReal sign = SIGN(gL);
+      const BoutReal abs_gL = fabs(gL);
+      const BoutReal abs_gR = fabs(gR);
+      const BoutReal half_slope =
+          sign * BOUTMAX(BOUTMIN(abs_gL, 0.5 * abs_gR), BOUTMIN(abs_gR, 0.5 * abs_gL));
+      n.L = n.c - half_slope;
+      n.R = n.c + half_slope;
+    }
+  }
+};
+
 /*!
    * Communicate fluxes between processors
    * Takes values in guard cells, and adds them to cells
@@ -189,13 +250,13 @@ void communicateFluxes(Field3D& f);
 ///
 /// NB: Uses to/from FieldAligned coordinates
 template <typename CellEdges = MC>
-const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
-                      const Field3D& wave_speed_in, bool fixflux = true) {
+Field3D Div_par(const Field3D& f_in, const Field3D& v_in, const Field3D& wave_speed_in,
+                bool fixflux = true) {
 
   ASSERT1_FIELDS_COMPATIBLE(f_in, v_in);
   ASSERT1_FIELDS_COMPATIBLE(f_in, wave_speed_in);
 
-  Mesh* mesh = f_in.getMesh();
+  Mesh const* mesh = f_in.getMesh();
 
   CellEdges cellboundary;
 
@@ -215,29 +276,17 @@ const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
 
   Field3D result{zeroFrom(f)};
 
-  // Only need one guard cell, so no need to communicate fluxes
-  // Instead calculate in guard cells to preserve fluxes
-  int ys = mesh->ystart - 1;
-  int ye = mesh->yend + 1;
-
   for (int i = mesh->xstart; i <= mesh->xend; i++) {
+    const bool is_periodic_y = mesh->periodicY(i);
+    const bool is_first_y = mesh->firstY(i);
+    const bool is_last_y = mesh->lastY(i);
 
-    if (!mesh->firstY(i) || mesh->periodicY(i)) {
-      // Calculate in guard cell to get fluxes consistent between processors
-      ys = mesh->ystart - 1;
-    } else {
-      // Don't include the boundary cell. Note that this implies special
-      // handling of boundaries later
-      ys = mesh->ystart;
-    }
-
-    if (!mesh->lastY(i) || mesh->periodicY(i)) {
-      // Calculate in guard cells
-      ye = mesh->yend + 1;
-    } else {
-      // Not in boundary cells
-      ye = mesh->yend;
-    }
+    // Only need one guard cell, so no need to communicate fluxes Instead
+    // calculate in guard cells to get fluxes consistent between processors, but
+    // don't include the boundary cell. Note that this implies special handling
+    // of boundaries later
+    const int ys = (!is_first_y || is_periodic_y) ? mesh->ystart - 1 : mesh->ystart;
+    const int ye = (!is_last_y || is_periodic_y) ? mesh->yend + 1 : mesh->yend;
 
     for (int j = ys; j <= ye; j++) {
       // Pre-calculate factors which multiply fluxes
@@ -246,16 +295,16 @@ const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
       BoutReal common_factor = (coord->J(i, j) + coord->J(i, j + 1))
                                / (sqrt(coord->g_22(i, j)) + sqrt(coord->g_22(i, j + 1)));
 
-      BoutReal flux_factor_rc = common_factor / (coord->dy(i, j) * coord->J(i, j));
-      BoutReal flux_factor_rp =
+      const BoutReal flux_factor_rc = common_factor / (coord->dy(i, j) * coord->J(i, j));
+      const BoutReal flux_factor_rp =
           common_factor / (coord->dy(i, j + 1) * coord->J(i, j + 1));
 
       // For left cell boundaries
       common_factor = (coord->J(i, j) + coord->J(i, j - 1))
                       / (sqrt(coord->g_22(i, j)) + sqrt(coord->g_22(i, j - 1)));
 
-      BoutReal flux_factor_lc = common_factor / (coord->dy(i, j) * coord->J(i, j));
-      BoutReal flux_factor_lm =
+      const BoutReal flux_factor_lc = common_factor / (coord->dy(i, j) * coord->J(i, j));
+      const BoutReal flux_factor_lm =
           common_factor / (coord->dy(i, j - 1) * coord->J(i, j - 1));
 #endif
       for (int k = 0; k < mesh->LocalNz; k++) {
@@ -298,23 +347,23 @@ const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
 
         // Calculate velocity at right boundary (y+1/2)
         BoutReal vpar = 0.5 * (v(i, j, k) + v(i, j + 1, k));
-        BoutReal flux;
+        BoutReal flux = NAN;
 
-        if (mesh->lastY(i) && (j == mesh->yend) && !mesh->periodicY(i)) {
+        if (is_last_y && (j == mesh->yend) && !is_periodic_y) {
           // Last point in domain
 
-          BoutReal bndryval = 0.5 * (s.c + s.p);
+          const BoutReal bndryval = 0.5 * (s.c + s.p);
           if (fixflux) {
             // Use mid-point to be consistent with boundary conditions
             flux = bndryval * vpar;
           } else {
             // Add flux due to difference in boundary values
-            flux = s.R * vpar + wave_speed(i, j, k) * (s.R - bndryval);
+            flux = (s.R * vpar) + (wave_speed(i, j, k) * (s.R - bndryval));
           }
         } else {
 
           // Maximum wave speed in the two cells
-          BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j + 1, k));
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j + 1, k));
 
           if (vpar > amax) {
             // Supersonic flow out of this cell
@@ -336,20 +385,20 @@ const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
 
         vpar = 0.5 * (v(i, j, k) + v(i, j - 1, k));
 
-        if (mesh->firstY(i) && (j == mesh->ystart) && !mesh->periodicY(i)) {
+        if (is_first_y && (j == mesh->ystart) && !is_periodic_y) {
           // First point in domain
-          BoutReal bndryval = 0.5 * (s.c + s.m);
+          const BoutReal bndryval = 0.5 * (s.c + s.m);
           if (fixflux) {
             // Use mid-point to be consistent with boundary conditions
             flux = bndryval * vpar;
           } else {
             // Add flux due to difference in boundary values
-            flux = s.L * vpar - wave_speed(i, j, k) * (s.L - bndryval);
+            flux = (s.L * vpar) - (wave_speed(i, j, k) * (s.L - bndryval));
           }
         } else {
 
           // Maximum wave speed in the two cells
-          BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j - 1, k));
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j - 1, k));
 
           if (vpar < -amax) {
             // Supersonic out of this cell
@@ -383,11 +432,11 @@ const Field3D Div_par(const Field3D& f_in, const Field3D& v_in,
    *
    */
 template <typename CellEdges = MC>
-const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
+Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
   ASSERT1(n_in.getLocation() == v.getLocation());
   ASSERT1_FIELDS_COMPATIBLE(n_in, v.x);
 
-  Mesh* mesh = n_in.getMesh();
+  const Mesh* mesh = n_in.getMesh();
 
   CellEdges cellboundary;
 
@@ -406,10 +455,10 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
 
   BOUT_FOR(i, result.getRegion("RGN_NOBNDRY")) {
     // Calculate velocities
-    BoutReal vU = 0.25 * (vz[i.zp()] + vz[i]) * (coord->J[i.zp()] + coord->J[i]);
-    BoutReal vD = 0.25 * (vz[i.zm()] + vz[i]) * (coord->J[i.zm()] + coord->J[i]);
-    BoutReal vL = 0.25 * (vx[i.xm()] + vx[i]) * (coord->J[i.xm()] + coord->J[i]);
-    BoutReal vR = 0.25 * (vx[i.xp()] + vx[i]) * (coord->J[i.xp()] + coord->J[i]);
+    const BoutReal vU = 0.25 * (vz[i.zp()] + vz[i]) * (coord->J[i.zp()] + coord->J[i]);
+    const BoutReal vD = 0.25 * (vz[i.zm()] + vz[i]) * (coord->J[i.zm()] + coord->J[i]);
+    const BoutReal vL = 0.25 * (vx[i.xm()] + vx[i]) * (coord->J[i.xm()] + coord->J[i]);
+    const BoutReal vR = 0.25 * (vx[i.xp()] + vx[i]) * (coord->J[i.xp()] + coord->J[i]);
 
     // X direction
     Stencil1D s;
@@ -424,7 +473,7 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
     if ((i.x() == mesh->xend) && (mesh->lastX())) {
       // At right boundary in X
       if (bndry_flux) {
-        BoutReal flux;
+        BoutReal flux = NAN;
         if (vR > 0.0) {
           // Flux to boundary
           flux = vR * s.R;
@@ -439,7 +488,7 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
       // Not at a boundary
       if (vR > 0.0) {
         // Flux out into next cell
-        BoutReal flux = vR * s.R;
+        const BoutReal flux = vR * s.R;
         result[i] += flux / (coord->dx[i] * coord->J[i]);
         result[i.xp()] -= flux / (coord->dx[i.xp()] * coord->J[i.xp()]);
       }
@@ -451,7 +500,7 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
       // At left boundary in X
 
       if (bndry_flux) {
-        BoutReal flux;
+        BoutReal flux = NAN;
         if (vL < 0.0) {
           // Flux to boundary
           flux = vL * s.L;
@@ -465,7 +514,7 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
     } else {
       // Not at a boundary
       if (vL < 0.0) {
-        BoutReal flux = vL * s.L;
+        const BoutReal flux = vL * s.L;
         result[i] -= flux / (coord->dx[i] * coord->J[i]);
         result[i.xm()] += flux / (coord->dx[i.xm()] * coord->J[i.xm()]);
       }
@@ -482,12 +531,12 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
     cellboundary(s);
 
     if (vU > 0.0) {
-      BoutReal flux = vU * s.R;
+      const BoutReal flux = vU * s.R;
       result[i] += flux / (coord->J[i] * coord->dz[i]);
       result[i.zp()] -= flux / (coord->J[i.zp()] * coord->dz[i.zp()]);
     }
     if (vD < 0.0) {
-      BoutReal flux = vD * s.L;
+      const BoutReal flux = vD * s.L;
       result[i] -= flux / (coord->J[i] * coord->dz[i]);
       result[i.zm()] += flux / (coord->J[i.zm()] * coord->dz[i.zm()]);
     }
@@ -507,13 +556,13 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
 
   BOUT_FOR(i, result.getRegion("RGN_NOBNDRY")) {
     // Y velocities on y boundaries
-    BoutReal vU = 0.25 * (vy[i] + vy[i.yp()]) * (coord->J[i] + coord->J[i.yp()]);
-    BoutReal vD = 0.25 * (vy[i] + vy[i.ym()]) * (coord->J[i] + coord->J[i.ym()]);
+    const BoutReal vU = 0.25 * (vy[i] + vy[i.yp()]) * (coord->J[i] + coord->J[i.yp()]);
+    const BoutReal vD = 0.25 * (vy[i] + vy[i.ym()]) * (coord->J[i] + coord->J[i.ym()]);
 
     // n (advected quantity) on y boundaries
     // Note: Use unshifted n_in variable
-    BoutReal nU = 0.5 * (n[i] + n[i.yp()]);
-    BoutReal nD = 0.5 * (n[i] + n[i.ym()]);
+    const BoutReal nU = 0.5 * (n[i] + n[i.yp()]);
+    const BoutReal nD = 0.5 * (n[i] + n[i.ym()]);
 
     yresult[i] = (nU * vU - nD * vD) / (coord->J[i] * coord->dy[i]);
   }
@@ -524,5 +573,448 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) {
    * X-Z Finite Volume diffusion operator
    */
 Field3D Div_Perp_Lap(const Field3D& a, const Field3D& f, CELL_LOC outloc = CELL_DEFAULT);
+
+/// Finite volume parallel divergence
+///
+/// NOTE: Modified version, applies limiter to velocity and field
+///       Performs better (smaller overshoots) than Div_par
+///
+/// Preserves the sum of f*J*dx*dy*dz over the domain
+///
+/// @param[in] f_in   The field being advected.
+///                   This will be reconstructed at cell faces
+///                   using the given CellEdges method
+/// @param[in] v_in   The advection velocity.
+///                   This will be interpolated to cell boundaries
+///                   using linear interpolation
+/// @param[in] wave_speed_in  Local maximum speed of all waves in the system at each
+//                            point in space
+/// @param[in] fixflux     Fix the flux at the boundary to be the value at the
+///                        midpoint (for boundary conditions)
+///
+/// @param[out] flow_ylow    Flow at the lower Y cell boundary
+///                          Already includes area factor * flux
+template <typename CellEdges = MC>
+Field3D Div_par_mod(const Field3D& f_in, const Field3D& v_in,
+                    const Field3D& wave_speed_in, Field3D& flow_ylow,
+                    bool fixflux = true) {
+
+  Coordinates* coord = f_in.getCoordinates();
+
+  if (f_in.isFci()) {
+    // Use mid-point (cell boundary) averages
+    if (flow_ylow.isAllocated()) {
+      flow_ylow = emptyFrom(flow_ylow);
+    }
+
+    ASSERT1(f_in.hasParallelSlices());
+    ASSERT1(v_in.hasParallelSlices());
+
+    const auto& f_up = f_in.yup();
+    const auto& f_down = f_in.ydown();
+
+    const auto& v_up = v_in.yup();
+    const auto& v_down = v_in.ydown();
+
+    Field3D result{emptyFrom(f_in)};
+    BOUT_FOR(i, f_in.getRegion("RGN_NOBNDRY")) {
+      const auto iyp = i.yp();
+      const auto iym = i.ym();
+
+      result[i] = (0.25 * (f_in[i] + f_up[iyp]) * (v_in[i] + v_up[iyp])
+                       * (coord->J[i] + coord->J.yup()[iyp])
+                       / (sqrt(coord->g_22[i]) + sqrt(coord->g_22.yup()[iyp]))
+                   - 0.25 * (f_in[i] + f_down[iym]) * (v_in[i] + v_down[iym])
+                         * (coord->J[i] + coord->J.ydown()[iym])
+                         / (sqrt(coord->g_22[i]) + sqrt(coord->g_22.ydown()[iym])))
+                  / (coord->dy[i] * coord->J[i]);
+    }
+    return result;
+  }
+  ASSERT1_FIELDS_COMPATIBLE(f_in, v_in);
+  ASSERT1_FIELDS_COMPATIBLE(f_in, wave_speed_in);
+
+  const Mesh* mesh = f_in.getMesh();
+
+  CellEdges cellboundary;
+
+  ASSERT2(f_in.getDirectionY() == v_in.getDirectionY());
+  ASSERT2(f_in.getDirectionY() == wave_speed_in.getDirectionY());
+  const bool are_unaligned =
+      ((f_in.getDirectionY() == YDirectionType::Standard)
+       and (v_in.getDirectionY() == YDirectionType::Standard)
+       and (wave_speed_in.getDirectionY() == YDirectionType::Standard));
+
+  const Field3D f = are_unaligned ? toFieldAligned(f_in, "RGN_NOX") : f_in;
+  const Field3D v = are_unaligned ? toFieldAligned(v_in, "RGN_NOX") : v_in;
+  const Field3D wave_speed =
+      are_unaligned ? toFieldAligned(wave_speed_in, "RGN_NOX") : wave_speed_in;
+
+  Field3D result{zeroFrom(f)};
+  flow_ylow = zeroFrom(f);
+
+  for (int i = mesh->xstart; i <= mesh->xend; i++) {
+    const bool is_periodic_y = mesh->periodicY(i);
+    const bool is_first_y = mesh->firstY(i);
+    const bool is_last_y = mesh->lastY(i);
+
+    // Only need one guard cell, so no need to communicate fluxes Instead
+    // calculate in guard cells to get fluxes consistent between processors, but
+    // don't include the boundary cell. Note that this implies special handling
+    // of boundaries later
+    const int ys = (!is_first_y || is_periodic_y) ? mesh->ystart - 1 : mesh->ystart;
+    const int ye = (!is_last_y || is_periodic_y) ? mesh->yend + 1 : mesh->yend;
+
+    for (int j = ys; j <= ye; j++) {
+      // Pre-calculate factors which multiply fluxes
+#if not(BOUT_USE_METRIC_3D)
+      // For right cell boundaries
+      const BoutReal common_factor_r =
+          (coord->J(i, j) + coord->J(i, j + 1))
+          / (sqrt(coord->g_22(i, j)) + sqrt(coord->g_22(i, j + 1)));
+
+      const BoutReal flux_factor_rc =
+          common_factor_r / (coord->dy(i, j) * coord->J(i, j));
+      const BoutReal flux_factor_rp =
+          common_factor_r / (coord->dy(i, j + 1) * coord->J(i, j + 1));
+
+      const BoutReal area_rp =
+          common_factor_r * coord->dx(i, j + 1) * coord->dz(i, j + 1);
+
+      // For left cell boundaries
+      const BoutReal common_factor_l =
+          (coord->J(i, j) + coord->J(i, j - 1))
+          / (sqrt(coord->g_22(i, j)) + sqrt(coord->g_22(i, j - 1)));
+
+      const BoutReal flux_factor_lc =
+          common_factor_l / (coord->dy(i, j) * coord->J(i, j));
+      const BoutReal flux_factor_lm =
+          common_factor_l / (coord->dy(i, j - 1) * coord->J(i, j - 1));
+
+      const BoutReal area_lc = common_factor_l * coord->dx(i, j) * coord->dz(i, j);
+#endif
+      for (int k = 0; k < mesh->LocalNz; k++) {
+#if BOUT_USE_METRIC_3D
+        // For right cell boundaries
+        const BoutReal common_factor_r =
+            (coord->J(i, j, k) + coord->J(i, j + 1, k))
+            / (sqrt(coord->g_22(i, j, k)) + sqrt(coord->g_22(i, j + 1, k)));
+
+        const BoutReal flux_factor_rc =
+            common_factor_r / (coord->dy(i, j, k) * coord->J(i, j, k));
+        const BoutReal flux_factor_rp =
+            common_factor_r / (coord->dy(i, j + 1, k) * coord->J(i, j + 1, k));
+
+        const BoutReal area_rp =
+            common_factor_r * coord->dx(i, j + 1, k) * coord->dz(i, j + 1, k);
+
+        // For left cell boundaries
+        const BoutReal common_factor_l =
+            (coord->J(i, j, k) + coord->J(i, j - 1, k))
+            / (sqrt(coord->g_22(i, j, k)) + sqrt(coord->g_22(i, j - 1, k)));
+
+        const BoutReal flux_factor_lc =
+            common_factor_l / (coord->dy(i, j, k) * coord->J(i, j, k));
+        const BoutReal flux_factor_lm =
+            common_factor_l / (coord->dy(i, j - 1, k) * coord->J(i, j - 1, k));
+
+        const BoutReal area_lc =
+            common_factor_l * coord->dx(i, j, k) * coord->dz(i, j, k);
+#endif
+
+        ////////////////////////////////////////////
+        // Reconstruct f at the cell faces
+        // This calculates s.R and s.L for the Right and Left
+        // face values on this cell
+
+        // Reconstruct f at the cell faces
+        // TODO(peter): We can remove this #ifdef guard after switching to C++20
+#if __cpp_designated_initializers >= 201707L
+        Stencil1D s{.c = f(i, j, k), .m = f(i, j - 1, k), .p = f(i, j + 1, k)};
+#else
+        Stencil1D s{f(i, j, k), f(i, j - 1, k), f(i, j + 1, k), BoutNaN,
+                    BoutNaN,    BoutNaN,        BoutNaN};
+#endif
+        cellboundary(s); // Calculate s.R and s.L
+
+        ////////////////////////////////////////////
+        // Reconstruct v at the cell faces
+        // TODO(peter): We can remove this #ifdef guard after switching to C++20
+#if __cpp_designated_initializers >= 201707L
+        Stencil1D sv{.c = v(i, j, k), .m = v(i, j - 1, k), .p = v(i, j + 1, k)};
+#else
+        Stencil1D sv{v(i, j, k), v(i, j - 1, k), v(i, j + 1, k), BoutNaN,
+                     BoutNaN,    BoutNaN,        BoutNaN};
+#endif
+        cellboundary(sv); // Calculate sv.R and sv.L
+
+        ////////////////////////////////////////////
+        // Right boundary
+
+        BoutReal flux = BoutNaN;
+
+        if (is_last_y && (j == mesh->yend) && !is_periodic_y) {
+          // Last point in domain
+
+          // Calculate velocity at right boundary (y+1/2)
+          const BoutReal vpar = 0.5 * (v(i, j, k) + v(i, j + 1, k));
+
+          const BoutReal bndryval = 0.5 * (s.c + s.p);
+          if (fixflux) {
+            // Use mid-point to be consistent with boundary conditions
+            flux = bndryval * vpar;
+          } else {
+            // Add flux due to difference in boundary values
+            flux = (s.R * vpar) + (wave_speed(i, j, k) * (s.R - bndryval));
+          }
+
+        } else {
+          // Maximum wave speed in the two cells
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j + 1, k),
+                                        fabs(v(i, j, k)), fabs(v(i, j + 1, k)));
+
+          flux = s.R * 0.5 * (sv.R + amax);
+        }
+
+        result(i, j, k) += flux * flux_factor_rc;
+        result(i, j + 1, k) -= flux * flux_factor_rp;
+
+        flow_ylow(i, j + 1, k) += flux * area_rp;
+
+        ////////////////////////////////////////////
+        // Calculate at left boundary
+
+        if (is_first_y && (j == mesh->ystart) && !is_periodic_y) {
+          // First point in domain
+          const BoutReal bndryval = 0.5 * (s.c + s.m);
+          const BoutReal vpar = 0.5 * (v(i, j, k) + v(i, j - 1, k));
+          if (fixflux) {
+            // Use mid-point to be consistent with boundary conditions
+            flux = bndryval * vpar;
+          } else {
+            // Add flux due to difference in boundary values
+            flux = (s.L * vpar) - (wave_speed(i, j, k) * (s.L - bndryval));
+          }
+        } else {
+
+          // Maximum wave speed in the two cells
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j - 1, k),
+                                        fabs(v(i, j, k)), fabs(v(i, j - 1, k)));
+
+          flux = s.L * 0.5 * (sv.L - amax);
+        }
+
+        result(i, j, k) -= flux * flux_factor_lc;
+        result(i, j - 1, k) += flux * flux_factor_lm;
+
+        flow_ylow(i, j, k) += flux * area_lc;
+      }
+    }
+  }
+  if (are_unaligned) {
+    flow_ylow = fromFieldAligned(flow_ylow, "RGN_NOBNDRY");
+  }
+  return are_unaligned ? fromFieldAligned(result, "RGN_NOBNDRY") : result;
+}
+
+/// This operator calculates Div_par(f v v)
+/// It is used primarily (only?) in the parallel momentum equation.
+///
+/// This operator is used rather than Div(f fv) so that the values of
+/// f and v are consistent with other advection equations: The product
+/// fv is not interpolated to cell boundaries.
+template <typename CellEdges = MC>
+Field3D Div_par_fvv(const Field3D& f_in, const Field3D& v_in,
+                    const Field3D& wave_speed_in, bool fixflux = true) {
+  ASSERT1_FIELDS_COMPATIBLE(f_in, v_in);
+  const Mesh* mesh = f_in.getMesh();
+  const Coordinates* coord = f_in.getCoordinates();
+  CellEdges cellboundary;
+
+  if (f_in.isFci()) {
+    // FCI version, using yup/down fields
+    ASSERT1(f_in.hasParallelSlices());
+    ASSERT1(v_in.hasParallelSlices());
+
+    const auto& B = coord->Bxy;
+    const auto& B_up = coord->Bxy.yup();
+    const auto& B_down = coord->Bxy.ydown();
+
+    const auto& f_up = f_in.yup();
+    const auto& f_down = f_in.ydown();
+
+    const auto& v_up = v_in.yup();
+    const auto& v_down = v_in.ydown();
+
+    const auto& g_22 = coord->g_22;
+    const auto& dy = coord->dy;
+
+    Field3D result{emptyFrom(f_in)};
+    BOUT_FOR(i, f_in.getRegion("RGN_NOBNDRY")) {
+      const auto iyp = i.yp();
+      const auto iym = i.ym();
+
+      // Maximum local wave speed
+      const BoutReal amax =
+          BOUTMAX(wave_speed_in[i], fabs(v_in[i]), fabs(v_up[iyp]), fabs(v_down[iym]));
+
+      result[i] =
+          B[i]
+          * ((f_up[iyp] * v_up[iyp] * v_up[iyp] / B_up[iyp])
+             - (f_down[iym] * v_down[iym] * v_down[iym] / B_down[iym])
+             // Penalty terms. This implementation is very dissipative.
+             // Note: This version adds a viscosity that damps gradients of velocity
+             + amax * (f_in[i] + f_up[iyp]) * (v_in[i] - v_up[iyp]) / (B[i] + B_up[iyp])
+             + amax * (f_in[i] + f_down[iym]) * (v_in[i] - v_down[iym])
+                   / (B[i] + B_down[iym]))
+          / (2 * dy[i] * sqrt(g_22[i]));
+
+#if CHECK > 0
+      if (!std::isfinite(result[i])) {
+        throw BoutException("Non-finite value in Div_par_fvv at {}\n"
+                            "fup {} vup {} fdown {} vdown {} amax {}\n",
+                            "B {} Bup {} Bdown {} dy {} sqrt(g_22} {}", i, f_up[i],
+                            v_up[i], f_down[i], v_down[i], amax, B[i], B_up[i], B_down[i],
+                            dy[i], sqrt(g_22[i]));
+      }
+#endif
+    }
+    return result;
+  }
+
+  ASSERT1(areFieldsCompatible(f_in, wave_speed_in));
+
+  /// Ensure that f, v and wave_speed are field aligned
+  Field3D f = toFieldAligned(f_in, "RGN_NOX");
+  Field3D v = toFieldAligned(v_in, "RGN_NOX");
+  Field3D wave_speed = toFieldAligned(wave_speed_in, "RGN_NOX");
+
+  Field3D result{zeroFrom(f)};
+
+  for (int i = mesh->xstart; i <= mesh->xend; i++) {
+    const bool is_periodic_y = mesh->periodicY(i);
+    const bool is_first_y = mesh->firstY(i);
+    const bool is_last_y = mesh->lastY(i);
+
+    // Only need one guard cell, so no need to communicate fluxes Instead
+    // calculate in guard cells to get fluxes consistent between processors, but
+    // don't include the boundary cell. Note that this implies special handling
+    // of boundaries later
+    const int ys = (!is_first_y || is_periodic_y) ? mesh->ystart - 1 : mesh->ystart;
+    const int ye = (!is_last_y || is_periodic_y) ? mesh->yend + 1 : mesh->yend;
+
+    for (int j = ys; j <= ye; j++) {
+      // Pre-calculate factors which multiply fluxes
+
+      for (int k = 0; k < mesh->LocalNz; k++) {
+        // For right cell boundaries
+        const BoutReal common_factor_r =
+            (coord->J(i, j, k) + coord->J(i, j + 1, k))
+            / (sqrt(coord->g_22(i, j, k)) + sqrt(coord->g_22(i, j + 1, k)));
+
+        const BoutReal flux_factor_rc =
+            common_factor_r / (coord->dy(i, j, k) * coord->J(i, j, k));
+        const BoutReal flux_factor_rp =
+            common_factor_r / (coord->dy(i, j + 1, k) * coord->J(i, j + 1, k));
+
+        // For left cell boundaries
+        const BoutReal common_factor_l =
+            (coord->J(i, j, k) + coord->J(i, j - 1, k))
+            / (sqrt(coord->g_22(i, j, k)) + sqrt(coord->g_22(i, j - 1, k)));
+
+        const BoutReal flux_factor_lc =
+            common_factor_l / (coord->dy(i, j, k) * coord->J(i, j, k));
+        const BoutReal flux_factor_lm =
+            common_factor_l / (coord->dy(i, j - 1, k) * coord->J(i, j - 1, k));
+
+        ////////////////////////////////////////////
+        // Reconstruct f at the cell faces
+        // This calculates s.R and s.L for the Right and Left
+        // face values on this cell
+
+        // Reconstruct f at the cell faces
+#if __cpp_designated_initializers >= 201707L
+        Stencil1D s{.c = f(i, j, k), .m = f(i, j - 1, k), .p = f(i, j + 1, k)};
+#else
+        Stencil1D s{f(i, j, k), f(i, j - 1, k), f(i, j + 1, k), BoutNaN,
+                    BoutNaN,    BoutNaN,        BoutNaN};
+#endif
+        cellboundary(s); // Calculate s.R and s.L
+
+        ////////////////////////////////////////////
+        // Reconstruct v at the cell faces
+        // TODO(peter): We can remove this #ifdef guard after switching to C++20
+#if __cpp_designated_initializers >= 201707L
+        Stencil1D sv{.c = v(i, j, k), .m = v(i, j - 1, k), .p = v(i, j + 1, k)};
+#else
+        Stencil1D sv{v(i, j, k), v(i, j - 1, k), v(i, j + 1, k), BoutNaN,
+                     BoutNaN,    BoutNaN,        BoutNaN};
+#endif
+        cellboundary(sv);
+
+        ////////////////////////////////////////////
+        // Right boundary
+
+        // Calculate velocity at right boundary (y+1/2)
+        const BoutReal v_mid_r = 0.5 * (sv.c + sv.p);
+        // And mid-point density at right boundary
+        const BoutReal n_mid_r = 0.5 * (s.c + s.p);
+        BoutReal flux = NAN;
+
+        if (mesh->lastY(i) && (j == mesh->yend) && !mesh->periodicY(i)) {
+          // Last point in domain
+
+          if (fixflux) {
+            // Use mid-point to be consistent with boundary conditions
+            flux = n_mid_r * v_mid_r * v_mid_r;
+          } else {
+            // Add flux due to difference in boundary values
+            flux = (s.R * sv.R * sv.R) // Use right cell edge values
+                   + (BOUTMAX(wave_speed(i, j, k), fabs(sv.c), fabs(sv.p)) * n_mid_r
+                      * (sv.R - v_mid_r)); // Damp differences in velocity, not flux
+          }
+        } else {
+          // Maximum wave speed in the two cells
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j + 1, k),
+                                        fabs(sv.c), fabs(sv.p));
+
+          flux = s.R * 0.5 * (sv.R + amax) * sv.R;
+        }
+
+        result(i, j, k) += flux * flux_factor_rc;
+        result(i, j + 1, k) -= flux * flux_factor_rp;
+
+        ////////////////////////////////////////////
+        // Calculate at left boundary
+
+        const BoutReal v_mid_l = 0.5 * (sv.c + sv.m);
+        const BoutReal n_mid_l = 0.5 * (s.c + s.m);
+
+        if (mesh->firstY(i) && (j == mesh->ystart) && !mesh->periodicY(i)) {
+          // First point in domain
+          if (fixflux) {
+            // Use mid-point to be consistent with boundary conditions
+            flux = n_mid_l * v_mid_l * v_mid_l;
+          } else {
+            // Add flux due to difference in boundary values
+            flux = (s.L * sv.L * sv.L)
+                   - (BOUTMAX(wave_speed(i, j, k), fabs(sv.c), fabs(sv.m)) * n_mid_l
+                      * (sv.L - v_mid_l));
+          }
+        } else {
+          // Maximum wave speed in the two cells
+          const BoutReal amax = BOUTMAX(wave_speed(i, j, k), wave_speed(i, j - 1, k),
+                                        fabs(sv.c), fabs(sv.m));
+
+          flux = s.L * 0.5 * (sv.L - amax) * sv.L;
+        }
+
+        result(i, j, k) -= flux * flux_factor_lc;
+        result(i, j - 1, k) += flux * flux_factor_lm;
+      }
+    }
+  }
+  return fromFieldAligned(result, "RGN_NOBNDRY");
+}
 } // namespace FV
 #endif // BOUT_FV_OPS_H
diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx
index 6c7419f7e4..4dd24259fd 100644
--- a/include/bout/interpolation_xz.hxx
+++ b/include/bout/interpolation_xz.hxx
@@ -24,7 +24,9 @@
 #ifndef BOUT_INTERP_XZ_H
 #define BOUT_INTERP_XZ_H
 
-#include "bout/mask.hxx"
+#include <bout/bout_types.hxx>
+#include <bout/generic_factory.hxx>
+#include <bout/mask.hxx>
 
 #define USE_NEW_WEIGHTS 1
 #if BOUT_HAS_PETSC
@@ -166,7 +168,8 @@ protected:
 #endif
 
 public:
-  XZHermiteSpline(Mesh* mesh = nullptr) : XZHermiteSpline(0, mesh) {}
+  XZHermiteSpline(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr)
+      : XZHermiteSpline(0, mesh) {}
   XZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr);
   XZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr)
       : XZHermiteSpline(y_offset, mesh) {
@@ -210,9 +213,29 @@ public:
 /// but also degrades accuracy near maxima and minima.
 /// Perhaps should only impose near boundaries, since that is where
 /// problems most obviously occur.
+///
+/// You can control how tight the clipping to the range of the neighbouring cell
+/// values through ``rtol`` and ``atol``:
+///
+///     diff = (max_of_neighours - min_of_neighours) * rtol + atol
+///
+/// and the interpolated value is instead clipped to the range
+/// ``[min_of_neighours - diff, max_of_neighours + diff]``
 class XZMonotonicHermiteSpline : public XZHermiteSpline {
+  /// Absolute tolerance for clipping
+  BoutReal atol = 0.0;
+  /// Relative tolerance for clipping
+  BoutReal rtol = 1.0;
+
 public:
-  XZMonotonicHermiteSpline(Mesh* mesh = nullptr) : XZHermiteSpline(0, mesh) {
+  XZMonotonicHermiteSpline(Mesh* mesh = nullptr, Options* options = nullptr)
+      : XZHermiteSpline(0, mesh),
+        atol{(*options)["atol"]
+                 .doc("Absolute tolerance for clipping overshoot")
+                 .withDefault(0.0)},
+        rtol{(*options)["rtol"]
+                 .doc("Relative tolerance for clipping overshoot")
+                 .withDefault(1.0)} {
     if (localmesh->getNXPE() > 1) {
       throw BoutException("Do not support MPI splitting in X");
     }
@@ -248,7 +271,8 @@ class XZLagrange4pt : public XZInterpolation {
   Field3D t_x, t_z;
 
 public:
-  XZLagrange4pt(Mesh* mesh = nullptr) : XZLagrange4pt(0, mesh) {}
+  XZLagrange4pt(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr)
+      : XZLagrange4pt(0, mesh) {}
   XZLagrange4pt(int y_offset = 0, Mesh* mesh = nullptr);
   XZLagrange4pt(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr)
       : XZLagrange4pt(y_offset, mesh) {
@@ -284,7 +308,8 @@ class XZBilinear : public XZInterpolation {
   Field3D w0, w1, w2, w3;
 
 public:
-  XZBilinear(Mesh* mesh = nullptr) : XZBilinear(0, mesh) {}
+  XZBilinear(Mesh* mesh = nullptr, [[maybe_unused]] Options* options = nullptr)
+      : XZBilinear(0, mesh) {}
   XZBilinear(int y_offset = 0, Mesh* mesh = nullptr);
   XZBilinear(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr)
       : XZBilinear(y_offset, mesh) {
@@ -308,7 +333,7 @@ public:
 };
 
 class XZInterpolationFactory
-    : public Factory<XZInterpolation, XZInterpolationFactory, Mesh*> {
+    : public Factory<XZInterpolation, XZInterpolationFactory, Mesh*, Options*> {
 public:
   static constexpr auto type_name = "XZInterpolation";
   static constexpr auto section_name = "xzinterpolation";
@@ -316,10 +341,10 @@ public:
   static constexpr auto default_type = "hermitespline";
 
   ReturnType create(Options* options = nullptr, Mesh* mesh = nullptr) const {
-    return Factory::create(getType(options), mesh);
+    return Factory::create(getType(options), mesh, options);
   }
-  ReturnType create(const std::string& type, [[maybe_unused]] Options* options) const {
-    return Factory::create(type, nullptr);
+  ReturnType create(const std::string& type, Options* options) const {
+    return Factory::create(type, nullptr, options);
   }
 
   static void ensureRegistered();
diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx
index 3dfee6a553..12e465ffb6 100644
--- a/src/mesh/coordinates.cxx
+++ b/src/mesh/coordinates.cxx
@@ -1577,7 +1577,7 @@ Field3D Coordinates::Div_par(const Field3D& f, CELL_LOC outloc,
 
   // Need Bxy at location of f, which might be different from location of this
   // Coordinates object
-  auto Bxy_floc = f.getCoordinates()->Bxy;
+  const auto& Bxy_floc = f.getCoordinates()->Bxy;
 
   if (!f.hasParallelSlices()) {
     // No yup/ydown fields. The Grad_par operator will
diff --git a/src/mesh/difops.cxx b/src/mesh/difops.cxx
index 42fa4d6ca5..8cc1a7c28e 100644
--- a/src/mesh/difops.cxx
+++ b/src/mesh/difops.cxx
@@ -25,20 +25,18 @@
 
 #include "bout/build_defines.hxx"
 
-#include <bout/assert.hxx>
-#include <bout/derivs.hxx>
-#include <bout/difops.hxx>
-#include <bout/fft.hxx>
-#include <bout/globals.hxx>
-#include <bout/msg_stack.hxx>
-#include <bout/solver.hxx>
-#include <bout/utils.hxx>
-#include <bout/vecops.hxx>
-
-#include <bout/invert_laplace.hxx> // Delp2 uses same coefficients as inversion code
-
-#include <bout/interpolation.hxx>
-#include <bout/unused.hxx>
+#include "bout/assert.hxx"
+#include "bout/derivs.hxx"
+#include "bout/difops.hxx"
+#include "bout/field2d.hxx"
+#include "bout/globals.hxx"
+#include "bout/interpolation.hxx"
+#include "bout/invert_laplace.hxx" // Delp2 uses same coefficients as inversion code
+#include "bout/msg_stack.hxx"
+#include "bout/region.hxx"
+#include "bout/solver.hxx"
+#include "bout/unused.hxx"
+#include "bout/utils.hxx"
 
 #include <cmath>
 
@@ -367,6 +365,108 @@ Field3D Div_par_K_Grad_par(const Field3D& kY, const Field3D& f, CELL_LOC outloc)
          + Div_par(kY, outloc) * Grad_par(f, outloc);
 }
 
+Field3D Div_par_K_Grad_par_mod(const Field3D& Kin, const Field3D& fin, Field3D& flow_ylow,
+                               bool bndry_flux) {
+  TRACE("FV::Div_par_K_Grad_par_mod");
+
+  ASSERT2(Kin.getLocation() == fin.getLocation());
+
+  const Mesh* mesh = Kin.getMesh();
+  const Coordinates* coord = fin.getCoordinates();
+
+  if (Kin.hasParallelSlices() && fin.hasParallelSlices()) {
+    // Using parallel slices.
+    // Note: Y slices may use different coordinate systems
+    //       -> Only B, dy and g_22 can be used in yup/ydown
+    //          Others (e.g J) may not be averaged between y planes.
+
+    const auto& K_up = Kin.yup();
+    const auto& K_down = Kin.ydown();
+
+    const auto& f_up = fin.yup();
+    const auto& f_down = fin.ydown();
+
+    Field3D result{zeroFrom(fin)};
+    flow_ylow = zeroFrom(fin);
+
+    BOUT_FOR(i, result.getRegion("RGN_NOBNDRY")) {
+      const auto iyp = i.yp();
+      const auto iym = i.ym();
+
+      // Upper cell edge
+      const BoutReal c_up = 0.5 * (Kin[i] + K_up[iyp]); // K at the upper boundary
+      const BoutReal J_up =
+          0.5 * (coord->J[i] + coord->J.yup()[iyp]); // Jacobian at boundary
+      const BoutReal g_22_up = 0.5 * (coord->g_22[i] + coord->g_22.yup()[iyp]);
+      const BoutReal gradient_up =
+          2. * (f_up[iyp] - fin[i]) / (coord->dy[i] + coord->dy.yup()[iyp]);
+
+      const BoutReal flux_up = c_up * J_up * gradient_up / g_22_up;
+
+      // Lower cell edge
+      const BoutReal c_down = 0.5 * (Kin[i] + K_down[iym]); // K at the lower boundary
+      const BoutReal J_down =
+          0.5 * (coord->J[i] + coord->J.ydown()[iym]); // Jacobian at boundary
+      const BoutReal g_22_down = 0.5 * (coord->g_22[i] + coord->g_22.ydown()[iym]);
+      const BoutReal gradient_down =
+          2. * (fin[i] - f_down[iym]) / (coord->dy[i] + coord->dy.ydown()[iym]);
+
+      const BoutReal flux_down = c_down * J_down * gradient_down / g_22_down;
+
+      result[i] = (flux_up - flux_down) / (coord->dy[i] * coord->J[i]);
+    }
+
+    return result;
+  }
+
+  // Calculate in field-aligned coordinates
+  const auto& K = toFieldAligned(Kin, "RGN_NOX");
+  const auto& f = toFieldAligned(fin, "RGN_NOX");
+
+  Field3D result{zeroFrom(f)};
+  flow_ylow = zeroFrom(f);
+
+  BOUT_FOR(i, result.getRegion("RGN_NOBNDRY")) {
+    // Calculate flux at upper surface
+    const auto ix = i.x();
+    const auto iy = i.y();
+    const auto iyp = i.yp();
+    const auto iym = i.ym();
+
+    const bool is_periodic_y = mesh->periodicY(ix);
+
+    if (bndry_flux || is_periodic_y || !mesh->lastY(ix) || (iy != mesh->yend)) {
+      const BoutReal c = 0.5 * (K[i] + K[iyp]);               // K at the upper boundary
+      const BoutReal J = 0.5 * (coord->J[i] + coord->J[iyp]); // Jacobian at boundary
+      const BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iyp]);
+      const BoutReal gradient = 2. * (f[iyp] - f[i]) / (coord->dy[i] + coord->dy[iyp]);
+
+      const BoutReal flux = c * J * gradient / g_22;
+
+      result[i] += flux / (coord->dy[i] * coord->J[i]);
+    }
+
+    // Calculate flux at lower surface
+    if (bndry_flux || is_periodic_y || !mesh->firstY(ix) || (iy != mesh->ystart)) {
+      const BoutReal c = 0.5 * (K[i] + K[iym]);               // K at the lower boundary
+      const BoutReal J = 0.5 * (coord->J[i] + coord->J[iym]); // Jacobian at boundary
+      const BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iym]);
+      const BoutReal gradient = 2. * (f[i] - f[iym]) / (coord->dy[i] + coord->dy[iym]);
+
+      const BoutReal flux = c * J * gradient / g_22;
+
+      result[i] -= flux / (coord->dy[i] * coord->J[i]);
+      flow_ylow[i] = -flux * coord->dx[i] * coord->dz[i];
+    }
+  }
+
+  // Shifted to field aligned coordinates, so need to shift back
+  result = fromFieldAligned(result, "RGN_NOBNDRY");
+  flow_ylow = fromFieldAligned(flow_ylow);
+
+  return result;
+}
+
 /*******************************************************************************
 * Delp2
 * perpendicular Laplacian operator
diff --git a/src/mesh/fv_ops.cxx b/src/mesh/fv_ops.cxx
index fe5422b4d1..71e51561b0 100644
--- a/src/mesh/fv_ops.cxx
+++ b/src/mesh/fv_ops.cxx
@@ -1,8 +1,16 @@
-#include <bout/fv_ops.hxx>
-#include <bout/globals.hxx>
-#include <bout/msg_stack.hxx>
-#include <bout/output.hxx>
-#include <bout/utils.hxx>
+#include "bout/fv_ops.hxx"
+
+#include "bout/assert.hxx"
+#include "bout/bout_types.hxx"
+#include "bout/boutexception.hxx"
+#include "bout/build_config.hxx"
+#include "bout/coordinates.hxx"
+#include "bout/field2d.hxx"
+#include "bout/field3d.hxx"
+#include "bout/globals.hxx"
+#include "bout/msg_stack.hxx"
+#include "bout/region.hxx"
+#include "bout/utils.hxx"
 
 namespace {
 template <class T>
@@ -34,28 +42,19 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) {
 
   // Flux in x
 
-  int xs = mesh->xstart - 1;
-  int xe = mesh->xend;
-
-  /*
-    if(mesh->firstX())
-    xs += 1;
-  */
-  /*
-    if(mesh->lastX())
-    xe -= 1;
-  */
+  const int xs = mesh->xstart - 1;
+  const int xe = mesh->xend;
 
   for (int i = xs; i <= xe; i++) {
     for (int j = mesh->ystart; j <= mesh->yend; j++) {
       for (int k = 0; k < mesh->LocalNz; k++) {
         // Calculate flux from i to i+1
 
-        BoutReal fout = 0.5 * (a(i, j, k) + a(i + 1, j, k))
-                        * (coord->J(i, j, k) * coord->g11(i, j, k)
-                           + coord->J(i + 1, j, k) * coord->g11(i + 1, j, k))
-                        * (f(i + 1, j, k) - f(i, j, k))
-                        / (coord->dx(i, j, k) + coord->dx(i + 1, j, k));
+        const BoutReal fout = 0.5 * (a(i, j, k) + a(i + 1, j, k))
+                              * (coord->J(i, j, k) * coord->g11(i, j, k)
+                                 + coord->J(i + 1, j, k) * coord->g11(i + 1, j, k))
+                              * (f(i + 1, j, k) - f(i, j, k))
+                              / (coord->dx(i, j, k) + coord->dx(i + 1, j, k));
 
         result(i, j, k) += fout / (coord->dx(i, j, k) * coord->J(i, j, k));
         result(i + 1, j, k) -= fout / (coord->dx(i + 1, j, k) * coord->J(i + 1, j, k));
@@ -179,15 +178,14 @@ Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) {
   return result;
 }
 
-const Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin,
-                                 bool bndry_flux) {
+Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin, bool bndry_flux) {
   TRACE("FV::Div_par_K_Grad_par");
 
   ASSERT2(Kin.getLocation() == fin.getLocation());
 
-  Mesh* mesh = Kin.getMesh();
+  const Mesh* mesh = Kin.getMesh();
 
-  bool use_parallel_slices = (Kin.hasParallelSlices() && fin.hasParallelSlices());
+  const bool use_parallel_slices = (Kin.hasParallelSlices() && fin.hasParallelSlices());
 
   const auto& K = use_parallel_slices ? Kin : toFieldAligned(Kin, "RGN_NOX");
   const auto& f = use_parallel_slices ? fin : toFieldAligned(fin, "RGN_NOX");
@@ -211,13 +209,13 @@ const Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin,
     if (bndry_flux || mesh->periodicY(i.x()) || !mesh->lastY(i.x())
         || (i.y() != mesh->yend)) {
 
-      BoutReal c = 0.5 * (K[i] + Kup[iyp]);             // K at the upper boundary
-      BoutReal J = 0.5 * (coord->J[i] + coord->J[iyp]); // Jacobian at boundary
-      BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iyp]);
+      const BoutReal c = 0.5 * (K[i] + Kup[iyp]);             // K at the upper boundary
+      const BoutReal J = 0.5 * (coord->J[i] + coord->J[iyp]); // Jacobian at boundary
+      const BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iyp]);
 
-      BoutReal gradient = 2. * (fup[iyp] - f[i]) / (coord->dy[i] + coord->dy[iyp]);
+      const BoutReal gradient = 2. * (fup[iyp] - f[i]) / (coord->dy[i] + coord->dy[iyp]);
 
-      BoutReal flux = c * J * gradient / g_22;
+      const BoutReal flux = c * J * gradient / g_22;
 
       result[i] += flux / (coord->dy[i] * coord->J[i]);
     }
@@ -225,14 +223,15 @@ const Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin,
     // Calculate flux at lower surface
     if (bndry_flux || mesh->periodicY(i.x()) || !mesh->firstY(i.x())
         || (i.y() != mesh->ystart)) {
-      BoutReal c = 0.5 * (K[i] + Kdown[iym]);           // K at the lower boundary
-      BoutReal J = 0.5 * (coord->J[i] + coord->J[iym]); // Jacobian at boundary
+      const BoutReal c = 0.5 * (K[i] + Kdown[iym]);           // K at the lower boundary
+      const BoutReal J = 0.5 * (coord->J[i] + coord->J[iym]); // Jacobian at boundary
 
-      BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iym]);
+      const BoutReal g_22 = 0.5 * (coord->g_22[i] + coord->g_22[iym]);
 
-      BoutReal gradient = 2. * (f[i] - fdown[iym]) / (coord->dy[i] + coord->dy[iym]);
+      const BoutReal gradient =
+          2. * (f[i] - fdown[iym]) / (coord->dy[i] + coord->dy[iym]);
 
-      BoutReal flux = c * J * gradient / g_22;
+      const BoutReal flux = c * J * gradient / g_22;
 
       result[i] -= flux / (coord->dy[i] * coord->J[i]);
     }
@@ -246,10 +245,10 @@ const Field3D Div_par_K_Grad_par(const Field3D& Kin, const Field3D& fin,
   return result;
 }
 
-const Field3D D4DY4(const Field3D& d_in, const Field3D& f_in) {
+Field3D D4DY4(const Field3D& d_in, const Field3D& f_in) {
   ASSERT1_FIELDS_COMPATIBLE(d_in, f_in);
 
-  Mesh* mesh = d_in.getMesh();
+  const Mesh* mesh = d_in.getMesh();
 
   Coordinates* coord = f_in.getCoordinates();
 
@@ -265,9 +264,9 @@ const Field3D D4DY4(const Field3D& d_in, const Field3D& f_in) {
 
   for (int i = mesh->xstart; i <= mesh->xend; i++) {
     // Check for boundaries
-    bool yperiodic = mesh->periodicY(i);
-    bool has_upper_boundary = !yperiodic && mesh->lastY(i);
-    bool has_lower_boundary = !yperiodic && mesh->firstY(i);
+    const bool yperiodic = mesh->periodicY(i);
+    const bool has_upper_boundary = !yperiodic && mesh->lastY(i);
+    const bool has_lower_boundary = !yperiodic && mesh->firstY(i);
 
     // Always calculate fluxes at upper Y cell boundary
     const int ystart =
@@ -283,15 +282,15 @@ const Field3D D4DY4(const Field3D& d_in, const Field3D& f_in) {
 
     for (int j = ystart; j <= yend; j++) {
       for (int k = 0; k < mesh->LocalNz; k++) {
-        BoutReal dy3 = SQ(coord->dy(i, j, k)) * coord->dy(i, j, k);
+        const BoutReal dy3 = SQ(coord->dy(i, j, k)) * coord->dy(i, j, k);
         // 3rd derivative at upper boundary
 
-        BoutReal d3fdy3 =
+        const BoutReal d3fdy3 =
             (f(i, j + 2, k) - 3. * f(i, j + 1, k) + 3. * f(i, j, k) - f(i, j - 1, k))
             / dy3;
 
-        BoutReal flux = 0.5 * (d(i, j, k) + d(i, j + 1, k))
-                        * (coord->J(i, j, k) + coord->J(i, j + 1, k)) * d3fdy3;
+        const BoutReal flux = 0.5 * (d(i, j, k) + d(i, j + 1, k))
+                              * (coord->J(i, j, k) + coord->J(i, j + 1, k)) * d3fdy3;
 
         result(i, j, k) += flux / (coord->J(i, j, k) * coord->dy(i, j, k));
         result(i, j + 1, k) -= flux / (coord->J(i, j + 1, k) * coord->dy(i, j + 1, k));
@@ -303,8 +302,8 @@ const Field3D D4DY4(const Field3D& d_in, const Field3D& f_in) {
   return are_unaligned ? fromFieldAligned(result, "RGN_NOBNDRY") : result;
 }
 
-const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
-  Mesh* mesh = f_in.getMesh();
+Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
+  const Mesh* mesh = f_in.getMesh();
 
   // Convert to field aligned coordinates
   const bool is_unaligned = (f_in.getDirectionY() == YDirectionType::Standard);
@@ -315,10 +314,10 @@ const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
   Coordinates* coord = f_in.getCoordinates();
 
   for (int i = mesh->xstart; i <= mesh->xend; i++) {
-    bool yperiodic = mesh->periodicY(i);
+    const bool yperiodic = mesh->periodicY(i);
 
-    bool has_upper_boundary = !yperiodic && mesh->lastY(i);
-    bool has_lower_boundary = !yperiodic && mesh->firstY(i);
+    const bool has_upper_boundary = !yperiodic && mesh->lastY(i);
+    const bool has_lower_boundary = !yperiodic && mesh->firstY(i);
 
     for (int j = mesh->ystart; j <= mesh->yend; j++) {
 
@@ -343,8 +342,8 @@ const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
             // Not on domain boundary
             // 3rd derivative at right cell boundary
 
-            const BoutReal d3fdx3 =
-                (f(i, j + 2, k) - 3. * f(i, j + 1, k) + 3. * f(i, j, k) - f(i, j - 1, k));
+            const BoutReal d3fdx3 = (f(i, j + 2, k) - (3. * f(i, j + 1, k))
+                                     + (3. * f(i, j, k)) - f(i, j - 1, k));
 
             result(i, j, k) += d3fdx3 * factor_rc;
             result(i, j + 1, k) -= d3fdx3 * factor_rp;
@@ -365,10 +364,10 @@ const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
                 common_factor / (coord->J(i, j + 1, k) * coord->dy(i, j + 1, k));
 
             const BoutReal d3fdx3 =
-                -((16. / 5) * 0.5 * (f(i, j + 1, k) + f(i, j, k)) // Boundary value f_b
-                  - 6. * f(i, j, k)                               // f_0
-                  + 4. * f(i, j - 1, k)                           // f_1
-                  - (6. / 5) * f(i, j - 2, k)                     // f_2
+                -(((16. / 5) * 0.5 * (f(i, j + 1, k) + f(i, j, k))) // Boundary value f_b
+                  - (6. * f(i, j, k))                               // f_0
+                  + (4. * f(i, j - 1, k))                           // f_1
+                  - ((6. / 5) * f(i, j - 2, k))                     // f_2
                 );
 
             result(i, j, k) += d3fdx3 * factor_rc;
@@ -394,8 +393,8 @@ const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
                 common_factor / (coord->J(i, j - 1, k) * coord->dy(i, j - 1, k));
 
             // Not on a domain boundary
-            const BoutReal d3fdx3 =
-                (f(i, j + 1, k) - 3. * f(i, j, k) + 3. * f(i, j - 1, k) - f(i, j - 2, k));
+            const BoutReal d3fdx3 = (f(i, j + 1, k) - (3. * f(i, j, k))
+                                     + (3. * f(i, j - 1, k)) - f(i, j - 2, k));
 
             result(i, j, k) -= d3fdx3 * factor_lc;
             result(i, j - 1, k) += d3fdx3 * factor_lm;
@@ -412,10 +411,10 @@ const Field3D D4DY4_Index(const Field3D& f_in, bool bndry_flux) {
             const BoutReal factor_lm =
                 common_factor / (coord->J(i, j - 1, k) * coord->dy(i, j - 1, k));
             const BoutReal d3fdx3 =
-                -(-(16. / 5) * 0.5 * (f(i, j - 1, k) + f(i, j, k)) // Boundary value f_b
-                  + 6. * f(i, j, k)                                // f_0
-                  - 4. * f(i, j + 1, k)                            // f_1
-                  + (6. / 5) * f(i, j + 2, k)                      // f_2
+                -((-(16. / 5) * 0.5 * (f(i, j - 1, k) + f(i, j, k))) // Boundary value f_b
+                  + (6. * f(i, j, k))                                // f_0
+                  - (4. * f(i, j + 1, k))                            // f_1
+                  + ((6. / 5) * f(i, j + 2, k))                      // f_2
                 );
 
             result(i, j, k) -= d3fdx3 * factor_lc;
@@ -438,8 +437,9 @@ void communicateFluxes(Field3D& f) {
     throw BoutException("communicateFluxes: Sorry!");
   }
 
-  int size = mesh->LocalNy * mesh->LocalNz;
-  comm_handle xin, xout;
+  const int size = mesh->LocalNy * mesh->LocalNz;
+  comm_handle xin = nullptr;
+  comm_handle xout = nullptr;
   // Cache results to silence spurious compiler warning about xin,
   // xout possibly being uninitialised when used
   const bool not_first = mesh->periodicX || !mesh->firstX();
@@ -498,45 +498,45 @@ Field3D Div_Perp_Lap(const Field3D& a, const Field3D& f, CELL_LOC outloc) {
   //     o --- gD --- o
   //
   Coordinates* coords = a.getCoordinates(outloc);
-  Mesh* mesh = f.getMesh();
+  const Mesh* mesh = f.getMesh();
 
   for (int i = mesh->xstart; i <= mesh->xend; i++) {
     for (int j = mesh->ystart; j <= mesh->yend; j++) {
       for (int k = 0; k < mesh->LocalNz; k++) {
 
         // wrap k-index around as Z is (currently) periodic.
-        int kp = (k + 1) % (mesh->LocalNz);
-        int km = (k - 1 + mesh->LocalNz) % (mesh->LocalNz);
+        const int kp = (k + 1) % (mesh->LocalNz);
+        const int km = (k - 1 + mesh->LocalNz) % (mesh->LocalNz);
 
         // Calculate gradients on cell faces -- assumes constant grid spacing
 
-        BoutReal gR =
-            (coords->g11(i, j, k) + coords->g11(i + 1, j, k))
-                * (f(i + 1, j, k) - f(i, j, k))
-                / (coords->dx(i + 1, j, k) + coords->dx(i, j, k))
-            + 0.5 * (coords->g13(i, j, k) + coords->g13(i + 1, j, k))
-                  * (f(i + 1, j, kp) - f(i + 1, j, km) + f(i, j, kp) - f(i, j, km))
-                  / (4. * coords->dz(i, j, k));
-
-        BoutReal gL =
-            (coords->g11(i - 1, j, k) + coords->g11(i, j, k))
-                * (f(i, j, k) - f(i - 1, j, k))
-                / (coords->dx(i - 1, j, k) + coords->dx(i, j, k))
-            + 0.5 * (coords->g13(i - 1, j, k) + coords->g13(i, j, k))
-                  * (f(i - 1, j, kp) - f(i - 1, j, km) + f(i, j, kp) - f(i, j, km))
-                  / (4 * coords->dz(i, j, k));
-
-        BoutReal gD =
-            coords->g13(i, j, k)
-                * (f(i + 1, j, km) - f(i - 1, j, km) + f(i + 1, j, k) - f(i - 1, j, k))
-                / (4. * coords->dx(i, j, k))
-            + coords->g33(i, j, k) * (f(i, j, k) - f(i, j, km)) / coords->dz(i, j, k);
-
-        BoutReal gU =
-            coords->g13(i, j, k)
-                * (f(i + 1, j, kp) - f(i - 1, j, kp) + f(i + 1, j, k) - f(i - 1, j, k))
-                / (4. * coords->dx(i, j, k))
-            + coords->g33(i, j, k) * (f(i, j, kp) - f(i, j, k)) / coords->dz(i, j, k);
+        const BoutReal gR =
+            ((coords->g11(i, j, k) + coords->g11(i + 1, j, k))
+             * (f(i + 1, j, k) - f(i, j, k))
+             / (coords->dx(i + 1, j, k) + coords->dx(i, j, k)))
+            + (0.5 * (coords->g13(i, j, k) + coords->g13(i + 1, j, k))
+               * (f(i + 1, j, kp) - f(i + 1, j, km) + f(i, j, kp) - f(i, j, km))
+               / (4. * coords->dz(i, j, k)));
+
+        const BoutReal gL =
+            ((coords->g11(i - 1, j, k) + coords->g11(i, j, k))
+             * (f(i, j, k) - f(i - 1, j, k))
+             / (coords->dx(i - 1, j, k) + coords->dx(i, j, k)))
+            + (0.5 * (coords->g13(i - 1, j, k) + coords->g13(i, j, k))
+               * (f(i - 1, j, kp) - f(i - 1, j, km) + f(i, j, kp) - f(i, j, km))
+               / (4 * coords->dz(i, j, k)));
+
+        const BoutReal gD =
+            (coords->g13(i, j, k)
+             * (f(i + 1, j, km) - f(i - 1, j, km) + f(i + 1, j, k) - f(i - 1, j, k))
+             / (4. * coords->dx(i, j, k)))
+            + (coords->g33(i, j, k) * (f(i, j, k) - f(i, j, km)) / coords->dz(i, j, k));
+
+        const BoutReal gU =
+            (coords->g13(i, j, k)
+             * (f(i + 1, j, kp) - f(i - 1, j, kp) + f(i + 1, j, k) - f(i - 1, j, k))
+             / (4. * coords->dx(i, j, k)))
+            + (coords->g33(i, j, k) * (f(i, j, kp) - f(i, j, k)) / coords->dz(i, j, k));
 
         // Flow right
         BoutReal flux = gR * 0.25 * (coords->J(i + 1, j, k) + coords->J(i, j, k))
diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx
index 5020a5b9a3..c58c50ddb5 100644
--- a/src/mesh/interpolation/hermite_spline_xz.cxx
+++ b/src/mesh/interpolation/hermite_spline_xz.cxx
@@ -25,6 +25,8 @@
 #include "bout/globals.hxx"
 #include "bout/index_derivs_interface.hxx"
 #include "bout/interpolation_xz.hxx"
+#include "bout/openmpwrap.hxx"
+#include "bout/region.hxx"
 
 #include <vector>
 
@@ -171,7 +173,19 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z
     BoutReal t_x = delta_x(x, y, z) - static_cast<BoutReal>(i_corn);
     BoutReal t_z = delta_z(x, y, z) - static_cast<BoutReal>(k_corner(x, y, z));
 
-    // NOTE: A (small) hack to avoid one-sided differences
+    // NOTE: A (small) hack to avoid one-sided differences. We need at
+    // least 2 interior points due to an awkwardness with the
+    // boundaries. The splines need derivatives in x, but we don't
+    // know the value in the boundaries, so _any_ interpolation in the
+    // last interior cell can't be done. Instead, we fudge the
+    // interpolation in the last cell to be at the extreme right-hand
+    // edge of the previous cell (that is, exactly on the last
+    // interior point). However, this doesn't work with only one
+    // interior point, because we have to do something similar to the
+    // _first_ cell, and these two fudges cancel out and we end up
+    // indexing into the boundary anyway.
+    // TODO(peter): Can we remove this if we apply (dirichlet?) BCs to
+    // the X derivatives? Note that we need at least _2_
     if (i_corn >= xend) {
       i_corn = xend - 1;
       t_x = 1.0;
@@ -284,6 +298,7 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z
                                         k_corner(x, y, z) - 1 + k);
         vals[k] = newWeights[j * 4 + k][i];
       }
+      BOUT_OMP(critical)
       MatSetValues(petscWeights, 1, idxn, 4, idxm, vals, INSERT_VALUES);
     }
 #endif
diff --git a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx
index f23bfd499e..f206ed1e0f 100644
--- a/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx
+++ b/src/mesh/interpolation/monotonic_hermite_spline_xz.cxx
@@ -25,7 +25,7 @@
 #include "bout/interpolation_xz.hxx"
 #include "bout/mesh.hxx"
 
-#include <vector>
+#include <algorithm>
 
 Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f,
                                               const std::string& region) const {
@@ -80,7 +80,6 @@ Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f,
     // Perhaps should only impose near boundaries, since that is where
     // problems most obviously occur.
     const BoutReal localmax = BOUTMAX(f[ic], f[icxp], f[iczp], f[icxpzp]);
-
     const BoutReal localmin = BOUTMIN(f[ic], f[icxp], f[iczp], f[icxpzp]);
 
     ASSERT2(std::isfinite(localmax) || i.x() < localmesh->xstart
@@ -88,12 +87,10 @@ Field3D XZMonotonicHermiteSpline::interpolate(const Field3D& f,
     ASSERT2(std::isfinite(localmin) || i.x() < localmesh->xstart
             || i.x() > localmesh->xend);
 
-    if (result > localmax) {
-      result = localmax;
-    }
-    if (result < localmin) {
-      result = localmin;
-    }
+    const auto diff = ((localmax - localmin) * rtol) + atol;
+
+    result = std::min(result, localmax + diff);
+    result = std::max(result, localmin - diff);
 
     f_interp[iyp] = result;
   }
diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx
index e8d3af1cdb..6243bbb67a 100644
--- a/src/mesh/parallel/fci.cxx
+++ b/src/mesh/parallel/fci.cxx
@@ -37,79 +37,100 @@
  **************************************************************************/
 
 #include "fci.hxx"
+
+#include "bout/assert.hxx"
+#include "bout/bout_types.hxx"
+#include "bout/boutexception.hxx"
+#include "bout/field2d.hxx"
+#include "bout/field3d.hxx"
+#include "bout/field_data.hxx"
+#include "bout/mesh.hxx"
+#include "bout/msg_stack.hxx"
+#include "bout/options.hxx"
 #include "bout/parallel_boundary_op.hxx"
 #include "bout/parallel_boundary_region.hxx"
-#include <bout/bout_types.hxx>
-#include <bout/constants.hxx>
-#include <bout/mesh.hxx>
-#include <bout/msg_stack.hxx>
-#include <bout/utils.hxx>
+#include "bout/paralleltransform.hxx"
+#include "bout/region.hxx"
+
+#include <fmt/format.h>
 
+#include <array>
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+#include <memory>
 #include <string>
+#include <string_view>
+
+using namespace std::string_view_literals;
 
-FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& options,
-               int offset_, const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
+FCIMap::FCIMap(Mesh& mesh, [[maybe_unused]] const Coordinates::FieldMetric& dy,
+               Options& options, int offset,
+               const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
                const std::shared_ptr<BoundaryRegionPar>& outer_boundary, bool zperiodic)
-    : map_mesh(mesh), offset(offset_),
-      region_no_boundary(map_mesh.getRegion("RGN_NOBNDRY")),
+    : map_mesh(&mesh), offset_(offset),
+      region_no_boundary(map_mesh->getRegion("RGN_NOBNDRY")),
       corner_boundary_mask(map_mesh) {
 
-  TRACE("Creating FCIMAP for direction {:d}", offset);
+  TRACE("Creating FCIMAP for direction {:d}", offset_);
 
-  if (offset == 0) {
+  if (offset_ == 0) {
     throw BoutException(
         "FCIMap called with offset = 0; You probably didn't mean to do that");
   }
 
   auto& interpolation_options = options["xzinterpolation"];
-  interp =
-      XZInterpolationFactory::getInstance().create(&interpolation_options, &map_mesh);
-  interp->setYOffset(offset);
+  interp = XZInterpolationFactory::getInstance().create(&interpolation_options, map_mesh);
+  interp->setYOffset(offset_);
 
   interp_corner =
-      XZInterpolationFactory::getInstance().create(&interpolation_options, &map_mesh);
-  interp_corner->setYOffset(offset);
+      XZInterpolationFactory::getInstance().create(&interpolation_options, map_mesh);
+  interp_corner->setYOffset(offset_);
 
   // Index-space coordinates of forward/backward points
-  Field3D xt_prime{&map_mesh}, zt_prime{&map_mesh};
+  Field3D xt_prime{map_mesh};
+  Field3D zt_prime{map_mesh};
 
   // Real-space coordinates of grid points
-  Field3D R{&map_mesh}, Z{&map_mesh};
+  Field3D R{map_mesh};
+  Field3D Z{map_mesh};
 
   // Real-space coordinates of forward/backward points
-  Field3D R_prime{&map_mesh}, Z_prime{&map_mesh};
+  Field3D R_prime{map_mesh};
+  Field3D Z_prime{map_mesh};
 
-  map_mesh.get(R, "R", 0.0, false);
-  map_mesh.get(Z, "Z", 0.0, false);
+  map_mesh->get(R, "R", 0.0, false);
+  map_mesh->get(Z, "Z", 0.0, false);
 
   // Get a unique name for a field based on the sign/magnitude of the offset
-  const auto parallel_slice_field_name = [&](std::string field) -> std::string {
-    const std::string direction = (offset > 0) ? "forward" : "backward";
+  const auto parallel_slice_field_name = [&](std::string_view field) -> std::string {
+    const auto direction = (offset_ > 0) ? "forward"sv : "backward"sv;
     // We only have a suffix for parallel slices beyond the first
     // This is for backwards compatibility
-    const std::string slice_suffix =
-        (std::abs(offset) > 1) ? "_" + std::to_string(std::abs(offset)) : "";
-    return direction + "_" + field + slice_suffix;
+    if (std::abs(offset_) == 1) {
+      return fmt::format("{}_{}", direction, field);
+    }
+    return fmt::format("{}_{}_{}", direction, field, std::abs(offset_));
   };
 
   // If we can't read in any of these fields, things will silently not
   // work, so best throw
-  if (map_mesh.get(xt_prime, parallel_slice_field_name("xt_prime"), 0.0, false) != 0) {
+  if (map_mesh->get(xt_prime, parallel_slice_field_name("xt_prime"), 0.0, false) != 0) {
     throw BoutException("Could not read {:s} from grid file!\n"
                         "  Either add it to the grid file, or reduce MYG",
                         parallel_slice_field_name("xt_prime"));
   }
-  if (map_mesh.get(zt_prime, parallel_slice_field_name("zt_prime"), 0.0, false) != 0) {
+  if (map_mesh->get(zt_prime, parallel_slice_field_name("zt_prime"), 0.0, false) != 0) {
     throw BoutException("Could not read {:s} from grid file!\n"
                         "  Either add it to the grid file, or reduce MYG",
                         parallel_slice_field_name("zt_prime"));
   }
-  if (map_mesh.get(R_prime, parallel_slice_field_name("R"), 0.0, false) != 0) {
+  if (map_mesh->get(R_prime, parallel_slice_field_name("R"), 0.0, false) != 0) {
     throw BoutException("Could not read {:s} from grid file!\n"
                         "  Either add it to the grid file, or reduce MYG",
                         parallel_slice_field_name("R"));
   }
-  if (map_mesh.get(Z_prime, parallel_slice_field_name("Z"), 0.0, false) != 0) {
+  if (map_mesh->get(Z_prime, parallel_slice_field_name("Z"), 0.0, false) != 0) {
     throw BoutException("Could not read {:s} from grid file!\n"
                         "  Either add it to the grid file, or reduce MYG",
                         parallel_slice_field_name("Z"));
@@ -157,25 +178,26 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options&
     interp->calcWeights(xt_prime, zt_prime);
   }
 
-  const int ncz = map_mesh.LocalNz;
+  const int ncz = map_mesh->LocalNz;
 
   BoutMask to_remove(map_mesh);
-  const int xend =
-      map_mesh.xstart + (map_mesh.xend - map_mesh.xstart + 1) * map_mesh.getNXPE() - 1;
+  const int xend = map_mesh->xstart
+                   + ((map_mesh->xend - map_mesh->xstart + 1) * map_mesh->getNXPE()) - 1;
   // Serial loop because call to BoundaryRegionPar::addPoint
   // (probably?) can't be done in parallel
   BOUT_FOR_SERIAL(i, xt_prime.getRegion("RGN_NOBNDRY")) {
     // z is periodic, so make sure the z-index wraps around
     if (zperiodic) {
-      zt_prime[i] = zt_prime[i]
-                    - ncz * (static_cast<int>(zt_prime[i] / static_cast<BoutReal>(ncz)));
+      zt_prime[i] =
+          zt_prime[i]
+          - (ncz * (static_cast<int>(zt_prime[i] / static_cast<BoutReal>(ncz))));
 
       if (zt_prime[i] < 0.0) {
         zt_prime[i] += ncz;
       }
     }
 
-    if ((xt_prime[i] >= map_mesh.xstart) and (xt_prime[i] <= xend)) {
+    if ((xt_prime[i] >= map_mesh->xstart) and (xt_prime[i] <= xend)) {
       // Not a boundary
       continue;
     }
@@ -215,7 +237,7 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options&
     const BoutReal dR_dz = 0.5 * (R[i_zp] - R[i_zm]);
     const BoutReal dZ_dz = 0.5 * (Z[i_zp] - Z[i_zm]);
 
-    const BoutReal det = dR_dx * dZ_dz - dR_dz * dZ_dx; // Determinant of 2x2 matrix
+    const BoutReal det = (dR_dx * dZ_dz) - (dR_dz * dZ_dx); // Determinant of 2x2 matrix
 
     const BoutReal dR = R_prime[i] - R[i];
     const BoutReal dZ = Z_prime[i] - Z[i];
@@ -228,9 +250,9 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options&
     // outer boundary. However, if any of the surrounding points are negative,
     // that also means inner. So to differentiate between inner and outer we
     // need at least 2 points in the domain.
-    ASSERT2(map_mesh.xend - map_mesh.xstart >= 2);
-    auto boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary;
-    boundary->add_point(x, y, z, x + dx, y + 0.5 * offset,
+    ASSERT2(map_mesh->xend - map_mesh->xstart >= 2);
+    auto boundary = (xt_prime[i] < map_mesh->xstart) ? inner_boundary : outer_boundary;
+    boundary->add_point(x, y, z, x + dx, y + (0.5 * offset_),
                         z + dz, // Intersection point in local index space
                         0.5,    // Distance to intersection
                         1       // Default to that there is a point in the other direction
@@ -240,13 +262,14 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options&
 
   interp->setRegion(region_no_boundary);
 
-  const auto region = fmt::format("RGN_YPAR_{:+d}", offset);
-  if (not map_mesh.hasRegion3D(region)) {
+  const auto region = fmt::format("RGN_YPAR_{:+d}", offset_);
+  if (not map_mesh->hasRegion3D(region)) {
     // The valid region for this slice
-    map_mesh.addRegion3D(
-        region, Region<Ind3D>(map_mesh.xstart, map_mesh.xend, map_mesh.ystart + offset,
-                              map_mesh.yend + offset, 0, map_mesh.LocalNz - 1,
-                              map_mesh.LocalNy, map_mesh.LocalNz));
+    map_mesh->addRegion3D(region, Region<Ind3D>(map_mesh->xstart, map_mesh->xend,
+                                                map_mesh->ystart + offset_,
+                                                map_mesh->yend + offset_, 0,
+                                                map_mesh->LocalNz - 1, map_mesh->LocalNy,
+                                                map_mesh->LocalNz));
   }
 }
 
@@ -254,7 +277,7 @@ Field3D FCIMap::integrate(Field3D& f) const {
   TRACE("FCIMap::integrate");
 
   ASSERT1(f.getDirectionY() == YDirectionType::Standard);
-  ASSERT1(&map_mesh == f.getMesh());
+  ASSERT1(map_mesh == f.getMesh());
 
   // Cell centre values
   Field3D centre = interp->interpolate(f);
@@ -269,7 +292,7 @@ Field3D FCIMap::integrate(Field3D& f) const {
 #endif
 
   BOUT_FOR(i, region_no_boundary) {
-    const auto inext = i.yp(offset);
+    const auto inext = i.yp(offset_);
     const BoutReal f_c = centre[inext];
     const auto izm = i.zm();
     const int x = i.x();
@@ -278,7 +301,7 @@ Field3D FCIMap::integrate(Field3D& f) const {
     const int zm = izm.z();
     if (corner_boundary_mask(x, y, z) || corner_boundary_mask(x - 1, y, z)
         || corner_boundary_mask(x, y, zm) || corner_boundary_mask(x - 1, y, zm)
-        || (x == map_mesh.xstart)) {
+        || (x == map_mesh->xstart)) {
       // One of the corners leaves the domain.
       // Use the cell centre value, since boundary conditions are not
       // currently applied to corners.
@@ -299,19 +322,70 @@ Field3D FCIMap::integrate(Field3D& f) const {
   return result;
 }
 
+FCITransform::FCITransform(Mesh& mesh, const Coordinates::FieldMetric& dy, bool zperiodic,
+                           Options* opt)
+    : ParallelTransform(mesh, opt), R{&mesh}, Z{&mesh} {
+
+  // check the coordinate system used for the grid data source
+  FCITransform::checkInputGrid();
+
+  // Real-space coordinates of grid cells
+  mesh.get(R, "R", 0.0, false);
+  mesh.get(Z, "Z", 0.0, false);
+
+  auto forward_boundary_xin =
+      std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh);
+  auto backward_boundary_xin =
+      std::make_shared<BoundaryRegionPar>("FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh);
+  auto forward_boundary_xout =
+      std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh);
+  auto backward_boundary_xout =
+      std::make_shared<BoundaryRegionPar>("FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
+
+  // Add the boundary region to the mesh's vector of parallel boundaries
+  mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd);
+  mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd);
+  mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd);
+  mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xout_bwd);
+
+  field_line_maps.reserve(static_cast<std::size_t>(mesh.ystart) * 2);
+  for (int offset = 1; offset < mesh.ystart + 1; ++offset) {
+    field_line_maps.emplace_back(mesh, dy, options, offset, forward_boundary_xin,
+                                 forward_boundary_xout, zperiodic);
+    field_line_maps.emplace_back(mesh, dy, options, -offset, backward_boundary_xin,
+                                 backward_boundary_xout, zperiodic);
+  }
+  ASSERT0(mesh.ystart == 1);
+  const std::array bndries = {forward_boundary_xin, forward_boundary_xout,
+                              backward_boundary_xin, backward_boundary_xout};
+  for (const auto& bndry : bndries) {
+    for (const auto& bndry2 : bndries) {
+      if (bndry->dir == bndry2->dir) {
+        continue;
+      }
+      for (bndry->first(); !bndry->isDone(); bndry->next()) {
+        if (bndry2->contains(*bndry)) {
+          bndry->setValid(0);
+        }
+      }
+    }
+  }
+}
+
 void FCITransform::checkInputGrid() {
   std::string parallel_transform;
   if (mesh.isDataSourceGridFile()
-      && !mesh.get(parallel_transform, "parallel_transform")) {
+      && (mesh.get(parallel_transform, "parallel_transform") == 0)) {
     if (parallel_transform != "fci") {
       throw BoutException(
           "Incorrect parallel transform type '" + parallel_transform
           + "' used "
             "to generate metric components for FCITransform. Should be 'fci'.");
     }
-  } // else: parallel_transform variable not found in grid input, indicates older input
-    //       file or grid from options so must rely on the user having ensured the type is
-    //       correct
+  }
+  // else: parallel_transform variable not found in grid input, indicates older input
+  //       file or grid from options so must rely on the user having ensured the type is
+  //       correct
 }
 
 void FCITransform::calcParallelSlices(Field3D& f) {
@@ -327,8 +401,8 @@ void FCITransform::calcParallelSlices(Field3D& f) {
 
   // Interpolate f onto yup and ydown fields
   for (const auto& map : field_line_maps) {
-    f.ynext(map.offset) = map.interpolate(f);
-    f.ynext(map.offset).setRegion(fmt::format("RGN_YPAR_{:+d}", map.offset));
+    f.ynext(map.offset()) = map.interpolate(f);
+    f.ynext(map.offset()).setRegion(fmt::format("RGN_YPAR_{:+d}", map.offset()));
   }
 }
 
@@ -345,7 +419,7 @@ void FCITransform::integrateParallelSlices(Field3D& f) {
 
   // Integrate f onto yup and ydown fields
   for (const auto& map : field_line_maps) {
-    f.ynext(map.offset) = map.integrate(f);
+    f.ynext(map.offset()) = map.integrate(f);
   }
 }
 
diff --git a/src/mesh/parallel/fci.hxx b/src/mesh/parallel/fci.hxx
index 1a02f558e1..65529a4c4e 100644
--- a/src/mesh/parallel/fci.hxx
+++ b/src/mesh/parallel/fci.hxx
@@ -26,6 +26,11 @@
 #ifndef BOUT_FCITRANSFORM_H
 #define BOUT_FCITRANSFORM_H
 
+#include "bout/assert.hxx"
+#include "bout/bout_types.hxx"
+#include "bout/boutexception.hxx"
+#include "bout/coordinates.hxx"
+#include "bout/region.hxx"
 #include <bout/interpolation_xz.hxx>
 #include <bout/mask.hxx>
 #include <bout/parallel_boundary_region.hxx>
@@ -33,25 +38,26 @@
 #include <bout/unused.hxx>
 
 #include <memory>
+#include <string>
 #include <vector>
 
+class BoundaryRegionPar;
+class FieldPerp;
+class Field2D;
+class Field3D;
+class Options;
+
 /// Field line map - contains the coefficients for interpolation
 class FCIMap {
   /// Interpolation objects
   std::unique_ptr<XZInterpolation> interp;        // Cell centre
   std::unique_ptr<XZInterpolation> interp_corner; // Cell corner at (x+1, z+1)
 
-public:
-  FCIMap() = delete;
-  FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, int offset,
-         const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
-         const std::shared_ptr<BoundaryRegionPar>& outer_boundary, bool zperiodic);
-
   // The mesh this map was created on
-  Mesh& map_mesh;
+  Mesh* map_mesh;
 
   /// Direction of map
-  const int offset;
+  int offset_;
 
   /// region containing all points where the field line has not left the
   /// domain
@@ -59,8 +65,17 @@ public:
   /// If any of the integration area has left the domain
   BoutMask corner_boundary_mask;
 
+public:
+  FCIMap() = delete;
+  FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, int offset,
+         const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
+         const std::shared_ptr<BoundaryRegionPar>& outer_boundary, bool zperiodic);
+
+  /// Direction of map
+  int offset() const { return offset_; }
+
   Field3D interpolate(Field3D& f) const {
-    ASSERT1(&map_mesh == f.getMesh());
+    ASSERT1(map_mesh == f.getMesh());
     return interp->interpolate(f);
   }
 
@@ -72,55 +87,7 @@ class FCITransform : public ParallelTransform {
 public:
   FCITransform() = delete;
   FCITransform(Mesh& mesh, const Coordinates::FieldMetric& dy, bool zperiodic = true,
-               Options* opt = nullptr)
-    : ParallelTransform(mesh, opt), R{&mesh}, Z{&mesh} {
-
-    // check the coordinate system used for the grid data source
-    FCITransform::checkInputGrid();
-
-    // Real-space coordinates of grid cells
-    mesh.get(R, "R", 0.0, false);
-    mesh.get(Z, "Z", 0.0, false);
-
-    auto forward_boundary_xin =
-        std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh);
-    auto backward_boundary_xin = std::make_shared<BoundaryRegionPar>(
-        "FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh);
-    auto forward_boundary_xout =
-        std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh);
-    auto backward_boundary_xout = std::make_shared<BoundaryRegionPar>(
-        "FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
-
-    // Add the boundary region to the mesh's vector of parallel boundaries
-    mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd);
-    mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd);
-    mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd);
-    mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xout_bwd);
-
-    field_line_maps.reserve(mesh.ystart * 2);
-    for (int offset = 1; offset < mesh.ystart + 1; ++offset) {
-      field_line_maps.emplace_back(mesh, dy, options, offset, forward_boundary_xin,
-                                   forward_boundary_xout, zperiodic);
-      field_line_maps.emplace_back(mesh, dy, options, -offset, backward_boundary_xin,
-                                   backward_boundary_xout, zperiodic);
-    }
-    ASSERT0(mesh.ystart == 1);
-    std::shared_ptr<BoundaryRegionPar> bndries[]{
-        forward_boundary_xin, forward_boundary_xout, backward_boundary_xin,
-        backward_boundary_xout};
-    for (auto& bndry : bndries) {
-      for (const auto& bndry2 : bndries) {
-        if (bndry->dir == bndry2->dir) {
-          continue;
-        }
-        for (bndry->first(); !bndry->isDone(); bndry->next()) {
-          if (bndry2->contains(*bndry)) {
-            bndry->setValid(0);
-          }
-        }
-      }
-    }
-  }
+               Options* opt = nullptr);
 
   void calcParallelSlices(Field3D& f) override;
 
diff --git a/tests/MMS/CMakeLists.txt b/tests/MMS/CMakeLists.txt
index 0c42da7074..cd639c9059 100644
--- a/tests/MMS/CMakeLists.txt
+++ b/tests/MMS/CMakeLists.txt
@@ -8,6 +8,7 @@ add_subdirectory(spatial/d2dx2)
 add_subdirectory(spatial/d2dz2)
 add_subdirectory(spatial/diffusion)
 add_subdirectory(spatial/fci)
+add_subdirectory(spatial/finite-volume)
 add_subdirectory(time)
 add_subdirectory(time-petsc)
 add_subdirectory(wave-1d)
diff --git a/tests/MMS/spatial/fci/data/BOUT.inp b/tests/MMS/spatial/fci/data/BOUT.inp
index 5f2001a906..76ac3035c9 100644
--- a/tests/MMS/spatial/fci/data/BOUT.inp
+++ b/tests/MMS/spatial/fci/data/BOUT.inp
@@ -1,15 +1,20 @@
-
 input_field = sin(y - 2*z) + sin(y - z)
-
-solution = (6.28318530717959*(0.01*x + 0.045)*(-2*cos(y - 2*z) - cos(y - z)) + 0.628318530717959*cos(y - 2*z) + 0.628318530717959*cos(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0)
-
-MXG = 1
-MYG = 1
-NXPE = 1
+grad_par_solution = (6.28318530717959*(0.01*x + 0.045)*(-2*cos(y - 2*z) - cos(y - z)) + 0.628318530717959*cos(y - 2*z) + 0.628318530717959*cos(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0)
+grad2_par2_solution = (6.28318530717959*(0.01*x + 0.045)*(6.28318530717959*(0.01*x + 0.045)*(-4*sin(y - 2*z) - sin(y - z)) + 1.25663706143592*sin(y - 2*z) + 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0) + 0.628318530717959*(6.28318530717959*(0.01*x + 0.045)*(2*sin(y - 2*z) + sin(y - z)) - 0.628318530717959*sin(y - 2*z) - 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0))/sqrt((0.01*x + 0.045)^2 + 1.0)
+div_par_solution = (0.01*x + 0.045)*(-12.5663706143592*cos(y - 2*z) - 6.28318530717959*cos(y - z) + 0.628318530717959*(cos(y - 2*z) + cos(y - z))/(0.01*x + 0.045))/sqrt((0.01*x + 0.045)^2 + 1.0)
+div_par_K_grad_par_solution = (0.01*x + 0.045)*(6.28318530717959*sin(y - z) - 0.628318530717959*sin(y - z)/(0.01*x + 0.045))*(6.28318530717959*(0.01*x + 0.045)*(-2*cos(y - 2*z) - cos(y - z)) + 0.628318530717959*cos(y - 2*z) + 0.628318530717959*cos(y - z))/((0.01*x + 0.045)^2 + 1.0) + (6.28318530717959*(0.01*x + 0.045)*(6.28318530717959*(0.01*x + 0.045)*(-4*sin(y - 2*z) - sin(y - z)) + 1.25663706143592*sin(y - 2*z) + 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0) + 0.628318530717959*(6.28318530717959*(0.01*x + 0.045)*(2*sin(y - 2*z) + sin(y - z)) - 0.628318530717959*sin(y - 2*z) - 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0))*cos(y - z)/sqrt((0.01*x + 0.045)^2 + 1.0)
+K = cos(y - z)
+laplace_par_solution = (0.01*x + 0.045)*(6.28318530717959*(6.28318530717959*(0.01*x + 0.045)*(-4*sin(y - 2*z) - sin(y - z)) + 1.25663706143592*sin(y - 2*z) + 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0) + 0.628318530717959*(6.28318530717959*(0.01*x + 0.045)*(2*sin(y - 2*z) + sin(y - z)) - 0.628318530717959*sin(y - 2*z) - 0.628318530717959*sin(y - z))/((0.01*x + 0.045)*sqrt((0.01*x + 0.045)^2 + 1.0)))/sqrt((0.01*x + 0.045)^2 + 1.0)
+FV_div_par_mod_solution = (0.01*x + 0.045)*(6.28318530717959*(0.01*x + 0.045)*((sin(y - 2*z) + sin(y - z))*sin(y - z)/(0.01*x + 0.045) + (-2*cos(y - 2*z) - cos(y - z))*cos(y - z)/(0.01*x + 0.045)) - 0.628318530717959*(sin(y - 2*z) + sin(y - z))*sin(y - z)/(0.01*x + 0.045) + 0.628318530717959*(cos(y - 2*z) + cos(y - z))*cos(y - z)/(0.01*x + 0.045))/sqrt((0.01*x + 0.045)^2 + 1.0)
+FV_div_par_fvv_solution = (0.01*x + 0.045)*(6.28318530717959*(0.01*x + 0.045)*(2*(sin(y - 2*z) + sin(y - z))*sin(y - z)*cos(y - z)/(0.01*x + 0.045) + (-2*cos(y - 2*z) - cos(y - z))*cos(y - z)^2/(0.01*x + 0.045)) - 1.25663706143592*(sin(y - 2*z) + sin(y - z))*sin(y - z)*cos(y - z)/(0.01*x + 0.045) + 0.628318530717959*(cos(y - 2*z) + cos(y - z))*cos(y - z)^2/(0.01*x + 0.045))/sqrt((0.01*x + 0.045)^2 + 1.0)
+div_par_K_grad_par_mod_solution = (0.01*x + 0.045)*(6.28318530717959*sin(y - z) - 0.628318530717959*sin(y - z)/(0.01*x + 0.045))*(6.28318530717959*(0.01*x + 0.045)*(-2*cos(y - 2*z) - cos(y - z)) + 0.628318530717959*cos(y - 2*z) + 0.628318530717959*cos(y - z))/((0.01*x + 0.045)^2 + 1.0) + (6.28318530717959*(0.01*x + 0.045)*(6.28318530717959*(0.01*x + 0.045)*(-4*sin(y - 2*z) - sin(y - z)) + 1.25663706143592*sin(y - 2*z) + 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0) + 0.628318530717959*(6.28318530717959*(0.01*x + 0.045)*(2*sin(y - 2*z) + sin(y - z)) - 0.628318530717959*sin(y - 2*z) - 0.628318530717959*sin(y - z))/sqrt((0.01*x + 0.045)^2 + 1.0))*cos(y - z)/sqrt((0.01*x + 0.045)^2 + 1.0)
 
 [mesh]
 symmetricglobalx = true
 file = fci.grid.nc
+MXG = 1
+MYG = 1
+NXPE = 1
 
 [mesh:ddy]
 first = C2
diff --git a/tests/MMS/spatial/fci/fci_mms.cxx b/tests/MMS/spatial/fci/fci_mms.cxx
index 18405a7f88..3a9b72070e 100644
--- a/tests/MMS/spatial/fci/fci_mms.cxx
+++ b/tests/MMS/spatial/fci/fci_mms.cxx
@@ -1,6 +1,42 @@
 #include "bout/bout.hxx"
-#include "bout/derivs.hxx"
+#include "bout/build_config.hxx"
+#include "bout/difops.hxx"
+#include "bout/field.hxx"
+#include "bout/field3d.hxx"
 #include "bout/field_factory.hxx"
+#include "bout/fv_ops.hxx"
+#include "bout/globals.hxx"
+#include "bout/options.hxx"
+#include "bout/options_io.hxx"
+#include "bout/utils.hxx"
+
+#include <fmt/format.h>
+
+#include <cmath>
+#include <string>
+
+namespace {
+auto fci_op_test(const std::string& name, Options& dump, const Field3D& input,
+                 const Field3D& result) {
+  auto* mesh = input.getMesh();
+  const Field3D solution{FieldFactory::get()->create3D(fmt::format("{}_solution", name),
+                                                       Options::getRoot(), mesh)};
+  const Field3D error{result - solution};
+
+  dump[fmt::format("{}_l_2", name)] = sqrt(mean(SQ(error), true, "RGN_NOBNDRY"));
+  dump[fmt::format("{}_l_inf", name)] = max(abs(error), true, "RGN_NOBNDRY");
+
+  dump[fmt::format("{}_result", name)] = result;
+  dump[fmt::format("{}_error", name)] = error;
+  dump[fmt::format("{}_input", name)] = input;
+  dump[fmt::format("{}_solution", name)] = solution;
+
+  for (int slice = 1; slice < mesh->ystart; ++slice) {
+    dump[fmt::format("{}_input.ynext(-{})", name, slice)] = input.ynext(-slice);
+    dump[fmt::format("{}_input.ynext({})", name, slice)] = input.ynext(slice);
+  }
+}
+} // namespace
 
 int main(int argc, char** argv) {
   BoutInitialise(argc, argv);
@@ -8,30 +44,38 @@ int main(int argc, char** argv) {
   using bout::globals::mesh;
 
   Field3D input{FieldFactory::get()->create3D("input_field", Options::getRoot(), mesh)};
-  Field3D solution{FieldFactory::get()->create3D("solution", Options::getRoot(), mesh)};
-
-  // Communicate to calculate parallel transform
-  mesh->communicate(input);
+  Field3D K{FieldFactory::get()->create3D("K", Options::getRoot(), mesh)};
 
-  Field3D result{Grad_par(input)};
-  Field3D error{result - solution};
+  // Communicate to calculate parallel transform.
+  if constexpr (bout::build::use_metric_3d) {
+    // Div_par operators require B parallel slices:
+    // Coordinates::geometry doesn't ensure this (yet)
+    auto& Bxy = mesh->getCoordinates()->Bxy;
+    auto& J = mesh->getCoordinates()->J;
+    auto& g_22 = mesh->getCoordinates()->g_22;
+    auto& dy = mesh->getCoordinates()->dy;
+    mesh->communicate(Bxy, J, g_22, dy);
+  }
+  mesh->communicate(input, K);
 
   Options dump;
   // Add mesh geometry variables
   mesh->outputVars(dump);
 
-  dump["l_2"] = sqrt(mean(SQ(error), true, "RGN_NOBNDRY"));
-  dump["l_inf"] = max(abs(error), true, "RGN_NOBNDRY");
+  // Dummy variable for *_mod overloads
+  Field3D flow_ylow;
 
-  dump["result"] = result;
-  dump["error"] = error;
-  dump["input"] = input;
-  dump["solution"] = solution;
+  fci_op_test("grad_par", dump, input, Grad_par(input));
+  fci_op_test("grad2_par2", dump, input, Grad2_par2(input));
+  fci_op_test("div_par", dump, input, Div_par(input));
+  fci_op_test("div_par_K_grad_par", dump, input, Div_par_K_Grad_par(K, input));
+  fci_op_test("div_par_K_grad_par_mod", dump, input,
+              Div_par_K_Grad_par_mod(K, input, flow_ylow));
+  fci_op_test("laplace_par", dump, input, Laplace_par(input));
 
-  for (int slice = 1; slice < mesh->ystart; ++slice) {
-    dump[fmt::format("input.ynext(-{})", slice)] = input.ynext(-slice);
-    dump[fmt::format("input.ynext({})", slice)] = input.ynext(slice);
-  }
+  // Finite volume methods
+  fci_op_test("FV_div_par_mod", dump, input, FV::Div_par_mod(input, K, K, flow_ylow));
+  fci_op_test("FV_div_par_fvv", dump, input, FV::Div_par_fvv(input, K, K));
 
   bout::writeDefaultOutputFile(dump);
 
diff --git a/tests/MMS/spatial/fci/mms.py b/tests/MMS/spatial/fci/mms.py
index 1e71135c90..801a8d3f26 100755
--- a/tests/MMS/spatial/fci/mms.py
+++ b/tests/MMS/spatial/fci/mms.py
@@ -3,13 +3,19 @@
 # Generate manufactured solution and sources for FCI test
 #
 
-from boutdata.mms import *
+from math import pi
+import warnings
 
-from sympy import sin, cos, sqrt
+from boututils.boutwarnings import AlwaysWarning
+from boutdata.data import BoutOptionsFile
+from boutdata.mms import diff, exprToStr, x, y, z
+from sympy import sin, cos, sqrt, Expr
 
-from math import pi
+warnings.simplefilter("ignore", AlwaysWarning)
 
 f = sin(y - z) + sin(y - 2 * z)
+K = cos(z - y)
+
 
 Lx = 0.1
 Ly = 10.0
@@ -23,12 +29,45 @@
 B = sqrt(Bpx**2 + Bt**2)
 
 
-def FCI_ddy(f):
+def FCI_grad_par(f: Expr) -> Expr:
     return (Bt * diff(f, y) * 2.0 * pi / Ly + Bpx * diff(f, z) * 2.0 * pi / Lz) / B
 
 
+def FCI_grad2_par2(f: Expr) -> Expr:
+    return FCI_grad_par(FCI_grad_par(f))
+
+
+def FCI_div_par(f: Expr) -> Expr:
+    return Bpx * FCI_grad_par(f / Bpx)
+
+
+def FCI_div_par_K_grad_par(f: Expr, K: Expr) -> Expr:
+    return (K * FCI_grad2_par2(f)) + (FCI_div_par(K) * FCI_grad_par(f))
+
+
+def FCI_Laplace_par(f: Expr) -> Expr:
+    return FCI_div_par(FCI_grad_par(f))
+
+
 ############################################
 # Equations solved
 
-print("input = " + exprToStr(f))
-print("solution = " + exprToStr(FCI_ddy(f)))
+options = BoutOptionsFile("data/BOUT.inp")
+
+for name, expr in (
+    ("input_field", f),
+    ("K", K),
+    ("grad_par_solution", FCI_grad_par(f)),
+    ("grad2_par2_solution", FCI_grad2_par2(f)),
+    ("div_par_solution", FCI_div_par(f)),
+    ("div_par_K_grad_par_solution", FCI_div_par_K_grad_par(f, K)),
+    ("div_par_K_grad_par_mod_solution", FCI_div_par_K_grad_par(f, K)),
+    ("laplace_par_solution", FCI_Laplace_par(f)),
+    ("FV_div_par_mod_solution", FCI_div_par(f * K)),
+    ("FV_div_par_fvv_solution", FCI_div_par(f * K * K)),
+):
+    expr_str = exprToStr(expr)
+    print(f"{name} = {expr_str}")
+    options[name] = expr_str
+
+options.write("data/BOUT.inp", overwrite=True)
diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest
index 7a9d6e655e..1e6d570c96 100755
--- a/tests/MMS/spatial/fci/runtest
+++ b/tests/MMS/spatial/fci/runtest
@@ -6,209 +6,276 @@
 # Cores: 2
 # requires: zoidberg
 
-from boututils.run_wrapper import build_and_log, launch_safe
-from boutdata.collect import collect
-import boutconfig as conf
-
-from numpy import array, log, polyfit, linspace, arange
-
-import pickle
-
-from sys import stdout
+import argparse
+import json
+import pathlib
+import sys
+from time import time
 
+import boutconfig as conf
 import zoidberg as zb
-
-nx = 4  # Not changed for these tests
-
+from boutdata.collect import collect
+from boututils.run_wrapper import build_and_log, launch_safe
+from numpy import arange, array, linspace, log, polyfit
+from scipy.interpolate import RectBivariateSpline as RBS
+
+# Global parameters
+DIRECTORY = "data"
+NPROC = 2
+MTHREAD = 2
+OPERATORS = {
+    "grad_par": 2,
+    "grad2_par2": 2,
+    "div_par": 2,
+    "div_par_K_grad_par": 2,
+    "div_par_K_grad_par_mod": 2,
+    "laplace_par": 2,
+    "FV_div_par_mod": 2,
+    "FV_div_par_fvv": 1,
+}
+# Note that we need at least _2_ interior points for hermite spline
+# interpolation due to an awkwardness with the boundaries
+NX = 4
 # Resolution in y and z
-nlist = [8, 16, 32, 64, 128]
-
-# Number of parallel slices (in each direction)
-nslices = [1]
-
-directory = "data"
-
-nproc = 2
-mthread = 2
-
-
-success = True
-
-error_2 = {}
-error_inf = {}
-method_orders = {}
-
-# Run with periodic Y?
-yperiodic = True
-
-failures = []
-
-build_and_log("FCI MMS test")
-
-for nslice in nslices:
-    for method in [
-        "hermitespline",
-        "lagrange4pt",
-        "bilinear",
-        # "monotonichermitespline",
-    ]:
-        error_2[nslice] = []
-        error_inf[nslice] = []
-
-        # Which central difference scheme to use and its expected order
-        order = nslice * 2
-        method_orders[nslice] = {"name": "C{}".format(order), "order": order}
-
-        for n in nlist:
-            # Define the magnetic field using new poloidal gridding method
-            # Note that the Bz and Bzprime parameters here must be the same as in mms.py
-            field = zb.field.Slab(Bz=0.05, Bzprime=0.1)
-            # Create rectangular poloidal grids
-            poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid(
-                nx, n, 0.1, 1.0, MXG=1
-            )
-            # Set the ylength and y locations
-            ylength = 10.0
-
-            if yperiodic:
-                ycoords = linspace(0.0, ylength, n, endpoint=False)
-            else:
-                # Doesn't include the end points
-                ycoords = (arange(n) + 0.5) * ylength / float(n)
-
-            # Create the grid
-            grid = zb.grid.Grid(poloidal_grid, ycoords, ylength, yperiodic=yperiodic)
-            # Make and write maps
-            maps = zb.make_maps(grid, field, nslice=nslice, quiet=True, MXG=1)
-            zb.write_maps(
-                grid,
-                field,
-                maps,
-                new_names=False,
-                metric2d=conf.isMetric2D(),
-                quiet=True,
-            )
-
-            args = " MZ={} MYG={} mesh:paralleltransform:y_periodic={} mesh:ddy:first={} NXPE={}".format(
-                n,
-                nslice,
-                yperiodic,
-                method_orders[nslice]["name"],
-                2 if conf.has["petsc"] and method == "hermitespline" else 1,
-            )
-            args += f" mesh:paralleltransform:xzinterpolation:type={method}"
-
-            # Command to run
-            cmd = "./fci_mms " + args
-
-            print("Running command: " + cmd)
-
-            # Launch using MPI
-            s, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True)
-
-            # Save output to log file
-            with open("run.log." + str(n), "w") as f:
-                f.write(out)
-
-            if s:
-                print("Run failed!\nOutput was:\n")
-                print(out)
-                exit(s)
-
-            # Collect data
-            l_2 = collect(
-                "l_2",
-                tind=[1, 1],
-                info=False,
-                path=directory,
-                xguards=False,
-                yguards=False,
-            )
-            l_inf = collect(
-                "l_inf",
-                tind=[1, 1],
-                info=False,
-                path=directory,
-                xguards=False,
-                yguards=False,
-            )
-
-            error_2[nslice].append(l_2)
-            error_inf[nslice].append(l_inf)
-
-            print("Errors : l-2 {:f} l-inf {:f}".format(l_2, l_inf))
-
-        dx = 1.0 / array(nlist)
-
-        # Calculate convergence order
-        fit = polyfit(log(dx), log(error_2[nslice]), 1)
-        order = fit[0]
-        stdout.write("Convergence order = {:f} (fit)".format(order))
-
-        order = log(error_2[nslice][-2] / error_2[nslice][-1]) / log(dx[-2] / dx[-1])
-        stdout.write(", {:f} (small spacing)".format(order))
-
-        # Should be close to the expected order
-        if order > method_orders[nslice]["order"] * 0.95:
-            print("............ PASS\n")
-        else:
-            print("............ FAIL\n")
-            success = False
-            failures.append(method_orders[nslice]["name"])
-
-
-with open("fci_mms.pkl", "wb") as output:
-    pickle.dump(nlist, output)
-    for nslice in nslices:
-        pickle.dump(error_2[nslice], output)
-        pickle.dump(error_inf[nslice], output)
-
-# Do we want to show the plot as well as save it to file.
-showPlot = True
-
-if False:
+NLIST = [8, 16, 32, 64]
+dx = 1.0 / array(NLIST)
+
+
+def myRBS(a, b, c):
+    """RectBivariateSpline, but automatically tune spline degree for small arrays"""
+    mx, _ = c.shape
+    kx = max(mx - 1, 1)
+    kx = min(kx, 3)
+    return RBS(a, b, c, kx=kx)
+
+
+zb.poloidal_grid.RectBivariateSpline = myRBS
+
+
+def quiet_collect(name: str) -> float:
+    # Index to return a plain (numpy) float rather than `BoutArray`
+    return collect(
+        name,
+        tind=[1, 1],
+        info=False,
+        path=DIRECTORY,
+        xguards=False,
+        yguards=False,
+    )[()]
+
+
+def assert_convergence(error, dx, name, expected) -> bool:
+    fit = polyfit(log(dx), log(error), 1)
+    order = fit[0]
+    print(f"{name} convergence order = {order:f} (fit)", end="")
+
+    order = log(error[-2] / error[-1]) / log(dx[-2] / dx[-1])
+    print(f", {order:f} (small spacing)", end="")
+
+    # Should be close to the expected order
+    success = order > expected * 0.95
+    print(f"\t............ {'PASS' if success else 'FAIL'}")
+
+    return success
+
+
+def run_fci_operators(
+    nslice: int, nz: int, yperiodic: bool, name: str
+) -> dict[str, float]:
+    # Define the magnetic field using new poloidal gridding method
+    # Note that the Bz and Bzprime parameters here must be the same as in mms.py
+    field = zb.field.Slab(Bz=0.05, Bzprime=0.1)
+    # Create rectangular poloidal grids
+    poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid(NX, nz, 0.1, 1.0, MXG=1)
+    # Set the ylength and y locations
+    ylength = 10.0
+
+    if yperiodic:
+        ycoords = linspace(0.0, ylength, nz, endpoint=False)
+    else:
+        # Doesn't include the end points
+        ycoords = (arange(nz) + 0.5) * ylength / float(nz)
+
+    # Create the grid
+    grid = zb.grid.Grid(poloidal_grid, ycoords, ylength, yperiodic=yperiodic)
+    maps = zb.make_maps(grid, field, nslice=nslice, quiet=True, MXG=1)
+    zb.write_maps(
+        grid,
+        field,
+        maps,
+        new_names=False,
+        metric2d=conf.isMetric2D(),
+        quiet=True,
+    )
+
+    # Command to run
+    args = f"MZ={nz} MYG={nslice} mesh:paralleltransform:y_periodic={yperiodic} {name}"
+    cmd = f"./fci_mms {args}"
+    print(f"Running command: {cmd}", end="")
+
+    # Launch using MPI
+    start = time()
+    status, out = launch_safe(cmd, nproc=NPROC, mthread=MTHREAD, pipe=True)
+    print(f" ... done in {time() - start:.3}s")
+
+    # Save output to log file
+    pathlib.Path(f"run.log.{nz}").write_text(out)
+
+    if status:
+        print(f"Run failed!\nOutput was:\n{out}")
+        sys.exit(status)
+
+    return {
+        operator: {
+            "l_2": quiet_collect(f"{operator}_l_2"),
+            "l_inf": quiet_collect(f"{operator}_l_inf"),
+        }
+        for operator in OPERATORS
+    }
+
+
+def transpose(
+    errors: list[dict[str, dict[str, float]]],
+) -> dict[str, dict[str, list[float]]]:
+    """Turn a list of {operator: error} into a dict of {operator: [errors]}"""
+
+    kinds = ("l_2", "l_inf")
+    result = {operator: {kind: [] for kind in kinds} for operator in OPERATORS}
+    for error in errors:
+        for k, v in error.items():
+            for kind in kinds:
+                result[k][kind].append(v[kind])
+    return result
+
+
+def check_fci_operators(name: str, case: dict) -> bool:
+    failures = []
+
+    nslice = case["nslice"]
+    yperiodic = case["yperiodic"]
+    order = case["order"]
+    args = case["args"]
+
+    all_errors = []
+
+    for n in NLIST:
+        errors = run_fci_operators(nslice, n, yperiodic, args)
+        all_errors.append(errors)
+
+        for operator in OPERATORS:
+            l_2 = errors[operator]["l_2"]
+            l_inf = errors[operator]["l_inf"]
+
+            print(f"{operator} errors: l-2 {l_2:f} l-inf {l_inf:f}")
+
+    final_errors = transpose(all_errors)
+    for operator, operator_order in OPERATORS.items():
+        test_name = f"{operator} {name}"
+        expected_order = min(order, operator_order)
+        success = assert_convergence(
+            final_errors[operator]["l_2"], dx, test_name, expected_order
+        )
+        if not success:
+            failures.append(test_name)
+
+    return final_errors, failures
+
+
+def make_plots(cases: dict[str, dict]):
     try:
-        # Plot using matplotlib if available
         import matplotlib.pyplot as plt
+    except ImportError:
+        print("No matplotlib")
+        return
 
-        fig, ax = plt.subplots(1, 1)
-
-        for nslice in nslices:
-            ax.plot(
-                dx,
-                error_2[nslice],
-                "-",
-                label="{} $l_2$".format(method_orders[nslice]["name"]),
-            )
-            ax.plot(
-                dx,
-                error_inf[nslice],
-                "--",
-                label="{} $l_\\inf$".format(method_orders[nslice]["name"]),
-            )
+    num_operators = len(OPERATORS)
+    fig, axes = plt.subplots(1, num_operators, figsize=(num_operators * 4, 4))
+
+    for ax, operator in zip(axes, OPERATORS):
+        for name, case in cases.items():
+            ax.loglog(dx, case[operator]["l_2"], "-", label=f"{name} $l_2$")
+            ax.loglog(dx, case[operator]["l_inf"], "--", label=f"{name} $l_\\inf$")
         ax.legend(loc="upper left")
         ax.grid()
-        ax.set_yscale("log")
-        ax.set_xscale("log")
-        ax.set_title("error scaling")
+        ax.set_title(f"Error scaling for {operator}")
         ax.set_xlabel(r"Mesh spacing $\delta x$")
         ax.set_ylabel("Error norm")
 
-        plt.savefig("fci_mms.pdf")
-
-        print("Plot saved to fci_mms.pdf")
-
-        if showPlot:
-            plt.show()
-        plt.close()
-    except ImportError:
-        print("No matplotlib")
-
-if success:
-    print("All tests passed")
-    exit(0)
-else:
-    print("Some tests failed:")
-    for failure in failures:
-        print("\t" + failure)
-    exit(1)
+    fig.tight_layout()
+    fig.savefig("fci_mms.pdf")
+    print("Plot saved to fci_mms.pdf")
+
+    if args.show_plots:
+        plt.show()
+    plt.close()
+
+
+if __name__ == "__main__":
+    build_and_log("FCI MMS test")
+
+    parser = argparse.ArgumentParser("Error scaling test for FCI operators")
+    parser.add_argument(
+        "--make-plots", action="store_true", help="Create plots of error scaling"
+    )
+    parser.add_argument(
+        "--show-plots",
+        action="store_true",
+        help="Stop and show plots, implies --make-plots",
+    )
+    parser.add_argument(
+        "--dump-errors",
+        type=str,
+        help="Output file to dump errors as JSON",
+        default="fci_operator_errors.json",
+    )
+
+    args = parser.parse_args()
+
+    success = True
+    failures = []
+    cases = {
+        "nslice=1 hermitespline": {
+            "nslice": 1,
+            "order": 2,
+            "yperiodic": True,
+            "args": "mesh:ddy:first=C2 mesh:paralleltransform:xzinterpolation:type=hermitespline",
+        },
+        "nslice=1 lagrange4pt": {
+            "nslice": 1,
+            "order": 2,
+            "yperiodic": True,
+            "args": "mesh:ddy:first=C2 mesh:paralleltransform:xzinterpolation:type=lagrange4pt",
+        },
+        "nslice=1 monotonichermitespline": {
+            "nslice": 1,
+            "order": 2,
+            "yperiodic": True,
+            "args": (
+                "mesh:ddy:first=C2 "
+                "mesh:paralleltransform:xzinterpolation:type=monotonichermitespline "
+                "mesh:paralleltransform:xzinterpolation:rtol=1e-3 "
+                "mesh:paralleltransform:xzinterpolation:atol=5e-3"
+            ),
+        },
+    }
+
+    for name, case in cases.items():
+        error2, failures_ = check_fci_operators(name, case)
+        case.update(error2)
+        failures.extend(failures_)
+        success &= len(failures) == 0
+
+    if args.dump_errors:
+        pathlib.Path(args.dump_errors).write_text(json.dumps(cases))
+
+    if args.make_plots or args.show_plots:
+        make_plots(cases)
+
+    if success:
+        print("\nAll tests passed")
+    else:
+        print("\nSome tests failed:")
+        for failure in failures:
+            print(f"\t{failure}")
+
+    sys.exit(0 if success else 1)
diff --git a/tests/MMS/spatial/finite-volume/CMakeLists.txt b/tests/MMS/spatial/finite-volume/CMakeLists.txt
new file mode 100644
index 0000000000..6d9c839a05
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/CMakeLists.txt
@@ -0,0 +1,6 @@
+bout_add_mms_test(MMS-spatial-finite-volume
+  SOURCES fv_mms.cxx
+  USE_RUNTEST
+  USE_DATA_BOUT_INP
+  PROCESSORS 2
+)
diff --git a/tests/MMS/spatial/finite-volume/data/BOUT.inp b/tests/MMS/spatial/finite-volume/data/BOUT.inp
new file mode 100644
index 0000000000..029011e437
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/data/BOUT.inp
@@ -0,0 +1,23 @@
+input_field = 0.1*sin(2.0*y) + 1
+FV_Div_par_mod_solution = -0.188495559215388*sin(3.0*y)
+FV_Div_par_fvv_solution = -0.376991118430775*(0.1*cos(3.0*y) + 1)*sin(3.0*y)/(0.1*sin(2.0*y) + 1) - 0.125663706143592*(0.1*cos(3.0*y) + 1)^2*cos(2.0*y)/(0.1*sin(2.0*y) + 1)^2
+FV_Div_par_solution = -0.188495559215388*sin(3.0*y)
+FV_Div_par_K_Grad_par_solution = 0.125663706143592*(-0.188495559215388*sin(3.0*y)/(0.1*sin(2.0*y) + 1) - 0.125663706143592*(0.1*cos(3.0*y) + 1)*cos(2.0*y)/(0.1*sin(2.0*y) + 1)^2)*cos(2.0*y) - 0.15791367041743*(0.1*cos(3.0*y) + 1)*sin(2.0*y)/(0.1*sin(2.0*y) + 1)
+FV_Div_par_K_Grad_par_mod_solution = 0.125663706143592*(-0.188495559215388*sin(3.0*y)/(0.1*sin(2.0*y) + 1) - 0.125663706143592*(0.1*cos(3.0*y) + 1)*cos(2.0*y)/(0.1*sin(2.0*y) + 1)^2)*cos(2.0*y) - 0.15791367041743*(0.1*cos(3.0*y) + 1)*sin(2.0*y)/(0.1*sin(2.0*y) + 1)
+v = (0.1*cos(3.0*y) + 1)/(0.1*sin(2.0*y) + 1)
+
+[mesh]
+MXG = 0
+
+nx = 1
+ny = 128
+nz = 1
+
+Ly = 10
+
+dy = Ly / ny
+J = 1  # Identity metric
+
+[mesh:ddy]
+first = C2
+second = C2
diff --git a/tests/MMS/spatial/finite-volume/fv_mms.cxx b/tests/MMS/spatial/finite-volume/fv_mms.cxx
new file mode 100644
index 0000000000..bb999bcc66
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/fv_mms.cxx
@@ -0,0 +1,102 @@
+#include "bout/bout.hxx"
+#include "bout/build_config.hxx"
+#include "bout/difops.hxx"
+#include "bout/field.hxx"
+#include "bout/field3d.hxx"
+#include "bout/field_factory.hxx"
+#include "bout/fv_ops.hxx"
+#include "bout/globals.hxx"
+#include "bout/options.hxx"
+#include "bout/options_io.hxx"
+#include "bout/utils.hxx"
+
+#include <fmt/format.h>
+
+#include <cmath>
+#include <string>
+
+namespace {
+auto fv_op_test(const std::string& name, Options& dump, const Field3D& input,
+                const Field3D& result, std::string suffix = "") {
+  auto* mesh = input.getMesh();
+  const Field3D solution{FieldFactory::get()->create3D(fmt::format("{}_solution", name),
+                                                       Options::getRoot(), mesh)};
+  const Field3D error{result - solution};
+
+  dump[fmt::format("{}{}_l_2", name, suffix)] =
+      sqrt(mean(SQ(error), true, "RGN_NOBNDRY"));
+  dump[fmt::format("{}{}_l_inf", name, suffix)] = max(abs(error), true, "RGN_NOBNDRY");
+
+  dump[fmt::format("{}{}_result", name, suffix)] = result;
+  dump[fmt::format("{}{}_error", name, suffix)] = error;
+  dump[fmt::format("{}{}_input", name, suffix)] = input;
+  dump[fmt::format("{}{}_solution", name, suffix)] = solution;
+}
+} // namespace
+
+int main(int argc, char** argv) {
+  BoutInitialise(argc, argv);
+
+  using bout::globals::mesh;
+
+  Field3D input{FieldFactory::get()->create3D("input_field", Options::getRoot(), mesh)};
+  Field3D v{FieldFactory::get()->create3D("v", Options::getRoot(), mesh)};
+
+  // Communicate to calculate parallel transform.
+  if constexpr (bout::build::use_metric_3d) {
+    // Div_par operators require B parallel slices:
+    // Coordinates::geometry doesn't ensure this (yet)
+    auto& Bxy = mesh->getCoordinates()->Bxy;
+    auto& J = mesh->getCoordinates()->J;
+    auto& dy = mesh->getCoordinates()->dy;
+    auto& g_22 = mesh->getCoordinates()->g_22;
+    mesh->communicate(Bxy, J, dy, g_22);
+  }
+  mesh->communicate(input, v);
+
+  Options dump;
+  // Add mesh geometry variables
+  mesh->outputVars(dump);
+  dump["v"] = v;
+
+  // Dummy variable for *_mod overloads
+  Field3D flow_ylow;
+
+  fv_op_test("FV_Div_par", dump, input, FV::Div_par<FV::MC>(input, v, v), "_MC");
+  fv_op_test("FV_Div_par_mod", dump, input,
+             FV::Div_par_mod<FV::MC>(input, v, v, flow_ylow), "_MC");
+  fv_op_test("FV_Div_par_fvv", dump, input, FV::Div_par_fvv<FV::MC>(input, v, v), "_MC");
+
+  fv_op_test("FV_Div_par", dump, input, FV::Div_par<FV::Upwind>(input, v, v), "_Upwind");
+  fv_op_test("FV_Div_par_mod", dump, input,
+             FV::Div_par_mod<FV::Upwind>(input, v, v, flow_ylow), "_Upwind");
+  fv_op_test("FV_Div_par_fvv", dump, input, FV::Div_par_fvv<FV::Upwind>(input, v, v),
+             "_Upwind");
+
+  fv_op_test("FV_Div_par", dump, input, FV::Div_par<FV::Fromm>(input, v, v), "_Fromm");
+  fv_op_test("FV_Div_par_mod", dump, input,
+             FV::Div_par_mod<FV::Fromm>(input, v, v, flow_ylow), "_Fromm");
+  fv_op_test("FV_Div_par_fvv", dump, input, FV::Div_par_fvv<FV::Fromm>(input, v, v),
+             "_Fromm");
+
+  fv_op_test("FV_Div_par", dump, input, FV::Div_par<FV::MinMod>(input, v, v), "_MinMod");
+  fv_op_test("FV_Div_par_mod", dump, input,
+             FV::Div_par_mod<FV::MinMod>(input, v, v, flow_ylow), "_MinMod");
+  fv_op_test("FV_Div_par_fvv", dump, input, FV::Div_par_fvv<FV::MinMod>(input, v, v),
+             "_MinMod");
+
+  fv_op_test("FV_Div_par", dump, input, FV::Div_par<FV::Superbee>(input, v, v),
+             "_Superbee");
+  fv_op_test("FV_Div_par_mod", dump, input,
+             FV::Div_par_mod<FV::Superbee>(input, v, v, flow_ylow), "_Superbee");
+  fv_op_test("FV_Div_par_fvv", dump, input, FV::Div_par_fvv<FV::Superbee>(input, v, v),
+             "_Superbee");
+
+  fv_op_test("FV_Div_par_K_Grad_par", dump, input, FV::Div_par_K_Grad_par(v, input));
+  fv_op_test("FV_Div_par_K_Grad_par_mod", dump, input,
+             Div_par_K_Grad_par_mod(v, input, flow_ylow));
+
+  bout::writeDefaultOutputFile(dump);
+
+  BoutFinalise();
+}
diff --git a/tests/MMS/spatial/finite-volume/makefile b/tests/MMS/spatial/finite-volume/makefile
new file mode 100644
index 0000000000..88ba6c77e7
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/makefile
@@ -0,0 +1,6 @@
+
+BOUT_TOP	= ../../../..
+
+SOURCEC		= fci_mms.cxx
+
+include $(BOUT_TOP)/make.config
diff --git a/tests/MMS/spatial/finite-volume/mms.py b/tests/MMS/spatial/finite-volume/mms.py
new file mode 100755
index 0000000000..a95ecc1328
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/mms.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+#
+# Generate manufactured solution and sources for FCI test
+#
+
+from math import pi
+import warnings
+
+from boututils.boutwarnings import AlwaysWarning
+from boutdata.data import BoutOptionsFile
+from boutdata.mms import exprToStr, y, Grad_par, Div_par, Metric
+from sympy import sin, cos, Expr
+
+warnings.simplefilter("ignore", AlwaysWarning)
+
+# Length of the y domain
+Ly = 10.0
+
+# Identity
+metric = Metric()
+
+# Define solution in terms of input x,y,z
+f = 1 + 0.1 * sin(2 * y)
+fv = 1 + 0.1 * cos(3 * y)
+
+
+# Turn solution into real x and z coordinates
+replace = [(y, metric.y * 2 * pi / Ly)]
+
+f = f.subs(replace)
+fv = fv.subs(replace)
+v = fv / f
+
+# Substitute back to get input y coordinates
+replace = [(metric.y, y * Ly / (2 * pi))]
+
+
+def Grad2_par2(f: Expr) -> Expr:
+    return Grad_par(Grad_par(f))
+
+
+def Div_par_K_Grad_par(f: Expr, K: Expr) -> Expr:
+    return (K * Grad2_par2(f)) + (Div_par(K) * Grad_par(f))
+
+
+############################################
+# Equations solved
+
+options = BoutOptionsFile("data/BOUT.inp")
+
+for name, expr in (
+    ("input_field", f),
+    ("v", v),
+    ("FV_Div_par_solution", Div_par(f * v)),
+    ("FV_Div_par_K_Grad_par_solution", Div_par_K_Grad_par(f, v)),
+    ("FV_Div_par_K_Grad_par_mod_solution", Div_par_K_Grad_par(f, v)),
+    ("FV_Div_par_mod_solution", Div_par(f * v)),
+    ("FV_Div_par_fvv_solution", Div_par(f * v * v)),
+):
+    expr_str = exprToStr(expr.subs(replace))
+    print(f"{name} = {expr_str}")
+    options[name] = expr_str
+
+options.write("data/BOUT.inp", overwrite=True)
diff --git a/tests/MMS/spatial/finite-volume/runtest b/tests/MMS/spatial/finite-volume/runtest
new file mode 100755
index 0000000000..bcd4672545
--- /dev/null
+++ b/tests/MMS/spatial/finite-volume/runtest
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+#
+# Python script to run and analyse MMS test
+#
+
+import argparse
+import json
+import pathlib
+import sys
+from time import time
+
+from boutdata.collect import collect
+from boututils.run_wrapper import build_and_log, launch_safe
+from numpy import array, log, polyfit
+
+# Global parameters
+DIRECTORY = "data"
+NPROC = 2
+MTHREAD = 2
+OPERATORS = {
+    # Slope-limiters necessarily reduce the accuracy in places
+    "FV_Div_par_MC": 1.5,
+    "FV_Div_par_mod_MC": 1.5,
+    "FV_Div_par_fvv_MC": 1.5,
+    "FV_Div_par_Upwind": 1,
+    "FV_Div_par_mod_Upwind": 1,
+    "FV_Div_par_fvv_Upwind": 1,
+    "FV_Div_par_Fromm": 1.5,
+    "FV_Div_par_mod_Fromm": 1.5,
+    "FV_Div_par_fvv_Fromm": 1.5,
+    "FV_Div_par_MinMod": 1.5,
+    "FV_Div_par_mod_MinMod": 1.5,
+    "FV_Div_par_fvv_MinMod": 1.5,
+    "FV_Div_par_Superbee": 1.5,
+    "FV_Div_par_mod_Superbee": 1.5,
+    "FV_Div_par_fvv_Superbee": 1.5,
+    "FV_Div_par_K_Grad_par": 2,
+    "FV_Div_par_K_Grad_par_mod": 2,
+}
+# Resolution in y and z
+NLIST = [8, 16, 32, 64]
+dx = 1.0 / array(NLIST)
+
+
+def quiet_collect(name: str) -> float:
+    # Index to return a plain (numpy) float rather than `BoutArray`
+    return collect(
+        name,
+        tind=[1, 1],
+        info=False,
+        path=DIRECTORY,
+        xguards=False,
+        yguards=False,
+    )[()]
+
+
+def assert_convergence(error, dx, name, expected) -> bool:
+    fit = polyfit(log(dx), log(error), 1)
+    order = fit[0]
+    print(f"{name} convergence order = {order:f} (fit)", end="")
+
+    order = log(error[-2] / error[-1]) / log(dx[-2] / dx[-1])
+    print(f", {order:f} (small spacing)", end="")
+
+    # Should be close to the expected order
+    success = order > expected * 0.95
+    print(f"\t............ {'PASS' if success else 'FAIL'}")
+
+    return success
+
+
+def run_fv_operators(nz: int) -> dict[str, float]:
+    # Command to run
+    cmd = f"./fv_mms MZ={nz} mesh:ny={nz}"
+    print(f"Running command: {cmd}", end="")
+
+    # Launch using MPI
+    start = time()
+    status, out = launch_safe(cmd, nproc=NPROC, mthread=MTHREAD, pipe=True)
+    print(f" ... done in {time() - start:.3}s")
+
+    # Save output to log file
+    pathlib.Path(f"run.log.{nz}").write_text(out)
+
+    if status:
+        print(f"Run failed!\nOutput was:\n{out}")
+        sys.exit(status)
+
+    return {
+        operator: {
+            "l_2": quiet_collect(f"{operator}_l_2"),
+            "l_inf": quiet_collect(f"{operator}_l_inf"),
+        }
+        for operator in OPERATORS
+    }
+
+
+def transpose(
+    errors: list[dict[str, dict[str, float]]],
+) -> dict[str, dict[str, list[float]]]:
+    """Turn a list of {operator: error} into a dict of {operator: [errors]}"""
+
+    kinds = ("l_2", "l_inf")
+    result = {operator: {kind: [] for kind in kinds} for operator in OPERATORS}
+    for error in errors:
+        for k, v in error.items():
+            for kind in kinds:
+                result[k][kind].append(v[kind])
+    return result
+
+
+def test_fv_operators() -> bool:
+    failures = []
+
+    all_errors = []
+
+    for n in NLIST:
+        errors = run_fv_operators(n)
+        all_errors.append(errors)
+
+        for operator in OPERATORS:
+            l_2 = errors[operator]["l_2"]
+            l_inf = errors[operator]["l_inf"]
+
+            print(f"{operator} errors: l-2 {l_2:f} l-inf {l_inf:f}")
+
+    final_errors = transpose(all_errors)
+    for operator, order in OPERATORS.items():
+        success = assert_convergence(final_errors[operator]["l_2"], dx, operator, order)
+        if not success:
+            failures.append(operator)
+
+    return final_errors, failures
+
+
+def make_plots(cases: dict[str, dict]):
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        print("No matplotlib")
+        return
+
+    num_operators = len(OPERATORS)
+    fig, axes = plt.subplots(1, num_operators, figsize=(num_operators * 4, 4))
+
+    for ax, operator in zip(axes, OPERATORS):
+        ax.loglog(dx, cases[operator]["l_2"], "-", label="$l_2$")
+        ax.loglog(dx, cases[operator]["l_inf"], "--", label="$l_\\inf$")
+        ax.legend(loc="upper left")
+        ax.grid()
+        ax.set_title(f"Error scaling for {operator}")
+        ax.set_xlabel(r"Mesh spacing $\delta x$")
+        ax.set_ylabel("Error norm")
+
+    fig.tight_layout()
+    fig.savefig("fv_mms.pdf")
+    print("Plot saved to fv_mms.pdf")
+
+    if args.show_plots:
+        plt.show()
+    plt.close()
+
+
+if __name__ == "__main__":
+    build_and_log("Finite volume MMS test")
+
+    parser = argparse.ArgumentParser("Error scaling test for finite volume operators")
+    parser.add_argument(
+        "--make-plots", action="store_true", help="Create plots of error scaling"
+    )
+    parser.add_argument(
+        "--show-plots",
+        action="store_true",
+        help="Stop and show plots, implies --make-plots",
+    )
+    parser.add_argument(
+        "--dump-errors",
+        type=str,
+        help="Output file to dump errors as JSON",
+        default="fv_operator_errors.json",
+    )
+
+    args = parser.parse_args()
+
+    error2, failures = test_fv_operators()
+    success = len(failures) == 0
+
+    if args.dump_errors:
+        pathlib.Path(args.dump_errors).write_text(json.dumps(error2))
+
+    if args.make_plots or args.show_plots:
+        make_plots(error2)
+
+    if success:
+        print("\nAll tests passed")
+    else:
+        print("\nSome tests failed:")
+        for failure in failures:
+            print(f"\t{failure}")
+
+    sys.exit(0 if success else 1)
diff --git a/tests/integrated/test-fci-boundary/get_par_bndry.cxx b/tests/integrated/test-fci-boundary/get_par_bndry.cxx
index ac0f5de2a6..8e3cfac2f7 100644
--- a/tests/integrated/test-fci-boundary/get_par_bndry.cxx
+++ b/tests/integrated/test-fci-boundary/get_par_bndry.cxx
@@ -1,31 +1,39 @@
 #include "bout/bout.hxx"
-#include "bout/derivs.hxx"
+#include "bout/field3d.hxx"
 #include "bout/field_factory.hxx"
+#include "bout/globals.hxx"
+#include "bout/options.hxx"
+#include "bout/options_io.hxx"
+#include "bout/output.hxx"
 #include "bout/parallel_boundary_region.hxx"
 
+#include <fmt/format.h>
+
+#include <vector>
+
 int main(int argc, char** argv) {
   BoutInitialise(argc, argv);
 
   using bout::globals::mesh;
 
-  std::vector<Field3D> fields;
-  fields.resize(static_cast<int>(BoundaryParType::SIZE));
+  std::vector<Field3D> fields(static_cast<int>(BoundaryParType::SIZE), Field3D{0.0});
+
   Options dump;
   for (int i = 0; i < fields.size(); i++) {
-    fields[i] = Field3D{0.0};
+    fields[i].allocate();
+    const auto boundary = static_cast<BoundaryParType>(i);
+    const auto boundary_name = toString(boundary);
     mesh->communicate(fields[i]);
-    for (const auto& bndry_par :
-         mesh->getBoundariesPar(static_cast<BoundaryParType>(i))) {
-      output.write("{:s} region\n", toString(static_cast<BoundaryParType>(i)));
+    for (const auto& bndry_par : mesh->getBoundariesPar(boundary)) {
+      output.write("{:s} region\n", boundary_name);
       for (bndry_par->first(); !bndry_par->isDone(); bndry_par->next()) {
         fields[i][bndry_par->ind()] += 1;
-        output.write("{:s} increment\n", toString(static_cast<BoundaryParType>(i)));
+        output.write("{:s} increment\n", boundary_name);
       }
     }
-    output.write("{:s} done\n", toString(static_cast<BoundaryParType>(i)));
+    output.write("{:s} done\n", boundary_name);
 
-    dump[fmt::format("field_{:s}", toString(static_cast<BoundaryParType>(i)))] =
-        fields[i];
+    dump[fmt::format("field_{:s}", boundary_name)] = fields[i];
   }
 
   bout::writeDefaultOutputFile(dump);
diff --git a/tests/integrated/test-fci-boundary/runtest b/tests/integrated/test-fci-boundary/runtest
index 1b1460da53..e749055185 100755
--- a/tests/integrated/test-fci-boundary/runtest
+++ b/tests/integrated/test-fci-boundary/runtest
@@ -1,29 +1,15 @@
 #!/usr/bin/env python3
 #
 # Python script to run and analyse MMS test
-#
 
-# Cores: 2
-# only working with cmake
-# requires: False
 from boututils.run_wrapper import launch_safe
 from boututils.datafile import DataFile
-from boutdata.collect import collect as _collect
+from boutdata.collect import collect
 
 import numpy as np
 
 
-def collect(var):
-    return _collect(
-        var,
-        info=False,
-        path=directory,
-        xguards=False,
-        yguards=False,
-    )
-
-
-nprocs = [1]  # , 2, 4]
+nprocs = [1]
 mthread = 2
 
 directory = "data"
@@ -43,11 +29,6 @@ regions = {
 }
 regions = {k: v.astype(int) for k, v in regions.items()}
 
-# for x in "xout", "xin":
-#     regions[x] = np.logical_or(regions[f"{x}_fwd"], regions[f"{x}_bwd"])
-# for x in "fwd", "bwd":
-#     regions[x] = np.logical_or(regions[f"xin_{x}"], regions[f"xout_{x}"])
-# regions["all"] = np.logical_or(regions["xin"], regions["xout"])
 for x in "xout", "xin":
     regions[x] = regions[f"{x}_fwd"] + regions[f"{x}_bwd"]
 for x in "fwd", "bwd":
@@ -56,15 +37,18 @@ regions["all"] = regions["xin"] + regions["xout"]
 
 for nproc in nprocs:
     cmd = "./get_par_bndry"
-
-    # Launch using MPI
     _, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True)
 
     for k, v in regions.items():
-        # Collect data
-        data = collect(f"field_{k}")
+        data = collect(
+            f"field_{k}",
+            info=False,
+            path=directory,
+            xguards=False,
+            yguards=False,
+        )
         assert np.allclose(data, v), (
-            k + " does not match",
+            f"{k} does not match",
             np.sum(data),
             np.sum(v),
             np.max(data),
diff --git a/tests/integrated/test-fci-mpi/fci_mpi.cxx b/tests/integrated/test-fci-mpi/fci_mpi.cxx
index 94520dd4a6..94e8e878ef 100644
--- a/tests/integrated/test-fci-mpi/fci_mpi.cxx
+++ b/tests/integrated/test-fci-mpi/fci_mpi.cxx
@@ -1,38 +1,41 @@
+#include "fmt/format.h"
 #include "bout/bout.hxx"
-#include "bout/derivs.hxx"
+#include "bout/field3d.hxx"
 #include "bout/field_factory.hxx"
+#include "bout/globals.hxx"
+#include "bout/options.hxx"
+#include "bout/options_io.hxx"
+#include "bout/region.hxx"
+
+namespace {
+auto fci_mpi_test(int num, Options& dump) {
+  using bout::globals::mesh;
+  Field3D input{FieldFactory::get()->create3D(fmt::format("input_{:d}:function", num),
+                                              Options::getRoot(), mesh)};
+  mesh->communicate(input);
+  input.applyParallelBoundary("parallel_neumann_o2");
+
+  for (int slice = -mesh->ystart; slice <= mesh->ystart; ++slice) {
+    if (slice == 0) {
+      continue;
+    }
+    Field3D tmp{0.};
+    BOUT_FOR(i, tmp.getRegion("RGN_NOBNDRY")) {
+      tmp[i] = input.ynext(slice)[i.yp(slice)];
+    }
+    dump[fmt::format("output_{:d}_{:+d}", num, slice)] = tmp;
+  }
+}
+} // namespace
 
 int main(int argc, char** argv) {
   BoutInitialise(argc, argv);
-  {
-    using bout::globals::mesh;
-    Options* options = Options::getRoot();
-    int i = 0;
-    const std::string default_str{"not_set"};
-    Options dump;
-    while (true) {
-      std::string temp_str;
-      options->get(fmt::format("input_{:d}:function", i), temp_str, default_str);
-      if (temp_str == default_str) {
-        break;
-      }
-      Field3D input{FieldFactory::get()->create3D(fmt::format("input_{:d}:function", i),
-                                                  Options::getRoot(), mesh)};
-      // options->get(fmt::format("input_{:d}:boundary_perp", i), temp_str, s"free_o3");
-      mesh->communicate(input);
-      input.applyParallelBoundary("parallel_neumann_o2");
-      for (int slice = -mesh->ystart; slice <= mesh->ystart; ++slice) {
-        if (slice != 0) {
-          Field3D tmp{0.};
-          BOUT_FOR(i, tmp.getRegion("RGN_NOBNDRY")) {
-            tmp[i] = input.ynext(slice)[i.yp(slice)];
-          }
-          dump[fmt::format("output_{:d}_{:+d}", i, slice)] = tmp;
-        }
-      }
-      ++i;
-    }
-    bout::writeDefaultOutputFile(dump);
+  Options dump;
+
+  for (auto num : {0, 1, 2, 3}) {
+    fci_mpi_test(num, dump);
   }
+
+  bout::writeDefaultOutputFile(dump);
   BoutFinalise();
 }
diff --git a/tests/integrated/test-fci-mpi/runtest b/tests/integrated/test-fci-mpi/runtest
index 6676f8f7a5..c18ab0391d 100755
--- a/tests/integrated/test-fci-mpi/runtest
+++ b/tests/integrated/test-fci-mpi/runtest
@@ -1,57 +1,82 @@
 #!/usr/bin/env python3
 #
 # Python script to run and analyse MMS test
-#
-
-# Cores: 8
-# requires: metric_3d
 
-from boututils.run_wrapper import build_and_log, launch_safe, shell_safe
+from boututils.run_wrapper import build_and_log, launch_safe
 from boutdata.collect import collect
-import boutconfig as conf
 import itertools
+import sys
 
-import numpy as np
+import numpy.testing as npt
 
 # Resolution in x and y
-nlist = [1, 2, 4]
+NLIST = [1, 2, 4]
+MAXCORES = 8
+NSLICES = [1]
 
-maxcores = 8
+build_and_log("FCI MMS test")
 
-nslices = [1]
+COLLECT_KW = dict(info=False, xguards=False, yguards=False, path="data")
 
-success = True
 
-build_and_log("FCI MMS test")
+def run_case(nxpe: int, nype: int, mthread: int):
+    cmd = f"./fci_mpi NXPE={nxpe} NYPE={nype}"
+    print(f"Running command: {cmd}")
+
+    _, out = launch_safe(cmd, nproc=nxpe * nype, mthread=mthread, pipe=True)
+
+    # Save output to log file
+    with open(f"run.log.{nxpe}.{nype}.{nslice}.log", "w") as f:
+        f.write(out)
+
+
+def test_case(nxpe: int, nype: int, mthread: int, ref: dict) -> bool:
+    run_case(nxpe, nype, mthread)
+
+    failures = []
+
+    for name, val in ref.items():
+        try:
+            npt.assert_allclose(val, collect(name, **COLLECT_KW))
+        except AssertionError as e:
+            failures.append((nxpe, nype, name, e))
 
-for nslice in nslices:
-    for NXPE, NYPE in itertools.product(nlist, nlist):
-        if NXPE * NYPE > maxcores:
+    return failures
+
+
+failures = []
+
+for nslice in NSLICES:
+    # reference data!
+    run_case(1, 1, MAXCORES)
+
+    ref = {}
+    for i in range(4):
+        for yp in range(1, nslice + 1):
+            for y in [-yp, yp]:
+                name = f"output_{i}_{y:+d}"
+                ref[name] = collect(name, **COLLECT_KW)
+
+    for nxpe, nype in itertools.product(NLIST, NLIST):
+        if (nxpe, nype) == (1, 1):
+            # reference case, done above
             continue
 
-        args = f"NXPE={NXPE} NYPE={NYPE}"
-        # Command to run
-        cmd = f"./fci_mpi {args}"
-
-        print(f"Running command: {cmd}")
-
-        mthread = maxcores // (NXPE * NYPE)
-        # Launch using MPI
-        _, out = launch_safe(cmd, nproc=NXPE * NYPE, mthread=mthread, pipe=True)
-
-        # Save output to log file
-        with open(f"run.log.{NXPE}.{NYPE}.{nslice}.log", "w") as f:
-            f.write(out)
-
-        collect_kw = dict(info=False, xguards=False, yguards=False, path="data")
-        if NXPE == NYPE == 1:
-            # reference data!
-            ref = {}
-            for i in range(4):
-                for yp in range(1, nslice + 1):
-                    for y in [-yp, yp]:
-                        name = f"output_{i}_{y:+d}"
-                        ref[name] = collect(name, **collect_kw)
-        else:
-            for name, val in ref.items():
-                assert np.allclose(val, collect(name, **collect_kw))
+        if nxpe * nype > MAXCORES:
+            continue
+
+        mthread = MAXCORES // (nxpe * nype)
+        failures_ = test_case(nxpe, nype, mthread, ref)
+        failures.extend(failures_)
+
+
+success = len(failures) == 0
+if success:
+    print("\nAll tests passed")
+else:
+    print("\nSome tests failed:")
+    for nxpe, nype, name, error in failures:
+        print("----------")
+        print(f"case {nxpe=} {nype=} {name=}\n{error}")
+
+sys.exit(0 if success else 1)