diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml
index 8a977cfcb3..b3c9a6b5cc 100644
--- a/.github/workflows/fp-stability.yml
+++ b/.github/workflows/fp-stability.yml
@@ -24,8 +24,9 @@ name: FP Stability
 # On FAIL: verrou_dd_sym runs to identify the responsible function symbols.
 # Logs are uploaded as CI artifacts.
 #
-# Verrou (Valgrind 3.26.0 + edf-hpc/verrou@a58d434) is built once and cached.
-# Build takes ~20 min uncached; cached runs restore in ~30 s.
+# Verrou (the pinned Valgrind+Verrou pair; versions live in toolchain/bootstrap/verrou.sh)
+# is installed by fp-stability on first use and cached. The prebuilt download is seconds;
+# a cache miss with no prebuilt falls back to a ~20-min source build.
 
 on:
   push:
@@ -68,37 +69,21 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.local/verrou
-          key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }}
+          # Key off the installer's content so any version bump (or other edit) in
+          # verrou.sh auto-busts the cache and forces a fresh install — no hand-synced
+          # version string to drift out of date.
+          key: verrou-${{ hashFiles('toolchain/bootstrap/verrou.sh') }}-${{ runner.os }}
 
       - name: Install system dependencies
         run: |
           sudo apt-get update -y
           sudo apt-get install -y \
             build-essential automake python3 python3-numpy libc6-dbg \
-            cmake gfortran
+            cmake gfortran zstd
 
-      - name: Build Verrou
-        if: steps.cache-verrou.outputs.cache-hit != 'true'
-        run: |
-          cd /tmp
-          wget -q https://sourceware.org/pub/valgrind/valgrind-3.26.0.tar.bz2
-          tar xf valgrind-3.26.0.tar.bz2
-
-          git clone https://github.com/edf-hpc/verrou.git
-          git -C verrou checkout a58d434
-
-          # Merge Verrou into Valgrind source tree and patch
-          cp -r verrou valgrind-3.26.0/verrou
-          cd valgrind-3.26.0
-          cat verrou/valgrind.*diff | patch -p1
-
-          ./autogen.sh
-          ./configure --enable-only64bit --prefix="$HOME/.local/verrou"
-          make -j"$(nproc)"
-          make install
-
-      - name: Verify Verrou
-        run: ~/.local/verrou/bin/valgrind --version
+      # Verrou is installed by `fp-stability` itself on first use (downloads the
+      # prebuilt artifact; aborts if that fails). The cache above restores it across
+      # runs so the download only happens on a cache miss.
 
       - name: Build MFC (debug, serial)
         # FFLAGS=-fno-inline prevents gfortran from inlining small functions into
diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh
new file mode 100755
index 0000000000..dd2a67c653
--- /dev/null
+++ b/toolchain/bootstrap/verrou.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+#
+# Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by
+# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package - it is a fork of
+# Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds);
+# if none is available for this tag/arch it falls back to a source build (~20 min).
+# fp-stability auto-runs this on first use when Verrou is absent (printing what it
+# does); it is also safe to run by hand. A failed install aborts, never a silent skip.
+#
+#   bash toolchain/bootstrap/verrou.sh            # install into $HOME/.local/verrou
+#   VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh
+#   bash toolchain/bootstrap/verrou.sh --force    # reinstall even if present
+#   VERROU_BUILD_FROM_SOURCE=1 bash toolchain/bootstrap/verrou.sh   # skip the prebuilt
+#
+# Versions are pinned to match the fp-stability CI workflow.
+
+set -euo pipefail
+
+VALGRIND_VERSION="3.26.0"
+VERROU_COMMIT="a58d434"
+# Prebuilt artifacts (built once per arch) live in a small companion repo. The tag
+# pins to the (valgrind, verrou) pair above - bump all three together.
+VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}"
+VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}"
+PREFIX="${VERROU_HOME:-$HOME/.local/verrou}"
+FORCE="${1:-}"
+
+echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}"
+
+# Idempotent: skip if already installed and working. Source env.sh first if present
+# (a prebuilt tree needs VALGRIND_LIB to run; a source build works either way).
+if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] \
+   && ( [ -f "${PREFIX}/env.sh" ] && . "${PREFIX}/env.sh"; "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
+    echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do."
+    exit 0
+fi
+
+# Platform: Valgrind has no working modern-macOS support; Linux only.
+if [ "$(uname -s)" != "Linux" ]; then
+    echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2
+    exit 1
+fi
+arch_tag=""
+case "$(uname -m)" in
+    x86_64) arch_tag="x86_64" ;;
+    aarch64|arm64)
+        arch_tag="aarch64"
+        echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2
+        echo "         best-validated on x86_64 - treat results as experimental on this arch." >&2
+        ;;
+    *)
+        echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2
+        ;;
+esac
+
+# Fast path: download a prebuilt, hash-verified artifact and source its relocatable
+# env.sh, instead of building from source. Any failure (no asset for this arch/tag,
+# missing zstd/sha256sum, checksum mismatch, won't run) falls through to the build.
+try_prebuilt() {
+    [ -n "$arch_tag" ] || return 1
+    [ "${VERROU_BUILD_FROM_SOURCE:-}" = "1" ] && return 1
+    command -v sha256sum >/dev/null 2>&1 || return 1
+    tar --zstd --help >/dev/null 2>&1 || command -v zstd >/dev/null 2>&1 || return 1
+    command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || return 1
+
+    local asset base dl
+    asset="verrou-${VERROU_COMMIT}-valgrind-${VALGRIND_VERSION}-linux-${arch_tag}.tar.zst"
+    base="https://github.com/${VERROU_DIST_REPO}/releases/download/${VERROU_DIST_TAG}/${asset}"
+    dl="$(mktemp -d)"
+
+    echo "==> Trying prebuilt ${VERROU_DIST_REPO}@${VERROU_DIST_TAG} (${asset})"
+    _fetch() {  # url dest
+        if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi
+    }
+    if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then
+        echo "==> No prebuilt for this tag/arch - building from source instead."
+        rm -rf "$dl"; return 1
+    fi
+    if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then
+        echo "WARNING: prebuilt checksum mismatch - building from source instead." >&2
+        rm -rf "$dl"; return 1
+    fi
+
+    # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e
+    # is suppressed inside a function used as an `if` condition, so check each step
+    # explicitly - otherwise a failed extract would fall through and the source
+    # build would install on top of a half-written tree (or a stale one on --force).
+    local stage="$dl/stage"
+    mkdir -p "$stage"
+    if tar --zstd --help >/dev/null 2>&1; then
+        tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; }
+    else
+        zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; }
+    fi
+
+    # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets
+    # VALGRIND_LIB relative to the tree so the relocated install works. Verify the
+    # staged tree runs before committing it.
+    if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
+        echo "WARNING: prebuilt did not run - building from source instead." >&2
+        rm -rf "$dl"; return 1
+    fi
+
+    # Commit only now: replace any existing $PREFIX atomically.
+    mkdir -p "$(dirname "$PREFIX")"
+    rm -rf "$PREFIX"
+    if ! mv "$stage" "$PREFIX"; then
+        echo "WARNING: could not install prebuilt to ${PREFIX} - building from source instead." >&2
+        rm -rf "$dl"; return 1
+    fi
+    rm -rf "$dl"
+    return 0
+}
+
+if try_prebuilt; then
+    echo "==> Verifying"
+    ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version )
+    echo "==> Done (prebuilt). Verrou installed at ${PREFIX}"
+    echo "    Run:  ./mfc.sh fp-stability   (or set VERROU_HOME=${PREFIX} if you used a custom prefix)"
+    exit 0
+fi
+
+# Build dependencies.
+missing=""
+for tool in tar git make patch autoconf automake; do
+    command -v "$tool" >/dev/null 2>&1 || missing="$missing $tool"
+done
+command -v cc >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || missing="$missing gcc"
+command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing="$missing wget/curl"
+if [ -n "$missing" ]; then
+    echo "ERROR: missing build dependencies:$missing" >&2
+    echo "       Install them (e.g. apt: build-essential automake autoconf libtool; or load HPC modules) and retry." >&2
+    exit 1
+fi
+
+workdir="$(mktemp -d)"
+trap 'rm -rf "$workdir"' EXIT
+cd "$workdir"
+
+tarball="valgrind-${VALGRIND_VERSION}.tar.bz2"
+url="https://sourceware.org/pub/valgrind/${tarball}"
+echo "==> Downloading ${tarball}"
+if command -v wget >/dev/null 2>&1; then
+    wget -q "$url"
+else
+    curl -fsSL -o "$tarball" "$url"
+fi
+tar xf "$tarball"
+
+echo "==> Cloning Verrou @ ${VERROU_COMMIT}"
+git clone --quiet https://github.com/edf-hpc/verrou.git
+git -C verrou checkout --quiet "$VERROU_COMMIT"
+
+# Merge Verrou into the Valgrind tree and apply its patch.
+cp -r verrou "valgrind-${VALGRIND_VERSION}/verrou"
+cd "valgrind-${VALGRIND_VERSION}"
+cat verrou/valgrind.*diff | patch -p1
+
+echo "==> Building (this takes ~20 min)"
+./autogen.sh
+./configure --enable-only64bit --prefix="$PREFIX"
+make -j"$(nproc)"
+make install
+
+echo "==> Verifying"
+"${PREFIX}/bin/valgrind" --tool=verrou --version
+echo "==> Done. Verrou installed at ${PREFIX}"
+echo "    Run:  ./mfc.sh fp-stability   (or set VERROU_HOME=${PREFIX} if you used a custom prefix)"
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 54bbff4641..b0591fc9a6 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -898,27 +898,36 @@
     name="fp-stability",
     help="Run floating-point stability tests using Verrou.",
     description=(
-        "Runs each registered test case N times under Verrou's random IEEE-754 "
-        "rounding mode and compares against a nearest-rounding reference run. "
-        "Reports the max L∞ deviation and PASS/FAIL against per-case thresholds.\n\n"
-        "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind "
-        "(defaults to $HOME/.local/verrou). The simulation and pre_process "
-        "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n"
-        "Test cases:\n"
-        "  sod_standard      1-D standard Sod, p_L/p_R=10 (well-conditioned baseline)\n"
-        "  sod_strong        1-D Sod, p_L/p_R=100,000 — HLLC xi-factor cancellation\n"
-        "  water_stiffened   1-D water shock (pi_inf=4046) — pressure-recovery cancellation\n"
-        "  air_water_interface  1-D air/water contact (two-fluid) — mixed-cell cancellation\n\n"
-        "Additional features (skip with --no-* flags):\n"
+        "Runs Verrou random-rounding stability analysis on a built-in suite of small "
+        "1-D cases, or - given a case .py (positional INPUT) - on your own case. Each "
+        "case is run N times under Verrou's random IEEE-754 rounding and compared "
+        "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must "
+        "retain at least ~24 significant bits (single precision) under random rounding "
+        "(no per-case thresholds).\n\n"
+        "With a case .py, that case is run as a SINGLE serial CPU process under Verrou "
+        "(~30x slower, and run many times), so it must be a small, short proxy - large "
+        "grids or long runs are rejected with guidance; serial .dat I/O is forced. "
+        "Example: ./mfc.sh fp-stability my_case.py\n\n"
+        "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to "
+        "$HOME/.local/verrou); if absent it is installed automatically (a pinned, "
+        "hash-verified prebuilt is downloaded, with a source build as fallback) - "
+        "aborts if that install fails. The simulation and pre_process binaries must "
+        "be serial (no-MPI, no-GPU) debug builds.\n\n"
+        "Analysis passes (skip with --no-* flags):\n"
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
-        "  dd_sym         verrou_dd_sym bisection to responsible functions (on failure)\n"
-        "  dd_line        verrou_dd_line bisection to responsible source lines (on failure)\n"
-        "  cancellation   --check-cancellation detection of catastrophic cancellation sites\n"
-        "  mca-sigbits    Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n"
-        "  float-max      --check-max-float detection of double→float overflow sites\n"
+        "  cancellation   --check-cancellation origins, ranked by significant digits lost\n"
+        "  float-max      --check-max-float detection of double->float overflow sites\n"
     ),
     include_common=["mfc_config", "verbose", "debug_log"],
+    positionals=[
+        Positional(
+            name="input",
+            help="Optional case .py to analyze instead of the built-in suite (run as a single serial CPU process under Verrou; must be small/short).",
+            nargs="?",
+            completion=Completion(type=CompletionType.FILES_PY),
+        ),
+    ],
     arguments=[
         Argument(
             name="sim-binary",
@@ -960,20 +969,6 @@
             default=False,
             dest="no_vprec",
         ),
-        Argument(
-            name="no-dd-sym",
-            help="Skip verrou_dd_sym function-level delta-debug on failure.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_dd_sym",
-        ),
-        Argument(
-            name="no-dd-line",
-            help="Skip verrou_dd_line source-line delta-debug on failure.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_dd_line",
-        ),
         Argument(
             name="no-cancellation",
             help="Skip --check-cancellation catastrophic-cancellation detection.",
@@ -981,13 +976,6 @@
             default=False,
             dest="no_cancellation",
         ),
-        Argument(
-            name="no-mca",
-            help="Skip Monte Carlo Arithmetic (mcaquad) significant-bits estimate.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_mca",
-        ),
         Argument(
             name="no-float-max",
             help="Skip --check-max-float float32 overflow detection.",
@@ -997,14 +985,15 @@
         ),
     ],
     examples=[
-        Example("./mfc.sh fp-stability", "Auto-discover binaries and run all cases"),
+        Example("./mfc.sh fp-stability", "Auto-discover binaries and run the built-in suite"),
+        Example("./mfc.sh fp-stability my_case.py", "Analyze your own case (small/short, serial, CPU)"),
         Example(
             "./mfc.sh fp-stability --sim-binary build/install/abc123/bin/simulation",
             "Specify simulation binary explicitly",
         ),
         Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"),
-        Example("./mfc.sh fp-stability --no-vprec --no-dd-line", "Skip VPREC sweep and line debug"),
-        Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"),
+        Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"),
+        Example("./mfc.sh fp-stability --no-cancellation --no-float-max", "Skip analysis passes"),
     ],
     key_options=[
         ("--sim-binary PATH", "Serial simulation binary (debug, no-MPI)"),
@@ -1013,10 +1002,7 @@
         ("-N, --samples N", "Random-rounding samples per case (default: 5)"),
         ("--no-float-proxy", "Skip float-rounding proxy run"),
         ("--no-vprec", "Skip VPREC mantissa-bit sweep"),
-        ("--no-dd-sym", "Skip verrou_dd_sym on failure"),
-        ("--no-dd-line", "Skip verrou_dd_line on failure"),
         ("--no-cancellation", "Skip cancellation detection"),
-        ("--no-mca", "Skip MCA significant-bits estimate"),
         ("--no-float-max", "Skip float32 overflow detection"),
     ],
 )
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index dd848f046c..200748203c 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -4,35 +4,27 @@
 Features
 --------
 A. Stability suite (always)
-   N random-rounding samples per case, threshold-based PASS/FAIL.
+   N random-rounding samples per case; PASS/FAIL on significant bits retained
+   (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold).
 
 B. Float proxy (--no-float-proxy to skip)
-   One run with --rounding-mode=float — deterministic proxy for
+   One run with --rounding-mode=float - deterministic proxy for
    single-precision sensitivity without recompiling.
 
 C. VPREC precision sweep (--no-vprec to skip)
    One run per mantissa-bit level [52,23,16,10] with
    --backend=vprec --vprec-mode=full; shows where each case breaks.
 
-D. verrou_dd_sym on failure (--no-dd-sym to skip)
-   Delta-debug bisection isolates the minimal set of *functions* causing
-   instability.
-
-E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip)
-   Further bisects to exact *source lines* within the responsible functions.
-
-F. Cancellation detection (--no-cancellation to skip)
+D. Cancellation detection (--no-cancellation to skip)
    One run with --check-cancellation=yes; reports MFC source lines that
    produce catastrophic cancellation (subtraction of nearly-equal doubles).
-   Uses --cc-gen-file for structured per-line output.
-
-G. MCA significant-bits estimate (--no-mca to skip)
-   N runs with --backend=mcaquad; max deviation vs nearest-rounding
-   reference gives a lower bound on significant bits: s = -log2(dev/scale).
+   Uses --cc-gen-file for structured per-line output.  A cancellation site whose
+   .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous
+   (the line maps to multiple generated instances).
 
-H. Float-max overflow detection (--no-float-max to skip)
+E. Float-max overflow detection (--no-float-max to skip)
    One run with --check-max-float=yes; reports locations where a
-   double→float conversion would overflow to ±Inf.
+   double->float conversion would overflow to +/-Inf.
 
 Logs are saved to fp-stability-logs/ and uploaded as CI artifacts.
 On GitHub Actions: a step summary table and ::warning:: file annotations
@@ -45,119 +37,53 @@
   - A serial pre_process binary (to generate initial conditions)
 
 Usage:
-  ./mfc.sh fp-stability
-  ./mfc.sh fp-stability --no-vprec --no-dd-line
+  ./mfc.sh fp-stability                       # built-in 1-D suite
+  ./mfc.sh fp-stability my_case.py            # your own case (small/short, serial, CPU)
+  ./mfc.sh fp-stability --no-vprec --no-cancellation
   ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH
+
+A user case .py is run as a single serial CPU process under Verrou, so it must be
+a small, short proxy (a feasibility guard rejects large grids / long runs); output
+is forced to serial .dat I/O and the files to diff are auto-detected.
 """
 
-import glob
 import math
 import os
-import re
 import shutil
-import stat
 import subprocess
 import sys
 import tempfile
-import textwrap
 import time
 
 from .common import MFC_ROOT_DIR, MFCException
+from .fp_stability_metrics import (
+    CANCEL_BIT_LEVELS,
+    MIN_SIG_BITS,
+    _autodetect_compare,
+    _cancellation_severity,
+    _macro_context,
+    _max_abs_np,
+    _max_diff_np,
+    _sig_bits,
+)
+from .fp_stability_report import (
+    _emit_github_annotations,
+    _emit_github_summary,
+)
+from .fp_stability_runners import (
+    _find_binary,
+    _find_verrou,
+    _run_cancellation_check,
+    _run_float_max_check,
+    _run_float_proxy,
+    _run_preprocess,
+    _run_simulation_verrou,
+    _run_vprec_sweep,
+    _write_inp,
+)
 from .printer import cons
 from .state import ARG
 
-# Mantissa-bit levels for the VPREC sweep (C).
-# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
-VPREC_MANTISSA_BITS = [52, 23, 16, 10]
-
-# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
-_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
-
-# Files to exclude from cancellation / float-max reports (runtime loaders, XALT).
-_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so")
-
-# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)".
-_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)")
-
-# Lines that are clearly control-flow delimiters rather than arithmetic.
-# dd_line sometimes reports these when the responsible arithmetic is on the
-# preceding line but shares DWARF debug info with the delimiter (e.g. loop
-# boundaries in #:for-expanded code, or inlined functions at call sites).
-_CONTROL_FLOW_RE = re.compile(
-    r"^\s*("
-    r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b"
-    r"|do\s+\w+\s*=\s*[\w,\s]+"  # naked do-loop header (no arithmetic)
-    r"|else(\s+if\s*\(.*\)\s*then)?\s*$"  # else / else if (...) then
-    r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+"  # subroutine declaration
-    r"|\$:END_GPU\w+"  # fypp GPU macro closers
-    r"|#:end\w*"  # fypp directive closers (#:endfor, #:enddef, etc.)
-    r"|\s*!\s*$"  # comment-only lines
-    r"|\s*$"  # blank lines
-    r")",
-    re.IGNORECASE,
-)
-
-
-def _read_source_line(fname: str, lineno: int) -> str:
-    """Return the raw source line at lineno (1-based), or '' if unavailable."""
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return ""
-    try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-        return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
-    except OSError:
-        return ""
-
-
-def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
-    """Return True if any line in [start, end] contains non-trivial arithmetic.
-
-    Filters out loop delimiters and fypp directive lines that dd_line sometimes
-    reports when the responsible arithmetic shares DWARF info with its enclosing
-    control-flow boundary (inlining, #:for template expansion, etc.).
-    Returns True (keep) when uncertain so we never silently drop real hotspots.
-    """
-    for lineno in range(start, end + 1):
-        line = _read_source_line(fname, lineno)
-        if not line:
-            return True  # can't read — keep to be safe
-        if not _CONTROL_FLOW_RE.match(line):
-            return True
-    return False
-
-
-def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
-    """Return a annotated source snippet around lineno, or '' if file not found.
-
-    fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path.
-    Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree.
-    """
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-        if not candidates:
-            candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return ""
-    try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-    except OSError:
-        return ""
-    start = max(0, lineno - context - 1)
-    end = min(len(lines), lineno + context)
-    rows = []
-    for i, line in enumerate(lines[start:end], start=start + 1):
-        marker = ">" if i == lineno else " "
-        rows.append(f"{marker}{i:5d} | {line.rstrip()}")
-    return "\n".join(rows)
-
 
 def _merge(*dicts):
     """Merge dicts left-to-right; later entries override earlier ones."""
@@ -225,8 +151,9 @@ def _merge(*dicts):
 #   name      - unique identifier used in log paths and console output
 #   description - human-readable summary
 #   compare   - D/ output files compared between reference and perturbed runs
-#   threshold - max L∞ deviation allowed before the case is declared FAIL
 #   ill_cond  - known source of cancellation (empty string = none expected)
+# Pass/fail is scale-free (>= MIN_SIG_BITS significant bits retained), so cases
+# need no per-case deviation threshold regardless of field magnitude.
 #   pre       - parameters for pre_process (generates initial conditions)
 #   sim       - parameters for simulation
 CASES = [
@@ -234,7 +161,6 @@ def _merge(*dicts):
         "name": "sod_standard",
         "description": "1-D standard Sod, p_L/p_R=10, ideal gas (well-conditioned baseline)",
         "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
-        "threshold": 1e-13,
         "ill_cond": "",
         "pre": _merge(
             _BASE_PRE,
@@ -257,7 +183,6 @@ def _merge(*dicts):
         "name": "sod_strong",
         "description": "1-D Sod, p_L/p_R=100,000, ideal gas",
         "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
-        "threshold": 1e-10,
         "ill_cond": "HLLC xi factor: (s_L - vel_L)/(s_L - s_S) cancels near sonic contact",
         "pre": _merge(
             _BASE_PRE,
@@ -280,8 +205,7 @@ def _merge(*dicts):
         "name": "water_stiffened",
         "description": "1-D water shock, stiffened EOS (pi_inf=4046)",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 1e-8,
-        "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000) [threshold loosened until reduced-energy (Etilde) scheme is merged]",
+        "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000)",
         "pre": _merge(
             _BASE_PRE,
             _WATER_EOS,
@@ -303,7 +227,6 @@ def _merge(*dicts):
         "name": "air_water_interface",
         "description": "1-D air/water isobaric contact (two-fluid, pi_inf=4046)",
         "compare": ["cons.1.00.000050.dat", "cons.4.00.000050.dat", "cons.5.00.000050.dat"],
-        "threshold": 1e-10,
         "ill_cond": "Mixed-cell pressure recovery: E-alpha_w*gamma_w*pi_inf cancels when alpha_w<<1",
         "pre": _merge(
             _BASE_PRE,
@@ -344,7 +267,6 @@ def _merge(*dicts):
         "name": "bubble_rp",
         "description": "1-D bubbly water, pressure step 2:1 driving Rayleigh-Plesset oscillations (nb=1, Keller-Miksis)",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 1e-8,
         "ill_cond": "RP ODE: (p_bub - p_ext) cancels near bubble equilibrium",
         "pre": _merge(
             _BASE_PRE,
@@ -412,8 +334,7 @@ def _merge(*dicts):
         "name": "low_mach",
         "description": "1-D water shock with low_Mach=1 HLLC correction active",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 2e-7,
-        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0 (threshold loosened to 2e-7 to absorb MCA sampling variance)",
+        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M~0",
         "pre": _merge(
             _BASE_PRE,
             _WATER_EOS,
@@ -434,511 +355,20 @@ def _merge(*dicts):
 ]
 
 
-def _find_verrou() -> str:
-    verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
-    candidate = os.path.join(verrou_home, "bin", "valgrind")
-    if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
-        return candidate
-    return shutil.which("valgrind") or ""
-
-
-def _find_binary(name: str) -> str:
-    install_dir = os.path.join(MFC_ROOT_DIR, "build", "install")
-    candidates = glob.glob(os.path.join(install_dir, "*", "bin", name))
-    return max(candidates, key=os.path.getmtime) if candidates else ""
-
-
-def _find_dd_sym(verrou_bin: str) -> str:
-    c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_sym")
-    return c if os.path.isfile(c) else ""
-
-
-def _find_dd_line(verrou_bin: str) -> str:
-    c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_line")
-    return c if os.path.isfile(c) else ""
-
-
-def _verrou_pythonpath(verrou_bin: str) -> str:
-    """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir)."""
-    verrou_home = os.path.dirname(os.path.dirname(verrou_bin))
-    matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind"))
-    return matches[0] if matches else ""
-
-
-def _write_inp(params: dict, target_name: str, work_dir: str) -> None:
-    """Write a Fortran namelist .inp file from a Python params dict."""
-    from .run import case_dicts
-
-    master_keys = case_dicts.get_input_dict_keys(target_name)
-    lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys]
-    with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh:
-        fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n")
-
-
-def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str):
-    _write_inp(pre_params, "pre_process", work_dir)
-    with open(os.path.join(work_dir, "pre.log"), "w") as f:
-        result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False)
-    if result.returncode != 0:
-        raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log")
-
-
-def _run_simulation_verrou(
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    run_dir: str,
-    rounding_mode: str = None,
-    extra_flags: list = None,
-):
-    """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output.
-
-    rounding_mode is passed as --rounding-mode=<mode> when not None.
-    extra_flags are appended before the binary (e.g. --backend=vprec ...).
-    """
-    with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir:
-        for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]:
-            src = os.path.join(work_dir, fname)
-            if os.path.exists(src):
-                shutil.copy2(src, tmpdir)
-        shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all"))
-        os.makedirs(os.path.join(tmpdir, "D"))
-
-        log_path = os.path.join(run_dir, "verrou.log")
-        cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"]
-        if rounding_mode:
-            cmd.append(f"--rounding-mode={rounding_mode}")
-        cmd.extend(extra_flags or [])
-        cmd.append(sim_bin)
-
-        with open(os.path.join(run_dir, "sim.out"), "w") as f:
-            result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False)
-
-        if result.returncode != 0:
-            tag = rounding_mode or "vprec"
-            raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out")
-
-        os.makedirs(run_dir, exist_ok=True)
-        for fn in os.listdir(os.path.join(tmpdir, "D")):
-            shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
-
-
-def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
-    import numpy as np
-
-    total = 0.0
-    for fname in compare_files:
-        ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname)
-        if not os.path.exists(ref_p) or not os.path.exists(run_p):
-            return float("inf")
-        ref = np.loadtxt(ref_p)[:, 1]
-        run = np.loadtxt(run_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref - run))))
-    return total
-
-
-def _max_abs_np(ref_dir: str, compare_files: list) -> float:
-    """Return the maximum absolute value across all reference output files."""
-    import numpy as np
-
-    total = 0.0
-    for fname in compare_files:
-        ref_p = os.path.join(ref_dir, fname)
-        if not os.path.exists(ref_p):
-            continue
-        ref = np.loadtxt(ref_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref))))
-    return total
-
-
-def _parse_cancel_gen(gen_path: str) -> list:
-    """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources."""
-    if not os.path.isfile(gen_path):
-        return []
-    locs = []
-    seen = set()
-    with open(gen_path) as fh:
-        for raw in fh:
-            parts = raw.rstrip("\n").split("\t")
-            if len(parts) < 2:
-                continue
-            fname = parts[0].strip()
-            if any(ext in fname for ext in _EXTERNAL_SRCS):
-                continue
-            if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")):
-                continue
-            try:
-                lineno = int(parts[1].strip())
-            except ValueError:
-                continue
-            key = (fname, lineno)
-            if key not in seen:
-                seen.add(key)
-                locs.append(key)
-    return locs
-
-
-def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
-    """Extract first MFC-source frame from each Valgrind error matching error_keyword."""
-    if not os.path.isfile(log_path):
-        return []
-    locs = []
-    seen = set()
-    in_error = False
-    with open(log_path) as fh:
-        for raw in fh:
-            line = re.sub(r"^==\d+== ?", "", raw)
-            if error_keyword in line:
-                in_error = True
-                continue
-            if in_error:
-                if "   at " in line or "   by " in line:
-                    m = _VGFRAME_RE.search(line)
-                    if m:
-                        fname = m.group(1)
-                        if any(ext in fname for ext in _EXTERNAL_SRCS):
-                            continue
-                        lineno = int(m.group(2))
-                        key = (fname, lineno)
-                        if key not in seen:
-                            seen.add(key)
-                            locs.append(key)
-                        in_error = False
-                elif line.strip() == "":
-                    in_error = False
-    return locs
-
-
-def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list:
-    """Run with --check-cancellation=yes; return [(fname, line)] of MFC cancellation sites."""
-    run_dir = os.path.join(work_dir, "cancellation")
-    os.makedirs(run_dir, exist_ok=True)
-    gen_path = os.path.join(run_dir, "cancel_gen.txt")
-    flags = [
-        "--check-cancellation=yes",
-        "--cc-threshold-double=10",
-        f"--cc-gen-file={gen_path}",
-    ]
-    try:
-        _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags)
-    except MFCException:
-        pass
-    raw = _parse_cancel_gen(gen_path)
-    filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
-    skipped = len(raw) - len(filtered)
-    if skipped:
-        cons.print(f"  [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
-    return filtered
-
-
-def _run_mca_samples(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    ref_dir: str,
-    n_mca: int,
-) -> tuple:
-    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound)."""
-    compare = case["compare"]
-    ref_scale = _max_abs_np(ref_dir, compare)
-    max_dev = 0.0
-    flags = ["--backend=mcaquad", "--mca-mode=mca"]
-    for i in range(n_mca):
-        run_dir = os.path.join(work_dir, f"mca_{i:02d}")
-        os.makedirs(run_dir, exist_ok=True)
-        try:
-            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
-            max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
-        except MFCException:
-            pass
-    sig_bits = None
-    if max_dev > 0.0 and ref_scale > 0.0:
-        sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale))))
-    return max_dev, sig_bits
-
-
-def _run_float_max_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list:
-    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites."""
-    run_dir = os.path.join(work_dir, "float_max")
-    os.makedirs(run_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="nearest",
-            extra_flags=["--check-max-float=yes"],
-        )
-    except MFCException:
-        pass
-    return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float")
-
-
-def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
-    """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref."""
-    run_dir = os.path.join(work_dir, "float_proxy")
-    os.makedirs(run_dir)
-    _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
-    return _max_diff_np(ref_dir, run_dir, case["compare"])
-
-
-def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list:
-    """Run at each mantissa-bit level. Returns [(bits, dev), ...]."""
-    results = []
-    for bits in VPREC_MANTISSA_BITS:
-        run_dir = os.path.join(work_dir, f"vprec_{bits}")
-        os.makedirs(run_dir)
-        flags = [
-            "--backend=vprec",
-            "--vprec-mode=full",
-            f"--vprec-precision-binary64={bits}",
-            "--vprec-range-binary64=11",
-        ]
-        try:
-            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
-            dev = _max_diff_np(ref_dir, run_dir, case["compare"])
-        except MFCException:
-            dev = float("inf")
-        results.append((bits, dev))
-    return results
-
-
-def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str):
-    """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via
-    VERROU_EXCLUDE / VERROU_SOURCE environment variables.  For test runs, we use
-    --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices).
-    For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the
-    environment — we honour that so the reference is a stable nearest-rounding baseline
-    to compare against.  CLI --rounding-mode would override the env var and break the
-    reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env bash
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        VERROU_BIN={verrou_bin!r}
-        SIM_BIN={sim_bin!r}
-        IC_DIR={ic_dir!r}
-
-        RUNDIR="$1"
-        TMPDIR_RUN=$(mktemp -d)
-        trap 'rm -rf "$TMPDIR_RUN"' EXIT
-
-        cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all"
-        cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp"
-        for fname in indices.dat pre_time_data.dat io_time_data.dat; do
-            [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/"
-        done
-        mkdir -p "$TMPDIR_RUN/D"
-
-        # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and
-        # leaves it unset for test runs.  Defaulting to float gives deterministic
-        # test steps while letting the reference use nearest-rounding.
-        ROUND="${{VERROU_ROUNDING_MODE:-float}}"
-
-        # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation).
-        # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to).
-        # Forward them as valgrind flags when set.
-        EXTRA=""
-        [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE"
-        [ -n "${{VERROU_SOURCE:-}}" ]  && EXTRA="$EXTRA --source=$VERROU_SOURCE"
-
-        cd "$TMPDIR_RUN"
-        "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN"
-        rc=$?
-
-        [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/"
-        exit $rc
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
-    """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR
-    Exits 0 (stable) or 1 (unstable) based on threshold.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env python3
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        import sys, os, numpy as np
-
-        COMPARE_FILES = {compare_files!r}
-        THRESHOLD = {threshold!r}
-
-        ref_dir, run_dir = sys.argv[1], sys.argv[2]
-        max_dev = 0.0
-        for fname in COMPARE_FILES:
-            ref_p = os.path.join(ref_dir, fname)
-            run_p = os.path.join(run_dir, fname)
-            if not os.path.exists(ref_p) or not os.path.exists(run_p):
-                print(f"MISSING: {{fname}}")
-                sys.exit(1)
-            ref = np.loadtxt(ref_p)[:, 1]
-            run = np.loadtxt(run_p)[:, 1]
-            dev = float(np.max(np.abs(ref - run)))
-            max_dev = max(max_dev, dev)
-
-        print(f"max_dev={{max_dev:.3e}}  threshold={{THRESHOLD:.0e}}")
-        sys.exit(0 if max_dev <= THRESHOLD else 1)
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _dd_env(verrou_bin: str) -> dict:
-    """Environment with PYTHONPATH set for verrou_dd_* imports."""
-    py_pkg = _verrou_pythonpath(verrou_bin)
-    env = os.environ.copy()
-    if py_pkg:
-        existing = env.get("PYTHONPATH", "")
-        env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing]))
-    return env
-
-
-def _parse_rddmin_locs(summary_path: str) -> list:
-    """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary.
-
-    Filters out locations whose source lines are pure control-flow delimiters
-    (loop boundaries, fypp directive closers, blank/comment lines).  These can
-    appear when the responsible arithmetic shares DWARF debug info with an
-    enclosing boundary due to inlining or #:for template expansion.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    locs = []
-    skipped = []
-    with open(summary_path) as fh:
-        for line in fh:
-            m = _LOC_RE.search(line)
-            if not m:
-                continue
-            path = m.group(1)
-            start = int(m.group(2))
-            end = int(m.group(3)) if m.group(3) else start
-            try:
-                rel = os.path.relpath(path, MFC_ROOT_DIR)
-                if rel.startswith(".."):
-                    rel = path
-            except ValueError:
-                rel = path
-            rel = rel.replace("\\", "/")
-            if _is_arithmetic_loc(path, start, end):
-                locs.append((rel, start, end))
-            else:
-                skipped.append((rel, start, end))
-    for rel, start, end in skipped:
-        loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}"
-        cons.print(f"  [dim]dd_line: skipped control-flow boundary {loc}[/dim]")
-    return locs
-
-
-def _parse_rddmin_syms(summary_path: str) -> list:
-    """Extract symbol/function names from a dd_sym rddmin_summary.
-
-    rddmin_summary format:
-      ddmin0:\\tFail Ratio: ...\\tFail indexes: ...
-      \\t<funcname>\\t<binary_path>
-      ddmin1:\\t...
-      \\t<funcname>\\t<binary_path>
-
-    Lines starting with 'ddmin' are metadata; function names are on the
-    indented (tab-prefixed) lines as the first tab-delimited field.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    syms = []
-    with open(summary_path) as fh:
-        for ln in fh:
-            stripped = ln.strip()
-            if not stripped or stripped.startswith("ddmin"):
-                continue
-            sym = stripped.split("\t")[0].strip()
-            if sym:
-                syms.append(sym)
-    return syms
-
-
-def _run_dd_tool(
-    dd_bin: str,
-    dd_dir: str,
-    dd_run_sh: str,
-    dd_cmp_py: str,
-    env: dict,
-    log_name: str,
-    summary_subdir: str,
-    label: str,
-) -> list:
-    """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines."""
-    log_file = os.path.join(dd_dir, log_name)
-    cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py]
-    cons.print(f"  [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]")
-    with open(log_file, "w") as f:
-        result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False)
-    summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary")
-    summary_lines = []
-    if result.returncode == 0:
-        if os.path.isfile(summary_path):
-            with open(summary_path) as f:
-                summary_lines = f.readlines()
-            cons.print(f"  [bold yellow]{label} result[/bold yellow]:")
-            for line in summary_lines:
-                cons.print(f"    {line.rstrip()}")
-        else:
-            cons.print(f"  [dim]{label} done; see {log_file}[/dim]")
-    else:
-        cons.print(f"  [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})")
-    return summary_lines
-
-
-def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list:
-    """Run verrou_dd_sym; return list of responsible symbol names."""
-    dd_bin = _find_dd_sym(verrou_bin)
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_sym not found; skipping delta-debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else case["threshold"])
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
-    cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
-    return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
-
-
-def _run_dd_line(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    log_dir: str,
-    threshold: float = None,
-) -> list:
-    """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples."""
-    dd_bin = _find_dd_line(verrou_bin)
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    effective_threshold = threshold if threshold is not None else case["threshold"]
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold)
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
-    return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
+def _blank_result(name: str) -> dict:
+    """A result dict with every field at its empty/unmeasured default."""
+    return {
+        "name": name,
+        "passed": False,
+        "max_dev": float("inf"),
+        "sig_bits": None,
+        "float_proxy": None,
+        "vprec": [],
+        "cancellation_locs": [],
+        "cancellation_bits": {},
+        "cancellation_macro": {},
+        "float_max_locs": [],
+    }
 
 
 def _run_case(
@@ -947,64 +377,58 @@ def _run_case(
     sim_bin: str,
     pp_bin: str,
     n_samples: int,
-    log_dir: str,
     run_float: bool,
     run_vprec: bool,
-    run_dd_sym: bool,
-    run_dd_line: bool,
     run_cancellation: bool,
-    run_mca: bool,
     run_float_max: bool,
 ) -> dict:
     name = case["name"]
-    threshold = case["threshold"]
     compare = case["compare"]
 
     cons.print(f"[bold]{name}[/bold]: {case['description']}")
     cons.indent()
     if case["ill_cond"]:
         cons.print(f"  ill-conditioning: {case['ill_cond']}")
-    cons.print(f"  threshold: {threshold:.0e}")
+    cons.print(f"  pass floor: >= {MIN_SIG_BITS} significant bits retained")
 
     work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-")
-    result = {
-        "name": name,
-        "passed": False,
-        "max_dev": float("inf"),
-        "threshold": threshold,
-        "float_proxy": None,
-        "vprec": [],
-        "dd_sym_syms": [],
-        "dd_line_locs": [],
-        "cancellation_locs": [],
-        "mca_dev": None,
-        "mca_sigbits": None,
-        "float_max_locs": [],
-    }
+    result = _blank_result(name)
     try:
         cons.print("  [dim]running pre_process...[/dim]")
         _write_inp(case["sim"], "simulation", work_dir)
         _run_preprocess(pp_bin, case["pre"], work_dir)
 
         ref_dir = os.path.join(work_dir, "ref")
-        os.makedirs(ref_dir)
         cons.print("  [dim]reference run (rounding=nearest)...[/dim]")
         _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest")
 
+        # For a user case with no fixed compare list, diff whatever the reference
+        # run actually wrote (conserved vars at the final step).
+        if not compare:
+            compare = _autodetect_compare(os.listdir(ref_dir))
+            case["compare"] = compare
+            if not compare:
+                raise MFCException("case produced no cons.*/prim.* output to compare (check t_step_save/t_step_stop and parallel_io)")
+            cons.print(f"  [dim]comparing: {', '.join(compare)}[/dim]")
+
         # --- A: random-rounding stability samples ---
+        # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale),
+        # vs one global floor (no per-case hand-tuned absolute threshold).
+        ref_scale = _max_abs_np(ref_dir, compare)
         max_dev = 0.0
         cons.print(f"  [dim]random-rounding runs (N={n_samples})...[/dim]")
         for i in range(n_samples):
             run_dir = os.path.join(work_dir, f"run_{i:02d}")
-            os.makedirs(run_dir)
             _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random")
             max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
 
-        passed = max_dev <= threshold
+        sig_bits = _sig_bits(max_dev, ref_scale)
+        passed = sig_bits >= MIN_SIG_BITS
         result["passed"] = passed
         result["max_dev"] = max_dev
+        result["sig_bits"] = sig_bits
         tag = "[bold green]PASS[/bold green]" if passed else "[bold red]FAIL[/bold red]"
-        cons.print(f"  {tag}  max_dev={max_dev:.3e}  threshold={threshold:.0e}")
+        cons.print(f"  {tag}  {sig_bits:.1f} bits retained (floor {MIN_SIG_BITS})  max_dev={max_dev:.3e}")
 
         # --- B: float proxy ---
         if run_float:
@@ -1027,77 +451,52 @@ def _run_case(
                 marker = ""
                 if dev == float("inf"):
                     marker = "  [red]crashed[/red]"
-                elif dev > threshold:
+                elif _sig_bits(dev, ref_scale) < MIN_SIG_BITS:
                     marker = "  [red]FAIL[/red]"
                 cons.print(f"    {bits:2d} bits{label_str}: dev={dev:.3e}{marker}")
 
-        # --- D/E: delta-debug with float mode to find FP hotspots.
-        # dd_run.sh uses --rounding-mode=float (deterministic single-precision),
-        # so each bisection step is consistent and --nruns=1 suffices.  Threshold
-        # = float_proxy/10: the full instrumented set produces ~float_proxy
-        # deviation; excluding the responsible function drops it to near zero;
-        # any subset missing the responsible function gives SAME.
-        # Skip when float_proxy is unavailable or too small to localize.
-        float_proxy = result.get("float_proxy")
-        _DD_FLOAT_MIN = 1e-6
-        dd_threshold = float_proxy / 10.0 if float_proxy and float_proxy >= _DD_FLOAT_MIN else 0.0
-        if dd_threshold > 0 and (run_dd_sym or run_dd_line):
-            cons.print(f"  [dim]dd threshold: {dd_threshold:.1e} (float_proxy={float_proxy:.1e})[/dim]")
-        elif run_dd_sym or run_dd_line:
-            cons.print(f"  [dim]skipping dd: float_proxy={float_proxy} < {_DD_FLOAT_MIN:.0e}[/dim]")
-        if dd_threshold > 0 and run_dd_sym:
-            try:
-                result["dd_sym_syms"] = _run_dd_sym(case, verrou_bin, sim_bin, work_dir, log_dir, threshold=dd_threshold)
-            except Exception as exc:
-                cons.print(f"  [bold yellow]dd_sym error[/bold yellow]: {exc}")
-        if dd_threshold > 0 and run_dd_line:
-            try:
-                result["dd_line_locs"] = _run_dd_line(
-                    case,
-                    verrou_bin,
-                    sim_bin,
-                    work_dir,
-                    log_dir,
-                    threshold=dd_threshold,
-                )
-            except Exception as exc:
-                cons.print(f"  [bold yellow]dd_line error[/bold yellow]: {exc}")
-
-        # --- F: cancellation detection ---
+        # --- D: cancellation detection ---
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
             try:
-                locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir)
-                result["cancellation_locs"] = locs
-                if locs:
-                    cons.print(f"  cancellation: {len(locs)} unique source location(s)")
+                # sweep bit thresholds to get per-site severity (bits lost); each
+                # run returns None if it failed (distinct from [] = ran, found none)
+                level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
+                locs = next((s for lvl, s in level_sites if lvl == CANCEL_BIT_LEVELS[0]), None)
+                if locs is None:
+                    cons.print("  [bold yellow]cancellation: detection run failed (see logs); not reported[/bold yellow]")
                 else:
-                    cons.print("  cancellation: none detected")
+                    bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None])
+                    result["cancellation_locs"] = locs
+                    result["cancellation_bits"] = bits
+                    # flag cancellation sites whose .fpp line is inside a #:for/#:def
+                    # expansion: the line maps to multiple generated instances, so the
+                    # report cannot pin it to a unique runtime instance.
+                    result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))}
+                    if locs:
+                        worst = max(bits.values()) if bits else 0
+                        cons.print(f"  cancellation: {len(locs)} site(s), worst loses >= {worst / math.log2(10):.0f} of ~16 digits")
+                        n_macro = len(result["cancellation_macro"])
+                        if n_macro:
+                            cons.print(f"  [dim]{n_macro} inside fypp expansions - line maps to multiple instances[/dim]")
+                    else:
+                        cons.print("  cancellation: none detected")
             except Exception as exc:
                 cons.print(f"  [bold yellow]cancellation check error[/bold yellow]: {exc}")
 
-        # --- G: MCA significant-bits estimate ---
-        if run_mca:
-            cons.print(f"  [dim]MCA significant-bits estimate (N={n_samples})...[/dim]")
-            try:
-                mca_dev, mca_sigbits = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples)
-                result["mca_dev"] = mca_dev
-                result["mca_sigbits"] = mca_sigbits
-                bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a"
-                cons.print(f"  MCA: dev={mca_dev:.3e}  ({bits_str})")
-            except Exception as exc:
-                cons.print(f"  [bold yellow]MCA error[/bold yellow]: {exc}")
-
-        # --- H: float-max overflow detection ---
+        # --- E: float-max overflow detection ---
         if run_float_max:
             cons.print("  [dim]float-max overflow check...[/dim]")
             try:
-                locs = _run_float_max_check(case, verrou_bin, sim_bin, work_dir)
-                result["float_max_locs"] = locs
-                if locs:
-                    cons.print(f"  [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)")
+                locs = _run_float_max_check(verrou_bin, sim_bin, work_dir)
+                if locs is None:
+                    cons.print("  [bold yellow]float-max: run failed (see logs); not reported[/bold yellow]")
                 else:
-                    cons.print("  float-max: no overflows")
+                    result["float_max_locs"] = locs
+                    if locs:
+                        cons.print(f"  [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)")
+                    else:
+                        cons.print("  float-max: no overflows")
             except Exception as exc:
                 cons.print(f"  [bold yellow]float-max check error[/bold yellow]: {exc}")
 
@@ -1108,150 +507,71 @@ def _run_case(
     return result
 
 
-def _emit_github_annotations(results: list):
-    """Emit GitHub annotations for FP hotspots.
-
-    Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
-    appear inline on the responsible source lines in the PR diff view.
-
-    Up to 3 dd_line locations are emitted as ::warning:: per case (minimal
-    responsible lines from delta-debug).  Up to 3 cancellation sites per case
-    are emitted as ::notice:: so the diff also highlights subtraction-
-    cancellation hotspots identified by --check-cancellation.
-    """
-    if not os.environ.get("GITHUB_ACTIONS"):
-        return
-    for r in results:
-        status = "FAIL" if not r["passed"] else "hotspot"
-        dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})"
-
-        for rel_path, start, end in r.get("dd_line_locs", [])[:3]:
-            loc = f"file={rel_path},line={start}"
-            if end != start:
-                loc += f",endLine={end}"
-            title = f"FP {status} [{r['name']}]"
-            print(f"::warning {loc},title={title}::{dev_str}", flush=True)
-
-        for fname, lineno in r.get("cancellation_locs", [])[:3]:
-            loc = f"file={fname},line={lineno}"
-            title = f"FP cancellation [{r['name']}]"
-            print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True)
+# Verrou is ~30x slower and the suite runs the simulation many times, so a user
+# case must be a small, short, single-process proxy. Work = cells x time steps;
+# both a huge grid and a long run are rejected (built-in cases are ~1k cell-steps).
+FP_CASE_MAX_CELLS = 100_000
+FP_CASE_MAX_WORK = 200_000  # cells x t_step_stop
 
 
-def _emit_github_summary(results: list, n_samples: int):
-    """Write a markdown results table to GITHUB_STEP_SUMMARY.
+def _load_user_case(input_path: str) -> dict:
+    """Build a single fp-stability case from a user case .py.
 
-    Visible directly in the Actions run UI without downloading artifacts.
-    Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels),
-    and dd_line source locations for any failing cases.
+    The case is run as ONE serial CPU process under Verrou (so it must be small
+    and short - a coarsened proxy of a production run, not the real thing); a grid
+    too large to be feasible errors. The output files to compare are auto-detected
+    from the reference run, so 'compare' is left empty here.
     """
-    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
-    if not summary_path:
-        return
-
-    n_pass = sum(1 for r in results if r["passed"])
-    n_fail = len(results) - n_pass
-
-    md = []
-    md.append("## FP Stability Results\n")
-    md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n")
-
-    # Main results table
-    md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |")
-    md.append("|------|:------:|--------:|--------:|--------:|:------:|")
-    for r in results:
-        status = "✅" if r["passed"] else "❌"
-        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
-        sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
-        md.append(f"| `{r['name']}` | {status} | {r['max_dev']:.2e} | {r['threshold']:.0e} | {fp} | {sb} |")
-    md.append("")
-
-    # VPREC sweep — one column per bit level, ❌ where dev > threshold
-    if any(r["vprec"] for r in results):
-        _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
-        header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
-        sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS)
-        md.append("### VPREC precision sweep\n")
-        md.append(f"| Case | {header} |")
-        md.append(f"|------|{sep}|")
-        for r in results:
-            vmap = {b: d for b, d in r["vprec"]}
-            cols = []
-            for b in VPREC_MANTISSA_BITS:
-                d = vmap.get(b)
-                if d is None:
-                    cols.append("—")
-                elif d == float("inf"):
-                    cols.append("💥 crash")
-                else:
-                    cols.append(f"{d:.2e}")
-            md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
-        md.append("")
-
-    # dd_line hotspot sources — always shown (top 10 per case) with source context
-    cases_with_locs = [r for r in results if r["dd_line_locs"]]
-    if cases_with_locs:
-        md.append("### Top FP hotspots (dd\\_line)\n")
-        for r in cases_with_locs:
-            status = "❌ FAIL" if not r["passed"] else "✅ pass"
-            md.append(f"**`{r['name']}`** ({status})\n")
-            for rel_path, start, end in r["dd_line_locs"][:10]:
-                loc = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
-                md.append(f"- `{loc}`")
-                snippet = _get_source_context(rel_path, start)
-                if snippet:
-                    md.append("  ```fortran")
-                    for line in snippet.splitlines():
-                        md.append(f"  {line}")
-                    md.append("  ```")
-            md.append("")
-
-    # dd_sym function names (collapsed, since less actionable than dd_line)
-    cases_with_syms = [r for r in results if r["dd_sym_syms"]]
-    if cases_with_syms:
-        md.append("<details>")
-        md.append("<summary>Responsible functions (dd_sym)</summary>\n")
-        for r in cases_with_syms:
-            md.append(f"\n**`{r['name']}`**\n")
-            for sym in r["dd_sym_syms"]:
-                md.append(f"- `{sym}`")
-        md.append("\n</details>\n")
-
-    # Cancellation hotspots
-    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
-    if cases_with_cancel:
-        md.append("### Catastrophic cancellation sites\n")
-        for r in cases_with_cancel:
-            md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s)\n")
-            for fname, lineno in r["cancellation_locs"][:15]:
-                md.append(f"- `{fname}:{lineno}`")
-                snippet = _get_source_context(fname, lineno)
-                if snippet:
-                    md.append("  ```fortran")
-                    for line in snippet.splitlines():
-                        md.append(f"  {line}")
-                    md.append("  ```")
-            md.append("")
+    from .run import input as run_input  # lazy import: avoids a circular import
+
+    params = run_input.load(input_path, None, {}, do_print=False).params
+    # Force serial .dat I/O: the suite runs the no-MPI binary as one process and
+    # diffs serial cons.*/prim.* files (not the parallel SILO/HDF5 path).
+    params["parallel_io"] = "F"
+    m, n, p = (int(params.get(k, 0) or 0) for k in ("m", "n", "p"))
+    cells = (m + 1) * (n + 1) * (p + 1)
+    t_stop = int(params.get("t_step_stop", 0) or 0)
+    work = cells * max(t_stop, 1)
+    if cells > FP_CASE_MAX_CELLS:
+        raise MFCException(f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).")
+    if work > FP_CASE_MAX_WORK:
+        raise MFCException(
+            f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under "
+            f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)."
+        )
+    stem = os.path.splitext(os.path.basename(input_path))[0]
+    if stem == "case":  # examples/<name>/case.py - the dir name is more telling
+        stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem
+    return {
+        "name": stem,
+        "description": f"user case {input_path} ({cells} cells, run single-rank on CPU)",
+        "compare": [],  # auto-detected from the reference run's output
+        "ill_cond": "",
+        "pre": params,
+        "sim": params,
+    }
 
-    # Float-max overflow sites
-    cases_with_fmax = [r for r in results if r.get("float_max_locs")]
-    if cases_with_fmax:
-        md.append("### Float32 overflow sites (check\\_max\\_float)\n")
-        for r in cases_with_fmax:
-            md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
-            for fname, lineno in r["float_max_locs"][:10]:
-                md.append(f"- `{fname}:{lineno}`")
-            md.append("")
 
-    with open(summary_path, "a") as f:
-        f.write("\n".join(md) + "\n")
+def _install_verrou() -> str:
+    """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified
+    prebuilt; source build as fallback) and return the valgrind path. Aborts on failure -
+    fp-stability cannot run without Verrou, so this is a hard error, not a skip."""
+    script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh")
+    cons.print("[bold]Verrou not found - installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]")
+    if subprocess.run(["bash", script], check=False).returncode != 0:
+        raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.")
+    verrou_bin = _find_verrou()
+    if not verrou_bin or not os.path.isfile(verrou_bin):
+        raise MFCException("Verrou install reported success but no valgrind binary was found under $VERROU_HOME.")
+    return verrou_bin
 
 
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
-    if not verrou_bin or not os.path.isfile(verrou_bin):
-        cons.print("[bold yellow]SKIP[/bold yellow]: verrou not found. Install at $HOME/.local/verrou or set VERROU_HOME.")
-        sys.exit(0)
+    if not verrou_bin or not (os.path.isfile(verrou_bin) and os.access(verrou_bin, os.X_OK)):
+        if ARG("verrou_binary"):
+            raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.")
+        verrou_bin = _install_verrou()
 
     sim_bin = ARG("sim_binary") or _find_binary("simulation")
     if not sim_bin or not os.path.isfile(sim_bin):
@@ -1264,12 +584,11 @@ def fp_stability():
     n_samples = ARG("samples")
     run_float = not ARG("no_float_proxy")
     run_vprec = not ARG("no_vprec")
-    run_dd_sym = not ARG("no_dd_sym")
-    run_dd_line = not ARG("no_dd_line")
     run_cancellation = not ARG("no_cancellation")
-    run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
 
+    cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES
+
     log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs")
     os.makedirs(log_dir, exist_ok=True)
 
@@ -1278,20 +597,16 @@ def fp_stability():
     cons.print(f"  verrou:      {verrou_bin}")
     cons.print(f"  simulation:  {sim_bin}")
     cons.print(f"  pre_process: {pp_bin}")
+    if ARG("input"):
+        cons.print(f"  case:        {ARG('input')}  (single serial CPU run under Verrou)")
     cons.print(f"  samples:     {n_samples}")
     features = []
     if run_float:
         features.append("float-proxy")
     if run_vprec:
         features.append("vprec-sweep")
-    if run_dd_sym:
-        features.append("dd_sym")
-    if run_dd_line:
-        features.append("dd_line")
     if run_cancellation:
         features.append("cancellation")
-    if run_mca:
-        features.append("mca-sigbits")
     if run_float_max:
         features.append("float-max")
     cons.print(f"  features:    {', '.join(features) if features else 'stability only'}")
@@ -1300,7 +615,7 @@ def fp_stability():
 
     start = time.time()
     results = []
-    for case in CASES:
+    for case in cases_to_run:
         try:
             r = _run_case(
                 case,
@@ -1308,31 +623,14 @@ def fp_stability():
                 sim_bin,
                 pp_bin,
                 n_samples,
-                log_dir,
                 run_float,
                 run_vprec,
-                run_dd_sym,
-                run_dd_line,
                 run_cancellation,
-                run_mca,
                 run_float_max,
             )
         except MFCException as exc:
             cons.print(f"  [bold red]ERROR[/bold red]: {exc}")
-            r = {
-                "name": case["name"],
-                "passed": False,
-                "max_dev": float("inf"),
-                "threshold": case["threshold"],
-                "float_proxy": None,
-                "vprec": [],
-                "dd_sym_syms": [],
-                "dd_line_locs": [],
-                "cancellation_locs": [],
-                "mca_dev": None,
-                "mca_sigbits": None,
-                "float_max_locs": [],
-            }
+            r = _blank_result(case["name"])
         results.append(r)
 
     elapsed = time.time() - start
@@ -1341,11 +639,8 @@ def fp_stability():
 
     cons.print(f"[bold]Results[/bold] ({elapsed:.0f}s):  [green]{n_pass} passed[/green]  [red]{n_fail} failed[/red]")
     for r in results:
-        mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]"
-        cons.print(f"  {mark} {r['name']}")
-
-    if n_fail > 0:
-        cons.print(f"\n  dd_sym/dd_line logs in: {log_dir}")
+        mark = "[green]PASS[/green]" if r["passed"] else "[red]FAIL[/red]"
+        cons.print(f"  {mark}  {r['name']}")
 
     _emit_github_summary(results, n_samples)
     _emit_github_annotations(results)
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
new file mode 100644
index 0000000000..4917e293f5
--- /dev/null
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -0,0 +1,237 @@
+"""Pure metrics, source-resolution, and parsing helpers for the FP-stability suite.
+
+Leaf module: imports only stdlib + MFC_ROOT_DIR. No sibling fp_stability*
+imports, so the runners/report/orchestrator modules can all depend on it.
+"""
+
+import glob
+import math
+import os
+import re
+
+from .common import MFC_ROOT_DIR
+
+# Mantissa-bit levels for the VPREC sweep (C).
+# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
+VPREC_MANTISSA_BITS = [52, 23, 16, 10]
+
+_OUTPUT_DAT = re.compile(r"^(cons|prim)\.\d+\.\d+\.(\d+)\.dat$")
+
+
+def _autodetect_compare(filenames: list) -> list:
+    """Pick the D/ output files to diff for a user-supplied case: the conserved-
+    variable files at the latest written time step (falling back to primitive
+    files if none are written). Returns [] if the case produced no field output."""
+    by_step = {}
+    for f in filenames:
+        m = _OUTPUT_DAT.match(os.path.basename(f))
+        if m:
+            by_step.setdefault(int(m.group(2)), {"cons": [], "prim": []})[m.group(1)].append(os.path.basename(f))
+    if not by_step:
+        return []
+    last = by_step[max(by_step)]
+    return sorted(last["cons"] or last["prim"])
+
+
+# Stability pass/fail (stage A) is scale-free: a case must retain at least this
+# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
+# 24 ~= single precision. One global floor replaces per-case absolute thresholds
+# (which spanned 6 orders of magnitude purely from field scale + conditioning);
+# normalising by the field scale collapses that, so a single number suffices.
+MIN_SIG_BITS = 24
+
+
+def _sig_bits(max_dev: float, ref_scale: float) -> float:
+    """Significant bits retained = -log2(max_dev / ref_scale).
+
+    Scale-free: dividing the deviation by the field's peak magnitude removes the
+    absolute scale, leaving only the conditioning.  Zero deviation (or zero
+    scale) returns 53.0 = full double precision retained.
+    """
+    if not (max_dev > 0) or not (ref_scale > 0):
+        return 53.0
+    return -math.log2(max_dev / ref_scale)
+
+
+# Files to exclude from cancellation / float-max reports (runtime loaders, XALT).
+_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so")
+
+# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)".
+_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)")
+
+# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def
+# defines a macro instantiated at multiple call sites) collapse many distinct
+# generated computations onto a single .fpp source line, so a cancellation site
+# inside one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute
+# select code but do not duplicate it, so they are tracked for balance but not flagged.
+_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE)
+_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE)
+_FYPP_DUPLICATING = ("for", "def", "block", "call")
+
+
+def _resolve_source(fname: str) -> str:
+    """Resolve a (possibly bare) source filename to an existing path, or '' if not
+    found.  An absolute existing path is used as-is; otherwise the basename is
+    located recursively under src/."""
+    if os.path.isabs(fname) and os.path.isfile(fname):
+        return fname
+    candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
+    return candidates[0] if candidates else ""
+
+
+def _read_source_lines(fname: str) -> list:
+    """Resolve `fname` and return its lines (with newlines), or [] if unreadable."""
+    path = _resolve_source(fname)
+    if not path:
+        return []
+    try:
+        with open(path) as fh:
+            return fh.readlines()
+    except OSError:
+        return []
+
+
+def _macro_context_in_lines(lines: list, lineno: int) -> str:
+    """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that
+    encloses `lineno` (1-based) in `lines`, or None if none does.
+
+    Used to flag cancellation sites whose .fpp line is shared across multiple
+    expanded instances (a #:for body, a #:def macro used in many places), where
+    line-level attribution cannot identify which instance is responsible.
+    """
+    stack = []
+    for raw in lines[: max(0, lineno - 1)]:
+        mo = _FYPP_BLOCK_OPEN.match(raw)
+        if mo:
+            stack.append(mo.group(1).lower())
+            continue
+        if _FYPP_BLOCK_CLOSE.match(raw) and stack:
+            stack.pop()
+    for kw in reversed(stack):
+        if kw in _FYPP_DUPLICATING:
+            return f"#:{kw}"
+    return None
+
+
+def _macro_context(fname: str, lineno: int) -> str:
+    """File-backed wrapper around _macro_context_in_lines; '' path safe."""
+    lines = _read_source_lines(fname)
+    if not lines:
+        return None
+    return _macro_context_in_lines(lines, lineno)
+
+
+def _dat_column(path: str):
+    """Load column 1 (the field value) from an MFC .dat file, robust to a
+    single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on)."""
+    import numpy as np
+
+    return np.atleast_2d(np.loadtxt(path))[:, 1]
+
+
+def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
+    import numpy as np
+
+    total = 0.0
+    for fname in compare_files:
+        ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname)
+        if not os.path.exists(ref_p) or not os.path.exists(run_p):
+            return float("inf")
+        total = max(total, float(np.max(np.abs(_dat_column(ref_p) - _dat_column(run_p)))))
+    return total
+
+
+def _max_abs_np(ref_dir: str, compare_files: list) -> float:
+    """Return the maximum absolute value across all reference output files."""
+    import numpy as np
+
+    total = 0.0
+    for fname in compare_files:
+        ref_p = os.path.join(ref_dir, fname)
+        if not os.path.exists(ref_p):
+            continue
+        total = max(total, float(np.max(np.abs(_dat_column(ref_p)))))
+    return total
+
+
+def _parse_cancel_gen(gen_path: str) -> list:
+    """Parse cc-gen-file TSV (file\\tline\\tsymbol) -> sorted unique [(fname, line)] for MFC sources."""
+    if not os.path.isfile(gen_path):
+        return []
+    locs = []
+    seen = set()
+    with open(gen_path) as fh:
+        for raw in fh:
+            parts = raw.rstrip("\n").split("\t")
+            if len(parts) < 2:
+                continue
+            fname = parts[0].strip()
+            if any(ext in fname for ext in _EXTERNAL_SRCS):
+                continue
+            if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")):
+                continue
+            try:
+                lineno = int(parts[1].strip())
+            except ValueError:
+                continue
+            key = (fname, lineno)
+            if key not in seen:
+                seen.add(key)
+                locs.append(key)
+    return locs
+
+
+def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
+    """Extract first MFC-source frame from each Valgrind error matching error_keyword."""
+    if not os.path.isfile(log_path):
+        return []
+    locs = []
+    seen = set()
+    in_error = False
+    with open(log_path) as fh:
+        for raw in fh:
+            line = re.sub(r"^==\d+== ?", "", raw)
+            if error_keyword in line:
+                in_error = True
+                continue
+            if in_error:
+                if "   at " in line or "   by " in line:
+                    m = _VGFRAME_RE.search(line)
+                    if m:
+                        fname = m.group(1)
+                        if any(ext in fname for ext in _EXTERNAL_SRCS):
+                            continue
+                        lineno = int(m.group(2))
+                        key = (fname, lineno)
+                        if key not in seen:
+                            seen.add(key)
+                            locs.append(key)
+                        in_error = False
+                elif line.strip() == "":
+                    in_error = False
+    return locs
+
+
+# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
+# filter: a site is reported only if it lost >= the threshold bits. Sweeping these
+# levels and taking the highest each site survives gives a per-site "bits lost"
+# severity (a lower bound - no false positives). 48 is near the full 53-bit
+# double mantissa (the top of the sweep), not the mantissa width itself.
+CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
+
+
+def _cancellation_severity(level_sites: list) -> dict:
+    """Given [(threshold, [sites])], return {site: highest threshold it survives}
+    = the per-site bits-lost severity (a lower bound)."""
+    sev = {}
+    for level, sites in level_sites:
+        for site in sites:
+            if level > sev.get(site, 0):
+                sev[site] = level
+    return sev
+
+
+def _digits_left(bits_lost: float) -> float:
+    """Approximate trustworthy decimal digits remaining after losing `bits_lost`
+    bits of a double's 53-bit mantissa (~15.95 digits full)."""
+    return max(0.0, (53 - bits_lost) / math.log2(10))
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
new file mode 100644
index 0000000000..2ca469b9e9
--- /dev/null
+++ b/toolchain/mfc/fp_stability_report.py
@@ -0,0 +1,158 @@
+"""GitHub-output emitters for the FP-stability suite (step summary + annotations).
+
+Pure formatting of the result dicts produced by the runners; the metric helpers
+it uses (digit math) live in fp_stability_metrics.
+"""
+
+import math
+import os
+
+from .fp_stability_metrics import (
+    MIN_SIG_BITS,
+    VPREC_MANTISSA_BITS,
+    _digits_left,
+)
+
+
+def _emit_github_annotations(results: list):
+    """Emit GitHub annotations for FP cancellation sites.
+
+    Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
+    appear inline on the responsible source lines in the PR diff view.
+
+    Up to 3 cancellation sites per case are emitted as ::notice:: so the diff
+    highlights subtraction-cancellation hotspots from --check-cancellation. A site
+    whose .fpp line sits inside a #:for/#:def expansion (tracked in
+    cancellation_macro) is noted as possibly representing multiple instances.
+    """
+    if not os.environ.get("GITHUB_ACTIONS"):
+        return
+    for r in results:
+        site_bits = r.get("cancellation_bits") or {}
+        macro_sites = r.get("cancellation_macro") or {}
+        for fname, lineno in r.get("cancellation_locs", [])[:3]:
+            loc = f"file={fname},line={lineno}"
+            title = f"FP cancellation [{r['name']}]"
+            note = "catastrophic cancellation site"
+            bits = site_bits.get((fname, lineno))
+            if bits:
+                note += f" - loses >= {bits / math.log2(10):.0f} of ~16 digits"
+            macro = macro_sites.get((fname, lineno))
+            if macro:
+                note += f" - inside a {macro}-expanded line, may represent multiple instances"
+            print(f"::notice {loc},title={title}::{note}", flush=True)
+        n_cc = len(r.get("cancellation_locs", []))
+        if n_cc > 3:
+            print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
+
+
+def _more_md(total: int, shown: int, noun: str) -> str:
+    """Markdown bullet noting `total - shown` further items elided from a list,
+    or '' when nothing was truncated."""
+    if total <= shown:
+        return ""
+    return f"- ...and {total - shown} more {noun}; see `fp-stability-logs/`"
+
+
+def _emit_github_summary(results: list, n_samples: int):
+    """Write a markdown results table to GITHUB_STEP_SUMMARY.
+
+    Visible directly in the Actions run UI without downloading artifacts.
+    Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels),
+    and catastrophic-cancellation source locations for any failing cases.
+    """
+    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+    if not summary_path:
+        return
+
+    n_pass = sum(1 for r in results if r["passed"])
+    n_fail = len(results) - n_pass
+
+    md = []
+    md.append("## FP Stability Results\n")
+    md.append(f"**{n_pass} passed, {n_fail} failed** - {n_samples} random-rounding samples per case\n")
+    md.append(
+        f"> **Coverage:** {len(results)} one-dimensional case(s) "
+        f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these "
+        "cases exercise - not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
+        "they do not reach.\n"
+    )
+
+    # Main results table - pass/fail is scale-free: bits retained vs a single floor
+    md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
+    md.append("| Case | Status | bits retained | max\\_dev | Float proxy |")
+    md.append("|------|:------:|:------:|--------:|--------:|")
+    for r in results:
+        status = "PASS" if r["passed"] else "FAIL"
+        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "-"
+        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "-"
+        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |")
+    md.append("")
+
+    # Cancellation ORIGINS - where ill-conditioning actually arises, led with the
+    # most severe (most bits lost).
+    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
+    if cases_with_cancel:
+        md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
+        md.append(
+            "> Subtraction of nearly-equal values loses leading significant digits. A double carries "
+            "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
+            "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
+            "single-precision trust. Site *count* is not severity - one site losing many digits outweighs "
+            "many mild ones.\n"
+        )
+        for r in cases_with_cancel:
+            site_bits = r.get("cancellation_bits") or {}
+            macro_sites = r.get("cancellation_macro") or {}
+            sites = [{"where": f"{fname}:{lineno}", "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"]]
+            ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"]))
+            if ordered:
+                w = ordered[0]
+                md.append(f"**`{r['name']}`** - {len(ordered)} site(s); worst loses >= {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
+            for e in ordered[:15]:
+                lost = e["bits"] / math.log2(10)
+                ambiguous = f" - _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else ""
+                md.append(f"- **>= {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) - `{e['where']}`{ambiguous}")
+            footer = _more_md(len(ordered), 15, "site(s)")
+            if footer:
+                md.append(footer)
+            md.append("")
+
+    # VPREC sweep - one column per mantissa-bit level showing the Linf deviation at
+    # that reduced precision ("crash" = run diverged/failed; dash = not measured).
+    if any(r["vprec"] for r in results):
+        _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
+        header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
+        sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS)
+        md.append("### VPREC precision sweep\n")
+        md.append(f"| Case | {header} |")
+        md.append(f"|------|{sep}|")
+        for r in results:
+            vmap = {b: d for b, d in r["vprec"]}
+            cols = []
+            for b in VPREC_MANTISSA_BITS:
+                d = vmap.get(b)
+                if d is None:
+                    cols.append("-")
+                elif d == float("inf"):
+                    cols.append("crash")
+                else:
+                    cols.append(f"{d:.2e}")
+            md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
+        md.append("")
+
+    # Float-max overflow sites
+    cases_with_fmax = [r for r in results if r.get("float_max_locs")]
+    if cases_with_fmax:
+        md.append("### Float32 overflow sites (check\\_max\\_float)\n")
+        for r in cases_with_fmax:
+            md.append(f"**`{r['name']}`** - {len(r['float_max_locs'])} site(s)\n")
+            for fname, lineno in r["float_max_locs"][:10]:
+                md.append(f"- `{fname}:{lineno}`")
+            footer = _more_md(len(r["float_max_locs"]), 10, "site(s)")
+            if footer:
+                md.append(footer)
+            md.append("")
+
+    with open(summary_path, "a") as f:
+        f.write("\n".join(md) + "\n")
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
new file mode 100644
index 0000000000..3809ee9992
--- /dev/null
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -0,0 +1,194 @@
+"""Verrou subprocess runners for the FP-stability suite.
+
+Each routine drives the verrou/valgrind binary and returns parsed results.  Pure
+parsing / metric helpers live in fp_stability_metrics, which this module imports.
+"""
+
+import glob
+import os
+import shutil
+import subprocess
+import tempfile
+
+from .common import MFC_ROOT_DIR, MFCException
+from .fp_stability_metrics import (
+    VPREC_MANTISSA_BITS,
+    _max_diff_np,
+    _parse_cancel_gen,
+    _parse_vg_error_locs,
+)
+from .printer import cons
+
+
+def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool:
+    """True if this valgrind actually provides the 'verrou' tool. A plain system
+    valgrind does not - accepting one would only fail later at run time. Pass env
+    (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its
+    tool without it."""
+    try:
+        return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0
+    except OSError:
+        return False
+
+
+def _find_verrou() -> str:
+    verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
+    candidate = os.path.join(verrou_home, "bin", "valgrind")
+    # Require the $VERROU_HOME tree to actually run the verrou tool (with VALGRIND_LIB
+    # for a relocated prebuilt). A broken/stale/non-Verrou tree there must read as
+    # "absent" so it gets reinstalled, not used until it fails on every run.
+    if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)):
+        return candidate
+    # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system
+    # valgrind must read as "Verrou absent" so it gets installed, not misused. Verify
+    # with VALGRIND_LIB too, so a relocated prebuilt on PATH (env.sh not sourced) isn't
+    # wrongly judged absent.
+    path_vg = shutil.which("valgrind")
+    if path_vg and _has_verrou_tool(path_vg, _verrou_env(path_vg)):
+        return path_vg
+    return ""
+
+
+def _find_binary(name: str) -> str:
+    install_dir = os.path.join(MFC_ROOT_DIR, "build", "install")
+    candidates = glob.glob(os.path.join(install_dir, "*", "bin", name))
+    return max(candidates, key=os.path.getmtime) if candidates else ""
+
+
+def _verrou_env(verrou_bin: str) -> dict:
+    """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt
+    artifact extracted to a new prefix) can locate its tool - Valgrind bakes its
+    build prefix into the binary otherwise. Harmless for a source-built tree, where
+    VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the
+    environment (user sourced env.sh) is left untouched."""
+    env = os.environ.copy()
+    libdir = os.path.join(os.path.dirname(os.path.dirname(verrou_bin)), "libexec", "valgrind")
+    if "VALGRIND_LIB" not in env and os.path.isdir(libdir):
+        env["VALGRIND_LIB"] = libdir
+    return env
+
+
+def _write_inp(params: dict, target_name: str, work_dir: str) -> None:
+    """Write a Fortran namelist .inp file from a Python params dict."""
+    from .run import case_dicts
+
+    master_keys = case_dicts.get_input_dict_keys(target_name)
+    lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys]
+    with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh:
+        fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n")
+
+
+def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str):
+    _write_inp(pre_params, "pre_process", work_dir)
+    with open(os.path.join(work_dir, "pre.log"), "w") as f:
+        result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False)
+    if result.returncode != 0:
+        raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log")
+
+
+def _run_simulation_verrou(
+    verrou_bin: str,
+    sim_bin: str,
+    work_dir: str,
+    run_dir: str,
+    rounding_mode: str = None,
+    extra_flags: list = None,
+):
+    """Create run_dir, copy ICs into a fresh tmpdir, run simulation under verrou,
+    and collect its D/ output into run_dir. Owns run_dir creation, so callers need
+    not pre-create it.
+
+    rounding_mode is passed as --rounding-mode=<mode> when not None.
+    extra_flags are appended before the binary (e.g. --backend=vprec ...).
+    """
+    os.makedirs(run_dir, exist_ok=True)  # needed before --log-file / sim.out below
+    with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir:
+        for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]:
+            src = os.path.join(work_dir, fname)
+            if os.path.exists(src):
+                shutil.copy2(src, tmpdir)
+        shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all"))
+        os.makedirs(os.path.join(tmpdir, "D"))
+
+        log_path = os.path.join(run_dir, "verrou.log")
+        cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"]
+        if rounding_mode:
+            cmd.append(f"--rounding-mode={rounding_mode}")
+        cmd.extend(extra_flags or [])
+        cmd.append(sim_bin)
+
+        with open(os.path.join(run_dir, "sim.out"), "w") as f:
+            result = subprocess.run(cmd, cwd=tmpdir, env=_verrou_env(verrou_bin), stdout=f, stderr=subprocess.STDOUT, check=False)
+
+        if result.returncode != 0:
+            tag = rounding_mode or "vprec"
+            raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out")
+
+        for fn in os.listdir(os.path.join(tmpdir, "D")):
+            shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
+
+
+def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10):
+    """Run --check-cancellation at the given bit threshold; return [(fname, line)]
+    of MFC cancellation sites (subtractions losing >= `threshold` significant bits),
+    or None if the run itself failed (distinct from [] = ran and found none)."""
+    tag = f"cancellation_{threshold}"
+    run_dir = os.path.join(work_dir, tag)
+    gen_path = os.path.join(run_dir, "cancel_gen.txt")
+    flags = [
+        "--check-cancellation=yes",
+        f"--cc-threshold-double={threshold}",
+        f"--cc-gen-file={gen_path}",
+    ]
+    try:
+        _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags)
+    except MFCException as exc:
+        cons.print(f"  [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]")
+        return None
+    return _parse_cancel_gen(gen_path)
+
+
+def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
+    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites,
+    or None if the run failed (distinct from [] = ran and found none)."""
+    run_dir = os.path.join(work_dir, "float_max")
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="nearest",
+            extra_flags=["--check-max-float=yes"],
+        )
+    except MFCException as exc:
+        cons.print(f"  [yellow]float-max run failed: {exc}[/yellow]")
+        return None
+    return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float")
+
+
+def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
+    """One run with --rounding-mode=float; returns Linf deviation from nearest-ref."""
+    run_dir = os.path.join(work_dir, "float_proxy")
+    _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
+    return _max_diff_np(ref_dir, run_dir, case["compare"])
+
+
+def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list:
+    """Run at each mantissa-bit level. Returns [(bits, dev), ...]."""
+    results = []
+    for bits in VPREC_MANTISSA_BITS:
+        run_dir = os.path.join(work_dir, f"vprec_{bits}")
+        flags = [
+            "--backend=vprec",
+            "--vprec-mode=full",
+            f"--vprec-precision-binary64={bits}",
+            "--vprec-range-binary64=11",
+        ]
+        try:
+            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
+            dev = _max_diff_np(ref_dir, run_dir, case["compare"])
+        except MFCException:
+            dev = float("inf")
+        results.append((bits, dev))
+    return results
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
new file mode 100644
index 0000000000..6521705b96
--- /dev/null
+++ b/toolchain/mfc/test_fp_stability.py
@@ -0,0 +1,220 @@
+"""Unit tests for the pure helpers behind the FP-stability cancellation pass, its
+fypp macro-expansion flagging, scale-free pass/fail, and Verrou discovery/install.
+
+The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job;
+here we test only the pure functions that decide what to instrument and how to
+label results, so they can run without Verrou or built binaries. We keep the tests
+that pin a real behavioral contract or a subtle edge, not every micro-variation.
+"""
+
+from mfc.fp_stability_metrics import (
+    _autodetect_compare,
+    _cancellation_severity,
+    _macro_context_in_lines,
+    _sig_bits,
+)
+
+# --- fypp macro-expansion context detection (a #:for/#:def line maps to N instances) ---
+
+
+def test_macro_context_inside_for_loop_body():
+    lines = [
+        "#:for i in [1, 2, 3]\n",
+        "  q(${i}$) = a - b\n",
+        "#:endfor\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) == "#:for"
+
+
+def test_macro_context_if_block_is_not_duplicating():
+    # #:if selects code but does not duplicate it, so it must NOT be flagged.
+    lines = [
+        "#:if FOO\n",
+        "  a = b - c\n",
+        "#:endif\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) is None
+
+
+def test_macro_context_unbalanced_close_is_safe():
+    # a stray #:endfor with an empty stack must not crash or misreport
+    assert _macro_context_in_lines(["#:endfor\n", "  a = b - c\n"], 2) is None
+
+
+# --- per-site cancellation severity (highest bit-threshold a site survives) ---
+
+
+def test_cancellation_severity_takes_highest_surviving_threshold():
+    level_sites = [
+        (10, [("a.fpp", 1), ("b.fpp", 2)]),
+        (20, [("a.fpp", 1)]),
+        (30, [("a.fpp", 1)]),
+    ]
+    # a.fpp:1 survives to 30 bits; b.fpp:2 only at 10
+    assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10}
+
+
+# --- auto-detect which output files to compare (for a user case) ---
+
+
+def test_autodetect_compare_picks_cons_at_latest_step():
+    fns = [
+        "cons.1.00.000000.dat",
+        "cons.1.00.000050.dat",
+        "cons.2.00.000050.dat",
+        "prim.1.00.000050.dat",
+    ]
+    assert _autodetect_compare(fns) == ["cons.1.00.000050.dat", "cons.2.00.000050.dat"]
+
+
+def test_autodetect_compare_falls_back_to_prim_when_no_cons():
+    fns = ["prim.1.00.000010.dat", "prim.3.00.000010.dat"]
+    assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"]
+
+
+# --- scale-free pass/fail: significant bits retained ---
+
+
+def test_sig_bits_is_scale_free():
+    # same relative deviation -> same bits regardless of absolute magnitude
+    assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9
+
+
+def test_sig_bits_zero_scale_is_safe():
+    # a zero/degenerate field scale must not divide-by-zero; report full precision
+    assert _sig_bits(1e-12, 0.0) == 53.0
+
+
+# --- report emitters: must survive the CI-only path without KeyError / regressions ---
+
+
+def _emit_to_tmp(results, tmp_path, monkeypatch):
+    """Run _emit_github_summary into a temp file under the GitHub-Actions env."""
+    from mfc import fp_stability_report as report
+
+    out = tmp_path / "summary.md"
+    monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(out))
+    monkeypatch.setenv("GITHUB_ACTIONS", "1")
+    report._emit_github_summary(results, 5)
+    return out.read_text()
+
+
+def test_emit_summary_survives_blank_result(tmp_path, monkeypatch):
+    # the dict produced on the per-case error path must not KeyError the emitter
+    from mfc.fp_stability import _blank_result
+
+    text = _emit_to_tmp([_blank_result("x")], tmp_path, monkeypatch)
+    assert "0 passed, 1 failed" in text
+
+
+def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys):
+    from mfc import fp_stability_report as report
+    from mfc.fp_stability import _blank_result
+
+    monkeypatch.setenv("GITHUB_ACTIONS", "1")
+    r = _blank_result("demo")
+    r.update(
+        cancellation_locs=[("src/x/m_a.fpp", 5)],
+        cancellation_bits={("src/x/m_a.fpp", 5): 40},
+        cancellation_macro={("src/x/m_a.fpp", 5): "#:for"},
+    )
+    report._emit_github_annotations([r])
+    out = capsys.readouterr().out
+    assert "::notice" in out
+    assert "multiple instances" in out  # fypp-expanded cancellation site flagged
+
+
+# --- Verrou discovery: a bare/broken valgrind must read as "Verrou absent" ---
+
+
+def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    vbin = tmp_path / "bin" / "valgrind"
+    vbin.parent.mkdir(parents=True)
+    vbin.write_text("#!/bin/sh\n")
+    vbin.chmod(0o755)
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    # The candidate must also verify as Verrou-enabled; stub that so the test
+    # exercises precedence, not a real valgrind invocation.
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True)
+    assert runners._find_verrou() == str(vbin)
+
+
+def test_find_verrou_rejects_broken_verrou_home_tree(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    # A valgrind exists at $VERROU_HOME but does not actually run the verrou tool
+    # (broken/stale/non-Verrou): it must read as absent, not be returned.
+    vbin = tmp_path / "bin" / "valgrind"
+    vbin.parent.mkdir(parents=True)
+    vbin.write_text("#!/bin/sh\n")
+    vbin.chmod(0o755)
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False)
+    monkeypatch.setattr(runners.shutil, "which", lambda _name: None)
+    assert runners._find_verrou() == ""
+
+
+def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool.
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind")
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False)
+    assert runners._find_verrou() == ""
+
+
+def test_has_verrou_tool_reflects_exit_code(monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    class _R:
+        def __init__(self, rc):
+            self.returncode = rc
+
+    monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(0))
+    assert runners._has_verrou_tool("/any/valgrind") is True
+    monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1))
+    assert runners._has_verrou_tool("/any/valgrind") is False
+
+    def _boom(*a, **k):
+        raise OSError("not executable")
+
+    monkeypatch.setattr(runners.subprocess, "run", _boom)
+    assert runners._has_verrou_tool("/stale/valgrind") is False
+
+
+# --- env composition for relocated (prebuilt) Verrou trees ---
+
+
+def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
+    monkeypatch.delenv("VALGRIND_LIB", raising=False)
+    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind")
+
+
+def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
+    monkeypatch.setenv("VALGRIND_LIB", "/user/chosen/lib")
+    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["VALGRIND_LIB"] == "/user/chosen/lib"  # not clobbered
+
+
+# --- auto-install hard-fail guard (a green bootstrap that produced no binary) ---
+
+
+def test_install_verrou_raises_when_no_binary_appears(monkeypatch):
+    import pytest
+
+    from mfc import fp_stability as fps
+
+    monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0})())
+    monkeypatch.setattr(fps, "_find_verrou", lambda: "")
+    with pytest.raises(fps.MFCException, match="no valgrind binary"):
+        fps._install_verrou()