From ac398abecac8ed76418ac2df10080157e1f6db1c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 1 Jun 2026 19:01:52 -0400
Subject: [PATCH 01/25] fp-stability: confirm, rank, and disambiguate dd_line
 hotspots

dd_line reports a minimal set of source lines, but presented them as a flat, equally-weighted list of confident warnings. Three problems: (1) no check that the reported lines actually reproduce the instability; (2) fypp #:for/#:def expansion collapses many generated computations onto one .fpp line, so a hit can be the wrong instance; (3) a multi-op line did not say which op was at fault.

This adds, reusing the verified Verrou --source mechanism (matches file+line+symbol, captured via --gen-source):

- Confirmation: perturb only the suspect lines; lines that fail to reproduce the deviation are downgraded from ::warning:: to ::notice:: (unconfirmed).

- Per-line ranking: perturb each line alone and rank by the share of float-proxy it reproduces, so the dominant computation is named (e.g. m_time_steppers.fpp:510 = 100%).

- Cancellation cross-reference: label dd_line hotspots that coincide with a stage-F catastrophic-cancellation site.

- Macro-expansion flag: mark hotspots whose .fpp line sits inside a #:for/#:def expansion as instance-ambiguous.

Surfaced in console, the GitHub step summary (ranked, tagged list), and inline annotations. Pure helpers covered by toolchain/mfc/test_fp_stability.py (22 tests, TDD). Verified end-to-end on a serial debug build.
---
 toolchain/mfc/fp_stability.py      | 300 +++++++++++++++++++++++++++--
 toolchain/mfc/test_fp_stability.py | 198 +++++++++++++++++++
 2 files changed, 480 insertions(+), 18 deletions(-)
 create mode 100644 toolchain/mfc/test_fp_stability.py

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index dd848f046c..fde268170c 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -20,6 +20,16 @@
 
 E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip)
    Further bisects to exact *source lines* within the responsible functions.
+   Each reported line is then *confirmed* by a positive control: --gen-source
+   captures the symbol-correct executed lines, those are filtered to the suspect
+   set, and a float-mode run with --source restricted to just them must
+   reproduce the instability.  Lines that do not reproduce it are reported as
+   unconfirmed (downgraded from ::warning:: to ::notice::).  Each line is then
+   perturbed alone and ranked by the share of the single-precision deviation it
+   reproduces, so the most flagrant computation is identified rather than a flat
+   list.  Hotspots are additionally cross-referenced against the stage-F
+   cancellation sites (to name the offending subtraction) and flagged as
+   instance-ambiguous when the .fpp line sits inside a #:for/#:def expansion.
 
 F. Cancellation detection (--no-cancellation to skip)
    One run with --check-cancellation=yes; reports MFC source lines that
@@ -79,6 +89,15 @@
 # Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)".
 _VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)")
 
+# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def
+# defines a macro instantiated at multiple call sites) collapse many distinct
+# generated computations onto a single .fpp source line, so a dd_line hit inside
+# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select
+# code but do not duplicate it, so they are tracked for balance but not flagged.
+_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE)
+_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE)
+_FYPP_DUPLICATING = ("for", "def", "block", "call")
+
 # Lines that are clearly control-flow delimiters rather than arithmetic.
 # dd_line sometimes reports these when the responsible arithmetic is on the
 # preceding line but shares DWARF debug info with the delimiter (e.g. loop
@@ -114,6 +133,44 @@ def _read_source_line(fname: str, lineno: int) -> str:
         return ""
 
 
+def _macro_context_in_lines(lines: list, lineno: int) -> str:
+    """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that
+    encloses `lineno` (1-based) in `lines`, or None if none does.
+
+    Used to flag dd_line hotspots whose .fpp line is shared across multiple
+    expanded instances (a #:for body, a #:def macro used in many places), where
+    line-level attribution cannot identify which instance is responsible.
+    """
+    stack = []
+    for raw in lines[: max(0, lineno - 1)]:
+        mo = _FYPP_BLOCK_OPEN.match(raw)
+        if mo:
+            stack.append(mo.group(1).lower())
+            continue
+        if _FYPP_BLOCK_CLOSE.match(raw) and stack:
+            stack.pop()
+    for kw in reversed(stack):
+        if kw in _FYPP_DUPLICATING:
+            return f"#:{kw}"
+    return None
+
+
+def _macro_context(fname: str, lineno: int) -> str:
+    """File-backed wrapper around _macro_context_in_lines; '' path safe."""
+    if os.path.isabs(fname) and os.path.isfile(fname):
+        candidates = [fname]
+    else:
+        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
+    if not candidates:
+        return None
+    try:
+        with open(candidates[0]) as fh:
+            lines = fh.readlines()
+    except OSError:
+        return None
+    return _macro_context_in_lines(lines, lineno)
+
+
 def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
     """Return True if any line in [start, end] contains non-trivial arithmetic.
 
@@ -804,7 +861,9 @@ def _dd_env(verrou_bin: str) -> dict:
 
 
 def _parse_rddmin_locs(summary_path: str) -> list:
-    """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary.
+    """Extract dd_line locations from an rddmin_summary as
+    [{path, start, end, macro}] dicts (path is repo-relative; macro is the
+    enclosing fypp duplicating block, e.g. '#:for', or None).
 
     Filters out locations whose source lines are pure control-flow delimiters
     (loop boundaries, fypp directive closers, blank/comment lines).  These can
@@ -831,7 +890,7 @@ def _parse_rddmin_locs(summary_path: str) -> list:
                 rel = path
             rel = rel.replace("\\", "/")
             if _is_arithmetic_loc(path, start, end):
-                locs.append((rel, start, end))
+                locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)})
             else:
                 skipped.append((rel, start, end))
     for rel, start, end in skipped:
@@ -866,6 +925,75 @@ def _parse_rddmin_syms(summary_path: str) -> list:
     return syms
 
 
+def _build_source_filter(gen_lines: list, suspect_locs: list) -> list:
+    """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a
+    suspect dd_line location.
+
+    gen_lines come from a --gen-source run and carry the exact symbol Verrou
+    requires (--source matches on file+line+symbol, not file+line alone).
+    suspect_locs are (path, start, end) tuples whose path may be a repo-relative
+    path while gen-source emits a basename, so matching is by basename + line.
+    """
+    ranges = {}
+    for path, start, end in suspect_locs:
+        ranges.setdefault(os.path.basename(path), []).append((start, end))
+    out = []
+    for raw in gen_lines:
+        parts = raw.rstrip("\n").split("\t")
+        if len(parts) < 2:
+            continue
+        base = os.path.basename(parts[0].strip())
+        try:
+            ln = int(parts[1].strip())
+        except ValueError:
+            continue
+        if any(s <= ln <= e for s, e in ranges.get(base, [])):
+            out.append(raw if raw.endswith("\n") else raw + "\n")
+    return out
+
+
+def _confirm_decision(suspect_dev, dd_threshold: float):
+    """Decide whether perturbing only the suspect lines reproduces the instability.
+
+    Returns True (confirmed), False (suspect lines are inert -> attribution
+    suspect, e.g. macro-collapse misattribution), or None if unmeasured.
+    """
+    if suspect_dev is None:
+        return None
+    return suspect_dev >= dd_threshold
+
+
+def _rank_locs(locs: list, total: float) -> list:
+    """Attach a 'share' (per-line deviation / total) to each loc dict — which
+    must already carry 'share_dev' from a single-line positive control — and
+    return the locs sorted by that deviation, most flagrant first.
+
+    'total' is normally float_proxy, so share is the fraction of the full
+    single-precision deviation that perturbing that one line alone reproduces.
+    A non-positive total yields share=None (cannot normalize).
+    """
+    for loc in locs:
+        dev = loc.get("share_dev")
+        loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None
+    return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True)
+
+
+def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
+    """Set loc['cancellation']=True for each dd_line loc whose line range covers a
+    catastrophic-cancellation site (stage F), matched by basename + line.
+
+    This pins the flagrant operation on a multi-op line to the subtraction that
+    cancels, rather than just naming the line.
+    """
+    by_base = {}
+    for fname, lineno in cancellation_locs:
+        by_base.setdefault(os.path.basename(fname), set()).add(lineno)
+    for loc in dd_line_locs:
+        lines = by_base.get(os.path.basename(loc["path"]), set())
+        loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1))
+    return dd_line_locs
+
+
 def _run_dd_tool(
     dd_bin: str,
     dd_dir: str,
@@ -924,7 +1052,7 @@ def _run_dd_line(
     log_dir: str,
     threshold: float = None,
 ) -> list:
-    """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples."""
+    """Run verrou_dd_line; return [{path, start, end, macro}] location dicts."""
     dd_bin = _find_dd_line(verrou_bin)
     if not dd_bin:
         cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
@@ -941,6 +1069,86 @@ def _run_dd_line(
     return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
 
 
+def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag):
+    """Perturb only the lines in src_lines (deterministic float mode) and return
+    the L-inf deviation from the nearest-rounding reference, or None on failure."""
+    src_path = os.path.join(conf_dir, f"source_{tag}.txt")
+    with open(src_path, "w") as fh:
+        fh.writelines(src_lines)
+    run_dir = os.path.join(conf_dir, f"perturb_{tag}")
+    os.makedirs(run_dir, exist_ok=True)
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="float",
+            extra_flags=[f"--source={src_path}"],
+        )
+    except MFCException:
+        return None
+    return _max_diff_np(ref_dir, run_dir, compare)
+
+
+def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy):
+    """Positive control for dd_line: perturb ONLY the suspect lines and confirm
+    the instability reproduces, then rank each line by its individual share.
+
+    Verrou's --source matches file+line+symbol (not file+line alone), so we first
+    capture the symbol-correct executed source lines via --gen-source, filter them
+    to the suspect set, then run deterministic float-mode restricted to just those
+    lines.  If the suspect-only deviation reaches dd_threshold the attribution is
+    confirmed; if it stays near zero the reported lines do not actually carry the
+    instability (e.g. a #:for-expanded line blamed for the wrong instance).
+
+    Each line is then perturbed alone so its 'share_dev' (and 'share' of
+    float_proxy) shows which computation dominates.
+
+    Returns (confirmed, suspect_dev, ranked_locs).
+    """
+    if not dd_line_locs:
+        return None, None, dd_line_locs
+    conf_dir = os.path.join(work_dir, "confirm")
+    os.makedirs(conf_dir, exist_ok=True)
+    gen_path = os.path.join(conf_dir, "gen_source.txt")
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            conf_dir,
+            rounding_mode="nearest",
+            extra_flags=[f"--gen-source={gen_path}"],
+        )
+    except MFCException:
+        return None, None, dd_line_locs
+    if not os.path.isfile(gen_path):
+        return None, None, dd_line_locs
+    with open(gen_path) as fh:
+        gen_lines = fh.readlines()
+    compare = case["compare"]
+
+    # whole-set positive control
+    suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs]
+    set_src = _build_source_filter(gen_lines, suspects)
+    if not set_src:
+        # none of the reported lines performs an instrumented FP op -> not reproduced
+        return False, 0.0, dd_line_locs
+    set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set")
+    confirmed = _confirm_decision(set_dev, dd_threshold)
+
+    # per-line ranking (a single line trivially owns the whole set deviation)
+    if len(dd_line_locs) == 1:
+        dd_line_locs[0]["share_dev"] = set_dev
+    else:
+        for i, loc in enumerate(dd_line_locs):
+            one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])])
+            loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0
+    ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev))
+    return confirmed, set_dev, ranked
+
+
 def _run_case(
     case: dict,
     verrou_bin: str,
@@ -976,6 +1184,8 @@ def _run_case(
         "vprec": [],
         "dd_sym_syms": [],
         "dd_line_locs": [],
+        "dd_line_confirmed": None,
+        "dd_line_confirm_dev": None,
         "cancellation_locs": [],
         "mca_dev": None,
         "mca_sigbits": None,
@@ -1060,9 +1270,30 @@ def _run_case(
                     log_dir,
                     threshold=dd_threshold,
                 )
+                macro_n = sum(1 for loc in result["dd_line_locs"] if loc["macro"])
+                if macro_n:
+                    cons.print(f"  [dim]dd_line: {macro_n} hotspot(s) inside fypp-expanded code (instance-ambiguous)[/dim]")
             except Exception as exc:
                 cons.print(f"  [bold yellow]dd_line error[/bold yellow]: {exc}")
 
+        # --- E2: confirm dd_line hotspots and rank each by its individual share ---
+        if dd_threshold > 0 and run_dd_line and result["dd_line_locs"]:
+            cons.print("  [dim]confirming + ranking dd_line hotspots (per-line perturbation)...[/dim]")
+            try:
+                confirmed, cdev, ranked = _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, result["dd_line_locs"], dd_threshold, float_proxy)
+                result["dd_line_locs"] = ranked
+                result["dd_line_confirmed"] = confirmed
+                result["dd_line_confirm_dev"] = cdev
+                if confirmed is True:
+                    cons.print(f"  [bold green]dd_line confirmed[/bold green]: suspect-only dev={cdev:.3e} >= {dd_threshold:.1e}")
+                elif confirmed is False:
+                    cons.print(f"  [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)")
+                top = ranked[0] if ranked else None
+                if top and top.get("share") is not None:
+                    cons.print(f"  most flagrant: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)")
+            except Exception as exc:
+                cons.print(f"  [bold yellow]dd_line confirmation error[/bold yellow]: {exc}")
+
         # --- F: cancellation detection ---
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
@@ -1073,6 +1304,12 @@ def _run_case(
                     cons.print(f"  cancellation: {len(locs)} unique source location(s)")
                 else:
                     cons.print("  cancellation: none detected")
+                # cross-reference: label dd_line hotspots that sit on a cancellation site
+                if result["dd_line_locs"] and locs:
+                    _mark_cancellation(result["dd_line_locs"], locs)
+                    n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation"))
+                    if n_xref:
+                        cons.print(f"  {n_xref} hotspot(s) coincide with a catastrophic-cancellation site")
             except Exception as exc:
                 cons.print(f"  [bold yellow]cancellation check error[/bold yellow]: {exc}")
 
@@ -1114,23 +1351,37 @@ def _emit_github_annotations(results: list):
     Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
     appear inline on the responsible source lines in the PR diff view.
 
-    Up to 3 dd_line locations are emitted as ::warning:: per case (minimal
-    responsible lines from delta-debug).  Up to 3 cancellation sites per case
-    are emitted as ::notice:: so the diff also highlights subtraction-
-    cancellation hotspots identified by --check-cancellation.
+    Up to 3 dd_line locations are emitted per case (minimal responsible lines
+    from delta-debug).  Confirmed hotspots (suspect-only perturbation reproduced
+    the instability) are ::warning::; unconfirmed ones are downgraded to
+    ::notice:: so a suspect attribution is not presented as fact.  Up to 3
+    cancellation sites per case are emitted as ::notice:: so the diff also
+    highlights subtraction-cancellation hotspots from --check-cancellation.
     """
     if not os.environ.get("GITHUB_ACTIONS"):
         return
     for r in results:
         status = "FAIL" if not r["passed"] else "hotspot"
         dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})"
-
-        for rel_path, start, end in r.get("dd_line_locs", [])[:3]:
-            loc = f"file={rel_path},line={start}"
-            if end != start:
-                loc += f",endLine={end}"
-            title = f"FP {status} [{r['name']}]"
-            print(f"::warning {loc},title={title}::{dev_str}", flush=True)
+        unconfirmed = r.get("dd_line_confirmed") is False
+
+        for loc in r.get("dd_line_locs", [])[:3]:
+            location = f"file={loc['path']},line={loc['start']}"
+            if loc["end"] != loc["start"]:
+                location += f",endLine={loc['end']}"
+            note = dev_str
+            if loc.get("share") is not None:
+                note += f" — reproduces {loc['share'] * 100:.0f}% of float-proxy alone"
+            if loc.get("cancellation"):
+                note += " — catastrophic cancellation site"
+            if loc.get("macro"):
+                note += f" — {loc['macro']}-expanded line, may represent multiple instances"
+            if unconfirmed:
+                title = f"FP candidate (unconfirmed) [{r['name']}]"
+                print(f"::notice {location},title={title}::{note}", flush=True)
+            else:
+                title = f"FP {status} [{r['name']}]"
+                print(f"::warning {location},title={title}::{note}", flush=True)
 
         for fname, lineno in r.get("cancellation_locs", [])[:3]:
             loc = f"file={fname},line={lineno}"
@@ -1192,12 +1443,23 @@ def _emit_github_summary(results: list, n_samples: int):
     cases_with_locs = [r for r in results if r["dd_line_locs"]]
     if cases_with_locs:
         md.append("### Top FP hotspots (dd\\_line)\n")
+        _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
         for r in cases_with_locs:
             status = "❌ FAIL" if not r["passed"] else "✅ pass"
-            md.append(f"**`{r['name']}`** ({status})\n")
-            for rel_path, start, end in r["dd_line_locs"][:10]:
-                loc = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
-                md.append(f"- `{loc}`")
+            md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}")
+            md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n")
+            for loc in r["dd_line_locs"][:10]:
+                rel_path, start, end = loc["path"], loc["start"], loc["end"]
+                where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
+                tags = []
+                if loc.get("share") is not None:
+                    tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy")
+                if loc.get("cancellation"):
+                    tags.append("catastrophic cancellation")
+                if loc.get("macro"):
+                    tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
+                suffix = f" — {', '.join(tags)}" if tags else ""
+                md.append(f"- `{where}`{suffix}")
                 snippet = _get_source_context(rel_path, start)
                 if snippet:
                     md.append("  ```fortran")
@@ -1328,6 +1590,8 @@ def fp_stability():
                 "vprec": [],
                 "dd_sym_syms": [],
                 "dd_line_locs": [],
+                "dd_line_confirmed": None,
+                "dd_line_confirm_dev": None,
                 "cancellation_locs": [],
                 "mca_dev": None,
                 "mca_sigbits": None,
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
new file mode 100644
index 0000000000..694da7d906
--- /dev/null
+++ b/toolchain/mfc/test_fp_stability.py
@@ -0,0 +1,198 @@
+"""Unit tests for the pure helpers behind the FP-stability dd_line confirmation
+pass (#1) and macro-expansion flagging (#2).
+
+The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job;
+here we test only the pure functions that decide what to instrument and how to
+label results, so they can run without Verrou or built binaries.
+"""
+
+from mfc.fp_stability import (
+    _build_source_filter,
+    _confirm_decision,
+    _macro_context_in_lines,
+    _mark_cancellation,
+    _rank_locs,
+)
+
+# --- #2: fypp macro-expansion context detection ---
+
+
+def test_macro_context_none_outside_any_block():
+    lines = [
+        "subroutine s_foo()\n",
+        "  a = b - c\n",
+        "end subroutine\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) is None
+
+
+def test_macro_context_inside_for_loop_body():
+    lines = [
+        "#:for i in [1, 2, 3]\n",
+        "  q(${i}$) = a - b\n",
+        "#:endfor\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) == "#:for"
+
+
+def test_macro_context_if_block_is_not_duplicating():
+    lines = [
+        "#:if FOO\n",
+        "  a = b - c\n",
+        "#:endif\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) is None
+
+
+def test_macro_context_reports_innermost_duplicating_block():
+    lines = [
+        "#:def MACRO(x)\n",
+        "  #:if cond\n",
+        "    #:for j in range(3)\n",
+        "      y = ${x}$ - z\n",
+        "    #:endfor\n",
+        "  #:endif\n",
+        "#:enddef\n",
+    ]
+    assert _macro_context_in_lines(lines, 4) == "#:for"
+
+
+def test_macro_context_balances_closers():
+    lines = [
+        "#:for i in [1, 2]\n",
+        "  a = b - c\n",
+        "#:endfor\n",
+        "d = e - f\n",
+    ]
+    # line 4 is after the loop closed -> not in any duplicating block
+    assert _macro_context_in_lines(lines, 4) is None
+
+
+def test_macro_context_def_body_when_no_inner_loop():
+    lines = [
+        "#:def GEOM(n)\n",
+        "  r = x - y\n",
+        "#:enddef\n",
+    ]
+    assert _macro_context_in_lines(lines, 2) == "#:def"
+
+
+# --- #1: building the symbol-correct --source filter from --gen-source output ---
+
+
+def test_build_source_filter_keeps_matching_file_and_line_with_symbol():
+    gen = [
+        "m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n",
+        "m_riemann_solvers.fpp\t999\ts_other\n",
+    ]
+    suspects = [("src/simulation/m_riemann_solvers.fpp", 512, 512)]
+    out = _build_source_filter(gen, suspects)
+    assert out == ["m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n"]
+
+
+def test_build_source_filter_matches_inclusive_range():
+    gen = [
+        "m_foo.fpp\t10\tsym\n",
+        "m_foo.fpp\t11\tsym\n",
+        "m_foo.fpp\t12\tsym\n",
+        "m_foo.fpp\t13\tsym\n",
+    ]
+    suspects = [("m_foo.fpp", 11, 12)]
+    out = _build_source_filter(gen, suspects)
+    assert out == ["m_foo.fpp\t11\tsym\n", "m_foo.fpp\t12\tsym\n"]
+
+
+def test_build_source_filter_excludes_other_basenames():
+    gen = ["m_bar.fpp\t5\tsym\n"]
+    suspects = [("m_foo.fpp", 5, 5)]
+    assert _build_source_filter(gen, suspects) == []
+
+
+def test_build_source_filter_matches_on_basename_not_full_path():
+    # gen-source emits a basename; dd_line locs are repo-relative paths.
+    gen = ["m_foo.fpp\t5\tsym\n"]
+    suspects = [("src/common/m_foo.fpp", 5, 5)]
+    assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"]
+
+
+def test_build_source_filter_skips_malformed_lines():
+    gen = ["garbage-no-tab\n", "m_foo.fpp\tnotanumber\tsym\n", "m_foo.fpp\t5\tsym\n"]
+    suspects = [("m_foo.fpp", 5, 5)]
+    assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"]
+
+
+# --- #1: confirmation decision ---
+
+
+def test_confirm_decision_true_when_suspect_reproduces_deviation():
+    # perturbing only the suspect lines yields >= dd_threshold deviation
+    assert _confirm_decision(suspect_dev=1e-3, dd_threshold=1e-5) is True
+
+
+def test_confirm_decision_false_when_suspect_is_inert():
+    # suspect lines barely move the result -> attribution not reproduced
+    assert _confirm_decision(suspect_dev=1e-9, dd_threshold=1e-5) is False
+
+
+def test_confirm_decision_none_when_measurement_unavailable():
+    assert _confirm_decision(suspect_dev=None, dd_threshold=1e-5) is None
+
+
+# --- Tier 1: per-line confirmation ranking ---
+
+
+def test_rank_locs_sorts_by_share_dev_descending():
+    locs = [
+        {"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.1},
+        {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.9},
+    ]
+    ranked = _rank_locs(locs, total=1.0)
+    assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"]
+
+
+def test_rank_locs_computes_share_as_fraction_of_total():
+    locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}]
+    ranked = _rank_locs(locs, total=0.5)
+    assert ranked[0]["share"] == 0.5
+
+
+def test_rank_locs_share_none_when_total_nonpositive():
+    locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}]
+    ranked = _rank_locs(locs, total=0.0)
+    assert ranked[0]["share"] is None
+
+
+def test_rank_locs_treats_missing_share_dev_as_zero_and_sorts_last():
+    locs = [
+        {"path": "a.fpp", "start": 1, "end": 1, "share_dev": None},
+        {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.3},
+    ]
+    ranked = _rank_locs(locs, total=1.0)
+    assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"]
+
+
+# --- Tier 1b: dd_line x cancellation cross-reference ---
+
+
+def test_mark_cancellation_flags_loc_on_a_cancellation_line():
+    locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}]
+    _mark_cancellation(locs, [("m_foo.fpp", 11)])
+    assert locs[0]["cancellation"] is True
+
+
+def test_mark_cancellation_false_when_no_site_in_range():
+    locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}]
+    _mark_cancellation(locs, [("m_foo.fpp", 99)])
+    assert locs[0]["cancellation"] is False
+
+
+def test_mark_cancellation_matches_on_basename_not_full_path():
+    locs = [{"path": "src/common/m_foo.fpp", "start": 5, "end": 5}]
+    _mark_cancellation(locs, [("/abs/build/m_foo.fpp", 5)])
+    assert locs[0]["cancellation"] is True
+
+
+def test_mark_cancellation_false_for_different_basename():
+    locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}]
+    _mark_cancellation(locs, [("m_bar.fpp", 5)])
+    assert locs[0]["cancellation"] is False

From 196aff5e461e01f8fbdaf15f92b2f93c3229dba6 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 1 Jun 2026 20:12:13 -0400
Subject: [PATCH 02/25] fp-stability: per-instance disambiguation of
 fypp-expanded hotspots (Tier 2)

dd_line attributes to .fpp source lines, but a #:for/#:def expansion collapses many generated computations onto one line, so a macro-ambiguous hotspot cannot be pinned to a single runtime instance. This adds an opt-in precision path that resolves it.

Mechanism (validated against gfortran+Verrou): a new build flag --fp-precision-lines strips the fypp line markers from each generated .f90 so the compiler attributes every expanded instance to a distinct physical line, emitting a .linemap.json sidecar mapping each line back to (.fpp file, line, instance). Marker renumbering was tried first but hit gfortran's DWARF line-number ceiling (~300k) and 700-line shadow runs; stripping avoids both and survives the cpp #if layer.

fp-stability gains --precision-sim-binary: for the most flagrant macro-ambiguous hotspot, each expanded instance is perturbed alone (Verrou --source) on the precision binary and ranked, naming the responsible instance and showing its concrete generated code. The strip is gated to the simulation target only (pre/post run on CPU).

Validated end-to-end: m_weno.fpp:238 (3 #:for instances) resolved to instance #0 = s_cb(i+3)-s_cb(i+1). toolchain/mfc/fp_precision_lines.py is pure + TDD'd (12 tests); normal build path is byte-identical and unaffected.
---
 CMakeLists.txt                           |  34 ++++++-
 toolchain/mfc/build.py                   |   1 +
 toolchain/mfc/cli/commands.py            |  14 +++
 toolchain/mfc/fp_precision_lines.py      | 123 +++++++++++++++++++++++
 toolchain/mfc/fp_stability.py            |  98 ++++++++++++++++++
 toolchain/mfc/test_fp_precision_lines.py | 112 +++++++++++++++++++++
 6 files changed, 380 insertions(+), 2 deletions(-)
 create mode 100644 toolchain/mfc/fp_precision_lines.py
 create mode 100644 toolchain/mfc/test_fp_precision_lines.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83bbb8fe0e..532c377702 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@ option(MFC_DOCUMENTATION "Build documentation"                               OFF
 option(MFC_ALL           "Build everything"                                  OFF)
 option(MFC_SINGLE_PRECISION "Build single precision"                         OFF)
 option(MFC_MIXED_PRECISION "Build mixed precision"                           OFF)
+option(MFC_FP_PRECISION_LINES "Strip fypp markers for per-instance fp-stability attribution" OFF)
 
 if (MFC_ALL)
     set(MFC_PRE_PROCESS   ON FORCE)
@@ -433,8 +434,24 @@ macro(HANDLE_SOURCES target useCommon)
         cmake_path(GET fpp FILENAME fpp_filename)
         set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90")
 
+        # In a precision-lines build, Fypp writes a marked intermediate that is
+        # then stripped of its line markers (so each expanded instance compiles
+        # to a distinct physical line) before compilation; the strip step emits a
+        # .linemap.json sidecar.  Otherwise Fypp writes ${f90} directly.  Only the
+        # simulation target is analyzed by fp-stability, so pre/post_process are
+        # always built normally.
+        set(_precision_lines OFF)
+        if (MFC_FP_PRECISION_LINES AND "${target}" STREQUAL "simulation")
+            set(_precision_lines ON)
+        endif()
+        if (_precision_lines)
+            set(f90_out "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.marked.f90")
+        else()
+            set(f90_out "${f90}")
+        endif()
+
         add_custom_command(
-            OUTPUT   ${f90}
+            OUTPUT   ${f90_out}
             COMMAND  ${FYPP_EXE} -m re
                                  -I "${CMAKE_BINARY_DIR}/include/${target}"
                                  -I "${${target}_DIR}/include"
@@ -450,12 +467,25 @@ macro(HANDLE_SOURCES target useCommon)
 								 --line-length=999
 		 						 --line-numbering-mode=nocontlines
                                  ${FYPP_GCOV_OPTS}
-                                 "${fpp}" "${f90}"
+                                 "${fpp}" "${f90_out}"
             DEPENDS  "${fpp};${${target}_incs}"
             COMMENT  "Preprocessing (Fypp) ${fpp_filename}"
             VERBATIM
         )
 
+        if (_precision_lines)
+            add_custom_command(
+                OUTPUT   ${f90}
+                COMMAND  ${Python3_EXECUTABLE}
+                         "${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py"
+                         "${f90_out}" "${f90}"
+                         "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.linemap.json"
+                DEPENDS  "${f90_out};${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py"
+                COMMENT  "Stripping markers (fp-precision-lines) ${fpp_filename}"
+                VERBATIM
+            )
+        endif()
+
         list(APPEND ${target}_SRCs ${f90})
     endforeach()
 endmacro()
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 01efb1a9b1..01a0c8ece3 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -421,6 +421,7 @@ def configure(self, case: Case):
             flags.append(f"-DMFC_GCov={'ON' if ARG('gcov') else 'OFF'}")
             flags.append(f"-DMFC_Unified={'ON' if ARG('unified') else 'OFF'}")
             flags.append(f"-DMFC_Fastmath={'ON' if ARG('fastmath') else 'OFF'}")
+            flags.append(f"-DMFC_FP_PRECISION_LINES={'ON' if ARG('fp_precision_lines') else 'OFF'}")
 
         command = ["cmake"] + flags + ["-S", cmake_dirpath, "-B", build_dirpath]
 
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 54bbff4641..cff47c3ecf 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -141,6 +141,13 @@
             default=False,
             dest="deps_only",
         ),
+        Argument(
+            name="fp-precision-lines",
+            help="(fp-stability) Strip fypp line markers so each expanded instance gets a distinct line; emits sidecars for per-instance attribution.",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="fp_precision_lines",
+        ),
     ],
     examples=[
         Example("./mfc.sh build", "Build all default targets (CPU)"),
@@ -938,6 +945,13 @@
             default=None,
             metavar="PATH",
         ),
+        Argument(
+            name="precision-sim-binary",
+            help="Path to a simulation binary built with --fp-precision-lines. When given, macro-ambiguous hotspots are disambiguated to the individual fypp-expanded instance.",
+            default=None,
+            dest="precision_sim_binary",
+            metavar="PATH",
+        ),
         Argument(
             name="samples",
             short="N",
diff --git a/toolchain/mfc/fp_precision_lines.py b/toolchain/mfc/fp_precision_lines.py
new file mode 100644
index 0000000000..6dc1df04c1
--- /dev/null
+++ b/toolchain/mfc/fp_precision_lines.py
@@ -0,0 +1,123 @@
+"""FP-stability precision-lines transform (Tier 2).
+
+A fypp #:for/#:def expansion emits many generated computations that all carry
+the same cpp line marker (`# N "file.fpp"`), so DWARF — and therefore Verrou —
+collapse every expanded instance onto one .fpp line.  This transform removes the
+fypp line markers from a generated .f90 so the compiler attributes each statement
+to the generated file's own physical line (which *is* distinct per expanded
+instance), and records a sidecar mapping each surviving physical line back to
+(file, original .fpp line, instance index).  Genuine cpp directives
+(#if/#define/#endif/...) are preserved so conditional compilation is unchanged.
+
+When the stripped .f90 is compiled, Verrou attributes — and fp-stability ranks
+and isolates via --source — per expanded instance rather than per source line.
+Used only by a dedicated precision build (MFC_FP_PRECISION_LINES); the normal
+build is unaffected.  The mechanism (stripped markers -> instance-distinct
+physical-line attribution -> per-instance Verrou --source isolation, surviving
+the cpp #if layer) is validated against gfortran + Verrou.
+"""
+
+import json
+import os
+import re
+
+# A fypp line marker: "# <number> "<file>"" possibly with trailing flags.  A cpp
+# conditional/define directive (#if, #define, #endif, ...) has a word, not a
+# number, after the '#', so the two are unambiguous.
+_FYPP_MARKER = re.compile(r'^#\s+(\d+)\s+"([^"]+)"')
+# Any other preprocessor directive line (kept, but it is not a .fpp source line,
+# so it neither consumes a source-line increment nor gets a sidecar entry).
+_CPP_DIRECTIVE = re.compile(r"^\s*#")
+
+
+def strip_markers(lines: list) -> tuple:
+    """Strip fypp line markers; return (output_lines, sidecar).
+
+    sidecar maps each 1-based physical output line number to
+    {"file", "line", "instance"}: the .fpp file, the .fpp line that physical
+    line came from (auto-incremented within a marker region), and how many times
+    that marker's (file, line) had been seen before (0 = first/real occurrence,
+    >=1 = an expanded instance).
+    """
+    seen = {}
+    out = []
+    sidecar = {}
+    cur_file = None
+    cur_line = None
+    cur_instance = None
+    for raw in lines:
+        m = _FYPP_MARKER.match(raw)
+        if m:
+            cur_file = m.group(2)
+            cur_line = int(m.group(1))
+            cur_instance = seen.get((cur_file, cur_line), 0)
+            seen[(cur_file, cur_line)] = cur_instance + 1
+            continue  # drop the marker line
+        out.append(raw)
+        if cur_file is None or _CPP_DIRECTIVE.match(raw):
+            # cpp directives are kept verbatim but are not .fpp source lines
+            continue
+        sidecar[len(out)] = {"file": cur_file, "line": cur_line, "instance": cur_instance}
+        cur_line += 1  # subsequent physical source lines map to the next .fpp line
+    return out, sidecar
+
+
+def transform_file(in_path: str, out_path: str, sidecar_path: str) -> int:
+    """Strip a generated .f90 to its precision-lines variant.
+
+    Reads in_path, writes the marker-stripped source to out_path and the sidecar
+    JSON to sidecar_path.  Returns the number of mapped physical lines.
+    """
+    with open(in_path) as fh:
+        lines = fh.readlines()
+    out, sidecar = strip_markers(lines)
+    with open(out_path, "w") as fh:
+        fh.writelines(out)
+    with open(sidecar_path, "w") as fh:
+        json.dump({str(k): v for k, v in sidecar.items()}, fh)
+    return len(sidecar)
+
+
+# --- consumption side (Tier 2): locating and querying the sidecars ---
+
+
+def sidecar_dir_for_binary(sim_bin: str) -> str:
+    """Map a precision simulation binary path to its sidecar directory.
+
+    .../build/install/<hash>/bin/simulation -> .../build/staging/<hash>/fypp/simulation
+    """
+    bin_dir = os.path.dirname(os.path.abspath(sim_bin))  # .../install/<hash>/bin
+    hash_dir = os.path.dirname(bin_dir)  # .../install/<hash>
+    cfg_hash = os.path.basename(hash_dir)
+    build_root = os.path.dirname(os.path.dirname(hash_dir))  # .../build
+    return os.path.join(build_root, "staging", cfg_hash, "fypp", "simulation")
+
+
+def sidecar_path(sidecar_dir: str, fpp_file: str) -> str:
+    """Sidecar JSON path for a .fpp file: <dir>/<basename>.linemap.json."""
+    return os.path.join(sidecar_dir, os.path.basename(fpp_file) + ".linemap.json")
+
+
+def load_sidecar(path: str) -> dict:
+    """Load a sidecar JSON into {physical_line:int -> {file, line, instance}}."""
+    if not os.path.isfile(path):
+        return {}
+    with open(path) as fh:
+        raw = json.load(fh)
+    return {int(k): v for k, v in raw.items()}
+
+
+def instances_of(sidecar: dict, fpp_file: str, fpp_line: int) -> list:
+    """Return [(physical_line, instance), ...] (sorted by physical line) for every
+    expanded instance of fpp_file:fpp_line, matched by basename."""
+    base = os.path.basename(fpp_file)
+    hits = [(physline, entry["instance"]) for physline, entry in sidecar.items() if os.path.basename(entry["file"]) == base and entry["line"] == fpp_line]
+    return sorted(hits)
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) != 4:
+        sys.exit("usage: fp_precision_lines.py <in.f90> <out.f90> <sidecar.json>")
+    transform_file(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index fde268170c..8ac03d2b87 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -44,6 +44,15 @@
    One run with --check-max-float=yes; reports locations where a
    double→float conversion would overflow to ±Inf.
 
+I. Per-instance disambiguation (--precision-sim-binary PATH; opt-in)
+   A fypp #:for/#:def expansion collapses many generated computations onto one
+   .fpp line, so a macro-ambiguous hotspot cannot be pinned to a single runtime
+   instance.  Given a simulation binary built with `--fp-precision-lines` (markers
+   stripped so each instance is a distinct line, plus .linemap.json sidecars), the
+   most flagrant macro-ambiguous hotspot is disambiguated: each expanded instance
+   is perturbed alone on the precision binary, ranking them to the responsible
+   instance and showing its concrete generated code.
+
 Logs are saved to fp-stability-logs/ and uploaded as CI artifacts.
 On GitHub Actions: a step summary table and ::warning:: file annotations
 are emitted automatically so failing source lines appear in the PR diff.
@@ -1149,6 +1158,67 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs
     return confirmed, set_dev, ranked
 
 
+def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line):
+    """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot.
+
+    Uses a precision binary (built with --fp-precision-lines) in which each
+    expanded instance of hotspot_file:hotspot_line compiles to a distinct
+    physical .f90 line.  The sidecar enumerates those physical lines; each is
+    perturbed alone (float mode, vs the precision binary's own nearest-rounding
+    reference) so the dominant instance is identified.
+
+    Returns a list of {instance, physline, dev, snippet} sorted most-flagrant
+    first (empty if no sidecar / no instrumented instances).
+    """
+    from . import fp_precision_lines as fpl
+
+    sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin)
+    sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file))
+    instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line)
+    if not instances:
+        return []
+
+    prec_dir = os.path.join(work_dir, "precision")
+    ref_dir = os.path.join(prec_dir, "ref")
+    os.makedirs(ref_dir, exist_ok=True)
+    gen_path = os.path.join(prec_dir, "gen_source.txt")
+    try:
+        _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest")
+        _run_simulation_verrou(
+            verrou_bin,
+            prec_sim_bin,
+            work_dir,
+            prec_dir,
+            rounding_mode="nearest",
+            extra_flags=[f"--gen-source={gen_path}"],
+        )
+    except MFCException:
+        return []
+    if not os.path.isfile(gen_path):
+        return []
+    with open(gen_path) as fh:
+        gen_lines = fh.readlines()
+
+    f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90")
+    compare = case["compare"]
+    results = []
+    for physline, instance in instances:
+        src = _build_source_filter(gen_lines, [(f90_file, physline, physline)])
+        if not src:
+            continue  # this instance performs no instrumented FP op
+        dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}")
+        results.append(
+            {
+                "instance": instance,
+                "physline": physline,
+                "dev": dev or 0.0,
+                "snippet": _read_source_line(f90_file, physline).strip(),
+            }
+        )
+    results.sort(key=lambda r: r["dev"], reverse=True)
+    return results
+
+
 def _run_case(
     case: dict,
     verrou_bin: str,
@@ -1163,6 +1233,7 @@ def _run_case(
     run_cancellation: bool,
     run_mca: bool,
     run_float_max: bool,
+    prec_sim_bin: str = None,
 ) -> dict:
     name = case["name"]
     threshold = case["threshold"]
@@ -1294,6 +1365,24 @@ def _run_case(
             except Exception as exc:
                 cons.print(f"  [bold yellow]dd_line confirmation error[/bold yellow]: {exc}")
 
+        # --- E3: per-instance disambiguation of the most flagrant macro-ambiguous hotspot ---
+        if prec_sim_bin and result["dd_line_locs"]:
+            macro_loc = next((loc for loc in result["dd_line_locs"] if loc.get("macro")), None)
+            if macro_loc:
+                cons.print(f"  [dim]disambiguating fypp instances of {macro_loc['path']}:{macro_loc['start']} (precision binary)...[/dim]")
+                try:
+                    insts = _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, macro_loc["path"], macro_loc["start"])
+                    macro_loc["instances"] = insts
+                    if insts and insts[0]["dev"] > 0:
+                        win = insts[0]
+                        cons.print(f"  flagrant instance: #{win['instance']} (.f90:{win['physline']}, dev={win['dev']:.3e})  {win['snippet']}")
+                    elif insts:
+                        cons.print(f"  [dim]{len(insts)} instance(s) enumerated; none perturbed measurably (hotspot inert)[/dim]")
+                    else:
+                        cons.print("  [dim]no sidecar instances found for this hotspot[/dim]")
+                except Exception as exc:
+                    cons.print(f"  [bold yellow]instance disambiguation error[/bold yellow]: {exc}")
+
         # --- F: cancellation detection ---
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
@@ -1460,6 +1549,9 @@ def _emit_github_summary(results: list, n_samples: int):
                     tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
                 suffix = f" — {', '.join(tags)}" if tags else ""
                 md.append(f"- `{where}`{suffix}")
+                for inst in loc.get("instances", [])[:8]:
+                    flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else ""
+                    md.append(f"  - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`")
                 snippet = _get_source_context(rel_path, start)
                 if snippet:
                     md.append("  ```fortran")
@@ -1531,6 +1623,9 @@ def fp_stability():
     run_cancellation = not ARG("no_cancellation")
     run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
+    prec_sim_bin = ARG("precision_sim_binary")
+    if prec_sim_bin and not os.path.isfile(prec_sim_bin):
+        raise MFCException(f"precision simulation binary not found: {prec_sim_bin}")
 
     log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs")
     os.makedirs(log_dir, exist_ok=True)
@@ -1540,6 +1635,8 @@ def fp_stability():
     cons.print(f"  verrou:      {verrou_bin}")
     cons.print(f"  simulation:  {sim_bin}")
     cons.print(f"  pre_process: {pp_bin}")
+    if prec_sim_bin:
+        cons.print(f"  precision:   {prec_sim_bin}  (per-instance disambiguation)")
     cons.print(f"  samples:     {n_samples}")
     features = []
     if run_float:
@@ -1578,6 +1675,7 @@ def fp_stability():
                 run_cancellation,
                 run_mca,
                 run_float_max,
+                prec_sim_bin,
             )
         except MFCException as exc:
             cons.print(f"  [bold red]ERROR[/bold red]: {exc}")
diff --git a/toolchain/mfc/test_fp_precision_lines.py b/toolchain/mfc/test_fp_precision_lines.py
new file mode 100644
index 0000000000..ddb139af2d
--- /dev/null
+++ b/toolchain/mfc/test_fp_precision_lines.py
@@ -0,0 +1,112 @@
+"""Unit tests for the fp-stability precision-lines transform (Tier 2, P1).
+
+A fypp #:for/#:def expansion re-marks many generated computations with the same
+cpp line marker (`# N "file.fpp"`), so DWARF — and Verrou — collapse every
+expanded instance onto one .fpp line.  strip_markers removes the fypp line
+markers so the compiler attributes to the generated .f90's own (instance-
+distinct) physical lines, and emits a sidecar mapping each surviving physical
+line back to (file, original .fpp line, instance index).  Genuine cpp directives
+(#if/#define/...) are kept so conditional compilation still works.
+"""
+
+import os
+
+from mfc.fp_precision_lines import (
+    instances_of,
+    sidecar_dir_for_binary,
+    sidecar_path,
+    strip_markers,
+)
+
+
+def test_strips_fypp_markers_and_keeps_code():
+    out, sidecar = strip_markers(['# 700 "real.fpp"\n', "  x = a - b\n"])
+    assert out == ["  x = a - b\n"]
+    assert sidecar == {1: {"file": "real.fpp", "line": 700, "instance": 0}}
+
+
+def test_keeps_cpp_conditional_directives():
+    lines = ['# 700 "real.fpp"\n', "#if defined(FOO)\n", "  x = 1\n", "#endif\n"]
+    out, _ = strip_markers(lines)
+    assert out == ["#if defined(FOO)\n", "  x = 1\n", "#endif\n"]
+
+
+def test_repeated_marker_increments_instance():
+    lines = ['# 700 "real.fpp"\n', "  s1 = x\n", '# 700 "real.fpp"\n', "  s2 = y\n"]
+    out, sidecar = strip_markers(lines)
+    assert out == ["  s1 = x\n", "  s2 = y\n"]
+    assert sidecar[1] == {"file": "real.fpp", "line": 700, "instance": 0}
+    assert sidecar[2] == {"file": "real.fpp", "line": 700, "instance": 1}
+
+
+def test_distinguishes_fypp_marker_from_cpp_directive():
+    # no fypp line markers here -> nothing stripped, no origin recorded
+    lines = ["#define X 1\n", "#if X\n", "  a = 1\n", "#endif\n"]
+    out, sidecar = strip_markers(lines)
+    assert out == lines
+    assert sidecar == {}
+
+
+def test_source_line_auto_increments_within_a_region():
+    lines = ['# 700 "real.fpp"\n', "  a = 1\n", "  b = 2\n"]
+    _, sidecar = strip_markers(lines)
+    assert sidecar[1]["line"] == 700
+    assert sidecar[2]["line"] == 701
+
+
+# --- Tier 2 consumption: locating + querying sidecars ---
+
+
+def test_instances_of_returns_physical_lines_for_a_source_line():
+    sidecar = {
+        7: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 0},
+        11: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 1},
+        20: {"file": "/abs/src/simulation/m_weno.fpp", "line": 999, "instance": 0},
+    }
+    # matched by basename; the repo-relative path from a dd_line hotspot still matches
+    assert instances_of(sidecar, "src/simulation/m_weno.fpp", 241) == [(7, 0), (11, 1)]
+
+
+def test_instances_of_empty_when_no_match():
+    sidecar = {7: {"file": "m_weno.fpp", "line": 241, "instance": 0}}
+    assert instances_of(sidecar, "m_weno.fpp", 999) == []
+    assert instances_of(sidecar, "m_other.fpp", 241) == []
+
+
+def test_instances_of_sorted_by_physical_line():
+    sidecar = {
+        30: {"file": "f.fpp", "line": 5, "instance": 2},
+        10: {"file": "f.fpp", "line": 5, "instance": 0},
+        20: {"file": "f.fpp", "line": 5, "instance": 1},
+    }
+    assert instances_of(sidecar, "f.fpp", 5) == [(10, 0), (20, 1), (30, 2)]
+
+
+def test_sidecar_dir_for_binary_maps_install_to_staging():
+    got = sidecar_dir_for_binary("/x/build/install/HASH/bin/simulation")
+    assert got == os.path.join("/x/build/staging/HASH/fypp/simulation")
+
+
+def test_sidecar_path_uses_fpp_basename_and_linemap_suffix():
+    got = sidecar_path("/x/staging/HASH/fypp/simulation", "src/simulation/m_weno.fpp")
+    assert got == os.path.join("/x/staging/HASH/fypp/simulation", "m_weno.fpp.linemap.json")
+
+
+def test_cpp_directives_do_not_consume_a_source_line_increment():
+    # the #else line must not advance the .fpp source line nor get a sidecar entry
+    lines = ['# 700 "real.fpp"\n', "  a = 1\n", "#else\n", "  b = 2\n"]
+    out, sidecar = strip_markers(lines)
+    assert out == ["  a = 1\n", "#else\n", "  b = 2\n"]
+    assert sidecar[1]["line"] == 700  # a = 1
+    assert 2 not in sidecar  # #else: kept, but not a source line
+    assert sidecar[3]["line"] == 701  # b = 2 (not 702)
+
+
+def test_sidecar_line_numbers_are_physical_output_lines():
+    # output physical line numbers (1-based, after stripping) are the keys
+    lines = ['# 10 "f"\n', "  a = 1\n", '# 20 "f"\n', "  b = 2\n", "  c = 3\n"]
+    out, sidecar = strip_markers(lines)
+    assert out == ["  a = 1\n", "  b = 2\n", "  c = 3\n"]
+    assert sidecar[1] == {"file": "f", "line": 10, "instance": 0}
+    assert sidecar[2] == {"file": "f", "line": 20, "instance": 0}
+    assert sidecar[3] == {"file": "f", "line": 21, "instance": 0}

From bc7e516fd2fa065cf87be635019fd30e83abbaeb Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 1 Jun 2026 20:52:12 -0400
Subject: [PATCH 03/25] fp-stability: distinguish precision-sensitivity from
 cancellation-origin; surface caps + coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-line --source ranking measures sensitivity (where reduced precision most moves the output), which is structurally dominated by the time integrator / final accumulation: perturbing the last write to q_cons hits the output 1:1, while upstream errors get re-rounded there. Empirically, sod_standard's cancellation concentrates in m_weno.fpp (14 sites) and m_riemann_solvers.fpp (5), with m_time_steppers.fpp at just 1 — yet the time-stepper led the share ranking at 100%. Presenting it as 'most flagrant' conflated sensitivity with where ill-conditioning originates.

Reframe: the dd_line/share view is relabeled 'single-precision sensitivity' with an explicit caveat (typically the time integrator, expected/benign, not a cancellation-origin finder); a new per-file cancellation-density line (_cancellation_by_file) headlines where cancellation actually concentrates; console + GitHub summary + inline annotations updated to keep the two signals distinct.

Also: no silent caps (truncated dd_line/cancellation/float-max lists now report '…and N more'; annotations emit a dropped-count notice), and a coverage caveat in the summary header (N 1-D cases; a pass is not a guarantee for unexercised multi-D/viscous/MHD/IGR/bubble paths). _cancellation_by_file is pure + TDD'd.
---
 toolchain/mfc/fp_stability.py      | 79 +++++++++++++++++++++++++-----
 toolchain/mfc/test_fp_stability.py | 22 +++++++++
 2 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 8ac03d2b87..440134b04c 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -26,10 +26,13 @@
    reproduce the instability.  Lines that do not reproduce it are reported as
    unconfirmed (downgraded from ::warning:: to ::notice::).  Each line is then
    perturbed alone and ranked by the share of the single-precision deviation it
-   reproduces, so the most flagrant computation is identified rather than a flat
-   list.  Hotspots are additionally cross-referenced against the stage-F
-   cancellation sites (to name the offending subtraction) and flagged as
-   instance-ambiguous when the .fpp line sits inside a #:for/#:def expansion.
+   reproduces.  NOTE: this is a *sensitivity* measure — where reduced precision
+   most moves the output — and is typically dominated by the time integrator /
+   final accumulation, NOT by where cancellation originates.  Stage F (and its
+   per-file density) is the cancellation-origin view; the two usually differ.
+   Hotspots are cross-referenced against the stage-F cancellation sites and
+   flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def
+   expansion.
 
 F. Cancellation detection (--no-cancellation to skip)
    One run with --check-cancellation=yes; reports MFC source lines that
@@ -1003,6 +1006,22 @@ def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
     return dd_line_locs
 
 
+def _cancellation_by_file(cancellation_locs: list) -> list:
+    """Aggregate cancellation sites by source file → [(basename, count)] sorted by
+    count (desc), ties by name.
+
+    This is the cancellation-*origin* view (where ill-conditioning concentrates),
+    as opposed to the per-line --source share, which is a *sensitivity* view
+    (where reduced precision most moves the output — typically the time
+    integrator / final accumulation, regardless of where error originates).
+    """
+    counts = {}
+    for fname, _lineno in cancellation_locs:
+        base = os.path.basename(fname)
+        counts[base] = counts.get(base, 0) + 1
+    return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
+
+
 def _run_dd_tool(
     dd_bin: str,
     dd_dir: str,
@@ -1361,7 +1380,9 @@ def _run_case(
                     cons.print(f"  [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)")
                 top = ranked[0] if ranked else None
                 if top and top.get("share") is not None:
-                    cons.print(f"  most flagrant: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)")
+                    cons.print(f"  highest single-precision sensitivity: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)")
+                    cons.print("  [dim](sensitivity = where reduced precision most moves the output, often the time")
+                    cons.print("  [dim] integrator; not necessarily where cancellation originates — see cancellation sites)[/dim]")
             except Exception as exc:
                 cons.print(f"  [bold yellow]dd_line confirmation error[/bold yellow]: {exc}")
 
@@ -1450,7 +1471,7 @@ def _emit_github_annotations(results: list):
     if not os.environ.get("GITHUB_ACTIONS"):
         return
     for r in results:
-        status = "FAIL" if not r["passed"] else "hotspot"
+        status = "FAIL" if not r["passed"] else "sensitivity"
         dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})"
         unconfirmed = r.get("dd_line_confirmed") is False
 
@@ -1460,9 +1481,9 @@ def _emit_github_annotations(results: list):
                 location += f",endLine={loc['end']}"
             note = dev_str
             if loc.get("share") is not None:
-                note += f" — reproduces {loc['share'] * 100:.0f}% of float-proxy alone"
+                note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)"
             if loc.get("cancellation"):
-                note += " — catastrophic cancellation site"
+                note += " — also a catastrophic cancellation site"
             if loc.get("macro"):
                 note += f" — {loc['macro']}-expanded line, may represent multiple instances"
             if unconfirmed:
@@ -1471,11 +1492,17 @@ def _emit_github_annotations(results: list):
             else:
                 title = f"FP {status} [{r['name']}]"
                 print(f"::warning {location},title={title}::{note}", flush=True)
+        n_dd = len(r.get("dd_line_locs", []))
+        if n_dd > 3:
+            print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True)
 
         for fname, lineno in r.get("cancellation_locs", [])[:3]:
             loc = f"file={fname},line={lineno}"
             title = f"FP cancellation [{r['name']}]"
             print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True)
+        n_cc = len(r.get("cancellation_locs", []))
+        if n_cc > 3:
+            print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
 
 
 def _emit_github_summary(results: list, n_samples: int):
@@ -1495,6 +1522,12 @@ def _emit_github_summary(results: list, n_samples: int):
     md = []
     md.append("## FP Stability Results\n")
     md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n")
+    md.append(
+        f"> **Coverage:** {len(results)} one-dimensional case(s) "
+        f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these "
+        "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
+        "they do not reach.\n"
+    )
 
     # Main results table
     md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |")
@@ -1528,10 +1561,19 @@ def _emit_github_summary(results: list, n_samples: int):
             md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
         md.append("")
 
-    # dd_line hotspot sources — always shown (top 10 per case) with source context
+    # dd_line — single-precision SENSITIVITY (where precision most affects the
+    # output). This is distinct from cancellation origin (reported separately):
+    # the leader is typically the time integrator / final accumulation, because
+    # perturbing the last write moves the output directly while upstream errors
+    # get re-rounded there. Not a culprit-finder for ill-conditioning.
     cases_with_locs = [r for r in results if r["dd_line_locs"]]
     if cases_with_locs:
-        md.append("### Top FP hotspots (dd\\_line)\n")
+        md.append("### Single-precision sensitivity (dd\\_line)\n")
+        md.append(
+            "> Where reduced precision most moves the output — **typically the time integrator / "
+            "final accumulation, which is expected and benign**. This is *not* the same as where "
+            "cancellation originates; see **Catastrophic cancellation sites** below for that.\n"
+        )
         _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
         for r in cases_with_locs:
             status = "❌ FAIL" if not r["passed"] else "✅ pass"
@@ -1558,6 +1600,8 @@ def _emit_github_summary(results: list, n_samples: int):
                     for line in snippet.splitlines():
                         md.append(f"  {line}")
                     md.append("  ```")
+            if len(r["dd_line_locs"]) > 10:
+                md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_")
             md.append("")
 
     # dd_sym function names (collapsed, since less actionable than dd_line)
@@ -1571,12 +1615,19 @@ def _emit_github_summary(results: list, n_samples: int):
                 md.append(f"- `{sym}`")
         md.append("\n</details>\n")
 
-    # Cancellation hotspots
+    # Cancellation hotspots — the ORIGIN view (where ill-conditioning concentrates).
     cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
     if cases_with_cancel:
         md.append("### Catastrophic cancellation sites\n")
+        md.append(
+            "> Where cancellation actually originates (subtraction of nearly-equal values). This is "
+            "the numerically interesting signal — and it usually differs from the sensitivity leader "
+            "above.\n"
+        )
         for r in cases_with_cancel:
-            md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s)\n")
+            by_file = _cancellation_by_file(r["cancellation_locs"])
+            density = ", ".join(f"`{f}` ({n})" for f, n in by_file[:6])
+            md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s); concentrates in: {density}\n")
             for fname, lineno in r["cancellation_locs"][:15]:
                 md.append(f"- `{fname}:{lineno}`")
                 snippet = _get_source_context(fname, lineno)
@@ -1585,6 +1636,8 @@ def _emit_github_summary(results: list, n_samples: int):
                     for line in snippet.splitlines():
                         md.append(f"  {line}")
                     md.append("  ```")
+            if len(r["cancellation_locs"]) > 15:
+                md.append(f"- _…and {len(r['cancellation_locs']) - 15} more site(s); see fp-stability-logs/_")
             md.append("")
 
     # Float-max overflow sites
@@ -1595,6 +1648,8 @@ def _emit_github_summary(results: list, n_samples: int):
             md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
             for fname, lineno in r["float_max_locs"][:10]:
                 md.append(f"- `{fname}:{lineno}`")
+            if len(r["float_max_locs"]) > 10:
+                md.append(f"- _…and {len(r['float_max_locs']) - 10} more site(s); see fp-stability-logs/_")
             md.append("")
 
     with open(summary_path, "a") as f:
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 694da7d906..ae188054f0 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -8,6 +8,7 @@
 
 from mfc.fp_stability import (
     _build_source_filter,
+    _cancellation_by_file,
     _confirm_decision,
     _macro_context_in_lines,
     _mark_cancellation,
@@ -196,3 +197,24 @@ def test_mark_cancellation_false_for_different_basename():
     locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}]
     _mark_cancellation(locs, [("m_bar.fpp", 5)])
     assert locs[0]["cancellation"] is False
+
+
+# --- cancellation-origin view: where cancellation concentrates ---
+
+
+def test_cancellation_by_file_counts_and_sorts_by_density():
+    locs = [
+        ("src/simulation/m_weno.fpp", 10),
+        ("m_weno.fpp", 20),
+        ("a/m_riemann_solvers.fpp", 5),
+    ]
+    assert _cancellation_by_file(locs) == [("m_weno.fpp", 2), ("m_riemann_solvers.fpp", 1)]
+
+
+def test_cancellation_by_file_breaks_ties_by_name():
+    locs = [("z.fpp", 1), ("a.fpp", 2)]
+    assert _cancellation_by_file(locs) == [("a.fpp", 1), ("z.fpp", 1)]
+
+
+def test_cancellation_by_file_empty():
+    assert _cancellation_by_file([]) == []

From 1825dd9a84cc16a7a37904a550e2c2c97ca2229e Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 1 Jun 2026 21:05:26 -0400
Subject: [PATCH 04/25] fp-stability: rank cancellation by severity (bits
 lost), not count; resolve continuations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes prompted by review: (1) site COUNT is not severity — one catastrophic cancellation outweighs many mild ones; (2) attribution can land on a continuation fragment, so the labelled line was unclear.

Severity: Verrou exposes no per-site bit-count, but --cc-threshold-double is itself a severity filter (a site is only reported if it lost >= threshold bits). A second pass at 26 bits identifies SEVERE sites with no false positives (may under-count). Severe sites are listed first and labelled; the count-by-file view is demoted with an explicit 'count != severity' caveat. On sod_standard this surfaces the real origins — flux divergence (m_rhs), divided differences and smoothness indicators (m_weno), HLLC wave speeds (m_riemann) — and correctly omits the time integrator.

Continuations: _statement_bounds_in_lines follows free-form '&' continuations (leading or trailing) to the logical-statement start; cancellation sites are de-duplicated and displayed as the full statement at its canonical start line, so a hit on a fragment resolves to the whole expression. Pure helpers TDD'd (60 toolchain tests).
---
 toolchain/mfc/fp_stability.py      | 110 +++++++++++++++++++++++------
 toolchain/mfc/test_fp_stability.py |  39 ++++++++++
 2 files changed, 127 insertions(+), 22 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 440134b04c..4d8b07b378 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -183,6 +183,53 @@ def _macro_context(fname: str, lineno: int) -> str:
     return _macro_context_in_lines(lines, lineno)
 
 
+def _ends_with_continuation(line: str) -> bool:
+    """True if a free-form Fortran line ends with a continuation '&' (the last
+    non-blank token before any trailing comment)."""
+    code = line.split("!", 1)[0].rstrip()  # drop trailing comment (string-'!' is rare; fine here)
+    return code.endswith("&")
+
+
+def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
+    """Return the (start, end) 1-based physical line range of the Fortran logical
+    statement containing lineno, following '&' continuations in both directions.
+
+    A hit reported on a continuation fragment thus resolves to the whole
+    statement, so the labelled location is the full expression rather than a
+    mid-statement piece.
+    """
+    n = len(lines)
+    start = lineno
+    while start > 1 and _ends_with_continuation(lines[start - 2]):
+        start -= 1
+    end = lineno
+    while end < n and _ends_with_continuation(lines[end - 1]):
+        end += 1
+    return start, end
+
+
+def _statement_at(fname: str, lineno: int) -> tuple:
+    """File-backed (start, end, text) for the logical statement at fname:lineno;
+    text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
+    if os.path.isabs(fname) and os.path.isfile(fname):
+        candidates = [fname]
+    else:
+        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
+    if not candidates:
+        return lineno, lineno, ""
+    try:
+        with open(candidates[0]) as fh:
+            lines = fh.readlines()
+    except OSError:
+        return lineno, lineno, ""
+    if not 0 < lineno <= len(lines):
+        return lineno, lineno, ""
+    start, end = _statement_bounds_in_lines(lines, lineno)
+    # join physical lines, dropping the continuation '&' that may lead or trail each
+    text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end])
+    return start, end, text
+
+
 def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
     """Return True if any line in [start, end] contains non-trivial arithmetic.
 
@@ -678,14 +725,22 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
     return locs
 
 
-def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list:
-    """Run with --check-cancellation=yes; return [(fname, line)] of MFC cancellation sites."""
-    run_dir = os.path.join(work_dir, "cancellation")
+# A site reported at this bit threshold has lost at least this many significant
+# bits to cancellation — a *severity* floor (Verrou only reports a site when it
+# exceeds the threshold, so a high-threshold pass has no false positives).
+CANCEL_SEVERE_BITS = 26
+
+
+def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
+    """Run --check-cancellation at the given bit threshold; return [(fname, line)]
+    of MFC cancellation sites (subtractions losing >= `threshold` significant bits)."""
+    tag = f"cancellation_{threshold}"
+    run_dir = os.path.join(work_dir, tag)
     os.makedirs(run_dir, exist_ok=True)
     gen_path = os.path.join(run_dir, "cancel_gen.txt")
     flags = [
         "--check-cancellation=yes",
-        "--cc-threshold-double=10",
+        f"--cc-threshold-double={threshold}",
         f"--cc-gen-file={gen_path}",
     ]
     try:
@@ -695,7 +750,7 @@ def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir:
     raw = _parse_cancel_gen(gen_path)
     filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
     skipped = len(raw) - len(filtered)
-    if skipped:
+    if skipped and threshold == 10:
         cons.print(f"  [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
     return filtered
 
@@ -1277,6 +1332,7 @@ def _run_case(
         "dd_line_confirmed": None,
         "dd_line_confirm_dev": None,
         "cancellation_locs": [],
+        "cancellation_severe": set(),
         "mca_dev": None,
         "mca_sigbits": None,
         "float_max_locs": [],
@@ -1411,7 +1467,10 @@ def _run_case(
                 locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir)
                 result["cancellation_locs"] = locs
                 if locs:
-                    cons.print(f"  cancellation: {len(locs)} unique source location(s)")
+                    # severity pass: which sites lose >= CANCEL_SEVERE_BITS bits
+                    severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS))
+                    result["cancellation_severe"] = severe
+                    cons.print(f"  cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)")
                 else:
                     cons.print("  cancellation: none detected")
                 # cross-reference: label dd_line hotspots that sit on a cancellation site
@@ -1620,24 +1679,30 @@ def _emit_github_summary(results: list, n_samples: int):
     if cases_with_cancel:
         md.append("### Catastrophic cancellation sites\n")
         md.append(
-            "> Where cancellation actually originates (subtraction of nearly-equal values). This is "
-            "the numerically interesting signal — and it usually differs from the sensitivity leader "
-            "above.\n"
+            "> Where cancellation actually originates (subtraction of nearly-equal values). "
+            f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is "
+            "not severity — one severe site outweighs many mild ones, so the severe sites are listed "
+            "first. (Severe detection has no false positives but may under-count.)\n"
         )
         for r in cases_with_cancel:
-            by_file = _cancellation_by_file(r["cancellation_locs"])
-            density = ", ".join(f"`{f}` ({n})" for f, n in by_file[:6])
-            md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s); concentrates in: {density}\n")
-            for fname, lineno in r["cancellation_locs"][:15]:
-                md.append(f"- `{fname}:{lineno}`")
-                snippet = _get_source_context(fname, lineno)
-                if snippet:
-                    md.append("  ```fortran")
-                    for line in snippet.splitlines():
-                        md.append(f"  {line}")
-                    md.append("  ```")
-            if len(r["cancellation_locs"]) > 15:
-                md.append(f"- _…and {len(r['cancellation_locs']) - 15} more site(s); see fp-stability-logs/_")
+            severe = r.get("cancellation_severe") or set()
+            # collapse continuation fragments to one entry per logical statement,
+            # severe statements first (the ones that matter)
+            stmts = {}  # (basename, stmt_start) -> {where, severe, text}
+            for fname, lineno in sorted(r["cancellation_locs"]):
+                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
+                key = (os.path.basename(fname), stmt_start)
+                entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text})
+                if (fname, lineno) in severe:
+                    entry["severe"] = True
+            ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"]))
+            n_severe_stmt = sum(1 for e in ordered if e["severe"])
+            md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n")
+            for e in ordered[:15]:
+                sev = " **severe**" if e["severe"] else ""
+                md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else ""))
+            if len(ordered) > 15:
+                md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
             md.append("")
 
     # Float-max overflow sites
@@ -1746,6 +1811,7 @@ def fp_stability():
                 "dd_line_confirmed": None,
                 "dd_line_confirm_dev": None,
                 "cancellation_locs": [],
+                "cancellation_severe": set(),
                 "mca_dev": None,
                 "mca_sigbits": None,
                 "float_max_locs": [],
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index ae188054f0..41b4502579 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -13,6 +13,7 @@
     _macro_context_in_lines,
     _mark_cancellation,
     _rank_locs,
+    _statement_bounds_in_lines,
 )
 
 # --- #2: fypp macro-expansion context detection ---
@@ -218,3 +219,41 @@ def test_cancellation_by_file_breaks_ties_by_name():
 
 def test_cancellation_by_file_empty():
     assert _cancellation_by_file([]) == []
+
+
+# --- Fortran line-continuation handling (correct-line labeling) ---
+
+
+def test_statement_bounds_single_line():
+    lines = ["  a = b - c\n"]
+    assert _statement_bounds_in_lines(lines, 1) == (1, 1)
+
+
+def test_statement_bounds_spans_continuation_from_first_line():
+    lines = ["  poly = (s_cb(i+3) - s_cb(i+1)) * &\n", "         (s_cb(i+2) - s_cb(i))\n"]
+    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
+
+
+def test_statement_bounds_from_middle_continuation_line():
+    # a hit on the continuation fragment must resolve to the statement start
+    lines = ["  x = a + &\n", "      b + &\n", "      c\n"]
+    assert _statement_bounds_in_lines(lines, 2) == (1, 3)
+    assert _statement_bounds_in_lines(lines, 3) == (1, 3)
+
+
+def test_statement_bounds_ignores_ampersand_in_trailing_comment_logic():
+    # a real continuation '&' before a trailing comment still continues
+    lines = ["  x = a & ! note\n", "      + b\n"]
+    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
+
+
+def test_statement_bounds_non_continuation_neighbors():
+    lines = ["  x = 1\n", "  y = 2\n", "  z = 3\n"]
+    assert _statement_bounds_in_lines(lines, 2) == (2, 2)
+
+
+def test_statement_bounds_with_leading_ampersand_continuation():
+    # the MFC WENO style: line ends with '&' and the next line *starts* with '&'
+    lines = ["  beta = x**2 &\n", "       & + eps\n"]
+    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
+    assert _statement_bounds_in_lines(lines, 2) == (1, 2)

From b9e790f5bedeeb2d2c36668c29087afedf181f8c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 1 Jun 2026 21:19:26 -0400
Subject: [PATCH 05/25] fp-stability: scale-free pass/fail via significant
 bits, replacing 6 hand-tuned thresholds

Each case had a hand-tuned absolute L-inf threshold spanning 1e-13..2e-7 (six orders), driven by field magnitude and conditioning. Maintaining per-case thresholds is fragile. Normalizing the deviation by the field's peak magnitude removes the scale, so a single global criterion suffices.

Pass/fail is now sig_bits = -log2(max_dev / max|ref|) >= MIN_SIG_BITS (24 = single precision retained under random rounding). The per-case 'threshold' field is removed from CASES; pass/fail, the VPREC FAIL marker, console, summary table, and inline annotations all report bits-retained vs the floor. The dd_sym/dd_line oracle keeps its own float-proxy-derived threshold (unchanged).

Validated: max_dev spans 1e-14..7e-8 across the 6 cases but sig_bits is a tight 30.3..48.7 band, all >= 24 with margin; classification matches the prior thresholds (6/6 pass). Pure _sig_bits/_stability_pass are TDD'd (67 toolchain tests). A per-case auto-measured baseline + regression delta would add sensitivity for moderate drops; deferred as a heavier change.
---
 toolchain/mfc/fp_stability.py      | 83 +++++++++++++++++++++---------
 toolchain/mfc/test_fp_stability.py | 40 ++++++++++++++
 2 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 4d8b07b378..a5cd636562 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -4,7 +4,8 @@
 Features
 --------
 A. Stability suite (always)
-   N random-rounding samples per case, threshold-based PASS/FAIL.
+   N random-rounding samples per case; PASS/FAIL on significant bits retained
+   (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold).
 
 B. Float proxy (--no-float-proxy to skip)
    One run with --rounding-mode=float — deterministic proxy for
@@ -92,6 +93,35 @@
 # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
 VPREC_MANTISSA_BITS = [52, 23, 16, 10]
 
+# Stability pass/fail (stage A) is scale-free: a case must retain at least this
+# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
+# 24 ~= single precision. One global floor replaces per-case absolute thresholds
+# (which spanned 6 orders of magnitude purely from field scale + conditioning);
+# normalising by the field scale collapses that, so a single number suffices.
+MIN_SIG_BITS = 24
+
+# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy-
+# derived threshold is supplied (callers always pass one, so this is only a guard).
+_DD_FALLBACK_THRESHOLD = 1e-12
+
+
+def _sig_bits(max_dev: float, ref_scale: float) -> float:
+    """Significant bits retained = -log2(max_dev / ref_scale).
+
+    Scale-free: dividing the deviation by the field's peak magnitude removes the
+    absolute scale, leaving only the conditioning.  Zero deviation (or zero
+    scale) returns 53.0 = full double precision retained.
+    """
+    if not (max_dev > 0) or not (ref_scale > 0):
+        return 53.0
+    return -math.log2(max_dev / ref_scale)
+
+
+def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
+    """A case passes when it retains at least `floor` significant bits."""
+    return _sig_bits(max_dev, ref_scale) >= floor
+
+
 # Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
 _LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
 
@@ -341,8 +371,9 @@ def _merge(*dicts):
 #   name      - unique identifier used in log paths and console output
 #   description - human-readable summary
 #   compare   - D/ output files compared between reference and perturbed runs
-#   threshold - max L∞ deviation allowed before the case is declared FAIL
 #   ill_cond  - known source of cancellation (empty string = none expected)
+# Pass/fail is scale-free (>= MIN_SIG_BITS significant bits retained), so cases
+# need no per-case deviation threshold regardless of field magnitude.
 #   pre       - parameters for pre_process (generates initial conditions)
 #   sim       - parameters for simulation
 CASES = [
@@ -350,7 +381,6 @@ def _merge(*dicts):
         "name": "sod_standard",
         "description": "1-D standard Sod, p_L/p_R=10, ideal gas (well-conditioned baseline)",
         "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
-        "threshold": 1e-13,
         "ill_cond": "",
         "pre": _merge(
             _BASE_PRE,
@@ -373,7 +403,6 @@ def _merge(*dicts):
         "name": "sod_strong",
         "description": "1-D Sod, p_L/p_R=100,000, ideal gas",
         "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
-        "threshold": 1e-10,
         "ill_cond": "HLLC xi factor: (s_L - vel_L)/(s_L - s_S) cancels near sonic contact",
         "pre": _merge(
             _BASE_PRE,
@@ -396,8 +425,7 @@ def _merge(*dicts):
         "name": "water_stiffened",
         "description": "1-D water shock, stiffened EOS (pi_inf=4046)",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 1e-8,
-        "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000) [threshold loosened until reduced-energy (Etilde) scheme is merged]",
+        "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000)",
         "pre": _merge(
             _BASE_PRE,
             _WATER_EOS,
@@ -419,7 +447,6 @@ def _merge(*dicts):
         "name": "air_water_interface",
         "description": "1-D air/water isobaric contact (two-fluid, pi_inf=4046)",
         "compare": ["cons.1.00.000050.dat", "cons.4.00.000050.dat", "cons.5.00.000050.dat"],
-        "threshold": 1e-10,
         "ill_cond": "Mixed-cell pressure recovery: E-alpha_w*gamma_w*pi_inf cancels when alpha_w<<1",
         "pre": _merge(
             _BASE_PRE,
@@ -460,7 +487,6 @@ def _merge(*dicts):
         "name": "bubble_rp",
         "description": "1-D bubbly water, pressure step 2:1 driving Rayleigh-Plesset oscillations (nb=1, Keller-Miksis)",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 1e-8,
         "ill_cond": "RP ODE: (p_bub - p_ext) cancels near bubble equilibrium",
         "pre": _merge(
             _BASE_PRE,
@@ -528,8 +554,7 @@ def _merge(*dicts):
         "name": "low_mach",
         "description": "1-D water shock with low_Mach=1 HLLC correction active",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "threshold": 2e-7,
-        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0 (threshold loosened to 2e-7 to absorb MCA sampling variance)",
+        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0",
         "pre": _merge(
             _BASE_PRE,
             _WATER_EOS,
@@ -1121,7 +1146,7 @@ def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_di
     dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
     dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
     _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else case["threshold"])
+    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
     _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
     cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
     return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
@@ -1145,7 +1170,7 @@ def _run_dd_line(
     os.makedirs(dd_dir, exist_ok=True)
     dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
     dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    effective_threshold = threshold if threshold is not None else case["threshold"]
+    effective_threshold = threshold if threshold is not None else _DD_FALLBACK_THRESHOLD
     _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
     _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold)
     _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
@@ -1310,21 +1335,20 @@ def _run_case(
     prec_sim_bin: str = None,
 ) -> dict:
     name = case["name"]
-    threshold = case["threshold"]
     compare = case["compare"]
 
     cons.print(f"[bold]{name}[/bold]: {case['description']}")
     cons.indent()
     if case["ill_cond"]:
         cons.print(f"  ill-conditioning: {case['ill_cond']}")
-    cons.print(f"  threshold: {threshold:.0e}")
+    cons.print(f"  pass floor: >= {MIN_SIG_BITS} significant bits retained")
 
     work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-")
     result = {
         "name": name,
         "passed": False,
         "max_dev": float("inf"),
-        "threshold": threshold,
+        "sig_bits": None,
         "float_proxy": None,
         "vprec": [],
         "dd_sym_syms": [],
@@ -1348,6 +1372,9 @@ def _run_case(
         _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest")
 
         # --- A: random-rounding stability samples ---
+        # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale),
+        # vs one global floor (no per-case hand-tuned absolute threshold).
+        ref_scale = _max_abs_np(ref_dir, compare)
         max_dev = 0.0
         cons.print(f"  [dim]random-rounding runs (N={n_samples})...[/dim]")
         for i in range(n_samples):
@@ -1356,11 +1383,13 @@ def _run_case(
             _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random")
             max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
 
-        passed = max_dev <= threshold
+        sig_bits = _sig_bits(max_dev, ref_scale)
+        passed = sig_bits >= MIN_SIG_BITS
         result["passed"] = passed
         result["max_dev"] = max_dev
+        result["sig_bits"] = sig_bits
         tag = "[bold green]PASS[/bold green]" if passed else "[bold red]FAIL[/bold red]"
-        cons.print(f"  {tag}  max_dev={max_dev:.3e}  threshold={threshold:.0e}")
+        cons.print(f"  {tag}  {sig_bits:.1f} bits retained (floor {MIN_SIG_BITS})  max_dev={max_dev:.3e}")
 
         # --- B: float proxy ---
         if run_float:
@@ -1383,7 +1412,7 @@ def _run_case(
                 marker = ""
                 if dev == float("inf"):
                     marker = "  [red]crashed[/red]"
-                elif dev > threshold:
+                elif _sig_bits(dev, ref_scale) < MIN_SIG_BITS:
                     marker = "  [red]FAIL[/red]"
                 cons.print(f"    {bits:2d} bits{label_str}: dev={dev:.3e}{marker}")
 
@@ -1531,7 +1560,9 @@ def _emit_github_annotations(results: list):
         return
     for r in results:
         status = "FAIL" if not r["passed"] else "sensitivity"
-        dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})"
+        _sb = r.get("sig_bits")
+        _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a"
+        dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}"
         unconfirmed = r.get("dd_line_confirmed") is False
 
         for loc in r.get("dd_line_locs", [])[:3]:
@@ -1588,17 +1619,19 @@ def _emit_github_summary(results: list, n_samples: int):
         "they do not reach.\n"
     )
 
-    # Main results table
-    md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |")
-    md.append("|------|:------:|--------:|--------:|--------:|:------:|")
+    # Main results table — pass/fail is scale-free: bits retained vs a single floor
+    md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
+    md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |")
+    md.append("|------|:------:|:------:|--------:|--------:|:------:|")
     for r in results:
         status = "✅" if r["passed"] else "❌"
+        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
         fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
         sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
-        md.append(f"| `{r['name']}` | {status} | {r['max_dev']:.2e} | {r['threshold']:.0e} | {fp} | {sb} |")
+        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
     md.append("")
 
-    # VPREC sweep — one column per bit level, ❌ where dev > threshold
+    # VPREC sweep — one column per bit level, ❌ where bits retained < floor
     if any(r["vprec"] for r in results):
         _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
         header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
@@ -1803,7 +1836,7 @@ def fp_stability():
                 "name": case["name"],
                 "passed": False,
                 "max_dev": float("inf"),
-                "threshold": case["threshold"],
+                "sig_bits": None,
                 "float_proxy": None,
                 "vprec": [],
                 "dd_sym_syms": [],
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 41b4502579..056193c519 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -7,12 +7,15 @@
 """
 
 from mfc.fp_stability import (
+    MIN_SIG_BITS,
     _build_source_filter,
     _cancellation_by_file,
     _confirm_decision,
     _macro_context_in_lines,
     _mark_cancellation,
     _rank_locs,
+    _sig_bits,
+    _stability_pass,
     _statement_bounds_in_lines,
 )
 
@@ -221,6 +224,43 @@ def test_cancellation_by_file_empty():
     assert _cancellation_by_file([]) == []
 
 
+# --- scale-free pass/fail: significant bits retained ---
+
+
+def test_sig_bits_relative_deviation():
+    # max_dev/ref_scale = 1e-14 -> ~46.5 retained bits
+    assert 46 < _sig_bits(1e-14, 1.0) < 47
+
+
+def test_sig_bits_is_scale_free():
+    # same relative deviation -> same bits regardless of absolute magnitude
+    assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9
+
+
+def test_sig_bits_zero_deviation_is_full_precision():
+    assert _sig_bits(0.0, 1.0) == 53.0
+
+
+def test_sig_bits_zero_scale_is_safe():
+    assert _sig_bits(1e-12, 0.0) == 53.0
+
+
+def test_sig_bits_deviation_at_scale_is_unstable():
+    # deviation as large as the field -> <= 0 retained bits
+    assert _sig_bits(1.0, 1.0) <= 0.0
+
+
+def test_stability_pass_uses_global_floor():
+    # well-conditioned: ~46 bits >= floor
+    assert _stability_pass(1e-14, 1.0, MIN_SIG_BITS) is True
+    # catastrophic: deviation at field scale -> fails
+    assert _stability_pass(0.5, 1.0, MIN_SIG_BITS) is False
+
+
+def test_min_sig_bits_is_single_precision_floor():
+    assert MIN_SIG_BITS == 24
+
+
 # --- Fortran line-continuation handling (correct-line labeling) ---
 
 

From 84bec6d3b30d1575887e8c415e924557551d7e95 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 07:41:12 -0400
Subject: [PATCH 06/25] fp-stability: lead with cancellation origins, report
 digits lost (not 'severe'), collapse sensitivity

Review feedback on the summary: (1) it buried the interesting cancellation origins below the long, mostly-expected sensitivity list; (2) 'severe' is a binary label when an actual magnitude is far more useful; (3) 'bits lost' is not intuitive.

Reorder: the cancellation-origins section now leads (right after the results table), ranked worst-first; the single-precision sensitivity list (dominated by the benign time integrator) is collapsed into a <details>.

Severity as a number: a sweep of --cc-threshold-double levels [10,20,30,40,48] buckets each site by the highest it survives (_cancellation_severity), giving per-site bits lost (a lower bound). Bits are translated to decimal digits (a double carries ~16; _digits_left) so each entry reads e.g. '>= 12 digits lost (~4 of 16 left)' with the full statement. On sod_standard the worst origins (flux divergence, divided differences, HLLC wave speeds) lose ~14 of 16 digits; the sweep discriminates (23 sites >=10 bits, 11 >=48). 69 toolchain tests.
---
 toolchain/mfc/fp_stability.py      | 115 ++++++++++++++++++-----------
 toolchain/mfc/test_fp_stability.py |  18 +++++
 2 files changed, 88 insertions(+), 45 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index a5cd636562..d826ca56c9 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -750,10 +750,28 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
     return locs
 
 
-# A site reported at this bit threshold has lost at least this many significant
-# bits to cancellation — a *severity* floor (Verrou only reports a site when it
-# exceeds the threshold, so a high-threshold pass has no false positives).
-CANCEL_SEVERE_BITS = 26
+# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
+# filter: a site is reported only if it lost >= the threshold bits. Sweeping these
+# levels and taking the highest each site survives gives a per-site "bits lost"
+# severity (a lower bound — no false positives). 48 ~ full double mantissa.
+CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
+
+
+def _cancellation_severity(level_sites: list) -> dict:
+    """Given [(threshold, [sites])], return {site: highest threshold it survives}
+    = the per-site bits-lost severity (a lower bound)."""
+    sev = {}
+    for level, sites in level_sites:
+        for site in sites:
+            if level > sev.get(site, 0):
+                sev[site] = level
+    return sev
+
+
+def _digits_left(bits_lost: float) -> float:
+    """Approximate trustworthy decimal digits remaining after losing `bits_lost`
+    bits of a double's 53-bit mantissa (~15.95 digits full)."""
+    return max(0.0, (53 - bits_lost) / math.log2(10))
 
 
 def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
@@ -1356,7 +1374,7 @@ def _run_case(
         "dd_line_confirmed": None,
         "dd_line_confirm_dev": None,
         "cancellation_locs": [],
-        "cancellation_severe": set(),
+        "cancellation_bits": {},
         "mca_dev": None,
         "mca_sigbits": None,
         "float_max_locs": [],
@@ -1493,13 +1511,15 @@ def _run_case(
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
             try:
-                locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir)
+                # sweep bit thresholds to get per-site severity (bits lost)
+                level_sites = [(level, _run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
+                locs = level_sites[0][1]  # lowest threshold = full list
+                bits = _cancellation_severity(level_sites)
                 result["cancellation_locs"] = locs
+                result["cancellation_bits"] = bits
                 if locs:
-                    # severity pass: which sites lose >= CANCEL_SEVERE_BITS bits
-                    severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS))
-                    result["cancellation_severe"] = severe
-                    cons.print(f"  cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)")
+                    worst = max(bits.values()) if bits else 0
+                    cons.print(f"  cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
                 else:
                     cons.print("  cancellation: none detected")
                 # cross-reference: label dd_line hotspots that sit on a cancellation site
@@ -1631,6 +1651,40 @@ def _emit_github_summary(results: list, n_samples: int):
         md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
     md.append("")
 
+    # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
+    # most severe (most bits lost). The numerically interesting signal; the
+    # sensitivity list further down is dominated by the (benign) time integrator.
+    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
+    if cases_with_cancel:
+        md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
+        md.append(
+            "> Subtraction of nearly-equal values loses leading significant digits. A double carries "
+            "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
+            "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
+            "single-precision trust. Site *count* is not severity — one site losing many digits outweighs "
+            "many mild ones.\n"
+        )
+        for r in cases_with_cancel:
+            site_bits = r.get("cancellation_bits") or {}
+            # collapse continuation fragments to one entry per logical statement,
+            # keeping the worst bits-lost seen on that statement
+            stmts = {}  # (basename, stmt_start) -> {where, bits, text}
+            for fname, lineno in r["cancellation_locs"]:
+                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
+                key = (os.path.basename(fname), stmt_start)
+                e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text})
+                e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0))
+            ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"]))
+            if ordered:
+                w = ordered[0]
+                md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
+            for e in ordered[:15]:
+                lost = e["bits"] / math.log2(10)
+                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
+            if len(ordered) > 15:
+                md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
+            md.append("")
+
     # VPREC sweep — one column per bit level, ❌ where bits retained < floor
     if any(r["vprec"] for r in results):
         _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
@@ -1660,11 +1714,12 @@ def _emit_github_summary(results: list, n_samples: int):
     # get re-rounded there. Not a culprit-finder for ill-conditioning.
     cases_with_locs = [r for r in results if r["dd_line_locs"]]
     if cases_with_locs:
-        md.append("### Single-precision sensitivity (dd\\_line)\n")
+        md.append("<details>")
+        md.append("<summary>Single-precision sensitivity (dd_line) — usually the time integrator; expand for details</summary>\n")
         md.append(
             "> Where reduced precision most moves the output — **typically the time integrator / "
-            "final accumulation, which is expected and benign**. This is *not* the same as where "
-            "cancellation originates; see **Catastrophic cancellation sites** below for that.\n"
+            "final accumulation, which is expected and benign**. This is *not* where cancellation "
+            "originates (that's the section above); it shows where precision matters most.\n"
         )
         _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
         for r in cases_with_locs:
@@ -1695,6 +1750,7 @@ def _emit_github_summary(results: list, n_samples: int):
             if len(r["dd_line_locs"]) > 10:
                 md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_")
             md.append("")
+        md.append("</details>\n")
 
     # dd_sym function names (collapsed, since less actionable than dd_line)
     cases_with_syms = [r for r in results if r["dd_sym_syms"]]
@@ -1707,37 +1763,6 @@ def _emit_github_summary(results: list, n_samples: int):
                 md.append(f"- `{sym}`")
         md.append("\n</details>\n")
 
-    # Cancellation hotspots — the ORIGIN view (where ill-conditioning concentrates).
-    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
-    if cases_with_cancel:
-        md.append("### Catastrophic cancellation sites\n")
-        md.append(
-            "> Where cancellation actually originates (subtraction of nearly-equal values). "
-            f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is "
-            "not severity — one severe site outweighs many mild ones, so the severe sites are listed "
-            "first. (Severe detection has no false positives but may under-count.)\n"
-        )
-        for r in cases_with_cancel:
-            severe = r.get("cancellation_severe") or set()
-            # collapse continuation fragments to one entry per logical statement,
-            # severe statements first (the ones that matter)
-            stmts = {}  # (basename, stmt_start) -> {where, severe, text}
-            for fname, lineno in sorted(r["cancellation_locs"]):
-                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
-                key = (os.path.basename(fname), stmt_start)
-                entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text})
-                if (fname, lineno) in severe:
-                    entry["severe"] = True
-            ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"]))
-            n_severe_stmt = sum(1 for e in ordered if e["severe"])
-            md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n")
-            for e in ordered[:15]:
-                sev = " **severe**" if e["severe"] else ""
-                md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else ""))
-            if len(ordered) > 15:
-                md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
-            md.append("")
-
     # Float-max overflow sites
     cases_with_fmax = [r for r in results if r.get("float_max_locs")]
     if cases_with_fmax:
@@ -1844,7 +1869,7 @@ def fp_stability():
                 "dd_line_confirmed": None,
                 "dd_line_confirm_dev": None,
                 "cancellation_locs": [],
-                "cancellation_severe": set(),
+                "cancellation_bits": {},
                 "mca_dev": None,
                 "mca_sigbits": None,
                 "float_max_locs": [],
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 056193c519..e89694d19b 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -10,6 +10,7 @@
     MIN_SIG_BITS,
     _build_source_filter,
     _cancellation_by_file,
+    _cancellation_severity,
     _confirm_decision,
     _macro_context_in_lines,
     _mark_cancellation,
@@ -224,6 +225,23 @@ def test_cancellation_by_file_empty():
     assert _cancellation_by_file([]) == []
 
 
+# --- per-site cancellation severity (bits lost), from a threshold sweep ---
+
+
+def test_cancellation_severity_takes_highest_surviving_threshold():
+    level_sites = [
+        (10, [("a.fpp", 1), ("b.fpp", 2)]),
+        (20, [("a.fpp", 1)]),
+        (30, [("a.fpp", 1)]),
+    ]
+    # a.fpp:1 survives to 30 bits; b.fpp:2 only at 10
+    assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10}
+
+
+def test_cancellation_severity_empty():
+    assert _cancellation_severity([]) == {}
+
+
 # --- scale-free pass/fail: significant bits retained ---
 
 

From d45bc5bb8819dadde8414c84cffc26ea4537809b Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 07:58:36 -0400
Subject: [PATCH 07/25] fp-stability: de-duplicate helpers from the review
 additions (no behavior change)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup pass over the ~680 lines this branch added to fp_stability.py. Extracts shared helpers that had accreted across the six feature commits, with identical behavior (69 toolchain tests + ruff + precheck green; emitted console/summary text unchanged):

- _resolve_source / _read_source_lines: the 'abs-path-or-glob-under-src(-then-tree)' + readlines block was repeated in _read_source_line, _macro_context, _statement_at, _get_source_context. A search_whole_tree flag preserves the one difference (only _get_source_context fell back to the whole tree).

- _blank_result(name): the 15-field result dict was written verbatim twice. _find_dd_tool(verrou_bin, tool): merges _find_dd_sym/_find_dd_line. _setup_dd_run: shared dd_run.sh/dd_cmp.py setup + threshold-default for dd_sym and dd_line. _capture_gen_source: shared nearest --gen-source capture for confirmation and disambiguation. _more_md: the '…and N more' truncation footer used in three summary sections.
---
 toolchain/mfc/fp_stability.py | 244 ++++++++++++++++------------------
 1 file changed, 113 insertions(+), 131 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index d826ca56c9..961e48d001 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -159,20 +159,34 @@ def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
 )
 
 
-def _read_source_line(fname: str, lineno: int) -> str:
-    """Return the raw source line at lineno (1-based), or '' if unavailable."""
+def _resolve_source(fname: str, search_whole_tree: bool = False) -> str:
+    """Resolve a (possibly bare) source filename to an existing path, or '' if not
+    found.  An absolute existing path is used as-is; otherwise the basename is
+    located recursively under src/ (then the whole tree if `search_whole_tree`)."""
     if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return ""
+        return fname
+    candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
+    if not candidates and search_whole_tree:
+        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
+    return candidates[0] if candidates else ""
+
+
+def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list:
+    """Resolve `fname` and return its lines (with newlines), or [] if unreadable."""
+    path = _resolve_source(fname, search_whole_tree)
+    if not path:
+        return []
     try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-        return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
+        with open(path) as fh:
+            return fh.readlines()
     except OSError:
-        return ""
+        return []
+
+
+def _read_source_line(fname: str, lineno: int) -> str:
+    """Return the raw source line at lineno (1-based), or '' if unavailable."""
+    lines = _read_source_lines(fname)
+    return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
 
 
 def _macro_context_in_lines(lines: list, lineno: int) -> str:
@@ -199,16 +213,8 @@ def _macro_context_in_lines(lines: list, lineno: int) -> str:
 
 def _macro_context(fname: str, lineno: int) -> str:
     """File-backed wrapper around _macro_context_in_lines; '' path safe."""
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return None
-    try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-    except OSError:
+    lines = _read_source_lines(fname)
+    if not lines:
         return None
     return _macro_context_in_lines(lines, lineno)
 
@@ -241,17 +247,7 @@ def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
 def _statement_at(fname: str, lineno: int) -> tuple:
     """File-backed (start, end, text) for the logical statement at fname:lineno;
     text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return lineno, lineno, ""
-    try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-    except OSError:
-        return lineno, lineno, ""
+    lines = _read_source_lines(fname)
     if not 0 < lineno <= len(lines):
         return lineno, lineno, ""
     start, end = _statement_bounds_in_lines(lines, lineno)
@@ -283,18 +279,8 @@ def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
     fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path.
     Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree.
     """
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        candidates = [fname]
-    else:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-        if not candidates:
-            candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
-    if not candidates:
-        return ""
-    try:
-        with open(candidates[0]) as fh:
-            lines = fh.readlines()
-    except OSError:
+    lines = _read_source_lines(fname, search_whole_tree=True)
+    if not lines:
         return ""
     start = max(0, lineno - context - 1)
     end = min(len(lines), lineno + context)
@@ -589,13 +575,10 @@ def _find_binary(name: str) -> str:
     return max(candidates, key=os.path.getmtime) if candidates else ""
 
 
-def _find_dd_sym(verrou_bin: str) -> str:
-    c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_sym")
-    return c if os.path.isfile(c) else ""
-
-
-def _find_dd_line(verrou_bin: str) -> str:
-    c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_line")
+def _find_dd_tool(verrou_bin: str, tool: str) -> str:
+    """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary,
+    or '' if absent."""
+    c = os.path.join(os.path.dirname(verrou_bin), tool)
     return c if os.path.isfile(c) else ""
 
 
@@ -1152,19 +1135,26 @@ def _run_dd_tool(
     return summary_lines
 
 
+def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float):
+    """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their
+    paths.  The threshold falls back to _DD_FALLBACK_THRESHOLD when unset."""
+    os.makedirs(dd_dir, exist_ok=True)
+    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
+    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
+    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
+    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
+    return dd_run_sh, dd_cmp_py
+
+
 def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list:
     """Run verrou_dd_sym; return list of responsible symbol names."""
-    dd_bin = _find_dd_sym(verrou_bin)
+    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym")
     if not dd_bin:
         cons.print("  [dim]verrou_dd_sym not found; skipping delta-debug[/dim]")
         return []
 
     dd_dir = os.path.join(log_dir, case["name"])
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
+    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
     _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
     cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
     return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
@@ -1179,18 +1169,13 @@ def _run_dd_line(
     threshold: float = None,
 ) -> list:
     """Run verrou_dd_line; return [{path, start, end, macro}] location dicts."""
-    dd_bin = _find_dd_line(verrou_bin)
+    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line")
     if not dd_bin:
         cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
         return []
 
     dd_dir = os.path.join(log_dir, case["name"])
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    effective_threshold = threshold if threshold is not None else _DD_FALLBACK_THRESHOLD
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold)
+    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
     _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
     return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
 
@@ -1217,6 +1202,26 @@ def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_li
     return _max_diff_np(ref_dir, run_dir, compare)
 
 
+def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path):
+    """Run nearest-rounding with --gen-source to capture the symbol-correct
+    executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure."""
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="nearest",
+            extra_flags=[f"--gen-source={gen_path}"],
+        )
+    except MFCException:
+        return None
+    if not os.path.isfile(gen_path):
+        return None
+    with open(gen_path) as fh:
+        return fh.readlines()
+
+
 def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy):
     """Positive control for dd_line: perturb ONLY the suspect lines and confirm
     the instability reproduces, then rank each line by its individual share.
@@ -1237,22 +1242,9 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs
         return None, None, dd_line_locs
     conf_dir = os.path.join(work_dir, "confirm")
     os.makedirs(conf_dir, exist_ok=True)
-    gen_path = os.path.join(conf_dir, "gen_source.txt")
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            conf_dir,
-            rounding_mode="nearest",
-            extra_flags=[f"--gen-source={gen_path}"],
-        )
-    except MFCException:
+    gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt"))
+    if gen_lines is None:
         return None, None, dd_line_locs
-    if not os.path.isfile(gen_path):
-        return None, None, dd_line_locs
-    with open(gen_path) as fh:
-        gen_lines = fh.readlines()
     compare = case["compare"]
 
     # whole-set positive control
@@ -1298,23 +1290,13 @@ def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_fi
     prec_dir = os.path.join(work_dir, "precision")
     ref_dir = os.path.join(prec_dir, "ref")
     os.makedirs(ref_dir, exist_ok=True)
-    gen_path = os.path.join(prec_dir, "gen_source.txt")
     try:
         _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest")
-        _run_simulation_verrou(
-            verrou_bin,
-            prec_sim_bin,
-            work_dir,
-            prec_dir,
-            rounding_mode="nearest",
-            extra_flags=[f"--gen-source={gen_path}"],
-        )
     except MFCException:
         return []
-    if not os.path.isfile(gen_path):
+    gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt"))
+    if gen_lines is None:
         return []
-    with open(gen_path) as fh:
-        gen_lines = fh.readlines()
 
     f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90")
     compare = case["compare"]
@@ -1336,6 +1318,27 @@ def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_fi
     return results
 
 
+def _blank_result(name: str) -> dict:
+    """A result dict with every field at its empty/unmeasured default."""
+    return {
+        "name": name,
+        "passed": False,
+        "max_dev": float("inf"),
+        "sig_bits": None,
+        "float_proxy": None,
+        "vprec": [],
+        "dd_sym_syms": [],
+        "dd_line_locs": [],
+        "dd_line_confirmed": None,
+        "dd_line_confirm_dev": None,
+        "cancellation_locs": [],
+        "cancellation_bits": {},
+        "mca_dev": None,
+        "mca_sigbits": None,
+        "float_max_locs": [],
+    }
+
+
 def _run_case(
     case: dict,
     verrou_bin: str,
@@ -1362,23 +1365,7 @@ def _run_case(
     cons.print(f"  pass floor: >= {MIN_SIG_BITS} significant bits retained")
 
     work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-")
-    result = {
-        "name": name,
-        "passed": False,
-        "max_dev": float("inf"),
-        "sig_bits": None,
-        "float_proxy": None,
-        "vprec": [],
-        "dd_sym_syms": [],
-        "dd_line_locs": [],
-        "dd_line_confirmed": None,
-        "dd_line_confirm_dev": None,
-        "cancellation_locs": [],
-        "cancellation_bits": {},
-        "mca_dev": None,
-        "mca_sigbits": None,
-        "float_max_locs": [],
-    }
+    result = _blank_result(name)
     try:
         cons.print("  [dim]running pre_process...[/dim]")
         _write_inp(case["sim"], "simulation", work_dir)
@@ -1615,6 +1602,14 @@ def _emit_github_annotations(results: list):
             print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
 
 
+def _more_md(total: int, shown: int, noun: str) -> str:
+    """Markdown bullet noting `total - shown` further items elided from a list,
+    or '' when nothing was truncated."""
+    if total <= shown:
+        return ""
+    return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_"
+
+
 def _emit_github_summary(results: list, n_samples: int):
     """Write a markdown results table to GITHUB_STEP_SUMMARY.
 
@@ -1681,8 +1676,9 @@ def _emit_github_summary(results: list, n_samples: int):
             for e in ordered[:15]:
                 lost = e["bits"] / math.log2(10)
                 md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
-            if len(ordered) > 15:
-                md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
+            footer = _more_md(len(ordered), 15, "statement(s)")
+            if footer:
+                md.append(footer)
             md.append("")
 
     # VPREC sweep — one column per bit level, ❌ where bits retained < floor
@@ -1747,8 +1743,9 @@ def _emit_github_summary(results: list, n_samples: int):
                     for line in snippet.splitlines():
                         md.append(f"  {line}")
                     md.append("  ```")
-            if len(r["dd_line_locs"]) > 10:
-                md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_")
+            footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)")
+            if footer:
+                md.append(footer)
             md.append("")
         md.append("</details>\n")
 
@@ -1771,8 +1768,9 @@ def _emit_github_summary(results: list, n_samples: int):
             md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
             for fname, lineno in r["float_max_locs"][:10]:
                 md.append(f"- `{fname}:{lineno}`")
-            if len(r["float_max_locs"]) > 10:
-                md.append(f"- _…and {len(r['float_max_locs']) - 10} more site(s); see fp-stability-logs/_")
+            footer = _more_md(len(r["float_max_locs"]), 10, "site(s)")
+            if footer:
+                md.append(footer)
             md.append("")
 
     with open(summary_path, "a") as f:
@@ -1857,23 +1855,7 @@ def fp_stability():
             )
         except MFCException as exc:
             cons.print(f"  [bold red]ERROR[/bold red]: {exc}")
-            r = {
-                "name": case["name"],
-                "passed": False,
-                "max_dev": float("inf"),
-                "sig_bits": None,
-                "float_proxy": None,
-                "vprec": [],
-                "dd_sym_syms": [],
-                "dd_line_locs": [],
-                "dd_line_confirmed": None,
-                "dd_line_confirm_dev": None,
-                "cancellation_locs": [],
-                "cancellation_bits": {},
-                "mca_dev": None,
-                "mca_sigbits": None,
-                "float_max_locs": [],
-            }
+            r = _blank_result(case["name"])
         results.append(r)
 
     elapsed = time.time() - start

From 9f868c742679a0301f0b3c6e12ddd31073820dbb Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 09:03:26 -0400
Subject: [PATCH 08/25] fp-stability: split the 1876-line module into
 metrics/runners/report (no behavior change)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PR had grown fp_stability.py well past the 1000-line soft guideline. Pure relocation — no function body, string, constant, or logic changed — into a clean dependency chain:

- fp_stability_metrics.py (474, leaf): regexes/constants + pure parsing, source-reading, sig-bits, cancellation, ranking, statement-bounds helpers. Imports no sibling.

- fp_stability_runners.py (530): Verrou subprocess orchestration (run/dd/vprec/cancellation/confirmation/disambiguation). Imports metrics.

- fp_stability_report.py (244): GitHub summary + annotation emitters. Imports metrics.

- fp_stability.py (715): CLI entry, CASES, _run_case, _blank_result; imports explicitly from the three. No import cycles.

Also dropped the unused 'case' parameter from _run_cancellation_check/_run_float_max_check. Verified: 69 toolchain tests, ruff (incl. F-rules: no undefined names), precheck all 7, and a live fp-stability run confirming the cross-module orchestration (sig-bits pass/fail + cancellation sweep) is unchanged. Test import repointed to fp_stability_metrics.
---
 toolchain/mfc/fp_stability.py         | 1223 +------------------------
 toolchain/mfc/fp_stability_metrics.py |  474 ++++++++++
 toolchain/mfc/fp_stability_report.py  |  244 +++++
 toolchain/mfc/fp_stability_runners.py |  530 +++++++++++
 toolchain/mfc/test_fp_stability.py    |    2 +-
 5 files changed, 1280 insertions(+), 1193 deletions(-)
 create mode 100644 toolchain/mfc/fp_stability_metrics.py
 create mode 100644 toolchain/mfc/fp_stability_report.py
 create mode 100644 toolchain/mfc/fp_stability_runners.py

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 961e48d001..0579502910 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -73,223 +73,46 @@
   ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH
 """
 
-import glob
 import math
 import os
-import re
 import shutil
-import stat
-import subprocess
 import sys
 import tempfile
-import textwrap
 import time
 
 from .common import MFC_ROOT_DIR, MFCException
+from .fp_stability_metrics import (
+    CANCEL_BIT_LEVELS,
+    MIN_SIG_BITS,
+    _cancellation_severity,
+    _mark_cancellation,
+    _max_abs_np,
+    _max_diff_np,
+    _sig_bits,
+)
+from .fp_stability_report import (
+    _emit_github_annotations,
+    _emit_github_summary,
+)
+from .fp_stability_runners import (
+    _disambiguate_instances,
+    _find_binary,
+    _find_verrou,
+    _run_cancellation_check,
+    _run_confirmation,
+    _run_dd_line,
+    _run_dd_sym,
+    _run_float_max_check,
+    _run_float_proxy,
+    _run_mca_samples,
+    _run_preprocess,
+    _run_simulation_verrou,
+    _run_vprec_sweep,
+    _write_inp,
+)
 from .printer import cons
 from .state import ARG
 
-# Mantissa-bit levels for the VPREC sweep (C).
-# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
-VPREC_MANTISSA_BITS = [52, 23, 16, 10]
-
-# Stability pass/fail (stage A) is scale-free: a case must retain at least this
-# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
-# 24 ~= single precision. One global floor replaces per-case absolute thresholds
-# (which spanned 6 orders of magnitude purely from field scale + conditioning);
-# normalising by the field scale collapses that, so a single number suffices.
-MIN_SIG_BITS = 24
-
-# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy-
-# derived threshold is supplied (callers always pass one, so this is only a guard).
-_DD_FALLBACK_THRESHOLD = 1e-12
-
-
-def _sig_bits(max_dev: float, ref_scale: float) -> float:
-    """Significant bits retained = -log2(max_dev / ref_scale).
-
-    Scale-free: dividing the deviation by the field's peak magnitude removes the
-    absolute scale, leaving only the conditioning.  Zero deviation (or zero
-    scale) returns 53.0 = full double precision retained.
-    """
-    if not (max_dev > 0) or not (ref_scale > 0):
-        return 53.0
-    return -math.log2(max_dev / ref_scale)
-
-
-def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
-    """A case passes when it retains at least `floor` significant bits."""
-    return _sig_bits(max_dev, ref_scale) >= floor
-
-
-# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
-_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
-
-# Files to exclude from cancellation / float-max reports (runtime loaders, XALT).
-_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so")
-
-# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)".
-_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)")
-
-# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def
-# defines a macro instantiated at multiple call sites) collapse many distinct
-# generated computations onto a single .fpp source line, so a dd_line hit inside
-# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select
-# code but do not duplicate it, so they are tracked for balance but not flagged.
-_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE)
-_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE)
-_FYPP_DUPLICATING = ("for", "def", "block", "call")
-
-# Lines that are clearly control-flow delimiters rather than arithmetic.
-# dd_line sometimes reports these when the responsible arithmetic is on the
-# preceding line but shares DWARF debug info with the delimiter (e.g. loop
-# boundaries in #:for-expanded code, or inlined functions at call sites).
-_CONTROL_FLOW_RE = re.compile(
-    r"^\s*("
-    r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b"
-    r"|do\s+\w+\s*=\s*[\w,\s]+"  # naked do-loop header (no arithmetic)
-    r"|else(\s+if\s*\(.*\)\s*then)?\s*$"  # else / else if (...) then
-    r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+"  # subroutine declaration
-    r"|\$:END_GPU\w+"  # fypp GPU macro closers
-    r"|#:end\w*"  # fypp directive closers (#:endfor, #:enddef, etc.)
-    r"|\s*!\s*$"  # comment-only lines
-    r"|\s*$"  # blank lines
-    r")",
-    re.IGNORECASE,
-)
-
-
-def _resolve_source(fname: str, search_whole_tree: bool = False) -> str:
-    """Resolve a (possibly bare) source filename to an existing path, or '' if not
-    found.  An absolute existing path is used as-is; otherwise the basename is
-    located recursively under src/ (then the whole tree if `search_whole_tree`)."""
-    if os.path.isabs(fname) and os.path.isfile(fname):
-        return fname
-    candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates and search_whole_tree:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
-    return candidates[0] if candidates else ""
-
-
-def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list:
-    """Resolve `fname` and return its lines (with newlines), or [] if unreadable."""
-    path = _resolve_source(fname, search_whole_tree)
-    if not path:
-        return []
-    try:
-        with open(path) as fh:
-            return fh.readlines()
-    except OSError:
-        return []
-
-
-def _read_source_line(fname: str, lineno: int) -> str:
-    """Return the raw source line at lineno (1-based), or '' if unavailable."""
-    lines = _read_source_lines(fname)
-    return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
-
-
-def _macro_context_in_lines(lines: list, lineno: int) -> str:
-    """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that
-    encloses `lineno` (1-based) in `lines`, or None if none does.
-
-    Used to flag dd_line hotspots whose .fpp line is shared across multiple
-    expanded instances (a #:for body, a #:def macro used in many places), where
-    line-level attribution cannot identify which instance is responsible.
-    """
-    stack = []
-    for raw in lines[: max(0, lineno - 1)]:
-        mo = _FYPP_BLOCK_OPEN.match(raw)
-        if mo:
-            stack.append(mo.group(1).lower())
-            continue
-        if _FYPP_BLOCK_CLOSE.match(raw) and stack:
-            stack.pop()
-    for kw in reversed(stack):
-        if kw in _FYPP_DUPLICATING:
-            return f"#:{kw}"
-    return None
-
-
-def _macro_context(fname: str, lineno: int) -> str:
-    """File-backed wrapper around _macro_context_in_lines; '' path safe."""
-    lines = _read_source_lines(fname)
-    if not lines:
-        return None
-    return _macro_context_in_lines(lines, lineno)
-
-
-def _ends_with_continuation(line: str) -> bool:
-    """True if a free-form Fortran line ends with a continuation '&' (the last
-    non-blank token before any trailing comment)."""
-    code = line.split("!", 1)[0].rstrip()  # drop trailing comment (string-'!' is rare; fine here)
-    return code.endswith("&")
-
-
-def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
-    """Return the (start, end) 1-based physical line range of the Fortran logical
-    statement containing lineno, following '&' continuations in both directions.
-
-    A hit reported on a continuation fragment thus resolves to the whole
-    statement, so the labelled location is the full expression rather than a
-    mid-statement piece.
-    """
-    n = len(lines)
-    start = lineno
-    while start > 1 and _ends_with_continuation(lines[start - 2]):
-        start -= 1
-    end = lineno
-    while end < n and _ends_with_continuation(lines[end - 1]):
-        end += 1
-    return start, end
-
-
-def _statement_at(fname: str, lineno: int) -> tuple:
-    """File-backed (start, end, text) for the logical statement at fname:lineno;
-    text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
-    lines = _read_source_lines(fname)
-    if not 0 < lineno <= len(lines):
-        return lineno, lineno, ""
-    start, end = _statement_bounds_in_lines(lines, lineno)
-    # join physical lines, dropping the continuation '&' that may lead or trail each
-    text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end])
-    return start, end, text
-
-
-def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
-    """Return True if any line in [start, end] contains non-trivial arithmetic.
-
-    Filters out loop delimiters and fypp directive lines that dd_line sometimes
-    reports when the responsible arithmetic shares DWARF info with its enclosing
-    control-flow boundary (inlining, #:for template expansion, etc.).
-    Returns True (keep) when uncertain so we never silently drop real hotspots.
-    """
-    for lineno in range(start, end + 1):
-        line = _read_source_line(fname, lineno)
-        if not line:
-            return True  # can't read — keep to be safe
-        if not _CONTROL_FLOW_RE.match(line):
-            return True
-    return False
-
-
-def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
-    """Return a annotated source snippet around lineno, or '' if file not found.
-
-    fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path.
-    Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree.
-    """
-    lines = _read_source_lines(fname, search_whole_tree=True)
-    if not lines:
-        return ""
-    start = max(0, lineno - context - 1)
-    end = min(len(lines), lineno + context)
-    rows = []
-    for i, line in enumerate(lines[start:end], start=start + 1):
-        marker = ">" if i == lineno else " "
-        rows.append(f"{marker}{i:5d} | {line.rstrip()}")
-    return "\n".join(rows)
-
 
 def _merge(*dicts):
     """Merge dicts left-to-right; later entries override earlier ones."""
@@ -561,763 +384,6 @@ def _merge(*dicts):
 ]
 
 
-def _find_verrou() -> str:
-    verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
-    candidate = os.path.join(verrou_home, "bin", "valgrind")
-    if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
-        return candidate
-    return shutil.which("valgrind") or ""
-
-
-def _find_binary(name: str) -> str:
-    install_dir = os.path.join(MFC_ROOT_DIR, "build", "install")
-    candidates = glob.glob(os.path.join(install_dir, "*", "bin", name))
-    return max(candidates, key=os.path.getmtime) if candidates else ""
-
-
-def _find_dd_tool(verrou_bin: str, tool: str) -> str:
-    """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary,
-    or '' if absent."""
-    c = os.path.join(os.path.dirname(verrou_bin), tool)
-    return c if os.path.isfile(c) else ""
-
-
-def _verrou_pythonpath(verrou_bin: str) -> str:
-    """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir)."""
-    verrou_home = os.path.dirname(os.path.dirname(verrou_bin))
-    matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind"))
-    return matches[0] if matches else ""
-
-
-def _write_inp(params: dict, target_name: str, work_dir: str) -> None:
-    """Write a Fortran namelist .inp file from a Python params dict."""
-    from .run import case_dicts
-
-    master_keys = case_dicts.get_input_dict_keys(target_name)
-    lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys]
-    with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh:
-        fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n")
-
-
-def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str):
-    _write_inp(pre_params, "pre_process", work_dir)
-    with open(os.path.join(work_dir, "pre.log"), "w") as f:
-        result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False)
-    if result.returncode != 0:
-        raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log")
-
-
-def _run_simulation_verrou(
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    run_dir: str,
-    rounding_mode: str = None,
-    extra_flags: list = None,
-):
-    """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output.
-
-    rounding_mode is passed as --rounding-mode=<mode> when not None.
-    extra_flags are appended before the binary (e.g. --backend=vprec ...).
-    """
-    with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir:
-        for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]:
-            src = os.path.join(work_dir, fname)
-            if os.path.exists(src):
-                shutil.copy2(src, tmpdir)
-        shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all"))
-        os.makedirs(os.path.join(tmpdir, "D"))
-
-        log_path = os.path.join(run_dir, "verrou.log")
-        cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"]
-        if rounding_mode:
-            cmd.append(f"--rounding-mode={rounding_mode}")
-        cmd.extend(extra_flags or [])
-        cmd.append(sim_bin)
-
-        with open(os.path.join(run_dir, "sim.out"), "w") as f:
-            result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False)
-
-        if result.returncode != 0:
-            tag = rounding_mode or "vprec"
-            raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out")
-
-        os.makedirs(run_dir, exist_ok=True)
-        for fn in os.listdir(os.path.join(tmpdir, "D")):
-            shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
-
-
-def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
-    import numpy as np
-
-    total = 0.0
-    for fname in compare_files:
-        ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname)
-        if not os.path.exists(ref_p) or not os.path.exists(run_p):
-            return float("inf")
-        ref = np.loadtxt(ref_p)[:, 1]
-        run = np.loadtxt(run_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref - run))))
-    return total
-
-
-def _max_abs_np(ref_dir: str, compare_files: list) -> float:
-    """Return the maximum absolute value across all reference output files."""
-    import numpy as np
-
-    total = 0.0
-    for fname in compare_files:
-        ref_p = os.path.join(ref_dir, fname)
-        if not os.path.exists(ref_p):
-            continue
-        ref = np.loadtxt(ref_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref))))
-    return total
-
-
-def _parse_cancel_gen(gen_path: str) -> list:
-    """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources."""
-    if not os.path.isfile(gen_path):
-        return []
-    locs = []
-    seen = set()
-    with open(gen_path) as fh:
-        for raw in fh:
-            parts = raw.rstrip("\n").split("\t")
-            if len(parts) < 2:
-                continue
-            fname = parts[0].strip()
-            if any(ext in fname for ext in _EXTERNAL_SRCS):
-                continue
-            if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")):
-                continue
-            try:
-                lineno = int(parts[1].strip())
-            except ValueError:
-                continue
-            key = (fname, lineno)
-            if key not in seen:
-                seen.add(key)
-                locs.append(key)
-    return locs
-
-
-def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
-    """Extract first MFC-source frame from each Valgrind error matching error_keyword."""
-    if not os.path.isfile(log_path):
-        return []
-    locs = []
-    seen = set()
-    in_error = False
-    with open(log_path) as fh:
-        for raw in fh:
-            line = re.sub(r"^==\d+== ?", "", raw)
-            if error_keyword in line:
-                in_error = True
-                continue
-            if in_error:
-                if "   at " in line or "   by " in line:
-                    m = _VGFRAME_RE.search(line)
-                    if m:
-                        fname = m.group(1)
-                        if any(ext in fname for ext in _EXTERNAL_SRCS):
-                            continue
-                        lineno = int(m.group(2))
-                        key = (fname, lineno)
-                        if key not in seen:
-                            seen.add(key)
-                            locs.append(key)
-                        in_error = False
-                elif line.strip() == "":
-                    in_error = False
-    return locs
-
-
-# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
-# filter: a site is reported only if it lost >= the threshold bits. Sweeping these
-# levels and taking the highest each site survives gives a per-site "bits lost"
-# severity (a lower bound — no false positives). 48 ~ full double mantissa.
-CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
-
-
-def _cancellation_severity(level_sites: list) -> dict:
-    """Given [(threshold, [sites])], return {site: highest threshold it survives}
-    = the per-site bits-lost severity (a lower bound)."""
-    sev = {}
-    for level, sites in level_sites:
-        for site in sites:
-            if level > sev.get(site, 0):
-                sev[site] = level
-    return sev
-
-
-def _digits_left(bits_lost: float) -> float:
-    """Approximate trustworthy decimal digits remaining after losing `bits_lost`
-    bits of a double's 53-bit mantissa (~15.95 digits full)."""
-    return max(0.0, (53 - bits_lost) / math.log2(10))
-
-
-def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
-    """Run --check-cancellation at the given bit threshold; return [(fname, line)]
-    of MFC cancellation sites (subtractions losing >= `threshold` significant bits)."""
-    tag = f"cancellation_{threshold}"
-    run_dir = os.path.join(work_dir, tag)
-    os.makedirs(run_dir, exist_ok=True)
-    gen_path = os.path.join(run_dir, "cancel_gen.txt")
-    flags = [
-        "--check-cancellation=yes",
-        f"--cc-threshold-double={threshold}",
-        f"--cc-gen-file={gen_path}",
-    ]
-    try:
-        _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags)
-    except MFCException:
-        pass
-    raw = _parse_cancel_gen(gen_path)
-    filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
-    skipped = len(raw) - len(filtered)
-    if skipped and threshold == 10:
-        cons.print(f"  [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
-    return filtered
-
-
-def _run_mca_samples(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    ref_dir: str,
-    n_mca: int,
-) -> tuple:
-    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound)."""
-    compare = case["compare"]
-    ref_scale = _max_abs_np(ref_dir, compare)
-    max_dev = 0.0
-    flags = ["--backend=mcaquad", "--mca-mode=mca"]
-    for i in range(n_mca):
-        run_dir = os.path.join(work_dir, f"mca_{i:02d}")
-        os.makedirs(run_dir, exist_ok=True)
-        try:
-            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
-            max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
-        except MFCException:
-            pass
-    sig_bits = None
-    if max_dev > 0.0 and ref_scale > 0.0:
-        sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale))))
-    return max_dev, sig_bits
-
-
-def _run_float_max_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list:
-    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites."""
-    run_dir = os.path.join(work_dir, "float_max")
-    os.makedirs(run_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="nearest",
-            extra_flags=["--check-max-float=yes"],
-        )
-    except MFCException:
-        pass
-    return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float")
-
-
-def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
-    """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref."""
-    run_dir = os.path.join(work_dir, "float_proxy")
-    os.makedirs(run_dir)
-    _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
-    return _max_diff_np(ref_dir, run_dir, case["compare"])
-
-
-def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list:
-    """Run at each mantissa-bit level. Returns [(bits, dev), ...]."""
-    results = []
-    for bits in VPREC_MANTISSA_BITS:
-        run_dir = os.path.join(work_dir, f"vprec_{bits}")
-        os.makedirs(run_dir)
-        flags = [
-            "--backend=vprec",
-            "--vprec-mode=full",
-            f"--vprec-precision-binary64={bits}",
-            "--vprec-range-binary64=11",
-        ]
-        try:
-            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
-            dev = _max_diff_np(ref_dir, run_dir, case["compare"])
-        except MFCException:
-            dev = float("inf")
-        results.append((bits, dev))
-    return results
-
-
-def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str):
-    """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via
-    VERROU_EXCLUDE / VERROU_SOURCE environment variables.  For test runs, we use
-    --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices).
-    For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the
-    environment — we honour that so the reference is a stable nearest-rounding baseline
-    to compare against.  CLI --rounding-mode would override the env var and break the
-    reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env bash
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        VERROU_BIN={verrou_bin!r}
-        SIM_BIN={sim_bin!r}
-        IC_DIR={ic_dir!r}
-
-        RUNDIR="$1"
-        TMPDIR_RUN=$(mktemp -d)
-        trap 'rm -rf "$TMPDIR_RUN"' EXIT
-
-        cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all"
-        cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp"
-        for fname in indices.dat pre_time_data.dat io_time_data.dat; do
-            [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/"
-        done
-        mkdir -p "$TMPDIR_RUN/D"
-
-        # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and
-        # leaves it unset for test runs.  Defaulting to float gives deterministic
-        # test steps while letting the reference use nearest-rounding.
-        ROUND="${{VERROU_ROUNDING_MODE:-float}}"
-
-        # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation).
-        # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to).
-        # Forward them as valgrind flags when set.
-        EXTRA=""
-        [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE"
-        [ -n "${{VERROU_SOURCE:-}}" ]  && EXTRA="$EXTRA --source=$VERROU_SOURCE"
-
-        cd "$TMPDIR_RUN"
-        "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN"
-        rc=$?
-
-        [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/"
-        exit $rc
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
-    """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR
-    Exits 0 (stable) or 1 (unstable) based on threshold.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env python3
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        import sys, os, numpy as np
-
-        COMPARE_FILES = {compare_files!r}
-        THRESHOLD = {threshold!r}
-
-        ref_dir, run_dir = sys.argv[1], sys.argv[2]
-        max_dev = 0.0
-        for fname in COMPARE_FILES:
-            ref_p = os.path.join(ref_dir, fname)
-            run_p = os.path.join(run_dir, fname)
-            if not os.path.exists(ref_p) or not os.path.exists(run_p):
-                print(f"MISSING: {{fname}}")
-                sys.exit(1)
-            ref = np.loadtxt(ref_p)[:, 1]
-            run = np.loadtxt(run_p)[:, 1]
-            dev = float(np.max(np.abs(ref - run)))
-            max_dev = max(max_dev, dev)
-
-        print(f"max_dev={{max_dev:.3e}}  threshold={{THRESHOLD:.0e}}")
-        sys.exit(0 if max_dev <= THRESHOLD else 1)
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _dd_env(verrou_bin: str) -> dict:
-    """Environment with PYTHONPATH set for verrou_dd_* imports."""
-    py_pkg = _verrou_pythonpath(verrou_bin)
-    env = os.environ.copy()
-    if py_pkg:
-        existing = env.get("PYTHONPATH", "")
-        env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing]))
-    return env
-
-
-def _parse_rddmin_locs(summary_path: str) -> list:
-    """Extract dd_line locations from an rddmin_summary as
-    [{path, start, end, macro}] dicts (path is repo-relative; macro is the
-    enclosing fypp duplicating block, e.g. '#:for', or None).
-
-    Filters out locations whose source lines are pure control-flow delimiters
-    (loop boundaries, fypp directive closers, blank/comment lines).  These can
-    appear when the responsible arithmetic shares DWARF debug info with an
-    enclosing boundary due to inlining or #:for template expansion.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    locs = []
-    skipped = []
-    with open(summary_path) as fh:
-        for line in fh:
-            m = _LOC_RE.search(line)
-            if not m:
-                continue
-            path = m.group(1)
-            start = int(m.group(2))
-            end = int(m.group(3)) if m.group(3) else start
-            try:
-                rel = os.path.relpath(path, MFC_ROOT_DIR)
-                if rel.startswith(".."):
-                    rel = path
-            except ValueError:
-                rel = path
-            rel = rel.replace("\\", "/")
-            if _is_arithmetic_loc(path, start, end):
-                locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)})
-            else:
-                skipped.append((rel, start, end))
-    for rel, start, end in skipped:
-        loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}"
-        cons.print(f"  [dim]dd_line: skipped control-flow boundary {loc}[/dim]")
-    return locs
-
-
-def _parse_rddmin_syms(summary_path: str) -> list:
-    """Extract symbol/function names from a dd_sym rddmin_summary.
-
-    rddmin_summary format:
-      ddmin0:\\tFail Ratio: ...\\tFail indexes: ...
-      \\t<funcname>\\t<binary_path>
-      ddmin1:\\t...
-      \\t<funcname>\\t<binary_path>
-
-    Lines starting with 'ddmin' are metadata; function names are on the
-    indented (tab-prefixed) lines as the first tab-delimited field.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    syms = []
-    with open(summary_path) as fh:
-        for ln in fh:
-            stripped = ln.strip()
-            if not stripped or stripped.startswith("ddmin"):
-                continue
-            sym = stripped.split("\t")[0].strip()
-            if sym:
-                syms.append(sym)
-    return syms
-
-
-def _build_source_filter(gen_lines: list, suspect_locs: list) -> list:
-    """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a
-    suspect dd_line location.
-
-    gen_lines come from a --gen-source run and carry the exact symbol Verrou
-    requires (--source matches on file+line+symbol, not file+line alone).
-    suspect_locs are (path, start, end) tuples whose path may be a repo-relative
-    path while gen-source emits a basename, so matching is by basename + line.
-    """
-    ranges = {}
-    for path, start, end in suspect_locs:
-        ranges.setdefault(os.path.basename(path), []).append((start, end))
-    out = []
-    for raw in gen_lines:
-        parts = raw.rstrip("\n").split("\t")
-        if len(parts) < 2:
-            continue
-        base = os.path.basename(parts[0].strip())
-        try:
-            ln = int(parts[1].strip())
-        except ValueError:
-            continue
-        if any(s <= ln <= e for s, e in ranges.get(base, [])):
-            out.append(raw if raw.endswith("\n") else raw + "\n")
-    return out
-
-
-def _confirm_decision(suspect_dev, dd_threshold: float):
-    """Decide whether perturbing only the suspect lines reproduces the instability.
-
-    Returns True (confirmed), False (suspect lines are inert -> attribution
-    suspect, e.g. macro-collapse misattribution), or None if unmeasured.
-    """
-    if suspect_dev is None:
-        return None
-    return suspect_dev >= dd_threshold
-
-
-def _rank_locs(locs: list, total: float) -> list:
-    """Attach a 'share' (per-line deviation / total) to each loc dict — which
-    must already carry 'share_dev' from a single-line positive control — and
-    return the locs sorted by that deviation, most flagrant first.
-
-    'total' is normally float_proxy, so share is the fraction of the full
-    single-precision deviation that perturbing that one line alone reproduces.
-    A non-positive total yields share=None (cannot normalize).
-    """
-    for loc in locs:
-        dev = loc.get("share_dev")
-        loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None
-    return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True)
-
-
-def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
-    """Set loc['cancellation']=True for each dd_line loc whose line range covers a
-    catastrophic-cancellation site (stage F), matched by basename + line.
-
-    This pins the flagrant operation on a multi-op line to the subtraction that
-    cancels, rather than just naming the line.
-    """
-    by_base = {}
-    for fname, lineno in cancellation_locs:
-        by_base.setdefault(os.path.basename(fname), set()).add(lineno)
-    for loc in dd_line_locs:
-        lines = by_base.get(os.path.basename(loc["path"]), set())
-        loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1))
-    return dd_line_locs
-
-
-def _cancellation_by_file(cancellation_locs: list) -> list:
-    """Aggregate cancellation sites by source file → [(basename, count)] sorted by
-    count (desc), ties by name.
-
-    This is the cancellation-*origin* view (where ill-conditioning concentrates),
-    as opposed to the per-line --source share, which is a *sensitivity* view
-    (where reduced precision most moves the output — typically the time
-    integrator / final accumulation, regardless of where error originates).
-    """
-    counts = {}
-    for fname, _lineno in cancellation_locs:
-        base = os.path.basename(fname)
-        counts[base] = counts.get(base, 0) + 1
-    return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
-
-
-def _run_dd_tool(
-    dd_bin: str,
-    dd_dir: str,
-    dd_run_sh: str,
-    dd_cmp_py: str,
-    env: dict,
-    log_name: str,
-    summary_subdir: str,
-    label: str,
-) -> list:
-    """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines."""
-    log_file = os.path.join(dd_dir, log_name)
-    cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py]
-    cons.print(f"  [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]")
-    with open(log_file, "w") as f:
-        result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False)
-    summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary")
-    summary_lines = []
-    if result.returncode == 0:
-        if os.path.isfile(summary_path):
-            with open(summary_path) as f:
-                summary_lines = f.readlines()
-            cons.print(f"  [bold yellow]{label} result[/bold yellow]:")
-            for line in summary_lines:
-                cons.print(f"    {line.rstrip()}")
-        else:
-            cons.print(f"  [dim]{label} done; see {log_file}[/dim]")
-    else:
-        cons.print(f"  [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})")
-    return summary_lines
-
-
-def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float):
-    """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their
-    paths.  The threshold falls back to _DD_FALLBACK_THRESHOLD when unset."""
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
-    return dd_run_sh, dd_cmp_py
-
-
-def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list:
-    """Run verrou_dd_sym; return list of responsible symbol names."""
-    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym")
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_sym not found; skipping delta-debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
-    cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
-    return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
-
-
-def _run_dd_line(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    log_dir: str,
-    threshold: float = None,
-) -> list:
-    """Run verrou_dd_line; return [{path, start, end, macro}] location dicts."""
-    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line")
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
-    return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
-
-
-def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag):
-    """Perturb only the lines in src_lines (deterministic float mode) and return
-    the L-inf deviation from the nearest-rounding reference, or None on failure."""
-    src_path = os.path.join(conf_dir, f"source_{tag}.txt")
-    with open(src_path, "w") as fh:
-        fh.writelines(src_lines)
-    run_dir = os.path.join(conf_dir, f"perturb_{tag}")
-    os.makedirs(run_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="float",
-            extra_flags=[f"--source={src_path}"],
-        )
-    except MFCException:
-        return None
-    return _max_diff_np(ref_dir, run_dir, compare)
-
-
-def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path):
-    """Run nearest-rounding with --gen-source to capture the symbol-correct
-    executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure."""
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="nearest",
-            extra_flags=[f"--gen-source={gen_path}"],
-        )
-    except MFCException:
-        return None
-    if not os.path.isfile(gen_path):
-        return None
-    with open(gen_path) as fh:
-        return fh.readlines()
-
-
-def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy):
-    """Positive control for dd_line: perturb ONLY the suspect lines and confirm
-    the instability reproduces, then rank each line by its individual share.
-
-    Verrou's --source matches file+line+symbol (not file+line alone), so we first
-    capture the symbol-correct executed source lines via --gen-source, filter them
-    to the suspect set, then run deterministic float-mode restricted to just those
-    lines.  If the suspect-only deviation reaches dd_threshold the attribution is
-    confirmed; if it stays near zero the reported lines do not actually carry the
-    instability (e.g. a #:for-expanded line blamed for the wrong instance).
-
-    Each line is then perturbed alone so its 'share_dev' (and 'share' of
-    float_proxy) shows which computation dominates.
-
-    Returns (confirmed, suspect_dev, ranked_locs).
-    """
-    if not dd_line_locs:
-        return None, None, dd_line_locs
-    conf_dir = os.path.join(work_dir, "confirm")
-    os.makedirs(conf_dir, exist_ok=True)
-    gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt"))
-    if gen_lines is None:
-        return None, None, dd_line_locs
-    compare = case["compare"]
-
-    # whole-set positive control
-    suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs]
-    set_src = _build_source_filter(gen_lines, suspects)
-    if not set_src:
-        # none of the reported lines performs an instrumented FP op -> not reproduced
-        return False, 0.0, dd_line_locs
-    set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set")
-    confirmed = _confirm_decision(set_dev, dd_threshold)
-
-    # per-line ranking (a single line trivially owns the whole set deviation)
-    if len(dd_line_locs) == 1:
-        dd_line_locs[0]["share_dev"] = set_dev
-    else:
-        for i, loc in enumerate(dd_line_locs):
-            one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])])
-            loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0
-    ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev))
-    return confirmed, set_dev, ranked
-
-
-def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line):
-    """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot.
-
-    Uses a precision binary (built with --fp-precision-lines) in which each
-    expanded instance of hotspot_file:hotspot_line compiles to a distinct
-    physical .f90 line.  The sidecar enumerates those physical lines; each is
-    perturbed alone (float mode, vs the precision binary's own nearest-rounding
-    reference) so the dominant instance is identified.
-
-    Returns a list of {instance, physline, dev, snippet} sorted most-flagrant
-    first (empty if no sidecar / no instrumented instances).
-    """
-    from . import fp_precision_lines as fpl
-
-    sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin)
-    sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file))
-    instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line)
-    if not instances:
-        return []
-
-    prec_dir = os.path.join(work_dir, "precision")
-    ref_dir = os.path.join(prec_dir, "ref")
-    os.makedirs(ref_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest")
-    except MFCException:
-        return []
-    gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt"))
-    if gen_lines is None:
-        return []
-
-    f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90")
-    compare = case["compare"]
-    results = []
-    for physline, instance in instances:
-        src = _build_source_filter(gen_lines, [(f90_file, physline, physline)])
-        if not src:
-            continue  # this instance performs no instrumented FP op
-        dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}")
-        results.append(
-            {
-                "instance": instance,
-                "physline": physline,
-                "dev": dev or 0.0,
-                "snippet": _read_source_line(f90_file, physline).strip(),
-            }
-        )
-    results.sort(key=lambda r: r["dev"], reverse=True)
-    return results
-
-
 def _blank_result(name: str) -> dict:
     """A result dict with every field at its empty/unmeasured default."""
     return {
@@ -1499,7 +565,7 @@ def _run_case(
             cons.print("  [dim]cancellation detection...[/dim]")
             try:
                 # sweep bit thresholds to get per-site severity (bits lost)
-                level_sites = [(level, _run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
+                level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
                 locs = level_sites[0][1]  # lowest threshold = full list
                 bits = _cancellation_severity(level_sites)
                 result["cancellation_locs"] = locs
@@ -1534,7 +600,7 @@ def _run_case(
         if run_float_max:
             cons.print("  [dim]float-max overflow check...[/dim]")
             try:
-                locs = _run_float_max_check(case, verrou_bin, sim_bin, work_dir)
+                locs = _run_float_max_check(verrou_bin, sim_bin, work_dir)
                 result["float_max_locs"] = locs
                 if locs:
                     cons.print(f"  [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)")
@@ -1550,233 +616,6 @@ def _run_case(
     return result
 
 
-def _emit_github_annotations(results: list):
-    """Emit GitHub annotations for FP hotspots.
-
-    Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
-    appear inline on the responsible source lines in the PR diff view.
-
-    Up to 3 dd_line locations are emitted per case (minimal responsible lines
-    from delta-debug).  Confirmed hotspots (suspect-only perturbation reproduced
-    the instability) are ::warning::; unconfirmed ones are downgraded to
-    ::notice:: so a suspect attribution is not presented as fact.  Up to 3
-    cancellation sites per case are emitted as ::notice:: so the diff also
-    highlights subtraction-cancellation hotspots from --check-cancellation.
-    """
-    if not os.environ.get("GITHUB_ACTIONS"):
-        return
-    for r in results:
-        status = "FAIL" if not r["passed"] else "sensitivity"
-        _sb = r.get("sig_bits")
-        _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a"
-        dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}"
-        unconfirmed = r.get("dd_line_confirmed") is False
-
-        for loc in r.get("dd_line_locs", [])[:3]:
-            location = f"file={loc['path']},line={loc['start']}"
-            if loc["end"] != loc["start"]:
-                location += f",endLine={loc['end']}"
-            note = dev_str
-            if loc.get("share") is not None:
-                note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)"
-            if loc.get("cancellation"):
-                note += " — also a catastrophic cancellation site"
-            if loc.get("macro"):
-                note += f" — {loc['macro']}-expanded line, may represent multiple instances"
-            if unconfirmed:
-                title = f"FP candidate (unconfirmed) [{r['name']}]"
-                print(f"::notice {location},title={title}::{note}", flush=True)
-            else:
-                title = f"FP {status} [{r['name']}]"
-                print(f"::warning {location},title={title}::{note}", flush=True)
-        n_dd = len(r.get("dd_line_locs", []))
-        if n_dd > 3:
-            print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True)
-
-        for fname, lineno in r.get("cancellation_locs", [])[:3]:
-            loc = f"file={fname},line={lineno}"
-            title = f"FP cancellation [{r['name']}]"
-            print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True)
-        n_cc = len(r.get("cancellation_locs", []))
-        if n_cc > 3:
-            print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
-
-
-def _more_md(total: int, shown: int, noun: str) -> str:
-    """Markdown bullet noting `total - shown` further items elided from a list,
-    or '' when nothing was truncated."""
-    if total <= shown:
-        return ""
-    return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_"
-
-
-def _emit_github_summary(results: list, n_samples: int):
-    """Write a markdown results table to GITHUB_STEP_SUMMARY.
-
-    Visible directly in the Actions run UI without downloading artifacts.
-    Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels),
-    and dd_line source locations for any failing cases.
-    """
-    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
-    if not summary_path:
-        return
-
-    n_pass = sum(1 for r in results if r["passed"])
-    n_fail = len(results) - n_pass
-
-    md = []
-    md.append("## FP Stability Results\n")
-    md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n")
-    md.append(
-        f"> **Coverage:** {len(results)} one-dimensional case(s) "
-        f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these "
-        "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
-        "they do not reach.\n"
-    )
-
-    # Main results table — pass/fail is scale-free: bits retained vs a single floor
-    md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
-    md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |")
-    md.append("|------|:------:|:------:|--------:|--------:|:------:|")
-    for r in results:
-        status = "✅" if r["passed"] else "❌"
-        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
-        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
-        sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
-        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
-    md.append("")
-
-    # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
-    # most severe (most bits lost). The numerically interesting signal; the
-    # sensitivity list further down is dominated by the (benign) time integrator.
-    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
-    if cases_with_cancel:
-        md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
-        md.append(
-            "> Subtraction of nearly-equal values loses leading significant digits. A double carries "
-            "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
-            "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
-            "single-precision trust. Site *count* is not severity — one site losing many digits outweighs "
-            "many mild ones.\n"
-        )
-        for r in cases_with_cancel:
-            site_bits = r.get("cancellation_bits") or {}
-            # collapse continuation fragments to one entry per logical statement,
-            # keeping the worst bits-lost seen on that statement
-            stmts = {}  # (basename, stmt_start) -> {where, bits, text}
-            for fname, lineno in r["cancellation_locs"]:
-                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
-                key = (os.path.basename(fname), stmt_start)
-                e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text})
-                e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0))
-            ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"]))
-            if ordered:
-                w = ordered[0]
-                md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
-            for e in ordered[:15]:
-                lost = e["bits"] / math.log2(10)
-                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
-            footer = _more_md(len(ordered), 15, "statement(s)")
-            if footer:
-                md.append(footer)
-            md.append("")
-
-    # VPREC sweep — one column per bit level, ❌ where bits retained < floor
-    if any(r["vprec"] for r in results):
-        _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
-        header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
-        sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS)
-        md.append("### VPREC precision sweep\n")
-        md.append(f"| Case | {header} |")
-        md.append(f"|------|{sep}|")
-        for r in results:
-            vmap = {b: d for b, d in r["vprec"]}
-            cols = []
-            for b in VPREC_MANTISSA_BITS:
-                d = vmap.get(b)
-                if d is None:
-                    cols.append("—")
-                elif d == float("inf"):
-                    cols.append("💥 crash")
-                else:
-                    cols.append(f"{d:.2e}")
-            md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
-        md.append("")
-
-    # dd_line — single-precision SENSITIVITY (where precision most affects the
-    # output). This is distinct from cancellation origin (reported separately):
-    # the leader is typically the time integrator / final accumulation, because
-    # perturbing the last write moves the output directly while upstream errors
-    # get re-rounded there. Not a culprit-finder for ill-conditioning.
-    cases_with_locs = [r for r in results if r["dd_line_locs"]]
-    if cases_with_locs:
-        md.append("<details>")
-        md.append("<summary>Single-precision sensitivity (dd_line) — usually the time integrator; expand for details</summary>\n")
-        md.append(
-            "> Where reduced precision most moves the output — **typically the time integrator / "
-            "final accumulation, which is expected and benign**. This is *not* where cancellation "
-            "originates (that's the section above); it shows where precision matters most.\n"
-        )
-        _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
-        for r in cases_with_locs:
-            status = "❌ FAIL" if not r["passed"] else "✅ pass"
-            md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}")
-            md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n")
-            for loc in r["dd_line_locs"][:10]:
-                rel_path, start, end = loc["path"], loc["start"], loc["end"]
-                where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
-                tags = []
-                if loc.get("share") is not None:
-                    tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy")
-                if loc.get("cancellation"):
-                    tags.append("catastrophic cancellation")
-                if loc.get("macro"):
-                    tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
-                suffix = f" — {', '.join(tags)}" if tags else ""
-                md.append(f"- `{where}`{suffix}")
-                for inst in loc.get("instances", [])[:8]:
-                    flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else ""
-                    md.append(f"  - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`")
-                snippet = _get_source_context(rel_path, start)
-                if snippet:
-                    md.append("  ```fortran")
-                    for line in snippet.splitlines():
-                        md.append(f"  {line}")
-                    md.append("  ```")
-            footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)")
-            if footer:
-                md.append(footer)
-            md.append("")
-        md.append("</details>\n")
-
-    # dd_sym function names (collapsed, since less actionable than dd_line)
-    cases_with_syms = [r for r in results if r["dd_sym_syms"]]
-    if cases_with_syms:
-        md.append("<details>")
-        md.append("<summary>Responsible functions (dd_sym)</summary>\n")
-        for r in cases_with_syms:
-            md.append(f"\n**`{r['name']}`**\n")
-            for sym in r["dd_sym_syms"]:
-                md.append(f"- `{sym}`")
-        md.append("\n</details>\n")
-
-    # Float-max overflow sites
-    cases_with_fmax = [r for r in results if r.get("float_max_locs")]
-    if cases_with_fmax:
-        md.append("### Float32 overflow sites (check\\_max\\_float)\n")
-        for r in cases_with_fmax:
-            md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
-            for fname, lineno in r["float_max_locs"][:10]:
-                md.append(f"- `{fname}:{lineno}`")
-            footer = _more_md(len(r["float_max_locs"]), 10, "site(s)")
-            if footer:
-                md.append(footer)
-            md.append("")
-
-    with open(summary_path, "a") as f:
-        f.write("\n".join(md) + "\n")
-
-
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
     if not verrou_bin or not os.path.isfile(verrou_bin):
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
new file mode 100644
index 0000000000..01940618d5
--- /dev/null
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -0,0 +1,474 @@
+"""Pure metrics, source-resolution, and parsing helpers for the FP-stability suite.
+
+Leaf module: imports only stdlib + MFC_ROOT_DIR + cons. No sibling fp_stability*
+imports, so the runners/report/orchestrator modules can all depend on it.
+"""
+
+import glob
+import math
+import os
+import re
+
+from .common import MFC_ROOT_DIR
+from .printer import cons
+
+# Mantissa-bit levels for the VPREC sweep (C).
+# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
+VPREC_MANTISSA_BITS = [52, 23, 16, 10]
+
+# Stability pass/fail (stage A) is scale-free: a case must retain at least this
+# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
+# 24 ~= single precision. One global floor replaces per-case absolute thresholds
+# (which spanned 6 orders of magnitude purely from field scale + conditioning);
+# normalising by the field scale collapses that, so a single number suffices.
+MIN_SIG_BITS = 24
+
+# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy-
+# derived threshold is supplied (callers always pass one, so this is only a guard).
+_DD_FALLBACK_THRESHOLD = 1e-12
+
+
+def _sig_bits(max_dev: float, ref_scale: float) -> float:
+    """Significant bits retained = -log2(max_dev / ref_scale).
+
+    Scale-free: dividing the deviation by the field's peak magnitude removes the
+    absolute scale, leaving only the conditioning.  Zero deviation (or zero
+    scale) returns 53.0 = full double precision retained.
+    """
+    if not (max_dev > 0) or not (ref_scale > 0):
+        return 53.0
+    return -math.log2(max_dev / ref_scale)
+
+
+def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
+    """A case passes when it retains at least `floor` significant bits."""
+    return _sig_bits(max_dev, ref_scale) >= floor
+
+
+# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
+_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
+
+# Files to exclude from cancellation / float-max reports (runtime loaders, XALT).
+_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so")
+
+# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)".
+_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)")
+
+# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def
+# defines a macro instantiated at multiple call sites) collapse many distinct
+# generated computations onto a single .fpp source line, so a dd_line hit inside
+# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select
+# code but do not duplicate it, so they are tracked for balance but not flagged.
+_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE)
+_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE)
+_FYPP_DUPLICATING = ("for", "def", "block", "call")
+
+# Lines that are clearly control-flow delimiters rather than arithmetic.
+# dd_line sometimes reports these when the responsible arithmetic is on the
+# preceding line but shares DWARF debug info with the delimiter (e.g. loop
+# boundaries in #:for-expanded code, or inlined functions at call sites).
+_CONTROL_FLOW_RE = re.compile(
+    r"^\s*("
+    r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b"
+    r"|do\s+\w+\s*=\s*[\w,\s]+"  # naked do-loop header (no arithmetic)
+    r"|else(\s+if\s*\(.*\)\s*then)?\s*$"  # else / else if (...) then
+    r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+"  # subroutine declaration
+    r"|\$:END_GPU\w+"  # fypp GPU macro closers
+    r"|#:end\w*"  # fypp directive closers (#:endfor, #:enddef, etc.)
+    r"|\s*!\s*$"  # comment-only lines
+    r"|\s*$"  # blank lines
+    r")",
+    re.IGNORECASE,
+)
+
+
+def _resolve_source(fname: str, search_whole_tree: bool = False) -> str:
+    """Resolve a (possibly bare) source filename to an existing path, or '' if not
+    found.  An absolute existing path is used as-is; otherwise the basename is
+    located recursively under src/ (then the whole tree if `search_whole_tree`)."""
+    if os.path.isabs(fname) and os.path.isfile(fname):
+        return fname
+    candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
+    if not candidates and search_whole_tree:
+        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
+    return candidates[0] if candidates else ""
+
+
+def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list:
+    """Resolve `fname` and return its lines (with newlines), or [] if unreadable."""
+    path = _resolve_source(fname, search_whole_tree)
+    if not path:
+        return []
+    try:
+        with open(path) as fh:
+            return fh.readlines()
+    except OSError:
+        return []
+
+
+def _read_source_line(fname: str, lineno: int) -> str:
+    """Return the raw source line at lineno (1-based), or '' if unavailable."""
+    lines = _read_source_lines(fname)
+    return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
+
+
+def _macro_context_in_lines(lines: list, lineno: int) -> str:
+    """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that
+    encloses `lineno` (1-based) in `lines`, or None if none does.
+
+    Used to flag dd_line hotspots whose .fpp line is shared across multiple
+    expanded instances (a #:for body, a #:def macro used in many places), where
+    line-level attribution cannot identify which instance is responsible.
+    """
+    stack = []
+    for raw in lines[: max(0, lineno - 1)]:
+        mo = _FYPP_BLOCK_OPEN.match(raw)
+        if mo:
+            stack.append(mo.group(1).lower())
+            continue
+        if _FYPP_BLOCK_CLOSE.match(raw) and stack:
+            stack.pop()
+    for kw in reversed(stack):
+        if kw in _FYPP_DUPLICATING:
+            return f"#:{kw}"
+    return None
+
+
+def _macro_context(fname: str, lineno: int) -> str:
+    """File-backed wrapper around _macro_context_in_lines; '' path safe."""
+    lines = _read_source_lines(fname)
+    if not lines:
+        return None
+    return _macro_context_in_lines(lines, lineno)
+
+
+def _ends_with_continuation(line: str) -> bool:
+    """True if a free-form Fortran line ends with a continuation '&' (the last
+    non-blank token before any trailing comment)."""
+    code = line.split("!", 1)[0].rstrip()  # drop trailing comment (string-'!' is rare; fine here)
+    return code.endswith("&")
+
+
+def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
+    """Return the (start, end) 1-based physical line range of the Fortran logical
+    statement containing lineno, following '&' continuations in both directions.
+
+    A hit reported on a continuation fragment thus resolves to the whole
+    statement, so the labelled location is the full expression rather than a
+    mid-statement piece.
+    """
+    n = len(lines)
+    start = lineno
+    while start > 1 and _ends_with_continuation(lines[start - 2]):
+        start -= 1
+    end = lineno
+    while end < n and _ends_with_continuation(lines[end - 1]):
+        end += 1
+    return start, end
+
+
+def _statement_at(fname: str, lineno: int) -> tuple:
+    """File-backed (start, end, text) for the logical statement at fname:lineno;
+    text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
+    lines = _read_source_lines(fname)
+    if not 0 < lineno <= len(lines):
+        return lineno, lineno, ""
+    start, end = _statement_bounds_in_lines(lines, lineno)
+    # join physical lines, dropping the continuation '&' that may lead or trail each
+    text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end])
+    return start, end, text
+
+
+def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
+    """Return True if any line in [start, end] contains non-trivial arithmetic.
+
+    Filters out loop delimiters and fypp directive lines that dd_line sometimes
+    reports when the responsible arithmetic shares DWARF info with its enclosing
+    control-flow boundary (inlining, #:for template expansion, etc.).
+    Returns True (keep) when uncertain so we never silently drop real hotspots.
+    """
+    for lineno in range(start, end + 1):
+        line = _read_source_line(fname, lineno)
+        if not line:
+            return True  # can't read — keep to be safe
+        if not _CONTROL_FLOW_RE.match(line):
+            return True
+    return False
+
+
+def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
+    """Return a annotated source snippet around lineno, or '' if file not found.
+
+    fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path.
+    Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree.
+    """
+    lines = _read_source_lines(fname, search_whole_tree=True)
+    if not lines:
+        return ""
+    start = max(0, lineno - context - 1)
+    end = min(len(lines), lineno + context)
+    rows = []
+    for i, line in enumerate(lines[start:end], start=start + 1):
+        marker = ">" if i == lineno else " "
+        rows.append(f"{marker}{i:5d} | {line.rstrip()}")
+    return "\n".join(rows)
+
+
+def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
+    import numpy as np
+
+    total = 0.0
+    for fname in compare_files:
+        ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname)
+        if not os.path.exists(ref_p) or not os.path.exists(run_p):
+            return float("inf")
+        ref = np.loadtxt(ref_p)[:, 1]
+        run = np.loadtxt(run_p)[:, 1]
+        total = max(total, float(np.max(np.abs(ref - run))))
+    return total
+
+
+def _max_abs_np(ref_dir: str, compare_files: list) -> float:
+    """Return the maximum absolute value across all reference output files."""
+    import numpy as np
+
+    total = 0.0
+    for fname in compare_files:
+        ref_p = os.path.join(ref_dir, fname)
+        if not os.path.exists(ref_p):
+            continue
+        ref = np.loadtxt(ref_p)[:, 1]
+        total = max(total, float(np.max(np.abs(ref))))
+    return total
+
+
+def _parse_cancel_gen(gen_path: str) -> list:
+    """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources."""
+    if not os.path.isfile(gen_path):
+        return []
+    locs = []
+    seen = set()
+    with open(gen_path) as fh:
+        for raw in fh:
+            parts = raw.rstrip("\n").split("\t")
+            if len(parts) < 2:
+                continue
+            fname = parts[0].strip()
+            if any(ext in fname for ext in _EXTERNAL_SRCS):
+                continue
+            if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")):
+                continue
+            try:
+                lineno = int(parts[1].strip())
+            except ValueError:
+                continue
+            key = (fname, lineno)
+            if key not in seen:
+                seen.add(key)
+                locs.append(key)
+    return locs
+
+
+def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
+    """Extract first MFC-source frame from each Valgrind error matching error_keyword."""
+    if not os.path.isfile(log_path):
+        return []
+    locs = []
+    seen = set()
+    in_error = False
+    with open(log_path) as fh:
+        for raw in fh:
+            line = re.sub(r"^==\d+== ?", "", raw)
+            if error_keyword in line:
+                in_error = True
+                continue
+            if in_error:
+                if "   at " in line or "   by " in line:
+                    m = _VGFRAME_RE.search(line)
+                    if m:
+                        fname = m.group(1)
+                        if any(ext in fname for ext in _EXTERNAL_SRCS):
+                            continue
+                        lineno = int(m.group(2))
+                        key = (fname, lineno)
+                        if key not in seen:
+                            seen.add(key)
+                            locs.append(key)
+                        in_error = False
+                elif line.strip() == "":
+                    in_error = False
+    return locs
+
+
+# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
+# filter: a site is reported only if it lost >= the threshold bits. Sweeping these
+# levels and taking the highest each site survives gives a per-site "bits lost"
+# severity (a lower bound — no false positives). 48 ~ full double mantissa.
+CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
+
+
+def _cancellation_severity(level_sites: list) -> dict:
+    """Given [(threshold, [sites])], return {site: highest threshold it survives}
+    = the per-site bits-lost severity (a lower bound)."""
+    sev = {}
+    for level, sites in level_sites:
+        for site in sites:
+            if level > sev.get(site, 0):
+                sev[site] = level
+    return sev
+
+
+def _digits_left(bits_lost: float) -> float:
+    """Approximate trustworthy decimal digits remaining after losing `bits_lost`
+    bits of a double's 53-bit mantissa (~15.95 digits full)."""
+    return max(0.0, (53 - bits_lost) / math.log2(10))
+
+
+def _parse_rddmin_locs(summary_path: str) -> list:
+    """Extract dd_line locations from an rddmin_summary as
+    [{path, start, end, macro}] dicts (path is repo-relative; macro is the
+    enclosing fypp duplicating block, e.g. '#:for', or None).
+
+    Filters out locations whose source lines are pure control-flow delimiters
+    (loop boundaries, fypp directive closers, blank/comment lines).  These can
+    appear when the responsible arithmetic shares DWARF debug info with an
+    enclosing boundary due to inlining or #:for template expansion.
+    """
+    if not os.path.isfile(summary_path):
+        return []
+    locs = []
+    skipped = []
+    with open(summary_path) as fh:
+        for line in fh:
+            m = _LOC_RE.search(line)
+            if not m:
+                continue
+            path = m.group(1)
+            start = int(m.group(2))
+            end = int(m.group(3)) if m.group(3) else start
+            try:
+                rel = os.path.relpath(path, MFC_ROOT_DIR)
+                if rel.startswith(".."):
+                    rel = path
+            except ValueError:
+                rel = path
+            rel = rel.replace("\\", "/")
+            if _is_arithmetic_loc(path, start, end):
+                locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)})
+            else:
+                skipped.append((rel, start, end))
+    for rel, start, end in skipped:
+        loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}"
+        cons.print(f"  [dim]dd_line: skipped control-flow boundary {loc}[/dim]")
+    return locs
+
+
+def _parse_rddmin_syms(summary_path: str) -> list:
+    """Extract symbol/function names from a dd_sym rddmin_summary.
+
+    rddmin_summary format:
+      ddmin0:\\tFail Ratio: ...\\tFail indexes: ...
+      \\t<funcname>\\t<binary_path>
+      ddmin1:\\t...
+      \\t<funcname>\\t<binary_path>
+
+    Lines starting with 'ddmin' are metadata; function names are on the
+    indented (tab-prefixed) lines as the first tab-delimited field.
+    """
+    if not os.path.isfile(summary_path):
+        return []
+    syms = []
+    with open(summary_path) as fh:
+        for ln in fh:
+            stripped = ln.strip()
+            if not stripped or stripped.startswith("ddmin"):
+                continue
+            sym = stripped.split("\t")[0].strip()
+            if sym:
+                syms.append(sym)
+    return syms
+
+
+def _build_source_filter(gen_lines: list, suspect_locs: list) -> list:
+    """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a
+    suspect dd_line location.
+
+    gen_lines come from a --gen-source run and carry the exact symbol Verrou
+    requires (--source matches on file+line+symbol, not file+line alone).
+    suspect_locs are (path, start, end) tuples whose path may be a repo-relative
+    path while gen-source emits a basename, so matching is by basename + line.
+    """
+    ranges = {}
+    for path, start, end in suspect_locs:
+        ranges.setdefault(os.path.basename(path), []).append((start, end))
+    out = []
+    for raw in gen_lines:
+        parts = raw.rstrip("\n").split("\t")
+        if len(parts) < 2:
+            continue
+        base = os.path.basename(parts[0].strip())
+        try:
+            ln = int(parts[1].strip())
+        except ValueError:
+            continue
+        if any(s <= ln <= e for s, e in ranges.get(base, [])):
+            out.append(raw if raw.endswith("\n") else raw + "\n")
+    return out
+
+
+def _confirm_decision(suspect_dev, dd_threshold: float):
+    """Decide whether perturbing only the suspect lines reproduces the instability.
+
+    Returns True (confirmed), False (suspect lines are inert -> attribution
+    suspect, e.g. macro-collapse misattribution), or None if unmeasured.
+    """
+    if suspect_dev is None:
+        return None
+    return suspect_dev >= dd_threshold
+
+
+def _rank_locs(locs: list, total: float) -> list:
+    """Attach a 'share' (per-line deviation / total) to each loc dict — which
+    must already carry 'share_dev' from a single-line positive control — and
+    return the locs sorted by that deviation, most flagrant first.
+
+    'total' is normally float_proxy, so share is the fraction of the full
+    single-precision deviation that perturbing that one line alone reproduces.
+    A non-positive total yields share=None (cannot normalize).
+    """
+    for loc in locs:
+        dev = loc.get("share_dev")
+        loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None
+    return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True)
+
+
+def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
+    """Set loc['cancellation']=True for each dd_line loc whose line range covers a
+    catastrophic-cancellation site (stage F), matched by basename + line.
+
+    This pins the flagrant operation on a multi-op line to the subtraction that
+    cancels, rather than just naming the line.
+    """
+    by_base = {}
+    for fname, lineno in cancellation_locs:
+        by_base.setdefault(os.path.basename(fname), set()).add(lineno)
+    for loc in dd_line_locs:
+        lines = by_base.get(os.path.basename(loc["path"]), set())
+        loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1))
+    return dd_line_locs
+
+
+def _cancellation_by_file(cancellation_locs: list) -> list:
+    """Aggregate cancellation sites by source file → [(basename, count)] sorted by
+    count (desc), ties by name.
+
+    This is the cancellation-*origin* view (where ill-conditioning concentrates),
+    as opposed to the per-line --source share, which is a *sensitivity* view
+    (where reduced precision most moves the output — typically the time
+    integrator / final accumulation, regardless of where error originates).
+    """
+    counts = {}
+    for fname, _lineno in cancellation_locs:
+        base = os.path.basename(fname)
+        counts[base] = counts.get(base, 0) + 1
+    return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
new file mode 100644
index 0000000000..f0583002f2
--- /dev/null
+++ b/toolchain/mfc/fp_stability_report.py
@@ -0,0 +1,244 @@
+"""GitHub-output emitters for the FP-stability suite (step summary + annotations).
+
+Pure formatting of the result dicts produced by the runners; the metric helpers
+it uses (statement resolution, source context, digit math) live in
+fp_stability_metrics.
+"""
+
+import math
+import os
+
+from .fp_stability_metrics import (
+    MIN_SIG_BITS,
+    VPREC_MANTISSA_BITS,
+    _digits_left,
+    _get_source_context,
+    _statement_at,
+)
+
+
+def _emit_github_annotations(results: list):
+    """Emit GitHub annotations for FP hotspots.
+
+    Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
+    appear inline on the responsible source lines in the PR diff view.
+
+    Up to 3 dd_line locations are emitted per case (minimal responsible lines
+    from delta-debug).  Confirmed hotspots (suspect-only perturbation reproduced
+    the instability) are ::warning::; unconfirmed ones are downgraded to
+    ::notice:: so a suspect attribution is not presented as fact.  Up to 3
+    cancellation sites per case are emitted as ::notice:: so the diff also
+    highlights subtraction-cancellation hotspots from --check-cancellation.
+    """
+    if not os.environ.get("GITHUB_ACTIONS"):
+        return
+    for r in results:
+        status = "FAIL" if not r["passed"] else "sensitivity"
+        _sb = r.get("sig_bits")
+        _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a"
+        dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}"
+        unconfirmed = r.get("dd_line_confirmed") is False
+
+        for loc in r.get("dd_line_locs", [])[:3]:
+            location = f"file={loc['path']},line={loc['start']}"
+            if loc["end"] != loc["start"]:
+                location += f",endLine={loc['end']}"
+            note = dev_str
+            if loc.get("share") is not None:
+                note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)"
+            if loc.get("cancellation"):
+                note += " — also a catastrophic cancellation site"
+            if loc.get("macro"):
+                note += f" — {loc['macro']}-expanded line, may represent multiple instances"
+            if unconfirmed:
+                title = f"FP candidate (unconfirmed) [{r['name']}]"
+                print(f"::notice {location},title={title}::{note}", flush=True)
+            else:
+                title = f"FP {status} [{r['name']}]"
+                print(f"::warning {location},title={title}::{note}", flush=True)
+        n_dd = len(r.get("dd_line_locs", []))
+        if n_dd > 3:
+            print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True)
+
+        for fname, lineno in r.get("cancellation_locs", [])[:3]:
+            loc = f"file={fname},line={lineno}"
+            title = f"FP cancellation [{r['name']}]"
+            print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True)
+        n_cc = len(r.get("cancellation_locs", []))
+        if n_cc > 3:
+            print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
+
+
+def _more_md(total: int, shown: int, noun: str) -> str:
+    """Markdown bullet noting `total - shown` further items elided from a list,
+    or '' when nothing was truncated."""
+    if total <= shown:
+        return ""
+    return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_"
+
+
+def _emit_github_summary(results: list, n_samples: int):
+    """Write a markdown results table to GITHUB_STEP_SUMMARY.
+
+    Visible directly in the Actions run UI without downloading artifacts.
+    Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels),
+    and dd_line source locations for any failing cases.
+    """
+    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+    if not summary_path:
+        return
+
+    n_pass = sum(1 for r in results if r["passed"])
+    n_fail = len(results) - n_pass
+
+    md = []
+    md.append("## FP Stability Results\n")
+    md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n")
+    md.append(
+        f"> **Coverage:** {len(results)} one-dimensional case(s) "
+        f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these "
+        "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
+        "they do not reach.\n"
+    )
+
+    # Main results table — pass/fail is scale-free: bits retained vs a single floor
+    md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
+    md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |")
+    md.append("|------|:------:|:------:|--------:|--------:|:------:|")
+    for r in results:
+        status = "✅" if r["passed"] else "❌"
+        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
+        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
+        sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
+        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
+    md.append("")
+
+    # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
+    # most severe (most bits lost). The numerically interesting signal; the
+    # sensitivity list further down is dominated by the (benign) time integrator.
+    cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
+    if cases_with_cancel:
+        md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
+        md.append(
+            "> Subtraction of nearly-equal values loses leading significant digits. A double carries "
+            "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
+            "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
+            "single-precision trust. Site *count* is not severity — one site losing many digits outweighs "
+            "many mild ones.\n"
+        )
+        for r in cases_with_cancel:
+            site_bits = r.get("cancellation_bits") or {}
+            # collapse continuation fragments to one entry per logical statement,
+            # keeping the worst bits-lost seen on that statement
+            stmts = {}  # (basename, stmt_start) -> {where, bits, text}
+            for fname, lineno in r["cancellation_locs"]:
+                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
+                key = (os.path.basename(fname), stmt_start)
+                e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text})
+                e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0))
+            ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"]))
+            if ordered:
+                w = ordered[0]
+                md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
+            for e in ordered[:15]:
+                lost = e["bits"] / math.log2(10)
+                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
+            footer = _more_md(len(ordered), 15, "statement(s)")
+            if footer:
+                md.append(footer)
+            md.append("")
+
+    # VPREC sweep — one column per bit level, ❌ where bits retained < floor
+    if any(r["vprec"] for r in results):
+        _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
+        header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
+        sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS)
+        md.append("### VPREC precision sweep\n")
+        md.append(f"| Case | {header} |")
+        md.append(f"|------|{sep}|")
+        for r in results:
+            vmap = {b: d for b, d in r["vprec"]}
+            cols = []
+            for b in VPREC_MANTISSA_BITS:
+                d = vmap.get(b)
+                if d is None:
+                    cols.append("—")
+                elif d == float("inf"):
+                    cols.append("💥 crash")
+                else:
+                    cols.append(f"{d:.2e}")
+            md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
+        md.append("")
+
+    # dd_line — single-precision SENSITIVITY (where precision most affects the
+    # output). This is distinct from cancellation origin (reported separately):
+    # the leader is typically the time integrator / final accumulation, because
+    # perturbing the last write moves the output directly while upstream errors
+    # get re-rounded there. Not a culprit-finder for ill-conditioning.
+    cases_with_locs = [r for r in results if r["dd_line_locs"]]
+    if cases_with_locs:
+        md.append("<details>")
+        md.append("<summary>Single-precision sensitivity (dd_line) — usually the time integrator; expand for details</summary>\n")
+        md.append(
+            "> Where reduced precision most moves the output — **typically the time integrator / "
+            "final accumulation, which is expected and benign**. This is *not* where cancellation "
+            "originates (that's the section above); it shows where precision matters most.\n"
+        )
+        _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
+        for r in cases_with_locs:
+            status = "❌ FAIL" if not r["passed"] else "✅ pass"
+            md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}")
+            md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n")
+            for loc in r["dd_line_locs"][:10]:
+                rel_path, start, end = loc["path"], loc["start"], loc["end"]
+                where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
+                tags = []
+                if loc.get("share") is not None:
+                    tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy")
+                if loc.get("cancellation"):
+                    tags.append("catastrophic cancellation")
+                if loc.get("macro"):
+                    tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
+                suffix = f" — {', '.join(tags)}" if tags else ""
+                md.append(f"- `{where}`{suffix}")
+                for inst in loc.get("instances", [])[:8]:
+                    flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else ""
+                    md.append(f"  - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`")
+                snippet = _get_source_context(rel_path, start)
+                if snippet:
+                    md.append("  ```fortran")
+                    for line in snippet.splitlines():
+                        md.append(f"  {line}")
+                    md.append("  ```")
+            footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)")
+            if footer:
+                md.append(footer)
+            md.append("")
+        md.append("</details>\n")
+
+    # dd_sym function names (collapsed, since less actionable than dd_line)
+    cases_with_syms = [r for r in results if r["dd_sym_syms"]]
+    if cases_with_syms:
+        md.append("<details>")
+        md.append("<summary>Responsible functions (dd_sym)</summary>\n")
+        for r in cases_with_syms:
+            md.append(f"\n**`{r['name']}`**\n")
+            for sym in r["dd_sym_syms"]:
+                md.append(f"- `{sym}`")
+        md.append("\n</details>\n")
+
+    # Float-max overflow sites
+    cases_with_fmax = [r for r in results if r.get("float_max_locs")]
+    if cases_with_fmax:
+        md.append("### Float32 overflow sites (check\\_max\\_float)\n")
+        for r in cases_with_fmax:
+            md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
+            for fname, lineno in r["float_max_locs"][:10]:
+                md.append(f"- `{fname}:{lineno}`")
+            footer = _more_md(len(r["float_max_locs"]), 10, "site(s)")
+            if footer:
+                md.append(footer)
+            md.append("")
+
+    with open(summary_path, "a") as f:
+        f.write("\n".join(md) + "\n")
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
new file mode 100644
index 0000000000..c9b7ee375b
--- /dev/null
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -0,0 +1,530 @@
+"""Verrou subprocess runners for the FP-stability suite.
+
+Each routine drives the verrou/valgrind binary (or the verrou_dd_* delta-debug
+tools) and returns parsed results.  Pure parsing / metric helpers live in
+fp_stability_metrics, which this module imports.
+"""
+
+import glob
+import math
+import os
+import shutil
+import stat
+import subprocess
+import tempfile
+import textwrap
+
+from .common import MFC_ROOT_DIR, MFCException
+from .fp_stability_metrics import (
+    _DD_FALLBACK_THRESHOLD,
+    VPREC_MANTISSA_BITS,
+    _build_source_filter,
+    _confirm_decision,
+    _is_arithmetic_loc,
+    _max_abs_np,
+    _max_diff_np,
+    _parse_cancel_gen,
+    _parse_rddmin_locs,
+    _parse_rddmin_syms,
+    _parse_vg_error_locs,
+    _rank_locs,
+    _read_source_line,
+)
+from .printer import cons
+
+
+def _find_verrou() -> str:
+    verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
+    candidate = os.path.join(verrou_home, "bin", "valgrind")
+    if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
+        return candidate
+    return shutil.which("valgrind") or ""
+
+
+def _find_binary(name: str) -> str:
+    install_dir = os.path.join(MFC_ROOT_DIR, "build", "install")
+    candidates = glob.glob(os.path.join(install_dir, "*", "bin", name))
+    return max(candidates, key=os.path.getmtime) if candidates else ""
+
+
+def _find_dd_tool(verrou_bin: str, tool: str) -> str:
+    """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary,
+    or '' if absent."""
+    c = os.path.join(os.path.dirname(verrou_bin), tool)
+    return c if os.path.isfile(c) else ""
+
+
+def _verrou_pythonpath(verrou_bin: str) -> str:
+    """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir)."""
+    verrou_home = os.path.dirname(os.path.dirname(verrou_bin))
+    matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind"))
+    return matches[0] if matches else ""
+
+
+def _write_inp(params: dict, target_name: str, work_dir: str) -> None:
+    """Write a Fortran namelist .inp file from a Python params dict."""
+    from .run import case_dicts
+
+    master_keys = case_dicts.get_input_dict_keys(target_name)
+    lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys]
+    with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh:
+        fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n")
+
+
+def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str):
+    _write_inp(pre_params, "pre_process", work_dir)
+    with open(os.path.join(work_dir, "pre.log"), "w") as f:
+        result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False)
+    if result.returncode != 0:
+        raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log")
+
+
+def _run_simulation_verrou(
+    verrou_bin: str,
+    sim_bin: str,
+    work_dir: str,
+    run_dir: str,
+    rounding_mode: str = None,
+    extra_flags: list = None,
+):
+    """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output.
+
+    rounding_mode is passed as --rounding-mode=<mode> when not None.
+    extra_flags are appended before the binary (e.g. --backend=vprec ...).
+    """
+    with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir:
+        for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]:
+            src = os.path.join(work_dir, fname)
+            if os.path.exists(src):
+                shutil.copy2(src, tmpdir)
+        shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all"))
+        os.makedirs(os.path.join(tmpdir, "D"))
+
+        log_path = os.path.join(run_dir, "verrou.log")
+        cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"]
+        if rounding_mode:
+            cmd.append(f"--rounding-mode={rounding_mode}")
+        cmd.extend(extra_flags or [])
+        cmd.append(sim_bin)
+
+        with open(os.path.join(run_dir, "sim.out"), "w") as f:
+            result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False)
+
+        if result.returncode != 0:
+            tag = rounding_mode or "vprec"
+            raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out")
+
+        os.makedirs(run_dir, exist_ok=True)
+        for fn in os.listdir(os.path.join(tmpdir, "D")):
+            shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
+
+
+def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
+    """Run --check-cancellation at the given bit threshold; return [(fname, line)]
+    of MFC cancellation sites (subtractions losing >= `threshold` significant bits)."""
+    tag = f"cancellation_{threshold}"
+    run_dir = os.path.join(work_dir, tag)
+    os.makedirs(run_dir, exist_ok=True)
+    gen_path = os.path.join(run_dir, "cancel_gen.txt")
+    flags = [
+        "--check-cancellation=yes",
+        f"--cc-threshold-double={threshold}",
+        f"--cc-gen-file={gen_path}",
+    ]
+    try:
+        _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags)
+    except MFCException:
+        pass
+    raw = _parse_cancel_gen(gen_path)
+    filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
+    skipped = len(raw) - len(filtered)
+    if skipped and threshold == 10:
+        cons.print(f"  [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
+    return filtered
+
+
+def _run_mca_samples(
+    case: dict,
+    verrou_bin: str,
+    sim_bin: str,
+    work_dir: str,
+    ref_dir: str,
+    n_mca: int,
+) -> tuple:
+    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound)."""
+    compare = case["compare"]
+    ref_scale = _max_abs_np(ref_dir, compare)
+    max_dev = 0.0
+    flags = ["--backend=mcaquad", "--mca-mode=mca"]
+    for i in range(n_mca):
+        run_dir = os.path.join(work_dir, f"mca_{i:02d}")
+        os.makedirs(run_dir, exist_ok=True)
+        try:
+            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
+            max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
+        except MFCException:
+            pass
+    sig_bits = None
+    if max_dev > 0.0 and ref_scale > 0.0:
+        sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale))))
+    return max_dev, sig_bits
+
+
+def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list:
+    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites."""
+    run_dir = os.path.join(work_dir, "float_max")
+    os.makedirs(run_dir, exist_ok=True)
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="nearest",
+            extra_flags=["--check-max-float=yes"],
+        )
+    except MFCException:
+        pass
+    return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float")
+
+
+def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
+    """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref."""
+    run_dir = os.path.join(work_dir, "float_proxy")
+    os.makedirs(run_dir)
+    _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
+    return _max_diff_np(ref_dir, run_dir, case["compare"])
+
+
+def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list:
+    """Run at each mantissa-bit level. Returns [(bits, dev), ...]."""
+    results = []
+    for bits in VPREC_MANTISSA_BITS:
+        run_dir = os.path.join(work_dir, f"vprec_{bits}")
+        os.makedirs(run_dir)
+        flags = [
+            "--backend=vprec",
+            "--vprec-mode=full",
+            f"--vprec-precision-binary64={bits}",
+            "--vprec-range-binary64=11",
+        ]
+        try:
+            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
+            dev = _max_diff_np(ref_dir, run_dir, case["compare"])
+        except MFCException:
+            dev = float("inf")
+        results.append((bits, dev))
+    return results
+
+
+def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str):
+    """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line.
+
+    verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via
+    VERROU_EXCLUDE / VERROU_SOURCE environment variables.  For test runs, we use
+    --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices).
+    For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the
+    environment — we honour that so the reference is a stable nearest-rounding baseline
+    to compare against.  CLI --rounding-mode would override the env var and break the
+    reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead.
+    """
+    content = textwrap.dedent(f"""\
+        #!/usr/bin/env bash
+        # Generated by mfc.sh fp-stability — do not edit by hand.
+        VERROU_BIN={verrou_bin!r}
+        SIM_BIN={sim_bin!r}
+        IC_DIR={ic_dir!r}
+
+        RUNDIR="$1"
+        TMPDIR_RUN=$(mktemp -d)
+        trap 'rm -rf "$TMPDIR_RUN"' EXIT
+
+        cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all"
+        cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp"
+        for fname in indices.dat pre_time_data.dat io_time_data.dat; do
+            [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/"
+        done
+        mkdir -p "$TMPDIR_RUN/D"
+
+        # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and
+        # leaves it unset for test runs.  Defaulting to float gives deterministic
+        # test steps while letting the reference use nearest-rounding.
+        ROUND="${{VERROU_ROUNDING_MODE:-float}}"
+
+        # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation).
+        # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to).
+        # Forward them as valgrind flags when set.
+        EXTRA=""
+        [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE"
+        [ -n "${{VERROU_SOURCE:-}}" ]  && EXTRA="$EXTRA --source=$VERROU_SOURCE"
+
+        cd "$TMPDIR_RUN"
+        "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN"
+        rc=$?
+
+        [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/"
+        exit $rc
+    """)
+    with open(path, "w") as f:
+        f.write(content)
+    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+
+def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
+    """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line.
+
+    verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR
+    Exits 0 (stable) or 1 (unstable) based on threshold.
+    """
+    content = textwrap.dedent(f"""\
+        #!/usr/bin/env python3
+        # Generated by mfc.sh fp-stability — do not edit by hand.
+        import sys, os, numpy as np
+
+        COMPARE_FILES = {compare_files!r}
+        THRESHOLD = {threshold!r}
+
+        ref_dir, run_dir = sys.argv[1], sys.argv[2]
+        max_dev = 0.0
+        for fname in COMPARE_FILES:
+            ref_p = os.path.join(ref_dir, fname)
+            run_p = os.path.join(run_dir, fname)
+            if not os.path.exists(ref_p) or not os.path.exists(run_p):
+                print(f"MISSING: {{fname}}")
+                sys.exit(1)
+            ref = np.loadtxt(ref_p)[:, 1]
+            run = np.loadtxt(run_p)[:, 1]
+            dev = float(np.max(np.abs(ref - run)))
+            max_dev = max(max_dev, dev)
+
+        print(f"max_dev={{max_dev:.3e}}  threshold={{THRESHOLD:.0e}}")
+        sys.exit(0 if max_dev <= THRESHOLD else 1)
+    """)
+    with open(path, "w") as f:
+        f.write(content)
+    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+
+def _dd_env(verrou_bin: str) -> dict:
+    """Environment with PYTHONPATH set for verrou_dd_* imports."""
+    py_pkg = _verrou_pythonpath(verrou_bin)
+    env = os.environ.copy()
+    if py_pkg:
+        existing = env.get("PYTHONPATH", "")
+        env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing]))
+    return env
+
+
+def _run_dd_tool(
+    dd_bin: str,
+    dd_dir: str,
+    dd_run_sh: str,
+    dd_cmp_py: str,
+    env: dict,
+    log_name: str,
+    summary_subdir: str,
+    label: str,
+) -> list:
+    """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines."""
+    log_file = os.path.join(dd_dir, log_name)
+    cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py]
+    cons.print(f"  [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]")
+    with open(log_file, "w") as f:
+        result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False)
+    summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary")
+    summary_lines = []
+    if result.returncode == 0:
+        if os.path.isfile(summary_path):
+            with open(summary_path) as f:
+                summary_lines = f.readlines()
+            cons.print(f"  [bold yellow]{label} result[/bold yellow]:")
+            for line in summary_lines:
+                cons.print(f"    {line.rstrip()}")
+        else:
+            cons.print(f"  [dim]{label} done; see {log_file}[/dim]")
+    else:
+        cons.print(f"  [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})")
+    return summary_lines
+
+
+def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float):
+    """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their
+    paths.  The threshold falls back to _DD_FALLBACK_THRESHOLD when unset."""
+    os.makedirs(dd_dir, exist_ok=True)
+    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
+    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
+    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
+    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
+    return dd_run_sh, dd_cmp_py
+
+
+def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list:
+    """Run verrou_dd_sym; return list of responsible symbol names."""
+    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym")
+    if not dd_bin:
+        cons.print("  [dim]verrou_dd_sym not found; skipping delta-debug[/dim]")
+        return []
+
+    dd_dir = os.path.join(log_dir, case["name"])
+    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
+    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
+    cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
+    return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
+
+
+def _run_dd_line(
+    case: dict,
+    verrou_bin: str,
+    sim_bin: str,
+    work_dir: str,
+    log_dir: str,
+    threshold: float = None,
+) -> list:
+    """Run verrou_dd_line; return [{path, start, end, macro}] location dicts."""
+    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line")
+    if not dd_bin:
+        cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
+        return []
+
+    dd_dir = os.path.join(log_dir, case["name"])
+    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
+    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
+    return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
+
+
+def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag):
+    """Perturb only the lines in src_lines (deterministic float mode) and return
+    the L-inf deviation from the nearest-rounding reference, or None on failure."""
+    src_path = os.path.join(conf_dir, f"source_{tag}.txt")
+    with open(src_path, "w") as fh:
+        fh.writelines(src_lines)
+    run_dir = os.path.join(conf_dir, f"perturb_{tag}")
+    os.makedirs(run_dir, exist_ok=True)
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="float",
+            extra_flags=[f"--source={src_path}"],
+        )
+    except MFCException:
+        return None
+    return _max_diff_np(ref_dir, run_dir, compare)
+
+
+def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path):
+    """Run nearest-rounding with --gen-source to capture the symbol-correct
+    executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure."""
+    try:
+        _run_simulation_verrou(
+            verrou_bin,
+            sim_bin,
+            work_dir,
+            run_dir,
+            rounding_mode="nearest",
+            extra_flags=[f"--gen-source={gen_path}"],
+        )
+    except MFCException:
+        return None
+    if not os.path.isfile(gen_path):
+        return None
+    with open(gen_path) as fh:
+        return fh.readlines()
+
+
+def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy):
+    """Positive control for dd_line: perturb ONLY the suspect lines and confirm
+    the instability reproduces, then rank each line by its individual share.
+
+    Verrou's --source matches file+line+symbol (not file+line alone), so we first
+    capture the symbol-correct executed source lines via --gen-source, filter them
+    to the suspect set, then run deterministic float-mode restricted to just those
+    lines.  If the suspect-only deviation reaches dd_threshold the attribution is
+    confirmed; if it stays near zero the reported lines do not actually carry the
+    instability (e.g. a #:for-expanded line blamed for the wrong instance).
+
+    Each line is then perturbed alone so its 'share_dev' (and 'share' of
+    float_proxy) shows which computation dominates.
+
+    Returns (confirmed, suspect_dev, ranked_locs).
+    """
+    if not dd_line_locs:
+        return None, None, dd_line_locs
+    conf_dir = os.path.join(work_dir, "confirm")
+    os.makedirs(conf_dir, exist_ok=True)
+    gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt"))
+    if gen_lines is None:
+        return None, None, dd_line_locs
+    compare = case["compare"]
+
+    # whole-set positive control
+    suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs]
+    set_src = _build_source_filter(gen_lines, suspects)
+    if not set_src:
+        # none of the reported lines performs an instrumented FP op -> not reproduced
+        return False, 0.0, dd_line_locs
+    set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set")
+    confirmed = _confirm_decision(set_dev, dd_threshold)
+
+    # per-line ranking (a single line trivially owns the whole set deviation)
+    if len(dd_line_locs) == 1:
+        dd_line_locs[0]["share_dev"] = set_dev
+    else:
+        for i, loc in enumerate(dd_line_locs):
+            one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])])
+            loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0
+    ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev))
+    return confirmed, set_dev, ranked
+
+
+def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line):
+    """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot.
+
+    Uses a precision binary (built with --fp-precision-lines) in which each
+    expanded instance of hotspot_file:hotspot_line compiles to a distinct
+    physical .f90 line.  The sidecar enumerates those physical lines; each is
+    perturbed alone (float mode, vs the precision binary's own nearest-rounding
+    reference) so the dominant instance is identified.
+
+    Returns a list of {instance, physline, dev, snippet} sorted most-flagrant
+    first (empty if no sidecar / no instrumented instances).
+    """
+    from . import fp_precision_lines as fpl
+
+    sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin)
+    sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file))
+    instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line)
+    if not instances:
+        return []
+
+    prec_dir = os.path.join(work_dir, "precision")
+    ref_dir = os.path.join(prec_dir, "ref")
+    os.makedirs(ref_dir, exist_ok=True)
+    try:
+        _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest")
+    except MFCException:
+        return []
+    gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt"))
+    if gen_lines is None:
+        return []
+
+    f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90")
+    compare = case["compare"]
+    results = []
+    for physline, instance in instances:
+        src = _build_source_filter(gen_lines, [(f90_file, physline, physline)])
+        if not src:
+            continue  # this instance performs no instrumented FP op
+        dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}")
+        results.append(
+            {
+                "instance": instance,
+                "physline": physline,
+                "dev": dev or 0.0,
+                "snippet": _read_source_line(f90_file, physline).strip(),
+            }
+        )
+    results.sort(key=lambda r: r["dev"], reverse=True)
+    return results
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index e89694d19b..4630cd3db6 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -6,7 +6,7 @@
 label results, so they can run without Verrou or built binaries.
 """
 
-from mfc.fp_stability import (
+from mfc.fp_stability_metrics import (
     MIN_SIG_BITS,
     _build_source_filter,
     _cancellation_by_file,

From 982ec890e30b6aca95aa8d09a7e4344f82dc2240 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 09:55:34 -0400
Subject: [PATCH 09/25] fp-stability: remove Tier 2 per-instance disambiguation
 entirely
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per the weight review, the precision-build-based per-instance disambiguation was the heaviest piece (its own module + a build flag + CMake plumbing + tests) for the narrowest trigger (fires only when the most-flagrant hotspot is also inside a #:for/#:def expansion). Removed in full:

- deleted toolchain/mfc/fp_precision_lines.py and its tests; deleted _disambiguate_instances

- reverted CMakeLists.txt and build.py to upstream (no MFC_FP_PRECISION_LINES option, no marker-strip step, no -D flag); dropped the --fp-precision-lines build arg and the --precision-sim-binary fp-stability arg

- removed the E3 disambiguation stage, its docstring section, and the per-instance summary display

Kept: the lightweight '#:for/#:def-expanded — may represent multiple instances' hotspot warning (cheap, honest, separate from the disambiguation machinery). 57 toolchain tests, ruff, precheck all 7 green; CMakeLists.txt and build.py are byte-identical to upstream.
---
 CMakeLists.txt                           |  34 +------
 toolchain/mfc/build.py                   |   1 -
 toolchain/mfc/cli/commands.py            |  14 ---
 toolchain/mfc/fp_precision_lines.py      | 123 -----------------------
 toolchain/mfc/fp_stability.py            |  35 -------
 toolchain/mfc/fp_stability_report.py     |   3 -
 toolchain/mfc/fp_stability_runners.py    |  52 ----------
 toolchain/mfc/test_fp_precision_lines.py | 112 ---------------------
 8 files changed, 2 insertions(+), 372 deletions(-)
 delete mode 100644 toolchain/mfc/fp_precision_lines.py
 delete mode 100644 toolchain/mfc/test_fp_precision_lines.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 532c377702..83bbb8fe0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,7 +31,6 @@ option(MFC_DOCUMENTATION "Build documentation"                               OFF
 option(MFC_ALL           "Build everything"                                  OFF)
 option(MFC_SINGLE_PRECISION "Build single precision"                         OFF)
 option(MFC_MIXED_PRECISION "Build mixed precision"                           OFF)
-option(MFC_FP_PRECISION_LINES "Strip fypp markers for per-instance fp-stability attribution" OFF)
 
 if (MFC_ALL)
     set(MFC_PRE_PROCESS   ON FORCE)
@@ -434,24 +433,8 @@ macro(HANDLE_SOURCES target useCommon)
         cmake_path(GET fpp FILENAME fpp_filename)
         set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90")
 
-        # In a precision-lines build, Fypp writes a marked intermediate that is
-        # then stripped of its line markers (so each expanded instance compiles
-        # to a distinct physical line) before compilation; the strip step emits a
-        # .linemap.json sidecar.  Otherwise Fypp writes ${f90} directly.  Only the
-        # simulation target is analyzed by fp-stability, so pre/post_process are
-        # always built normally.
-        set(_precision_lines OFF)
-        if (MFC_FP_PRECISION_LINES AND "${target}" STREQUAL "simulation")
-            set(_precision_lines ON)
-        endif()
-        if (_precision_lines)
-            set(f90_out "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.marked.f90")
-        else()
-            set(f90_out "${f90}")
-        endif()
-
         add_custom_command(
-            OUTPUT   ${f90_out}
+            OUTPUT   ${f90}
             COMMAND  ${FYPP_EXE} -m re
                                  -I "${CMAKE_BINARY_DIR}/include/${target}"
                                  -I "${${target}_DIR}/include"
@@ -467,25 +450,12 @@ macro(HANDLE_SOURCES target useCommon)
 								 --line-length=999
 		 						 --line-numbering-mode=nocontlines
                                  ${FYPP_GCOV_OPTS}
-                                 "${fpp}" "${f90_out}"
+                                 "${fpp}" "${f90}"
             DEPENDS  "${fpp};${${target}_incs}"
             COMMENT  "Preprocessing (Fypp) ${fpp_filename}"
             VERBATIM
         )
 
-        if (_precision_lines)
-            add_custom_command(
-                OUTPUT   ${f90}
-                COMMAND  ${Python3_EXECUTABLE}
-                         "${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py"
-                         "${f90_out}" "${f90}"
-                         "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.linemap.json"
-                DEPENDS  "${f90_out};${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py"
-                COMMENT  "Stripping markers (fp-precision-lines) ${fpp_filename}"
-                VERBATIM
-            )
-        endif()
-
         list(APPEND ${target}_SRCs ${f90})
     endforeach()
 endmacro()
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 01a0c8ece3..01efb1a9b1 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -421,7 +421,6 @@ def configure(self, case: Case):
             flags.append(f"-DMFC_GCov={'ON' if ARG('gcov') else 'OFF'}")
             flags.append(f"-DMFC_Unified={'ON' if ARG('unified') else 'OFF'}")
             flags.append(f"-DMFC_Fastmath={'ON' if ARG('fastmath') else 'OFF'}")
-            flags.append(f"-DMFC_FP_PRECISION_LINES={'ON' if ARG('fp_precision_lines') else 'OFF'}")
 
         command = ["cmake"] + flags + ["-S", cmake_dirpath, "-B", build_dirpath]
 
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index cff47c3ecf..54bbff4641 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -141,13 +141,6 @@
             default=False,
             dest="deps_only",
         ),
-        Argument(
-            name="fp-precision-lines",
-            help="(fp-stability) Strip fypp line markers so each expanded instance gets a distinct line; emits sidecars for per-instance attribution.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="fp_precision_lines",
-        ),
     ],
     examples=[
         Example("./mfc.sh build", "Build all default targets (CPU)"),
@@ -945,13 +938,6 @@
             default=None,
             metavar="PATH",
         ),
-        Argument(
-            name="precision-sim-binary",
-            help="Path to a simulation binary built with --fp-precision-lines. When given, macro-ambiguous hotspots are disambiguated to the individual fypp-expanded instance.",
-            default=None,
-            dest="precision_sim_binary",
-            metavar="PATH",
-        ),
         Argument(
             name="samples",
             short="N",
diff --git a/toolchain/mfc/fp_precision_lines.py b/toolchain/mfc/fp_precision_lines.py
deleted file mode 100644
index 6dc1df04c1..0000000000
--- a/toolchain/mfc/fp_precision_lines.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""FP-stability precision-lines transform (Tier 2).
-
-A fypp #:for/#:def expansion emits many generated computations that all carry
-the same cpp line marker (`# N "file.fpp"`), so DWARF — and therefore Verrou —
-collapse every expanded instance onto one .fpp line.  This transform removes the
-fypp line markers from a generated .f90 so the compiler attributes each statement
-to the generated file's own physical line (which *is* distinct per expanded
-instance), and records a sidecar mapping each surviving physical line back to
-(file, original .fpp line, instance index).  Genuine cpp directives
-(#if/#define/#endif/...) are preserved so conditional compilation is unchanged.
-
-When the stripped .f90 is compiled, Verrou attributes — and fp-stability ranks
-and isolates via --source — per expanded instance rather than per source line.
-Used only by a dedicated precision build (MFC_FP_PRECISION_LINES); the normal
-build is unaffected.  The mechanism (stripped markers -> instance-distinct
-physical-line attribution -> per-instance Verrou --source isolation, surviving
-the cpp #if layer) is validated against gfortran + Verrou.
-"""
-
-import json
-import os
-import re
-
-# A fypp line marker: "# <number> "<file>"" possibly with trailing flags.  A cpp
-# conditional/define directive (#if, #define, #endif, ...) has a word, not a
-# number, after the '#', so the two are unambiguous.
-_FYPP_MARKER = re.compile(r'^#\s+(\d+)\s+"([^"]+)"')
-# Any other preprocessor directive line (kept, but it is not a .fpp source line,
-# so it neither consumes a source-line increment nor gets a sidecar entry).
-_CPP_DIRECTIVE = re.compile(r"^\s*#")
-
-
-def strip_markers(lines: list) -> tuple:
-    """Strip fypp line markers; return (output_lines, sidecar).
-
-    sidecar maps each 1-based physical output line number to
-    {"file", "line", "instance"}: the .fpp file, the .fpp line that physical
-    line came from (auto-incremented within a marker region), and how many times
-    that marker's (file, line) had been seen before (0 = first/real occurrence,
-    >=1 = an expanded instance).
-    """
-    seen = {}
-    out = []
-    sidecar = {}
-    cur_file = None
-    cur_line = None
-    cur_instance = None
-    for raw in lines:
-        m = _FYPP_MARKER.match(raw)
-        if m:
-            cur_file = m.group(2)
-            cur_line = int(m.group(1))
-            cur_instance = seen.get((cur_file, cur_line), 0)
-            seen[(cur_file, cur_line)] = cur_instance + 1
-            continue  # drop the marker line
-        out.append(raw)
-        if cur_file is None or _CPP_DIRECTIVE.match(raw):
-            # cpp directives are kept verbatim but are not .fpp source lines
-            continue
-        sidecar[len(out)] = {"file": cur_file, "line": cur_line, "instance": cur_instance}
-        cur_line += 1  # subsequent physical source lines map to the next .fpp line
-    return out, sidecar
-
-
-def transform_file(in_path: str, out_path: str, sidecar_path: str) -> int:
-    """Strip a generated .f90 to its precision-lines variant.
-
-    Reads in_path, writes the marker-stripped source to out_path and the sidecar
-    JSON to sidecar_path.  Returns the number of mapped physical lines.
-    """
-    with open(in_path) as fh:
-        lines = fh.readlines()
-    out, sidecar = strip_markers(lines)
-    with open(out_path, "w") as fh:
-        fh.writelines(out)
-    with open(sidecar_path, "w") as fh:
-        json.dump({str(k): v for k, v in sidecar.items()}, fh)
-    return len(sidecar)
-
-
-# --- consumption side (Tier 2): locating and querying the sidecars ---
-
-
-def sidecar_dir_for_binary(sim_bin: str) -> str:
-    """Map a precision simulation binary path to its sidecar directory.
-
-    .../build/install/<hash>/bin/simulation -> .../build/staging/<hash>/fypp/simulation
-    """
-    bin_dir = os.path.dirname(os.path.abspath(sim_bin))  # .../install/<hash>/bin
-    hash_dir = os.path.dirname(bin_dir)  # .../install/<hash>
-    cfg_hash = os.path.basename(hash_dir)
-    build_root = os.path.dirname(os.path.dirname(hash_dir))  # .../build
-    return os.path.join(build_root, "staging", cfg_hash, "fypp", "simulation")
-
-
-def sidecar_path(sidecar_dir: str, fpp_file: str) -> str:
-    """Sidecar JSON path for a .fpp file: <dir>/<basename>.linemap.json."""
-    return os.path.join(sidecar_dir, os.path.basename(fpp_file) + ".linemap.json")
-
-
-def load_sidecar(path: str) -> dict:
-    """Load a sidecar JSON into {physical_line:int -> {file, line, instance}}."""
-    if not os.path.isfile(path):
-        return {}
-    with open(path) as fh:
-        raw = json.load(fh)
-    return {int(k): v for k, v in raw.items()}
-
-
-def instances_of(sidecar: dict, fpp_file: str, fpp_line: int) -> list:
-    """Return [(physical_line, instance), ...] (sorted by physical line) for every
-    expanded instance of fpp_file:fpp_line, matched by basename."""
-    base = os.path.basename(fpp_file)
-    hits = [(physline, entry["instance"]) for physline, entry in sidecar.items() if os.path.basename(entry["file"]) == base and entry["line"] == fpp_line]
-    return sorted(hits)
-
-
-if __name__ == "__main__":
-    import sys
-
-    if len(sys.argv) != 4:
-        sys.exit("usage: fp_precision_lines.py <in.f90> <out.f90> <sidecar.json>")
-    transform_file(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 0579502910..c84b6e97ba 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -48,15 +48,6 @@
    One run with --check-max-float=yes; reports locations where a
    double→float conversion would overflow to ±Inf.
 
-I. Per-instance disambiguation (--precision-sim-binary PATH; opt-in)
-   A fypp #:for/#:def expansion collapses many generated computations onto one
-   .fpp line, so a macro-ambiguous hotspot cannot be pinned to a single runtime
-   instance.  Given a simulation binary built with `--fp-precision-lines` (markers
-   stripped so each instance is a distinct line, plus .linemap.json sidecars), the
-   most flagrant macro-ambiguous hotspot is disambiguated: each expanded instance
-   is perturbed alone on the precision binary, ranking them to the responsible
-   instance and showing its concrete generated code.
-
 Logs are saved to fp-stability-logs/ and uploaded as CI artifacts.
 On GitHub Actions: a step summary table and ::warning:: file annotations
 are emitted automatically so failing source lines appear in the PR diff.
@@ -95,7 +86,6 @@
     _emit_github_summary,
 )
 from .fp_stability_runners import (
-    _disambiguate_instances,
     _find_binary,
     _find_verrou,
     _run_cancellation_check,
@@ -419,7 +409,6 @@ def _run_case(
     run_cancellation: bool,
     run_mca: bool,
     run_float_max: bool,
-    prec_sim_bin: str = None,
 ) -> dict:
     name = case["name"]
     compare = case["compare"]
@@ -542,24 +531,6 @@ def _run_case(
             except Exception as exc:
                 cons.print(f"  [bold yellow]dd_line confirmation error[/bold yellow]: {exc}")
 
-        # --- E3: per-instance disambiguation of the most flagrant macro-ambiguous hotspot ---
-        if prec_sim_bin and result["dd_line_locs"]:
-            macro_loc = next((loc for loc in result["dd_line_locs"] if loc.get("macro")), None)
-            if macro_loc:
-                cons.print(f"  [dim]disambiguating fypp instances of {macro_loc['path']}:{macro_loc['start']} (precision binary)...[/dim]")
-                try:
-                    insts = _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, macro_loc["path"], macro_loc["start"])
-                    macro_loc["instances"] = insts
-                    if insts and insts[0]["dev"] > 0:
-                        win = insts[0]
-                        cons.print(f"  flagrant instance: #{win['instance']} (.f90:{win['physline']}, dev={win['dev']:.3e})  {win['snippet']}")
-                    elif insts:
-                        cons.print(f"  [dim]{len(insts)} instance(s) enumerated; none perturbed measurably (hotspot inert)[/dim]")
-                    else:
-                        cons.print("  [dim]no sidecar instances found for this hotspot[/dim]")
-                except Exception as exc:
-                    cons.print(f"  [bold yellow]instance disambiguation error[/bold yellow]: {exc}")
-
         # --- F: cancellation detection ---
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
@@ -638,9 +609,6 @@ def fp_stability():
     run_cancellation = not ARG("no_cancellation")
     run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
-    prec_sim_bin = ARG("precision_sim_binary")
-    if prec_sim_bin and not os.path.isfile(prec_sim_bin):
-        raise MFCException(f"precision simulation binary not found: {prec_sim_bin}")
 
     log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs")
     os.makedirs(log_dir, exist_ok=True)
@@ -650,8 +618,6 @@ def fp_stability():
     cons.print(f"  verrou:      {verrou_bin}")
     cons.print(f"  simulation:  {sim_bin}")
     cons.print(f"  pre_process: {pp_bin}")
-    if prec_sim_bin:
-        cons.print(f"  precision:   {prec_sim_bin}  (per-instance disambiguation)")
     cons.print(f"  samples:     {n_samples}")
     features = []
     if run_float:
@@ -690,7 +656,6 @@ def fp_stability():
                 run_cancellation,
                 run_mca,
                 run_float_max,
-                prec_sim_bin,
             )
         except MFCException as exc:
             cons.print(f"  [bold red]ERROR[/bold red]: {exc}")
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index f0583002f2..05d31d0c9d 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -201,9 +201,6 @@ def _emit_github_summary(results: list, n_samples: int):
                     tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
                 suffix = f" — {', '.join(tags)}" if tags else ""
                 md.append(f"- `{where}`{suffix}")
-                for inst in loc.get("instances", [])[:8]:
-                    flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else ""
-                    md.append(f"  - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`")
                 snippet = _get_source_context(rel_path, start)
                 if snippet:
                     md.append("  ```fortran")
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index c9b7ee375b..4146baab11 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -28,7 +28,6 @@
     _parse_rddmin_syms,
     _parse_vg_error_locs,
     _rank_locs,
-    _read_source_line,
 )
 from .printer import cons
 
@@ -477,54 +476,3 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs
             loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0
     ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev))
     return confirmed, set_dev, ranked
-
-
-def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line):
-    """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot.
-
-    Uses a precision binary (built with --fp-precision-lines) in which each
-    expanded instance of hotspot_file:hotspot_line compiles to a distinct
-    physical .f90 line.  The sidecar enumerates those physical lines; each is
-    perturbed alone (float mode, vs the precision binary's own nearest-rounding
-    reference) so the dominant instance is identified.
-
-    Returns a list of {instance, physline, dev, snippet} sorted most-flagrant
-    first (empty if no sidecar / no instrumented instances).
-    """
-    from . import fp_precision_lines as fpl
-
-    sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin)
-    sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file))
-    instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line)
-    if not instances:
-        return []
-
-    prec_dir = os.path.join(work_dir, "precision")
-    ref_dir = os.path.join(prec_dir, "ref")
-    os.makedirs(ref_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest")
-    except MFCException:
-        return []
-    gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt"))
-    if gen_lines is None:
-        return []
-
-    f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90")
-    compare = case["compare"]
-    results = []
-    for physline, instance in instances:
-        src = _build_source_filter(gen_lines, [(f90_file, physline, physline)])
-        if not src:
-            continue  # this instance performs no instrumented FP op
-        dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}")
-        results.append(
-            {
-                "instance": instance,
-                "physline": physline,
-                "dev": dev or 0.0,
-                "snippet": _read_source_line(f90_file, physline).strip(),
-            }
-        )
-    results.sort(key=lambda r: r["dev"], reverse=True)
-    return results
diff --git a/toolchain/mfc/test_fp_precision_lines.py b/toolchain/mfc/test_fp_precision_lines.py
deleted file mode 100644
index ddb139af2d..0000000000
--- a/toolchain/mfc/test_fp_precision_lines.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""Unit tests for the fp-stability precision-lines transform (Tier 2, P1).
-
-A fypp #:for/#:def expansion re-marks many generated computations with the same
-cpp line marker (`# N "file.fpp"`), so DWARF — and Verrou — collapse every
-expanded instance onto one .fpp line.  strip_markers removes the fypp line
-markers so the compiler attributes to the generated .f90's own (instance-
-distinct) physical lines, and emits a sidecar mapping each surviving physical
-line back to (file, original .fpp line, instance index).  Genuine cpp directives
-(#if/#define/...) are kept so conditional compilation still works.
-"""
-
-import os
-
-from mfc.fp_precision_lines import (
-    instances_of,
-    sidecar_dir_for_binary,
-    sidecar_path,
-    strip_markers,
-)
-
-
-def test_strips_fypp_markers_and_keeps_code():
-    out, sidecar = strip_markers(['# 700 "real.fpp"\n', "  x = a - b\n"])
-    assert out == ["  x = a - b\n"]
-    assert sidecar == {1: {"file": "real.fpp", "line": 700, "instance": 0}}
-
-
-def test_keeps_cpp_conditional_directives():
-    lines = ['# 700 "real.fpp"\n', "#if defined(FOO)\n", "  x = 1\n", "#endif\n"]
-    out, _ = strip_markers(lines)
-    assert out == ["#if defined(FOO)\n", "  x = 1\n", "#endif\n"]
-
-
-def test_repeated_marker_increments_instance():
-    lines = ['# 700 "real.fpp"\n', "  s1 = x\n", '# 700 "real.fpp"\n', "  s2 = y\n"]
-    out, sidecar = strip_markers(lines)
-    assert out == ["  s1 = x\n", "  s2 = y\n"]
-    assert sidecar[1] == {"file": "real.fpp", "line": 700, "instance": 0}
-    assert sidecar[2] == {"file": "real.fpp", "line": 700, "instance": 1}
-
-
-def test_distinguishes_fypp_marker_from_cpp_directive():
-    # no fypp line markers here -> nothing stripped, no origin recorded
-    lines = ["#define X 1\n", "#if X\n", "  a = 1\n", "#endif\n"]
-    out, sidecar = strip_markers(lines)
-    assert out == lines
-    assert sidecar == {}
-
-
-def test_source_line_auto_increments_within_a_region():
-    lines = ['# 700 "real.fpp"\n', "  a = 1\n", "  b = 2\n"]
-    _, sidecar = strip_markers(lines)
-    assert sidecar[1]["line"] == 700
-    assert sidecar[2]["line"] == 701
-
-
-# --- Tier 2 consumption: locating + querying sidecars ---
-
-
-def test_instances_of_returns_physical_lines_for_a_source_line():
-    sidecar = {
-        7: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 0},
-        11: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 1},
-        20: {"file": "/abs/src/simulation/m_weno.fpp", "line": 999, "instance": 0},
-    }
-    # matched by basename; the repo-relative path from a dd_line hotspot still matches
-    assert instances_of(sidecar, "src/simulation/m_weno.fpp", 241) == [(7, 0), (11, 1)]
-
-
-def test_instances_of_empty_when_no_match():
-    sidecar = {7: {"file": "m_weno.fpp", "line": 241, "instance": 0}}
-    assert instances_of(sidecar, "m_weno.fpp", 999) == []
-    assert instances_of(sidecar, "m_other.fpp", 241) == []
-
-
-def test_instances_of_sorted_by_physical_line():
-    sidecar = {
-        30: {"file": "f.fpp", "line": 5, "instance": 2},
-        10: {"file": "f.fpp", "line": 5, "instance": 0},
-        20: {"file": "f.fpp", "line": 5, "instance": 1},
-    }
-    assert instances_of(sidecar, "f.fpp", 5) == [(10, 0), (20, 1), (30, 2)]
-
-
-def test_sidecar_dir_for_binary_maps_install_to_staging():
-    got = sidecar_dir_for_binary("/x/build/install/HASH/bin/simulation")
-    assert got == os.path.join("/x/build/staging/HASH/fypp/simulation")
-
-
-def test_sidecar_path_uses_fpp_basename_and_linemap_suffix():
-    got = sidecar_path("/x/staging/HASH/fypp/simulation", "src/simulation/m_weno.fpp")
-    assert got == os.path.join("/x/staging/HASH/fypp/simulation", "m_weno.fpp.linemap.json")
-
-
-def test_cpp_directives_do_not_consume_a_source_line_increment():
-    # the #else line must not advance the .fpp source line nor get a sidecar entry
-    lines = ['# 700 "real.fpp"\n', "  a = 1\n", "#else\n", "  b = 2\n"]
-    out, sidecar = strip_markers(lines)
-    assert out == ["  a = 1\n", "#else\n", "  b = 2\n"]
-    assert sidecar[1]["line"] == 700  # a = 1
-    assert 2 not in sidecar  # #else: kept, but not a source line
-    assert sidecar[3]["line"] == 701  # b = 2 (not 702)
-
-
-def test_sidecar_line_numbers_are_physical_output_lines():
-    # output physical line numbers (1-based, after stripping) are the keys
-    lines = ['# 10 "f"\n', "  a = 1\n", '# 20 "f"\n', "  b = 2\n", "  c = 3\n"]
-    out, sidecar = strip_markers(lines)
-    assert out == ["  a = 1\n", "  b = 2\n", "  c = 3\n"]
-    assert sidecar[1] == {"file": "f", "line": 10, "instance": 0}
-    assert sidecar[2] == {"file": "f", "line": 20, "instance": 0}
-    assert sidecar[3] == {"file": "f", "line": 21, "instance": 0}

From 2276eb1926df04003a787a81829bbcf6e69ca70f Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 10:26:58 -0400
Subject: [PATCH 10/25] fp-stability: accept a user case.py (positional, like
 run), with a feasibility guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following the native convention (run/validate/viz take the case .py as a positional 'input'), fp-stability now does too — './mfc.sh fp-stability my_case.py' analyzes your case instead of the built-in suite; omitting it runs the suite as before.

It loads the case via the shared loader (run.input.load), runs it as a single case, and auto-detects the files to diff from the reference run (_autodetect_compare: conserved-var .dat at the final step, prim fallback). Output is forced to serial .dat I/O (parallel_io=F) since the no-MPI binary is run as one process and the suite diffs serial files.

Guard (Verrou is ~30x and the suite runs the sim many times): the case must be a small, short, single-process proxy — errors if cells > 100k or work (cells x t_step_stop) > 200k cell-steps, with guidance to coarsen. Validated end-to-end on a real case .py (auto-compare + sig-bits PASS + cancellation digits); guard correctly rejects 1D_sodshocktube (400k cell-steps). 60 toolchain tests, ruff, precheck all 7.
---
 toolchain/mfc/cli/commands.py         | 11 ++++-
 toolchain/mfc/fp_stability.py         | 61 ++++++++++++++++++++++++++-
 toolchain/mfc/fp_stability_metrics.py | 18 ++++++++
 toolchain/mfc/test_fp_stability.py    | 23 ++++++++++
 4 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 54bbff4641..32527ab166 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -919,6 +919,14 @@
         "  float-max      --check-max-float detection of double→float overflow sites\n"
     ),
     include_common=["mfc_config", "verbose", "debug_log"],
+    positionals=[
+        Positional(
+            name="input",
+            help="Optional case .py to analyze instead of the built-in suite (run as a single serial CPU process under Verrou; must be small/short).",
+            nargs="?",
+            completion=Completion(type=CompletionType.FILES_PY),
+        ),
+    ],
     arguments=[
         Argument(
             name="sim-binary",
@@ -997,7 +1005,8 @@
         ),
     ],
     examples=[
-        Example("./mfc.sh fp-stability", "Auto-discover binaries and run all cases"),
+        Example("./mfc.sh fp-stability", "Auto-discover binaries and run the built-in suite"),
+        Example("./mfc.sh fp-stability my_case.py", "Analyze your own case (small/short, serial, CPU)"),
         Example(
             "./mfc.sh fp-stability --sim-binary build/install/abc123/bin/simulation",
             "Specify simulation binary explicitly",
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index c84b6e97ba..1a2188a9ed 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -75,6 +75,7 @@
 from .fp_stability_metrics import (
     CANCEL_BIT_LEVELS,
     MIN_SIG_BITS,
+    _autodetect_compare,
     _cancellation_severity,
     _mark_cancellation,
     _max_abs_np,
@@ -431,6 +432,15 @@ def _run_case(
         cons.print("  [dim]reference run (rounding=nearest)...[/dim]")
         _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest")
 
+        # For a user case with no fixed compare list, diff whatever the reference
+        # run actually wrote (conserved vars at the final step).
+        if not compare:
+            compare = _autodetect_compare(os.listdir(ref_dir))
+            case["compare"] = compare
+            if not compare:
+                raise MFCException("case produced no cons.*/prim.* output to compare (check t_step_save/t_step_stop and parallel_io)")
+            cons.print(f"  [dim]comparing: {', '.join(compare)}[/dim]")
+
         # --- A: random-rounding stability samples ---
         # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale),
         # vs one global floor (no per-case hand-tuned absolute threshold).
@@ -587,6 +597,51 @@ def _run_case(
     return result
 
 
+# Verrou is ~30x slower and the suite runs the simulation many times, so a user
+# case must be a small, short, single-process proxy. Work = cells x time steps;
+# both a huge grid and a long run are rejected (built-in cases are ~1k cell-steps).
+FP_CASE_MAX_CELLS = 100_000
+FP_CASE_MAX_WORK = 200_000  # cells x t_step_stop
+
+
+def _load_user_case(input_path: str) -> dict:
+    """Build a single fp-stability case from a user case .py.
+
+    The case is run as ONE serial CPU process under Verrou (so it must be small
+    and short — a coarsened proxy of a production run, not the real thing); a grid
+    too large to be feasible errors. The output files to compare are auto-detected
+    from the reference run, so 'compare' is left empty here.
+    """
+    from .run import input as run_input  # lazy import: avoids a circular import
+
+    params = run_input.load(input_path, None, {}, do_print=False).params
+    # Force serial .dat I/O: the suite runs the no-MPI binary as one process and
+    # diffs serial cons.*/prim.* files (not the parallel SILO/HDF5 path).
+    params["parallel_io"] = "F"
+    m, n, p = (int(params.get(k, 0) or 0) for k in ("m", "n", "p"))
+    cells = (m + 1) * (n + 1) * (p + 1)
+    t_stop = int(params.get("t_step_stop", 0) or 0)
+    work = cells * max(t_stop, 1)
+    if cells > FP_CASE_MAX_CELLS:
+        raise MFCException(f"case has {cells:,} cells — too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).")
+    if work > FP_CASE_MAX_WORK:
+        raise MFCException(
+            f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) — too slow under "
+            f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)."
+        )
+    stem = os.path.splitext(os.path.basename(input_path))[0]
+    if stem == "case":  # examples/<name>/case.py — the dir name is more telling
+        stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem
+    return {
+        "name": stem,
+        "description": f"user case {input_path} ({cells} cells, run single-rank on CPU)",
+        "compare": [],  # auto-detected from the reference run's output
+        "ill_cond": "",
+        "pre": params,
+        "sim": params,
+    }
+
+
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
     if not verrou_bin or not os.path.isfile(verrou_bin):
@@ -610,6 +665,8 @@ def fp_stability():
     run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
 
+    cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES
+
     log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs")
     os.makedirs(log_dir, exist_ok=True)
 
@@ -618,6 +675,8 @@ def fp_stability():
     cons.print(f"  verrou:      {verrou_bin}")
     cons.print(f"  simulation:  {sim_bin}")
     cons.print(f"  pre_process: {pp_bin}")
+    if ARG("input"):
+        cons.print(f"  case:        {ARG('input')}  (single serial CPU run under Verrou)")
     cons.print(f"  samples:     {n_samples}")
     features = []
     if run_float:
@@ -640,7 +699,7 @@ def fp_stability():
 
     start = time.time()
     results = []
-    for case in CASES:
+    for case in cases_to_run:
         try:
             r = _run_case(
                 case,
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
index 01940618d5..f84f088db3 100644
--- a/toolchain/mfc/fp_stability_metrics.py
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -16,6 +16,24 @@
 # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
 VPREC_MANTISSA_BITS = [52, 23, 16, 10]
 
+_OUTPUT_DAT = re.compile(r"^(cons|prim)\.\d+\.\d+\.(\d+)\.dat$")
+
+
+def _autodetect_compare(filenames: list) -> list:
+    """Pick the D/ output files to diff for a user-supplied case: the conserved-
+    variable files at the latest written time step (falling back to primitive
+    files if none are written). Returns [] if the case produced no field output."""
+    by_step = {}
+    for f in filenames:
+        m = _OUTPUT_DAT.match(os.path.basename(f))
+        if m:
+            by_step.setdefault(int(m.group(2)), {"cons": [], "prim": []})[m.group(1)].append(os.path.basename(f))
+    if not by_step:
+        return []
+    last = by_step[max(by_step)]
+    return sorted(last["cons"] or last["prim"])
+
+
 # Stability pass/fail (stage A) is scale-free: a case must retain at least this
 # many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
 # 24 ~= single precision. One global floor replaces per-case absolute thresholds
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 4630cd3db6..30fb2f0caa 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -8,6 +8,7 @@
 
 from mfc.fp_stability_metrics import (
     MIN_SIG_BITS,
+    _autodetect_compare,
     _build_source_filter,
     _cancellation_by_file,
     _cancellation_severity,
@@ -242,6 +243,28 @@ def test_cancellation_severity_empty():
     assert _cancellation_severity([]) == {}
 
 
+# --- auto-detect which output files to compare (for a user case) ---
+
+
+def test_autodetect_compare_picks_cons_at_latest_step():
+    fns = [
+        "cons.1.00.000000.dat",
+        "cons.1.00.000050.dat",
+        "cons.2.00.000050.dat",
+        "prim.1.00.000050.dat",
+    ]
+    assert _autodetect_compare(fns) == ["cons.1.00.000050.dat", "cons.2.00.000050.dat"]
+
+
+def test_autodetect_compare_falls_back_to_prim_when_no_cons():
+    fns = ["prim.1.00.000010.dat", "prim.3.00.000010.dat"]
+    assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"]
+
+
+def test_autodetect_compare_empty_when_no_field_output():
+    assert _autodetect_compare(["indices.dat", "pre_time_data.dat", "foo.txt"]) == []
+
+
 # --- scale-free pass/fail: significant bits retained ---
 
 

From 3b662db38c1bc7e99ee9e6721573a1155a427de0 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 10:34:01 -0400
Subject: [PATCH 11/25] fp-stability: refresh --help description for case.py
 usage + sig-bits/cancellation reframe

The --help prose was stale: it claimed 'PASS/FAIL against per-case thresholds' (now scale-free sig-bits), didn't mention the case.py positional / its serial-CPU constraints / the feasibility guard, and listed an outdated case set. Rewrote the description to cover: running on a built-in suite or a user case .py (with constraints + guard), the >= 24-bit scale-free pass criterion, and the analysis passes (dd confirmation/ranking, cancellation origins by digits lost). Also updated the module-docstring Usage. The positional INPUT and a case.py example were already shown in --help; this fixes the surrounding prose.
---
 toolchain/mfc/cli/commands.py | 26 ++++++++++++++------------
 toolchain/mfc/fp_stability.py |  7 ++++++-
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 32527ab166..a6eae93846 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -898,23 +898,25 @@
     name="fp-stability",
     help="Run floating-point stability tests using Verrou.",
     description=(
-        "Runs each registered test case N times under Verrou's random IEEE-754 "
-        "rounding mode and compares against a nearest-rounding reference run. "
-        "Reports the max L∞ deviation and PASS/FAIL against per-case thresholds.\n\n"
+        "Runs Verrou random-rounding stability analysis on a built-in suite of small "
+        "1-D cases, or — given a case .py (positional INPUT) — on your own case. Each "
+        "case is run N times under Verrou's random IEEE-754 rounding and compared "
+        "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must "
+        "retain at least ~24 significant bits (single precision) under random rounding "
+        "(no per-case thresholds).\n\n"
+        "With a case .py, that case is run as a SINGLE serial CPU process under Verrou "
+        "(~30x slower, and run many times), so it must be a small, short proxy — large "
+        "grids or long runs are rejected with guidance; serial .dat I/O is forced. "
+        "Example: ./mfc.sh fp-stability my_case.py\n\n"
         "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind "
         "(defaults to $HOME/.local/verrou). The simulation and pre_process "
         "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n"
-        "Test cases:\n"
-        "  sod_standard      1-D standard Sod, p_L/p_R=10 (well-conditioned baseline)\n"
-        "  sod_strong        1-D Sod, p_L/p_R=100,000 — HLLC xi-factor cancellation\n"
-        "  water_stiffened   1-D water shock (pi_inf=4046) — pressure-recovery cancellation\n"
-        "  air_water_interface  1-D air/water contact (two-fluid) — mixed-cell cancellation\n\n"
-        "Additional features (skip with --no-* flags):\n"
+        "Analysis passes (skip with --no-* flags):\n"
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
-        "  dd_sym         verrou_dd_sym bisection to responsible functions (on failure)\n"
-        "  dd_line        verrou_dd_line bisection to responsible source lines (on failure)\n"
-        "  cancellation   --check-cancellation detection of catastrophic cancellation sites\n"
+        "  dd_sym/dd_line verrou_dd bisection to responsible functions/lines, then a\n"
+        "                 --source positive control confirms + ranks them by sensitivity\n"
+        "  cancellation   --check-cancellation origins, ranked by significant digits lost\n"
         "  mca-sigbits    Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n"
         "  float-max      --check-max-float detection of double→float overflow sites\n"
     ),
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 1a2188a9ed..7dde83e2e3 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -59,9 +59,14 @@
   - A serial pre_process binary (to generate initial conditions)
 
 Usage:
-  ./mfc.sh fp-stability
+  ./mfc.sh fp-stability                       # built-in 1-D suite
+  ./mfc.sh fp-stability my_case.py            # your own case (small/short, serial, CPU)
   ./mfc.sh fp-stability --no-vprec --no-dd-line
   ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH
+
+A user case .py is run as a single serial CPU process under Verrou, so it must be
+a small, short proxy (a feasibility guard rejects large grids / long runs); output
+is forced to serial .dat I/O and the files to diff are auto-detected.
 """
 
 import math

From c6637a0f46f7a89e8ec52fc8efbee2a34ce847c4 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 11:19:17 -0400
Subject: [PATCH 12/25] =?UTF-8?q?fp-stability:=20address=20PR=20review=20?=
 =?UTF-8?q?=E2=80=94=20silent=20failures,=201-row=20crash,=20dead=20code,?=
 =?UTF-8?q?=20tests,=20comment=20rot?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From a multi-agent PR review:

- Silent failures (critical): a crashed cancellation/float-max/MCA run was reported as a clean result ('none detected' / 'no overflows' / 'dev=0.0'). The run helpers now log the failure and return None (cancellation/float-max) or a completed-sample count (MCA, distinct from measured zero); _run_case reports 'run failed' instead of a false all-clear.

- Crash (important): np.loadtxt(...)[:,1] raised IndexError on a single-row .dat (reachable via a 1-cell user case), aborting the whole suite. Added _dat_column using np.atleast_2d; also fixed the generated dd_cmp.py oracle.

- Dead code: removed _cancellation_by_file (superseded by the digits-lost severity view) and _stability_pass (the orchestrator inlines the comparison), plus their tests.

- Tests: smoke tests for the CI-only report emitters (blank + populated result, and unconfirmed->::notice:: downgrade), _digits_left clamp, and #:block/#:call/unbalanced macro cases.

- Comment rot: stage-E docstring now says confirmation is set-level (not per-line); dropped the non-existent 'per-file density' reference; fixed the '48 ~ full mantissa' note (53-bit). 45 tests, ruff, precheck all 7.
---
 toolchain/mfc/fp_stability.py         |  73 +++++++++++--------
 toolchain/mfc/fp_stability_metrics.py |  39 +++-------
 toolchain/mfc/fp_stability_runners.py |  37 ++++++----
 toolchain/mfc/test_fp_stability.py    | 101 ++++++++++++++++++--------
 4 files changed, 147 insertions(+), 103 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 7dde83e2e3..9047e77bc9 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -24,13 +24,14 @@
    Each reported line is then *confirmed* by a positive control: --gen-source
    captures the symbol-correct executed lines, those are filtered to the suspect
    set, and a float-mode run with --source restricted to just them must
-   reproduce the instability.  Lines that do not reproduce it are reported as
-   unconfirmed (downgraded from ::warning:: to ::notice::).  Each line is then
-   perturbed alone and ranked by the share of the single-precision deviation it
-   reproduces.  NOTE: this is a *sensitivity* measure — where reduced precision
-   most moves the output — and is typically dominated by the time integrator /
-   final accumulation, NOT by where cancellation originates.  Stage F (and its
-   per-file density) is the cancellation-origin view; the two usually differ.
+   reproduce the instability.  If perturbing the suspect set does not reproduce
+   it, the case's hotspots are reported as unconfirmed (downgraded from
+   ::warning:: to ::notice::) — this is a single set-level verdict, not per line.
+   Each line is then perturbed alone and ranked by the share of the single-
+   precision deviation it reproduces.  NOTE: that share is a *sensitivity*
+   measure — where reduced precision most moves the output — typically dominated
+   by the time integrator / final accumulation, NOT by where cancellation
+   originates.  Stage F is the cancellation-origin view; the two usually differ.
    Hotspots are cross-referenced against the stage-F cancellation sites and
    flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def
    expansion.
@@ -550,23 +551,27 @@ def _run_case(
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
             try:
-                # sweep bit thresholds to get per-site severity (bits lost)
+                # sweep bit thresholds to get per-site severity (bits lost); each
+                # run returns None if it failed (distinct from [] = ran, found none)
                 level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
-                locs = level_sites[0][1]  # lowest threshold = full list
-                bits = _cancellation_severity(level_sites)
-                result["cancellation_locs"] = locs
-                result["cancellation_bits"] = bits
-                if locs:
-                    worst = max(bits.values()) if bits else 0
-                    cons.print(f"  cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
+                locs = next((s for lvl, s in level_sites if lvl == CANCEL_BIT_LEVELS[0]), None)
+                if locs is None:
+                    cons.print("  [bold yellow]cancellation: detection run failed (see logs); not reported[/bold yellow]")
                 else:
-                    cons.print("  cancellation: none detected")
-                # cross-reference: label dd_line hotspots that sit on a cancellation site
-                if result["dd_line_locs"] and locs:
-                    _mark_cancellation(result["dd_line_locs"], locs)
-                    n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation"))
-                    if n_xref:
-                        cons.print(f"  {n_xref} hotspot(s) coincide with a catastrophic-cancellation site")
+                    bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None])
+                    result["cancellation_locs"] = locs
+                    result["cancellation_bits"] = bits
+                    if locs:
+                        worst = max(bits.values()) if bits else 0
+                        cons.print(f"  cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
+                    else:
+                        cons.print("  cancellation: none detected")
+                    # cross-reference: label dd_line hotspots that sit on a cancellation site
+                    if result["dd_line_locs"] and locs:
+                        _mark_cancellation(result["dd_line_locs"], locs)
+                        n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation"))
+                        if n_xref:
+                            cons.print(f"  {n_xref} hotspot(s) coincide with a catastrophic-cancellation site")
             except Exception as exc:
                 cons.print(f"  [bold yellow]cancellation check error[/bold yellow]: {exc}")
 
@@ -574,11 +579,14 @@ def _run_case(
         if run_mca:
             cons.print(f"  [dim]MCA significant-bits estimate (N={n_samples})...[/dim]")
             try:
-                mca_dev, mca_sigbits = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples)
-                result["mca_dev"] = mca_dev
-                result["mca_sigbits"] = mca_sigbits
-                bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a"
-                cons.print(f"  MCA: dev={mca_dev:.3e}  ({bits_str})")
+                mca_dev, mca_sigbits, n_ok = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples)
+                if n_ok == 0:
+                    cons.print(f"  [bold yellow]MCA: no samples completed (0/{n_samples}; see logs)[/bold yellow]")
+                else:
+                    result["mca_dev"] = mca_dev
+                    result["mca_sigbits"] = mca_sigbits
+                    bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a"
+                    cons.print(f"  MCA: dev={mca_dev:.3e}  ({bits_str})  [{n_ok}/{n_samples} samples]")
             except Exception as exc:
                 cons.print(f"  [bold yellow]MCA error[/bold yellow]: {exc}")
 
@@ -587,11 +595,14 @@ def _run_case(
             cons.print("  [dim]float-max overflow check...[/dim]")
             try:
                 locs = _run_float_max_check(verrou_bin, sim_bin, work_dir)
-                result["float_max_locs"] = locs
-                if locs:
-                    cons.print(f"  [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)")
+                if locs is None:
+                    cons.print("  [bold yellow]float-max: run failed (see logs); not reported[/bold yellow]")
                 else:
-                    cons.print("  float-max: no overflows")
+                    result["float_max_locs"] = locs
+                    if locs:
+                        cons.print(f"  [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)")
+                    else:
+                        cons.print("  float-max: no overflows")
             except Exception as exc:
                 cons.print(f"  [bold yellow]float-max check error[/bold yellow]: {exc}")
 
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
index f84f088db3..cfb3b2c1fd 100644
--- a/toolchain/mfc/fp_stability_metrics.py
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -58,11 +58,6 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float:
     return -math.log2(max_dev / ref_scale)
 
 
-def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
-    """A case passes when it retains at least `floor` significant bits."""
-    return _sig_bits(max_dev, ref_scale) >= floor
-
-
 # Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
 _LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
 
@@ -232,6 +227,14 @@ def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
     return "\n".join(rows)
 
 
+def _dat_column(path: str):
+    """Load column 1 (the field value) from an MFC .dat file, robust to a
+    single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on)."""
+    import numpy as np
+
+    return np.atleast_2d(np.loadtxt(path))[:, 1]
+
+
 def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
     import numpy as np
 
@@ -240,9 +243,7 @@ def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float:
         ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname)
         if not os.path.exists(ref_p) or not os.path.exists(run_p):
             return float("inf")
-        ref = np.loadtxt(ref_p)[:, 1]
-        run = np.loadtxt(run_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref - run))))
+        total = max(total, float(np.max(np.abs(_dat_column(ref_p) - _dat_column(run_p)))))
     return total
 
 
@@ -255,8 +256,7 @@ def _max_abs_np(ref_dir: str, compare_files: list) -> float:
         ref_p = os.path.join(ref_dir, fname)
         if not os.path.exists(ref_p):
             continue
-        ref = np.loadtxt(ref_p)[:, 1]
-        total = max(total, float(np.max(np.abs(ref))))
+        total = max(total, float(np.max(np.abs(_dat_column(ref_p)))))
     return total
 
 
@@ -321,7 +321,8 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
 # Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
 # filter: a site is reported only if it lost >= the threshold bits. Sweeping these
 # levels and taking the highest each site survives gives a per-site "bits lost"
-# severity (a lower bound — no false positives). 48 ~ full double mantissa.
+# severity (a lower bound — no false positives). 48 is near the full 53-bit
+# double mantissa (the top of the sweep), not the mantissa width itself.
 CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
 
 
@@ -474,19 +475,3 @@ def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
         lines = by_base.get(os.path.basename(loc["path"]), set())
         loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1))
     return dd_line_locs
-
-
-def _cancellation_by_file(cancellation_locs: list) -> list:
-    """Aggregate cancellation sites by source file → [(basename, count)] sorted by
-    count (desc), ties by name.
-
-    This is the cancellation-*origin* view (where ill-conditioning concentrates),
-    as opposed to the per-line --source share, which is a *sensitivity* view
-    (where reduced precision most moves the output — typically the time
-    integrator / final accumulation, regardless of where error originates).
-    """
-    counts = {}
-    for fname, _lineno in cancellation_locs:
-        base = os.path.basename(fname)
-        counts[base] = counts.get(base, 0) + 1
-    return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 4146baab11..39f2ece47b 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -118,9 +118,10 @@ def _run_simulation_verrou(
             shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
 
 
-def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
+def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10):
     """Run --check-cancellation at the given bit threshold; return [(fname, line)]
-    of MFC cancellation sites (subtractions losing >= `threshold` significant bits)."""
+    of MFC cancellation sites (subtractions losing >= `threshold` significant bits),
+    or None if the run itself failed (distinct from [] = ran and found none)."""
     tag = f"cancellation_{threshold}"
     run_dir = os.path.join(work_dir, tag)
     os.makedirs(run_dir, exist_ok=True)
@@ -132,8 +133,9 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh
     ]
     try:
         _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags)
-    except MFCException:
-        pass
+    except MFCException as exc:
+        cons.print(f"  [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]")
+        return None
     raw = _parse_cancel_gen(gen_path)
     filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
     skipped = len(raw) - len(filtered)
@@ -150,10 +152,12 @@ def _run_mca_samples(
     ref_dir: str,
     n_mca: int,
 ) -> tuple:
-    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound)."""
+    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound, n_ok) where
+    n_ok is how many samples actually completed (0 => no usable measurement)."""
     compare = case["compare"]
     ref_scale = _max_abs_np(ref_dir, compare)
     max_dev = 0.0
+    n_ok = 0
     flags = ["--backend=mcaquad", "--mca-mode=mca"]
     for i in range(n_mca):
         run_dir = os.path.join(work_dir, f"mca_{i:02d}")
@@ -161,16 +165,18 @@ def _run_mca_samples(
         try:
             _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
             max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
-        except MFCException:
-            pass
+            n_ok += 1
+        except MFCException as exc:
+            cons.print(f"  [dim]MCA sample {i} failed: {exc}[/dim]")
     sig_bits = None
-    if max_dev > 0.0 and ref_scale > 0.0:
+    if n_ok and max_dev > 0.0 and ref_scale > 0.0:
         sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale))))
-    return max_dev, sig_bits
+    return max_dev, sig_bits, n_ok
 
 
-def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list:
-    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites."""
+def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
+    """Run with --check-max-float=yes; return [(fname, line)] of overflow sites,
+    or None if the run failed (distinct from [] = ran and found none)."""
     run_dir = os.path.join(work_dir, "float_max")
     os.makedirs(run_dir, exist_ok=True)
     try:
@@ -182,8 +188,9 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list:
             rounding_mode="nearest",
             extra_flags=["--check-max-float=yes"],
         )
-    except MFCException:
-        pass
+    except MFCException as exc:
+        cons.print(f"  [yellow]float-max run failed: {exc}[/yellow]")
+        return None
     return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float")
 
 
@@ -291,8 +298,8 @@ def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
             if not os.path.exists(ref_p) or not os.path.exists(run_p):
                 print(f"MISSING: {{fname}}")
                 sys.exit(1)
-            ref = np.loadtxt(ref_p)[:, 1]
-            run = np.loadtxt(run_p)[:, 1]
+            ref = np.atleast_2d(np.loadtxt(ref_p))[:, 1]
+            run = np.atleast_2d(np.loadtxt(run_p))[:, 1]
             dev = float(np.max(np.abs(ref - run)))
             max_dev = max(max_dev, dev)
 
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 30fb2f0caa..981b8b3c86 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -10,14 +10,13 @@
     MIN_SIG_BITS,
     _autodetect_compare,
     _build_source_filter,
-    _cancellation_by_file,
     _cancellation_severity,
     _confirm_decision,
+    _digits_left,
     _macro_context_in_lines,
     _mark_cancellation,
     _rank_locs,
     _sig_bits,
-    _stability_pass,
     _statement_bounds_in_lines,
 )
 
@@ -84,6 +83,16 @@ def test_macro_context_def_body_when_no_inner_loop():
     assert _macro_context_in_lines(lines, 2) == "#:def"
 
 
+def test_macro_context_block_and_call_are_duplicating():
+    assert _macro_context_in_lines(["#:block B\n", "  a = b - c\n", "#:endblock\n"], 2) == "#:block"
+    assert _macro_context_in_lines(["#:call M()\n", "  a = b - c\n", "#:endcall\n"], 2) == "#:call"
+
+
+def test_macro_context_unbalanced_close_is_safe():
+    # a stray #:endfor with an empty stack must not crash or misreport
+    assert _macro_context_in_lines(["#:endfor\n", "  a = b - c\n"], 2) is None
+
+
 # --- #1: building the symbol-correct --source filter from --gen-source output ---
 
 
@@ -205,27 +214,6 @@ def test_mark_cancellation_false_for_different_basename():
     assert locs[0]["cancellation"] is False
 
 
-# --- cancellation-origin view: where cancellation concentrates ---
-
-
-def test_cancellation_by_file_counts_and_sorts_by_density():
-    locs = [
-        ("src/simulation/m_weno.fpp", 10),
-        ("m_weno.fpp", 20),
-        ("a/m_riemann_solvers.fpp", 5),
-    ]
-    assert _cancellation_by_file(locs) == [("m_weno.fpp", 2), ("m_riemann_solvers.fpp", 1)]
-
-
-def test_cancellation_by_file_breaks_ties_by_name():
-    locs = [("z.fpp", 1), ("a.fpp", 2)]
-    assert _cancellation_by_file(locs) == [("a.fpp", 1), ("z.fpp", 1)]
-
-
-def test_cancellation_by_file_empty():
-    assert _cancellation_by_file([]) == []
-
-
 # --- per-site cancellation severity (bits lost), from a threshold sweep ---
 
 
@@ -291,17 +279,16 @@ def test_sig_bits_deviation_at_scale_is_unstable():
     assert _sig_bits(1.0, 1.0) <= 0.0
 
 
-def test_stability_pass_uses_global_floor():
-    # well-conditioned: ~46 bits >= floor
-    assert _stability_pass(1e-14, 1.0, MIN_SIG_BITS) is True
-    # catastrophic: deviation at field scale -> fails
-    assert _stability_pass(0.5, 1.0, MIN_SIG_BITS) is False
-
-
 def test_min_sig_bits_is_single_precision_floor():
     assert MIN_SIG_BITS == 24
 
 
+def test_digits_left_full_and_clamped():
+    assert 15.5 < _digits_left(0) < 16.0  # full double ~ 16 sig digits
+    assert _digits_left(53) == 0.0
+    assert _digits_left(60) == 0.0  # clamp: never negative
+
+
 # --- Fortran line-continuation handling (correct-line labeling) ---
 
 
@@ -338,3 +325,57 @@ def test_statement_bounds_with_leading_ampersand_continuation():
     lines = ["  beta = x**2 &\n", "       & + eps\n"]
     assert _statement_bounds_in_lines(lines, 1) == (1, 2)
     assert _statement_bounds_in_lines(lines, 2) == (1, 2)
+
+
+# --- report emitters: must survive blank and populated result dicts (CI-only path) ---
+
+
+def _emit_to_tmp(results, tmp_path, monkeypatch):
+    """Run _emit_github_summary into a temp file under the GitHub-Actions env."""
+    from mfc import fp_stability_report as report
+
+    out = tmp_path / "summary.md"
+    monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(out))
+    monkeypatch.setenv("GITHUB_ACTIONS", "1")
+    report._emit_github_summary(results, 5)
+    return out.read_text()
+
+
+def test_emit_summary_survives_blank_result(tmp_path, monkeypatch):
+    # the dict produced on the per-case error path must not KeyError the emitter
+    from mfc.fp_stability import _blank_result
+
+    text = _emit_to_tmp([_blank_result("x")], tmp_path, monkeypatch)
+    assert "0 passed, 1 failed" in text
+
+
+def test_emit_summary_populated_result(tmp_path, monkeypatch):
+    from mfc.fp_stability import _blank_result
+
+    r = _blank_result("demo")
+    r.update(
+        passed=False,
+        max_dev=1e-9,
+        sig_bits=30.0,
+        float_proxy=1e-6,
+        vprec=[(52, 1e-14), (23, float("inf"))],  # exercises the "crash" branch
+        dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": "#:for", "share": 0.4, "cancellation": True}],
+        dd_line_confirmed=False,
+        cancellation_locs=[("src/x/m_a.fpp", 5)],
+        cancellation_bits={("src/x/m_a.fpp", 5): 40},
+        float_max_locs=[("m_a.fpp", 9)],
+    )
+    text = _emit_to_tmp([r], tmp_path, monkeypatch)
+    assert "💥 crash" in text and "digits lost" in text
+
+
+def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys):
+    from mfc import fp_stability_report as report
+    from mfc.fp_stability import _blank_result
+
+    monkeypatch.setenv("GITHUB_ACTIONS", "1")
+    r = _blank_result("demo")
+    r.update(dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": None, "share": 0.9, "cancellation": False}], dd_line_confirmed=False)
+    report._emit_github_annotations([r])
+    out = capsys.readouterr().out
+    assert "::notice" in out and "::warning" not in out  # unconfirmed -> notice, not warning

From d3919d5cc351a72ab307bfe42f70c50581a98ad6 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 13:17:41 -0400
Subject: [PATCH 13/25] fp-stability: add opt-in Verrou bootstrap script +
 actionable SKIP message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Verrou is a compiled Valgrind fork (not a pip/uv/conda-friendly Python package), so it can't be installed into the venv. Add toolchain/bootstrap/verrou.sh — an explicit, opt-in installer that builds Valgrind+Verrou from source into $VERROU_HOME (default ~/.local/verrou), pinned to the same versions as the fp-stability CI workflow (Valgrind 3.26.0 + edf-hpc/verrou@a58d434).

It is deliberately opt-in (never auto-built on a bare fp-stability run, since it's a ~20-min source build needing a C toolchain + autotools): idempotent (skips if already present, --force to rebuild), requires Linux (Valgrind has no modern-macOS/Apple-Silicon support), warns but proceeds on non-x86_64 (Valgrind builds on aarch64 etc., but Verrou's FP backends are best-validated on x86_64), and checks build deps up front with guidance. The fp-stability SKIP message now points at it and clarifies Verrou is not a pip package.
---
 toolchain/bootstrap/verrou.sh | 90 +++++++++++++++++++++++++++++++++++
 toolchain/mfc/fp_stability.py |  4 +-
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100755 toolchain/bootstrap/verrou.sh

diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh
new file mode 100755
index 0000000000..5b22cbca1f
--- /dev/null
+++ b/toolchain/bootstrap/verrou.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by
+# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of
+# Valgrind that must be compiled from source (~20 min), so this is a deliberate,
+# explicit step rather than something `fp-stability` does silently.
+#
+#   bash toolchain/bootstrap/verrou.sh            # build into $HOME/.local/verrou
+#   VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh
+#   bash toolchain/bootstrap/verrou.sh --force    # rebuild even if present
+#
+# Versions are pinned to match the fp-stability CI workflow.
+
+set -euo pipefail
+
+VALGRIND_VERSION="3.26.0"
+VERROU_COMMIT="a58d434"
+PREFIX="${VERROU_HOME:-$HOME/.local/verrou}"
+FORCE="${1:-}"
+
+echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}"
+
+# Idempotent: skip if already installed and working.
+if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1; then
+    echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do."
+    exit 0
+fi
+
+# Platform: Valgrind has no working modern-macOS support; Linux only.
+if [ "$(uname -s)" != "Linux" ]; then
+    echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2
+    exit 1
+fi
+case "$(uname -m)" in
+    x86_64) ;;
+    aarch64|arm64)
+        echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2
+        echo "         best-validated on x86_64 — treat results as experimental on this arch." >&2
+        ;;
+    *)
+        echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2
+        ;;
+esac
+
+# Build dependencies.
+missing=""
+for tool in tar git make patch autoconf automake; do
+    command -v "$tool" >/dev/null 2>&1 || missing="$missing $tool"
+done
+command -v cc >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || missing="$missing gcc"
+command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing="$missing wget/curl"
+if [ -n "$missing" ]; then
+    echo "ERROR: missing build dependencies:$missing" >&2
+    echo "       Install them (e.g. apt: build-essential automake autoconf libtool; or load HPC modules) and retry." >&2
+    exit 1
+fi
+
+workdir="$(mktemp -d)"
+trap 'rm -rf "$workdir"' EXIT
+cd "$workdir"
+
+tarball="valgrind-${VALGRIND_VERSION}.tar.bz2"
+url="https://sourceware.org/pub/valgrind/${tarball}"
+echo "==> Downloading ${tarball}"
+if command -v wget >/dev/null 2>&1; then
+    wget -q "$url"
+else
+    curl -fsSL -o "$tarball" "$url"
+fi
+tar xf "$tarball"
+
+echo "==> Cloning Verrou @ ${VERROU_COMMIT}"
+git clone --quiet https://github.com/edf-hpc/verrou.git
+git -C verrou checkout --quiet "$VERROU_COMMIT"
+
+# Merge Verrou into the Valgrind tree and apply its patch.
+cp -r verrou "valgrind-${VALGRIND_VERSION}/verrou"
+cd "valgrind-${VALGRIND_VERSION}"
+cat verrou/valgrind.*diff | patch -p1
+
+echo "==> Building (this takes ~20 min)"
+./autogen.sh
+./configure --enable-only64bit --prefix="$PREFIX"
+make -j"$(nproc)"
+make install
+
+echo "==> Verifying"
+"${PREFIX}/bin/valgrind" --tool=verrou --version
+echo "==> Done. Verrou installed at ${PREFIX}"
+echo "    Run:  ./mfc.sh fp-stability   (or set VERROU_HOME=${PREFIX} if you used a custom prefix)"
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 9047e77bc9..407dd06419 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -661,7 +661,9 @@ def _load_user_case(input_path: str) -> dict:
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
     if not verrou_bin or not os.path.isfile(verrou_bin):
-        cons.print("[bold yellow]SKIP[/bold yellow]: verrou not found. Install at $HOME/.local/verrou or set VERROU_HOME.")
+        cons.print("[bold yellow]SKIP[/bold yellow]: Verrou not found (it is a compiled Valgrind tool, not a pip package).")
+        cons.print("  Install it (Linux; ~20 min source build) with:  [bold]bash toolchain/bootstrap/verrou.sh[/bold]")
+        cons.print("  Or point at an existing build with --verrou-binary PATH or $VERROU_HOME.")
         sys.exit(0)
 
     sim_bin = ARG("sim_binary") or _find_binary("simulation")

From c27b6aea593e78e423b12718177a1f1da8501f37 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 13:29:27 -0400
Subject: [PATCH 14/25] ci(fp-stability): build Verrou via the shared bootstrap
 script (DRY)

Replace the inline Valgrind+Verrou build in the workflow with a call to toolchain/bootstrap/verrou.sh, so the local installer and CI share one pinned recipe (no drift between them). Cache gating and the system-deps step are unchanged; the build step is still skipped on a cache hit. Tightened the verify step to '--tool=verrou --version', and noted that the cache key's pinned versions must track the script.
---
 .github/workflows/fp-stability.yml | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml
index 8a977cfcb3..45ff70d69f 100644
--- a/.github/workflows/fp-stability.yml
+++ b/.github/workflows/fp-stability.yml
@@ -68,6 +68,7 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.local/verrou
+          # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the builder).
           key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }}
 
       - name: Install system dependencies
@@ -79,26 +80,10 @@ jobs:
 
       - name: Build Verrou
         if: steps.cache-verrou.outputs.cache-hit != 'true'
-        run: |
-          cd /tmp
-          wget -q https://sourceware.org/pub/valgrind/valgrind-3.26.0.tar.bz2
-          tar xf valgrind-3.26.0.tar.bz2
-
-          git clone https://github.com/edf-hpc/verrou.git
-          git -C verrou checkout a58d434
-
-          # Merge Verrou into Valgrind source tree and patch
-          cp -r verrou valgrind-3.26.0/verrou
-          cd valgrind-3.26.0
-          cat verrou/valgrind.*diff | patch -p1
-
-          ./autogen.sh
-          ./configure --enable-only64bit --prefix="$HOME/.local/verrou"
-          make -j"$(nproc)"
-          make install
+        run: bash toolchain/bootstrap/verrou.sh
 
       - name: Verify Verrou
-        run: ~/.local/verrou/bin/valgrind --version
+        run: ~/.local/verrou/bin/valgrind --tool=verrou --version
 
       - name: Build MFC (debug, serial)
         # FFLAGS=-fno-inline prevents gfortran from inlining small functions into

From a4cfa79e74554ce388d34dc168edc716ee1c0a94 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 16:50:56 -0400
Subject: [PATCH 15/25] fp-stability: install Verrou from prebuilt artifact
 (verrou-dist), build as fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

verrou.sh now downloads a pinned, hash-verified prebuilt from sbryngelson/verrou-dist@v1 (seconds) and falls back to the ~20-min source build; idempotency check sources env.sh so a relocated prebuilt isn't re-fetched. fp_stability sets VALGRIND_LIB (via _verrou_env, reused by _dd_env) so a relocated tree's valgrind/verrou_dd_* calls resolve — harmless for source builds. CI installs zstd and sources env.sh before verifying.
---
 .github/workflows/fp-stability.yml    | 12 +++--
 toolchain/bootstrap/verrou.sh         | 77 ++++++++++++++++++++++++---
 toolchain/mfc/fp_stability_runners.py | 20 +++++--
 3 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml
index 45ff70d69f..205e3d711e 100644
--- a/.github/workflows/fp-stability.yml
+++ b/.github/workflows/fp-stability.yml
@@ -68,7 +68,7 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.local/verrou
-          # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the builder).
+          # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the installer).
           key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }}
 
       - name: Install system dependencies
@@ -76,14 +76,18 @@ jobs:
           sudo apt-get update -y
           sudo apt-get install -y \
             build-essential automake python3 python3-numpy libc6-dbg \
-            cmake gfortran
+            cmake gfortran zstd
 
-      - name: Build Verrou
+      - name: Install Verrou (prebuilt artifact, or source build as fallback)
         if: steps.cache-verrou.outputs.cache-hit != 'true'
         run: bash toolchain/bootstrap/verrou.sh
 
       - name: Verify Verrou
-        run: ~/.local/verrou/bin/valgrind --tool=verrou --version
+        # Source env.sh first: a prebuilt (relocated) tree needs VALGRIND_LIB; a
+        # source build works either way. (fp-stability sets this itself at runtime.)
+        run: |
+          [ -f ~/.local/verrou/env.sh ] && . ~/.local/verrou/env.sh
+          ~/.local/verrou/bin/valgrind --tool=verrou --version
 
       - name: Build MFC (debug, serial)
         # FFLAGS=-fno-inline prevents gfortran from inlining small functions into
diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh
index 5b22cbca1f..8ebafdeb2f 100755
--- a/toolchain/bootstrap/verrou.sh
+++ b/toolchain/bootstrap/verrou.sh
@@ -2,12 +2,14 @@
 #
 # Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by
 # `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of
-# Valgrind that must be compiled from source (~20 min), so this is a deliberate,
-# explicit step rather than something `fp-stability` does silently.
+# Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds);
+# if none is available for this tag/arch it falls back to a source build (~20 min).
+# Either way it's a deliberate, explicit step, not something fp-stability does silently.
 #
-#   bash toolchain/bootstrap/verrou.sh            # build into $HOME/.local/verrou
+#   bash toolchain/bootstrap/verrou.sh            # install into $HOME/.local/verrou
 #   VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh
-#   bash toolchain/bootstrap/verrou.sh --force    # rebuild even if present
+#   bash toolchain/bootstrap/verrou.sh --force    # reinstall even if present
+#   VERROU_BUILD_FROM_SOURCE=1 bash toolchain/bootstrap/verrou.sh   # skip the prebuilt
 #
 # Versions are pinned to match the fp-stability CI workflow.
 
@@ -15,13 +17,19 @@ set -euo pipefail
 
 VALGRIND_VERSION="3.26.0"
 VERROU_COMMIT="a58d434"
+# Prebuilt artifacts (built once per arch) live in a small companion repo. The tag
+# pins to the (valgrind, verrou) pair above — bump all three together.
+VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}"
+VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}"
 PREFIX="${VERROU_HOME:-$HOME/.local/verrou}"
 FORCE="${1:-}"
 
 echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}"
 
-# Idempotent: skip if already installed and working.
-if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1; then
+# Idempotent: skip if already installed and working. Source env.sh first if present
+# (a prebuilt tree needs VALGRIND_LIB to run; a source build works either way).
+if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] \
+   && ( [ -f "${PREFIX}/env.sh" ] && . "${PREFIX}/env.sh"; "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
     echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do."
     exit 0
 fi
@@ -31,9 +39,11 @@ if [ "$(uname -s)" != "Linux" ]; then
     echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2
     exit 1
 fi
+arch_tag=""
 case "$(uname -m)" in
-    x86_64) ;;
+    x86_64) arch_tag="x86_64" ;;
     aarch64|arm64)
+        arch_tag="aarch64"
         echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2
         echo "         best-validated on x86_64 — treat results as experimental on this arch." >&2
         ;;
@@ -42,6 +52,59 @@ case "$(uname -m)" in
         ;;
 esac
 
+# Fast path: download a prebuilt, hash-verified artifact and source its relocatable
+# env.sh, instead of building from source. Any failure (no asset for this arch/tag,
+# missing zstd/sha256sum, checksum mismatch, won't run) falls through to the build.
+try_prebuilt() {
+    [ -n "$arch_tag" ] || return 1
+    [ "${VERROU_BUILD_FROM_SOURCE:-}" = "1" ] && return 1
+    command -v sha256sum >/dev/null 2>&1 || return 1
+    tar --zstd --help >/dev/null 2>&1 || command -v zstd >/dev/null 2>&1 || return 1
+    command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || return 1
+
+    local asset base dl
+    asset="verrou-${VERROU_COMMIT}-valgrind-${VALGRIND_VERSION}-linux-${arch_tag}.tar.zst"
+    base="https://github.com/${VERROU_DIST_REPO}/releases/download/${VERROU_DIST_TAG}/${asset}"
+    dl="$(mktemp -d)"
+
+    echo "==> Trying prebuilt ${VERROU_DIST_REPO}@${VERROU_DIST_TAG} (${asset})"
+    _fetch() {  # url dest
+        if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi
+    }
+    if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then
+        echo "==> No prebuilt for this tag/arch — building from source instead."
+        rm -rf "$dl"; return 1
+    fi
+    if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then
+        echo "WARNING: prebuilt checksum mismatch — building from source instead." >&2
+        rm -rf "$dl"; return 1
+    fi
+
+    mkdir -p "$PREFIX"
+    if tar --zstd --help >/dev/null 2>&1; then
+        tar -C "$PREFIX" --zstd -xf "$dl/$asset"
+    else
+        zstd -dc "$dl/$asset" | tar -C "$PREFIX" -xf -
+    fi
+    rm -rf "$dl"
+
+    # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets
+    # VALGRIND_LIB relative to the extracted tree so the relocated install works.
+    if ! ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
+        echo "WARNING: prebuilt did not run — building from source instead." >&2
+        return 1
+    fi
+    return 0
+}
+
+if try_prebuilt; then
+    echo "==> Verifying"
+    ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version )
+    echo "==> Done (prebuilt). Verrou installed at ${PREFIX}"
+    echo "    Run:  ./mfc.sh fp-stability   (or set VERROU_HOME=${PREFIX} if you used a custom prefix)"
+    exit 0
+fi
+
 # Build dependencies.
 missing=""
 for tool in tar git make patch autoconf automake; do
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 39f2ece47b..c16f5540db 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -60,6 +60,19 @@ def _verrou_pythonpath(verrou_bin: str) -> str:
     return matches[0] if matches else ""
 
 
+def _verrou_env(verrou_bin: str) -> dict:
+    """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt
+    artifact extracted to a new prefix) can locate its tool — Valgrind bakes its
+    build prefix into the binary otherwise. Harmless for a source-built tree, where
+    VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the
+    environment (user sourced env.sh) is left untouched."""
+    env = os.environ.copy()
+    libdir = os.path.join(os.path.dirname(os.path.dirname(verrou_bin)), "libexec", "valgrind")
+    if "VALGRIND_LIB" not in env and os.path.isdir(libdir):
+        env["VALGRIND_LIB"] = libdir
+    return env
+
+
 def _write_inp(params: dict, target_name: str, work_dir: str) -> None:
     """Write a Fortran namelist .inp file from a Python params dict."""
     from .run import case_dicts
@@ -107,7 +120,7 @@ def _run_simulation_verrou(
         cmd.append(sim_bin)
 
         with open(os.path.join(run_dir, "sim.out"), "w") as f:
-            result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False)
+            result = subprocess.run(cmd, cwd=tmpdir, env=_verrou_env(verrou_bin), stdout=f, stderr=subprocess.STDOUT, check=False)
 
         if result.returncode != 0:
             tag = rounding_mode or "vprec"
@@ -312,9 +325,10 @@ def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
 
 
 def _dd_env(verrou_bin: str) -> dict:
-    """Environment with PYTHONPATH set for verrou_dd_* imports."""
+    """Environment for verrou_dd_*: VALGRIND_LIB (so a relocated tree's inner valgrind
+    calls resolve) plus PYTHONPATH (for the verrou_dd_* imports)."""
     py_pkg = _verrou_pythonpath(verrou_bin)
-    env = os.environ.copy()
+    env = _verrou_env(verrou_bin)
     if py_pkg:
         existing = env.get("PYTHONPATH", "")
         env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing]))

From 0613913080c42e4938d9c6ae3aec71de595d54f3 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 18:01:32 -0400
Subject: [PATCH 16/25] fp-stability: auto-install Verrou on first use
 (download prebuilt), hard-fail if it can't
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Running ./mfc.sh fp-stability with no Verrou present now installs it via the bootstrap (downloads the pinned prebuilt from verrou-dist; source build as fallback) and proceeds, instead of SKIP+exit-0; a failed install is now a hard error. _find_verrou no longer accepts a bare system valgrind on PATH (it has no 'verrou' tool and would only fail at run time) — that case reads as 'Verrou absent' so it gets installed. CI drops the separate Install/Verify Verrou steps; the run does it. Tests added for the discovery logic.
---
 .github/workflows/fp-stability.yml    | 13 ++------
 toolchain/mfc/cli/commands.py         |  8 +++--
 toolchain/mfc/fp_stability.py         | 22 ++++++++++---
 toolchain/mfc/fp_stability_runners.py | 16 +++++++++-
 toolchain/mfc/test_fp_stability.py    | 46 +++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml
index 205e3d711e..203cff3ad4 100644
--- a/.github/workflows/fp-stability.yml
+++ b/.github/workflows/fp-stability.yml
@@ -78,16 +78,9 @@ jobs:
             build-essential automake python3 python3-numpy libc6-dbg \
             cmake gfortran zstd
 
-      - name: Install Verrou (prebuilt artifact, or source build as fallback)
-        if: steps.cache-verrou.outputs.cache-hit != 'true'
-        run: bash toolchain/bootstrap/verrou.sh
-
-      - name: Verify Verrou
-        # Source env.sh first: a prebuilt (relocated) tree needs VALGRIND_LIB; a
-        # source build works either way. (fp-stability sets this itself at runtime.)
-        run: |
-          [ -f ~/.local/verrou/env.sh ] && . ~/.local/verrou/env.sh
-          ~/.local/verrou/bin/valgrind --tool=verrou --version
+      # Verrou is installed by `fp-stability` itself on first use (downloads the
+      # prebuilt artifact; aborts if that fails). The cache above restores it across
+      # runs so the download only happens on a cache miss.
 
       - name: Build MFC (debug, serial)
         # FFLAGS=-fno-inline prevents gfortran from inlining small functions into
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index a6eae93846..4beebd0f34 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -908,9 +908,11 @@
         "(~30x slower, and run many times), so it must be a small, short proxy — large "
         "grids or long runs are rejected with guidance; serial .dat I/O is forced. "
         "Example: ./mfc.sh fp-stability my_case.py\n\n"
-        "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind "
-        "(defaults to $HOME/.local/verrou). The simulation and pre_process "
-        "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n"
+        "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to "
+        "$HOME/.local/verrou); if absent it is installed automatically (a pinned, "
+        "hash-verified prebuilt is downloaded, with a source build as fallback) — "
+        "aborts if that install fails. The simulation and pre_process binaries must "
+        "be serial (no-MPI, no-GPU) debug builds.\n\n"
         "Analysis passes (skip with --no-* flags):\n"
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 407dd06419..867c2fb1a6 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -73,6 +73,7 @@
 import math
 import os
 import shutil
+import subprocess
 import sys
 import tempfile
 import time
@@ -658,13 +659,26 @@ def _load_user_case(input_path: str) -> dict:
     }
 
 
+def _install_verrou() -> str:
+    """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified
+    prebuilt; source build as fallback) and return the valgrind path. Aborts on failure —
+    fp-stability cannot run without Verrou, so this is a hard error, not a skip."""
+    script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh")
+    cons.print("[bold]Verrou not found — installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]")
+    if subprocess.run(["bash", script], check=False).returncode != 0:
+        raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.")
+    verrou_bin = _find_verrou()
+    if not verrou_bin or not os.path.isfile(verrou_bin):
+        raise MFCException("Verrou install reported success but no valgrind binary was found under $VERROU_HOME.")
+    return verrou_bin
+
+
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
     if not verrou_bin or not os.path.isfile(verrou_bin):
-        cons.print("[bold yellow]SKIP[/bold yellow]: Verrou not found (it is a compiled Valgrind tool, not a pip package).")
-        cons.print("  Install it (Linux; ~20 min source build) with:  [bold]bash toolchain/bootstrap/verrou.sh[/bold]")
-        cons.print("  Or point at an existing build with --verrou-binary PATH or $VERROU_HOME.")
-        sys.exit(0)
+        if ARG("verrou_binary"):
+            raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.")
+        verrou_bin = _install_verrou()
 
     sim_bin = ARG("sim_binary") or _find_binary("simulation")
     if not sim_bin or not os.path.isfile(sim_bin):
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index c16f5540db..3202fbd9de 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -32,12 +32,26 @@
 from .printer import cons
 
 
+def _has_verrou_tool(valgrind_bin: str) -> bool:
+    """True if this valgrind actually provides the 'verrou' tool. A plain system
+    valgrind does not — accepting one would only fail later at run time."""
+    try:
+        return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0
+    except OSError:
+        return False
+
+
 def _find_verrou() -> str:
     verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
     candidate = os.path.join(verrou_home, "bin", "valgrind")
     if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
         return candidate
-    return shutil.which("valgrind") or ""
+    # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system
+    # valgrind must read as "Verrou absent" so it gets installed, not misused.
+    path_vg = shutil.which("valgrind")
+    if path_vg and _has_verrou_tool(path_vg):
+        return path_vg
+    return ""
 
 
 def _find_binary(name: str) -> str:
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 981b8b3c86..da37ac750e 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -379,3 +379,49 @@ def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys):
     report._emit_github_annotations([r])
     out = capsys.readouterr().out
     assert "::notice" in out and "::warning" not in out  # unconfirmed -> notice, not warning
+
+
+# --- Verrou discovery: a bare system valgrind must read as "Verrou absent" ---
+
+
+def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    vbin = tmp_path / "bin" / "valgrind"
+    vbin.parent.mkdir(parents=True)
+    vbin.write_text("#!/bin/sh\n")
+    vbin.chmod(0o755)
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    assert runners._find_verrou() == str(vbin)
+
+
+def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool.
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind")
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: False)
+    assert runners._find_verrou() == ""
+
+
+def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind")
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: True)
+    assert runners._find_verrou() == "/opt/verrou/bin/valgrind"
+
+
+def test_has_verrou_tool_reflects_exit_code(monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    class _R:
+        def __init__(self, rc):
+            self.returncode = rc
+
+    monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(0))
+    assert runners._has_verrou_tool("/any/valgrind") is True
+    monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1))
+    assert runners._has_verrou_tool("/any/valgrind") is False

From 37b7a216db7f0835caa98f0893887a5c19476138 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 18:16:02 -0400
Subject: [PATCH 17/25] =?UTF-8?q?fp-stability:=20address=20PR=20review=20?=
 =?UTF-8?q?=E2=80=94=20atomic=20prebuilt=20install,=20verify=20VERROU=5FHO?=
 =?UTF-8?q?ME=20tree,=20more=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

verrou.sh try_prebuilt() runs as an if-condition (set -e suppressed), so a failed extract could fall through and the source build would install over a half-written tree; now extract+verify in a staging dir and swap into PREFIX atomically with explicit error checks. _find_verrou now verifies the $VERROU_HOME tree actually runs the verrou tool (with VALGRIND_LIB for a relocated prebuilt) so a broken/stale tree reads as absent and gets reinstalled, not used until it fails per-run. Fix comment rot (fp-stability now auto-installs). Add unit tests for _verrou_env (incl. preserve-user-VALGRIND_LIB), _dd_env PYTHONPATH composition, _install_verrou hard-fail guards, _has_verrou_tool OSError, and the broken-VERROU_HOME case.
---
 toolchain/bootstrap/verrou.sh         | 31 ++++++---
 toolchain/mfc/fp_stability_runners.py | 13 ++--
 toolchain/mfc/test_fp_stability.py    | 98 +++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 12 deletions(-)

diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh
index 8ebafdeb2f..dfbd462231 100755
--- a/toolchain/bootstrap/verrou.sh
+++ b/toolchain/bootstrap/verrou.sh
@@ -4,7 +4,8 @@
 # `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of
 # Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds);
 # if none is available for this tag/arch it falls back to a source build (~20 min).
-# Either way it's a deliberate, explicit step, not something fp-stability does silently.
+# fp-stability auto-runs this on first use when Verrou is absent (printing what it
+# does); it is also safe to run by hand. A failed install aborts, never a silent skip.
 #
 #   bash toolchain/bootstrap/verrou.sh            # install into $HOME/.local/verrou
 #   VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh
@@ -80,20 +81,34 @@ try_prebuilt() {
         rm -rf "$dl"; return 1
     fi
 
-    mkdir -p "$PREFIX"
+    # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e
+    # is suppressed inside a function used as an `if` condition, so check each step
+    # explicitly — otherwise a failed extract would fall through and the source
+    # build would install on top of a half-written tree (or a stale one on --force).
+    local stage="$dl/stage"
+    mkdir -p "$stage"
     if tar --zstd --help >/dev/null 2>&1; then
-        tar -C "$PREFIX" --zstd -xf "$dl/$asset"
+        tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; }
     else
-        zstd -dc "$dl/$asset" | tar -C "$PREFIX" -xf -
+        zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; }
     fi
-    rm -rf "$dl"
 
     # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets
-    # VALGRIND_LIB relative to the extracted tree so the relocated install works.
-    if ! ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
+    # VALGRIND_LIB relative to the tree so the relocated install works. Verify the
+    # staged tree runs before committing it.
+    if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
         echo "WARNING: prebuilt did not run — building from source instead." >&2
-        return 1
+        rm -rf "$dl"; return 1
+    fi
+
+    # Commit only now: replace any existing $PREFIX atomically.
+    mkdir -p "$(dirname "$PREFIX")"
+    rm -rf "$PREFIX"
+    if ! mv "$stage" "$PREFIX"; then
+        echo "WARNING: could not install prebuilt to ${PREFIX} — building from source instead." >&2
+        rm -rf "$dl"; return 1
     fi
+    rm -rf "$dl"
     return 0
 }
 
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 3202fbd9de..1d1c5a7b8f 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -32,11 +32,13 @@
 from .printer import cons
 
 
-def _has_verrou_tool(valgrind_bin: str) -> bool:
+def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool:
     """True if this valgrind actually provides the 'verrou' tool. A plain system
-    valgrind does not — accepting one would only fail later at run time."""
+    valgrind does not — accepting one would only fail later at run time. Pass env
+    (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its
+    tool without it."""
     try:
-        return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0
+        return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0
     except OSError:
         return False
 
@@ -44,7 +46,10 @@ def _has_verrou_tool(valgrind_bin: str) -> bool:
 def _find_verrou() -> str:
     verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou"))
     candidate = os.path.join(verrou_home, "bin", "valgrind")
-    if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
+    # Require the $VERROU_HOME tree to actually run the verrou tool (with VALGRIND_LIB
+    # for a relocated prebuilt). A broken/stale/non-Verrou tree there must read as
+    # "absent" so it gets reinstalled, not used until it fails on every run.
+    if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)):
         return candidate
     # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system
     # valgrind must read as "Verrou absent" so it gets installed, not misused.
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index da37ac750e..b2b43bfc02 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -392,9 +392,27 @@ def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch):
     vbin.write_text("#!/bin/sh\n")
     vbin.chmod(0o755)
     monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    # The candidate must also verify as Verrou-enabled; stub that so the test
+    # exercises precedence, not a real valgrind invocation.
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True)
     assert runners._find_verrou() == str(vbin)
 
 
+def test_find_verrou_rejects_broken_verrou_home_tree(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    # A valgrind exists at $VERROU_HOME but does not actually run the verrou tool
+    # (broken/stale/non-Verrou): it must read as absent, not be returned.
+    vbin = tmp_path / "bin" / "valgrind"
+    vbin.parent.mkdir(parents=True)
+    vbin.write_text("#!/bin/sh\n")
+    vbin.chmod(0o755)
+    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False)
+    monkeypatch.setattr(runners.shutil, "which", lambda _name: None)
+    assert runners._find_verrou() == ""
+
+
 def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch):
     from mfc import fp_stability_runners as runners
 
@@ -425,3 +443,83 @@ def __init__(self, rc):
     assert runners._has_verrou_tool("/any/valgrind") is True
     monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1))
     assert runners._has_verrou_tool("/any/valgrind") is False
+
+    def _boom(*a, **k):
+        raise OSError("not executable")
+
+    monkeypatch.setattr(runners.subprocess, "run", _boom)
+    assert runners._has_verrou_tool("/stale/valgrind") is False
+
+
+# --- env composition for relocated (prebuilt) Verrou trees ---
+
+
+def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
+    monkeypatch.delenv("VALGRIND_LIB", raising=False)
+    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind")
+
+
+def test_verrou_env_omits_valgrind_lib_when_libexec_absent(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    monkeypatch.delenv("VALGRIND_LIB", raising=False)
+    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
+    assert "VALGRIND_LIB" not in env
+
+
+def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
+    monkeypatch.setenv("VALGRIND_LIB", "/user/chosen/lib")
+    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["VALGRIND_LIB"] == "/user/chosen/lib"  # not clobbered
+
+
+def test_dd_env_prepends_pythonpath_and_inherits_valgrind_lib(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
+    monkeypatch.delenv("VALGRIND_LIB", raising=False)
+    monkeypatch.setenv("PYTHONPATH", "/pre/existing")
+    monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/site-packages/valgrind")
+    env = runners._dd_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["PYTHONPATH"] == "/vg/site-packages/valgrind:/pre/existing"
+    assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind")
+
+
+def test_dd_env_no_leading_colon_when_pythonpath_empty(tmp_path, monkeypatch):
+    from mfc import fp_stability_runners as runners
+
+    monkeypatch.delenv("PYTHONPATH", raising=False)
+    monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/valgrind")
+    env = runners._dd_env(str(tmp_path / "bin" / "valgrind"))
+    assert env["PYTHONPATH"] == "/vg/valgrind"  # no stray leading ':'
+
+
+# --- auto-install hard-fail guards ---
+
+
+def test_install_verrou_raises_when_bootstrap_fails(monkeypatch):
+    import pytest
+
+    from mfc import fp_stability as fps
+
+    monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 1})())
+    with pytest.raises(fps.MFCException, match="Verrou install failed"):
+        fps._install_verrou()
+
+
+def test_install_verrou_raises_when_no_binary_appears(monkeypatch):
+    import pytest
+
+    from mfc import fp_stability as fps
+
+    monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0})())
+    monkeypatch.setattr(fps, "_find_verrou", lambda: "")
+    with pytest.raises(fps.MFCException, match="no valgrind binary"):
+        fps._install_verrou()

From 1f10f31f7b489f99e0c4ff355ccf15662e803c71 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 19:27:54 -0400
Subject: [PATCH 18/25] fp-stability: drop dd line/sym bisection; keep
 cancellation + move fypp flag onto it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dd delta-debug stack (bisection + confirmation positive-control + sensitivity ranking) tried to pinpoint and rank the single most precision-sensitive source line, but fypp #:for/#:def expansion collapses many generated computations onto one .fpp line, so that attribution is instance-ambiguous by construction — the fragile part. Removed it (~900 lines). The cancellation pass stays and now carries the fypp instance-ambiguity flag: each cancellation origin is checked with _macro_context and, if its .fpp line sits inside a #:for/#:def, marked 'may represent multiple instances' in console, annotations, and summary. file:line attribution (cancellation origins, ranked by digits lost) is preserved; only the false-precision line-pinpointing is gone. Verified end-to-end: 27 cancellation sites, 23 flagged fypp-ambiguous.
---
 toolchain/mfc/cli/commands.py         |  20 +-
 toolchain/mfc/fp_stability.py         | 132 ++----------
 toolchain/mfc/fp_stability_metrics.py | 260 +---------------------
 toolchain/mfc/fp_stability_report.py  | 130 +++--------
 toolchain/mfc/fp_stability_runners.py | 298 +-------------------------
 toolchain/mfc/test_fp_stability.py    | 204 ++----------------
 6 files changed, 69 insertions(+), 975 deletions(-)

diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 4beebd0f34..6dfbb57c77 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -916,8 +916,6 @@
         "Analysis passes (skip with --no-* flags):\n"
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
-        "  dd_sym/dd_line verrou_dd bisection to responsible functions/lines, then a\n"
-        "                 --source positive control confirms + ranks them by sensitivity\n"
         "  cancellation   --check-cancellation origins, ranked by significant digits lost\n"
         "  mca-sigbits    Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n"
         "  float-max      --check-max-float detection of double→float overflow sites\n"
@@ -972,20 +970,6 @@
             default=False,
             dest="no_vprec",
         ),
-        Argument(
-            name="no-dd-sym",
-            help="Skip verrou_dd_sym function-level delta-debug on failure.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_dd_sym",
-        ),
-        Argument(
-            name="no-dd-line",
-            help="Skip verrou_dd_line source-line delta-debug on failure.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_dd_line",
-        ),
         Argument(
             name="no-cancellation",
             help="Skip --check-cancellation catastrophic-cancellation detection.",
@@ -1016,7 +1000,7 @@
             "Specify simulation binary explicitly",
         ),
         Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"),
-        Example("./mfc.sh fp-stability --no-vprec --no-dd-line", "Skip VPREC sweep and line debug"),
+        Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"),
         Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"),
     ],
     key_options=[
@@ -1026,8 +1010,6 @@
         ("-N, --samples N", "Random-rounding samples per case (default: 5)"),
         ("--no-float-proxy", "Skip float-rounding proxy run"),
         ("--no-vprec", "Skip VPREC mantissa-bit sweep"),
-        ("--no-dd-sym", "Skip verrou_dd_sym on failure"),
-        ("--no-dd-line", "Skip verrou_dd_line on failure"),
         ("--no-cancellation", "Skip cancellation detection"),
         ("--no-mca", "Skip MCA significant-bits estimate"),
         ("--no-float-max", "Skip float32 overflow detection"),
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 867c2fb1a6..102d512d52 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -15,37 +15,18 @@
    One run per mantissa-bit level [52,23,16,10] with
    --backend=vprec --vprec-mode=full; shows where each case breaks.
 
-D. verrou_dd_sym on failure (--no-dd-sym to skip)
-   Delta-debug bisection isolates the minimal set of *functions* causing
-   instability.
-
-E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip)
-   Further bisects to exact *source lines* within the responsible functions.
-   Each reported line is then *confirmed* by a positive control: --gen-source
-   captures the symbol-correct executed lines, those are filtered to the suspect
-   set, and a float-mode run with --source restricted to just them must
-   reproduce the instability.  If perturbing the suspect set does not reproduce
-   it, the case's hotspots are reported as unconfirmed (downgraded from
-   ::warning:: to ::notice::) — this is a single set-level verdict, not per line.
-   Each line is then perturbed alone and ranked by the share of the single-
-   precision deviation it reproduces.  NOTE: that share is a *sensitivity*
-   measure — where reduced precision most moves the output — typically dominated
-   by the time integrator / final accumulation, NOT by where cancellation
-   originates.  Stage F is the cancellation-origin view; the two usually differ.
-   Hotspots are cross-referenced against the stage-F cancellation sites and
-   flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def
-   expansion.
-
-F. Cancellation detection (--no-cancellation to skip)
+D. Cancellation detection (--no-cancellation to skip)
    One run with --check-cancellation=yes; reports MFC source lines that
    produce catastrophic cancellation (subtraction of nearly-equal doubles).
-   Uses --cc-gen-file for structured per-line output.
+   Uses --cc-gen-file for structured per-line output.  A cancellation site whose
+   .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous
+   (the line maps to multiple generated instances).
 
-G. MCA significant-bits estimate (--no-mca to skip)
+E. MCA significant-bits estimate (--no-mca to skip)
    N runs with --backend=mcaquad; max deviation vs nearest-rounding
    reference gives a lower bound on significant bits: s = -log2(dev/scale).
 
-H. Float-max overflow detection (--no-float-max to skip)
+F. Float-max overflow detection (--no-float-max to skip)
    One run with --check-max-float=yes; reports locations where a
    double→float conversion would overflow to ±Inf.
 
@@ -62,7 +43,7 @@
 Usage:
   ./mfc.sh fp-stability                       # built-in 1-D suite
   ./mfc.sh fp-stability my_case.py            # your own case (small/short, serial, CPU)
-  ./mfc.sh fp-stability --no-vprec --no-dd-line
+  ./mfc.sh fp-stability --no-vprec --no-cancellation
   ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH
 
 A user case .py is run as a single serial CPU process under Verrou, so it must be
@@ -84,7 +65,7 @@
     MIN_SIG_BITS,
     _autodetect_compare,
     _cancellation_severity,
-    _mark_cancellation,
+    _macro_context,
     _max_abs_np,
     _max_diff_np,
     _sig_bits,
@@ -97,9 +78,6 @@
     _find_binary,
     _find_verrou,
     _run_cancellation_check,
-    _run_confirmation,
-    _run_dd_line,
-    _run_dd_sym,
     _run_float_max_check,
     _run_float_proxy,
     _run_mca_samples,
@@ -391,12 +369,9 @@ def _blank_result(name: str) -> dict:
         "sig_bits": None,
         "float_proxy": None,
         "vprec": [],
-        "dd_sym_syms": [],
-        "dd_line_locs": [],
-        "dd_line_confirmed": None,
-        "dd_line_confirm_dev": None,
         "cancellation_locs": [],
         "cancellation_bits": {},
+        "cancellation_macro": {},
         "mca_dev": None,
         "mca_sigbits": None,
         "float_max_locs": [],
@@ -409,11 +384,8 @@ def _run_case(
     sim_bin: str,
     pp_bin: str,
     n_samples: int,
-    log_dir: str,
     run_float: bool,
     run_vprec: bool,
-    run_dd_sym: bool,
-    run_dd_line: bool,
     run_cancellation: bool,
     run_mca: bool,
     run_float_max: bool,
@@ -493,62 +465,7 @@ def _run_case(
                     marker = "  [red]FAIL[/red]"
                 cons.print(f"    {bits:2d} bits{label_str}: dev={dev:.3e}{marker}")
 
-        # --- D/E: delta-debug with float mode to find FP hotspots.
-        # dd_run.sh uses --rounding-mode=float (deterministic single-precision),
-        # so each bisection step is consistent and --nruns=1 suffices.  Threshold
-        # = float_proxy/10: the full instrumented set produces ~float_proxy
-        # deviation; excluding the responsible function drops it to near zero;
-        # any subset missing the responsible function gives SAME.
-        # Skip when float_proxy is unavailable or too small to localize.
-        float_proxy = result.get("float_proxy")
-        _DD_FLOAT_MIN = 1e-6
-        dd_threshold = float_proxy / 10.0 if float_proxy and float_proxy >= _DD_FLOAT_MIN else 0.0
-        if dd_threshold > 0 and (run_dd_sym or run_dd_line):
-            cons.print(f"  [dim]dd threshold: {dd_threshold:.1e} (float_proxy={float_proxy:.1e})[/dim]")
-        elif run_dd_sym or run_dd_line:
-            cons.print(f"  [dim]skipping dd: float_proxy={float_proxy} < {_DD_FLOAT_MIN:.0e}[/dim]")
-        if dd_threshold > 0 and run_dd_sym:
-            try:
-                result["dd_sym_syms"] = _run_dd_sym(case, verrou_bin, sim_bin, work_dir, log_dir, threshold=dd_threshold)
-            except Exception as exc:
-                cons.print(f"  [bold yellow]dd_sym error[/bold yellow]: {exc}")
-        if dd_threshold > 0 and run_dd_line:
-            try:
-                result["dd_line_locs"] = _run_dd_line(
-                    case,
-                    verrou_bin,
-                    sim_bin,
-                    work_dir,
-                    log_dir,
-                    threshold=dd_threshold,
-                )
-                macro_n = sum(1 for loc in result["dd_line_locs"] if loc["macro"])
-                if macro_n:
-                    cons.print(f"  [dim]dd_line: {macro_n} hotspot(s) inside fypp-expanded code (instance-ambiguous)[/dim]")
-            except Exception as exc:
-                cons.print(f"  [bold yellow]dd_line error[/bold yellow]: {exc}")
-
-        # --- E2: confirm dd_line hotspots and rank each by its individual share ---
-        if dd_threshold > 0 and run_dd_line and result["dd_line_locs"]:
-            cons.print("  [dim]confirming + ranking dd_line hotspots (per-line perturbation)...[/dim]")
-            try:
-                confirmed, cdev, ranked = _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, result["dd_line_locs"], dd_threshold, float_proxy)
-                result["dd_line_locs"] = ranked
-                result["dd_line_confirmed"] = confirmed
-                result["dd_line_confirm_dev"] = cdev
-                if confirmed is True:
-                    cons.print(f"  [bold green]dd_line confirmed[/bold green]: suspect-only dev={cdev:.3e} >= {dd_threshold:.1e}")
-                elif confirmed is False:
-                    cons.print(f"  [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)")
-                top = ranked[0] if ranked else None
-                if top and top.get("share") is not None:
-                    cons.print(f"  highest single-precision sensitivity: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)")
-                    cons.print("  [dim](sensitivity = where reduced precision most moves the output, often the time")
-                    cons.print("  [dim] integrator; not necessarily where cancellation originates — see cancellation sites)[/dim]")
-            except Exception as exc:
-                cons.print(f"  [bold yellow]dd_line confirmation error[/bold yellow]: {exc}")
-
-        # --- F: cancellation detection ---
+        # --- D: cancellation detection ---
         if run_cancellation:
             cons.print("  [dim]cancellation detection...[/dim]")
             try:
@@ -562,21 +479,22 @@ def _run_case(
                     bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None])
                     result["cancellation_locs"] = locs
                     result["cancellation_bits"] = bits
+                    # flag cancellation sites whose .fpp line is inside a #:for/#:def
+                    # expansion: the line maps to multiple generated instances, so the
+                    # report cannot pin it to a unique runtime instance.
+                    result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))}
                     if locs:
                         worst = max(bits.values()) if bits else 0
                         cons.print(f"  cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
+                        n_macro = len(result["cancellation_macro"])
+                        if n_macro:
+                            cons.print(f"  [dim]{n_macro} inside fypp expansions — line maps to multiple instances[/dim]")
                     else:
                         cons.print("  cancellation: none detected")
-                    # cross-reference: label dd_line hotspots that sit on a cancellation site
-                    if result["dd_line_locs"] and locs:
-                        _mark_cancellation(result["dd_line_locs"], locs)
-                        n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation"))
-                        if n_xref:
-                            cons.print(f"  {n_xref} hotspot(s) coincide with a catastrophic-cancellation site")
             except Exception as exc:
                 cons.print(f"  [bold yellow]cancellation check error[/bold yellow]: {exc}")
 
-        # --- G: MCA significant-bits estimate ---
+        # --- E: MCA significant-bits estimate ---
         if run_mca:
             cons.print(f"  [dim]MCA significant-bits estimate (N={n_samples})...[/dim]")
             try:
@@ -591,7 +509,7 @@ def _run_case(
             except Exception as exc:
                 cons.print(f"  [bold yellow]MCA error[/bold yellow]: {exc}")
 
-        # --- H: float-max overflow detection ---
+        # --- F: float-max overflow detection ---
         if run_float_max:
             cons.print("  [dim]float-max overflow check...[/dim]")
             try:
@@ -691,8 +609,6 @@ def fp_stability():
     n_samples = ARG("samples")
     run_float = not ARG("no_float_proxy")
     run_vprec = not ARG("no_vprec")
-    run_dd_sym = not ARG("no_dd_sym")
-    run_dd_line = not ARG("no_dd_line")
     run_cancellation = not ARG("no_cancellation")
     run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
@@ -715,10 +631,6 @@ def fp_stability():
         features.append("float-proxy")
     if run_vprec:
         features.append("vprec-sweep")
-    if run_dd_sym:
-        features.append("dd_sym")
-    if run_dd_line:
-        features.append("dd_line")
     if run_cancellation:
         features.append("cancellation")
     if run_mca:
@@ -739,11 +651,8 @@ def fp_stability():
                 sim_bin,
                 pp_bin,
                 n_samples,
-                log_dir,
                 run_float,
                 run_vprec,
-                run_dd_sym,
-                run_dd_line,
                 run_cancellation,
                 run_mca,
                 run_float_max,
@@ -762,9 +671,6 @@ def fp_stability():
         mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]"
         cons.print(f"  {mark} {r['name']}")
 
-    if n_fail > 0:
-        cons.print(f"\n  dd_sym/dd_line logs in: {log_dir}")
-
     _emit_github_summary(results, n_samples)
     _emit_github_annotations(results)
 
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
index cfb3b2c1fd..a985b363af 100644
--- a/toolchain/mfc/fp_stability_metrics.py
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -1,6 +1,6 @@
 """Pure metrics, source-resolution, and parsing helpers for the FP-stability suite.
 
-Leaf module: imports only stdlib + MFC_ROOT_DIR + cons. No sibling fp_stability*
+Leaf module: imports only stdlib + MFC_ROOT_DIR. No sibling fp_stability*
 imports, so the runners/report/orchestrator modules can all depend on it.
 """
 
@@ -10,7 +10,6 @@
 import re
 
 from .common import MFC_ROOT_DIR
-from .printer import cons
 
 # Mantissa-bit levels for the VPREC sweep (C).
 # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
@@ -41,10 +40,6 @@ def _autodetect_compare(filenames: list) -> list:
 # normalising by the field scale collapses that, so a single number suffices.
 MIN_SIG_BITS = 24
 
-# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy-
-# derived threshold is supplied (callers always pass one, so this is only a guard).
-_DD_FALLBACK_THRESHOLD = 1e-12
-
 
 def _sig_bits(max_dev: float, ref_scale: float) -> float:
     """Significant bits retained = -log2(max_dev / ref_scale).
@@ -58,9 +53,6 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float:
     return -math.log2(max_dev / ref_scale)
 
 
-# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
-_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
-
 # Files to exclude from cancellation / float-max reports (runtime loaders, XALT).
 _EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so")
 
@@ -69,47 +61,27 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float:
 
 # Fypp block directives. The duplicating ones (#:for expands to N copies, #:def
 # defines a macro instantiated at multiple call sites) collapse many distinct
-# generated computations onto a single .fpp source line, so a dd_line hit inside
-# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select
-# code but do not duplicate it, so they are tracked for balance but not flagged.
+# generated computations onto a single .fpp source line, so a cancellation site
+# inside one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute
+# select code but do not duplicate it, so they are tracked for balance but not flagged.
 _FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE)
 _FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE)
 _FYPP_DUPLICATING = ("for", "def", "block", "call")
 
-# Lines that are clearly control-flow delimiters rather than arithmetic.
-# dd_line sometimes reports these when the responsible arithmetic is on the
-# preceding line but shares DWARF debug info with the delimiter (e.g. loop
-# boundaries in #:for-expanded code, or inlined functions at call sites).
-_CONTROL_FLOW_RE = re.compile(
-    r"^\s*("
-    r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b"
-    r"|do\s+\w+\s*=\s*[\w,\s]+"  # naked do-loop header (no arithmetic)
-    r"|else(\s+if\s*\(.*\)\s*then)?\s*$"  # else / else if (...) then
-    r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+"  # subroutine declaration
-    r"|\$:END_GPU\w+"  # fypp GPU macro closers
-    r"|#:end\w*"  # fypp directive closers (#:endfor, #:enddef, etc.)
-    r"|\s*!\s*$"  # comment-only lines
-    r"|\s*$"  # blank lines
-    r")",
-    re.IGNORECASE,
-)
-
-
-def _resolve_source(fname: str, search_whole_tree: bool = False) -> str:
+
+def _resolve_source(fname: str) -> str:
     """Resolve a (possibly bare) source filename to an existing path, or '' if not
     found.  An absolute existing path is used as-is; otherwise the basename is
-    located recursively under src/ (then the whole tree if `search_whole_tree`)."""
+    located recursively under src/."""
     if os.path.isabs(fname) and os.path.isfile(fname):
         return fname
     candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
-    if not candidates and search_whole_tree:
-        candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True)
     return candidates[0] if candidates else ""
 
 
-def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list:
+def _read_source_lines(fname: str) -> list:
     """Resolve `fname` and return its lines (with newlines), or [] if unreadable."""
-    path = _resolve_source(fname, search_whole_tree)
+    path = _resolve_source(fname)
     if not path:
         return []
     try:
@@ -119,17 +91,11 @@ def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list:
         return []
 
 
-def _read_source_line(fname: str, lineno: int) -> str:
-    """Return the raw source line at lineno (1-based), or '' if unavailable."""
-    lines = _read_source_lines(fname)
-    return lines[lineno - 1] if 0 < lineno <= len(lines) else ""
-
-
 def _macro_context_in_lines(lines: list, lineno: int) -> str:
     """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that
     encloses `lineno` (1-based) in `lines`, or None if none does.
 
-    Used to flag dd_line hotspots whose .fpp line is shared across multiple
+    Used to flag cancellation sites whose .fpp line is shared across multiple
     expanded instances (a #:for body, a #:def macro used in many places), where
     line-level attribution cannot identify which instance is responsible.
     """
@@ -155,78 +121,6 @@ def _macro_context(fname: str, lineno: int) -> str:
     return _macro_context_in_lines(lines, lineno)
 
 
-def _ends_with_continuation(line: str) -> bool:
-    """True if a free-form Fortran line ends with a continuation '&' (the last
-    non-blank token before any trailing comment)."""
-    code = line.split("!", 1)[0].rstrip()  # drop trailing comment (string-'!' is rare; fine here)
-    return code.endswith("&")
-
-
-def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
-    """Return the (start, end) 1-based physical line range of the Fortran logical
-    statement containing lineno, following '&' continuations in both directions.
-
-    A hit reported on a continuation fragment thus resolves to the whole
-    statement, so the labelled location is the full expression rather than a
-    mid-statement piece.
-    """
-    n = len(lines)
-    start = lineno
-    while start > 1 and _ends_with_continuation(lines[start - 2]):
-        start -= 1
-    end = lineno
-    while end < n and _ends_with_continuation(lines[end - 1]):
-        end += 1
-    return start, end
-
-
-def _statement_at(fname: str, lineno: int) -> tuple:
-    """File-backed (start, end, text) for the logical statement at fname:lineno;
-    text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
-    lines = _read_source_lines(fname)
-    if not 0 < lineno <= len(lines):
-        return lineno, lineno, ""
-    start, end = _statement_bounds_in_lines(lines, lineno)
-    # join physical lines, dropping the continuation '&' that may lead or trail each
-    text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end])
-    return start, end, text
-
-
-def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
-    """Return True if any line in [start, end] contains non-trivial arithmetic.
-
-    Filters out loop delimiters and fypp directive lines that dd_line sometimes
-    reports when the responsible arithmetic shares DWARF info with its enclosing
-    control-flow boundary (inlining, #:for template expansion, etc.).
-    Returns True (keep) when uncertain so we never silently drop real hotspots.
-    """
-    for lineno in range(start, end + 1):
-        line = _read_source_line(fname, lineno)
-        if not line:
-            return True  # can't read — keep to be safe
-        if not _CONTROL_FLOW_RE.match(line):
-            return True
-    return False
-
-
-def _get_source_context(fname: str, lineno: int, context: int = 2) -> str:
-    """Return a annotated source snippet around lineno, or '' if file not found.
-
-    fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path.
-    Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree.
-    """
-    lines = _read_source_lines(fname, search_whole_tree=True)
-    if not lines:
-        return ""
-    start = max(0, lineno - context - 1)
-    end = min(len(lines), lineno + context)
-    rows = []
-    for i, line in enumerate(lines[start:end], start=start + 1):
-        marker = ">" if i == lineno else " "
-        rows.append(f"{marker}{i:5d} | {line.rstrip()}")
-    return "\n".join(rows)
-
-
 def _dat_column(path: str):
     """Load column 1 (the field value) from an MFC .dat file, robust to a
     single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on)."""
@@ -341,137 +235,3 @@ def _digits_left(bits_lost: float) -> float:
     """Approximate trustworthy decimal digits remaining after losing `bits_lost`
     bits of a double's 53-bit mantissa (~15.95 digits full)."""
     return max(0.0, (53 - bits_lost) / math.log2(10))
-
-
-def _parse_rddmin_locs(summary_path: str) -> list:
-    """Extract dd_line locations from an rddmin_summary as
-    [{path, start, end, macro}] dicts (path is repo-relative; macro is the
-    enclosing fypp duplicating block, e.g. '#:for', or None).
-
-    Filters out locations whose source lines are pure control-flow delimiters
-    (loop boundaries, fypp directive closers, blank/comment lines).  These can
-    appear when the responsible arithmetic shares DWARF debug info with an
-    enclosing boundary due to inlining or #:for template expansion.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    locs = []
-    skipped = []
-    with open(summary_path) as fh:
-        for line in fh:
-            m = _LOC_RE.search(line)
-            if not m:
-                continue
-            path = m.group(1)
-            start = int(m.group(2))
-            end = int(m.group(3)) if m.group(3) else start
-            try:
-                rel = os.path.relpath(path, MFC_ROOT_DIR)
-                if rel.startswith(".."):
-                    rel = path
-            except ValueError:
-                rel = path
-            rel = rel.replace("\\", "/")
-            if _is_arithmetic_loc(path, start, end):
-                locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)})
-            else:
-                skipped.append((rel, start, end))
-    for rel, start, end in skipped:
-        loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}"
-        cons.print(f"  [dim]dd_line: skipped control-flow boundary {loc}[/dim]")
-    return locs
-
-
-def _parse_rddmin_syms(summary_path: str) -> list:
-    """Extract symbol/function names from a dd_sym rddmin_summary.
-
-    rddmin_summary format:
-      ddmin0:\\tFail Ratio: ...\\tFail indexes: ...
-      \\t<funcname>\\t<binary_path>
-      ddmin1:\\t...
-      \\t<funcname>\\t<binary_path>
-
-    Lines starting with 'ddmin' are metadata; function names are on the
-    indented (tab-prefixed) lines as the first tab-delimited field.
-    """
-    if not os.path.isfile(summary_path):
-        return []
-    syms = []
-    with open(summary_path) as fh:
-        for ln in fh:
-            stripped = ln.strip()
-            if not stripped or stripped.startswith("ddmin"):
-                continue
-            sym = stripped.split("\t")[0].strip()
-            if sym:
-                syms.append(sym)
-    return syms
-
-
-def _build_source_filter(gen_lines: list, suspect_locs: list) -> list:
-    """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a
-    suspect dd_line location.
-
-    gen_lines come from a --gen-source run and carry the exact symbol Verrou
-    requires (--source matches on file+line+symbol, not file+line alone).
-    suspect_locs are (path, start, end) tuples whose path may be a repo-relative
-    path while gen-source emits a basename, so matching is by basename + line.
-    """
-    ranges = {}
-    for path, start, end in suspect_locs:
-        ranges.setdefault(os.path.basename(path), []).append((start, end))
-    out = []
-    for raw in gen_lines:
-        parts = raw.rstrip("\n").split("\t")
-        if len(parts) < 2:
-            continue
-        base = os.path.basename(parts[0].strip())
-        try:
-            ln = int(parts[1].strip())
-        except ValueError:
-            continue
-        if any(s <= ln <= e for s, e in ranges.get(base, [])):
-            out.append(raw if raw.endswith("\n") else raw + "\n")
-    return out
-
-
-def _confirm_decision(suspect_dev, dd_threshold: float):
-    """Decide whether perturbing only the suspect lines reproduces the instability.
-
-    Returns True (confirmed), False (suspect lines are inert -> attribution
-    suspect, e.g. macro-collapse misattribution), or None if unmeasured.
-    """
-    if suspect_dev is None:
-        return None
-    return suspect_dev >= dd_threshold
-
-
-def _rank_locs(locs: list, total: float) -> list:
-    """Attach a 'share' (per-line deviation / total) to each loc dict — which
-    must already carry 'share_dev' from a single-line positive control — and
-    return the locs sorted by that deviation, most flagrant first.
-
-    'total' is normally float_proxy, so share is the fraction of the full
-    single-precision deviation that perturbing that one line alone reproduces.
-    A non-positive total yields share=None (cannot normalize).
-    """
-    for loc in locs:
-        dev = loc.get("share_dev")
-        loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None
-    return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True)
-
-
-def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list:
-    """Set loc['cancellation']=True for each dd_line loc whose line range covers a
-    catastrophic-cancellation site (stage F), matched by basename + line.
-
-    This pins the flagrant operation on a multi-op line to the subtraction that
-    cancels, rather than just naming the line.
-    """
-    by_base = {}
-    for fname, lineno in cancellation_locs:
-        by_base.setdefault(os.path.basename(fname), set()).add(lineno)
-    for loc in dd_line_locs:
-        lines = by_base.get(os.path.basename(loc["path"]), set())
-        loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1))
-    return dd_line_locs
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index 05d31d0c9d..2e4fe1abb5 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -1,8 +1,7 @@
 """GitHub-output emitters for the FP-stability suite (step summary + annotations).
 
 Pure formatting of the result dicts produced by the runners; the metric helpers
-it uses (statement resolution, source context, digit math) live in
-fp_stability_metrics.
+it uses (digit math) live in fp_stability_metrics.
 """
 
 import math
@@ -12,58 +11,36 @@
     MIN_SIG_BITS,
     VPREC_MANTISSA_BITS,
     _digits_left,
-    _get_source_context,
-    _statement_at,
 )
 
 
 def _emit_github_annotations(results: list):
-    """Emit GitHub annotations for FP hotspots.
+    """Emit GitHub annotations for FP cancellation sites.
 
     Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations
     appear inline on the responsible source lines in the PR diff view.
 
-    Up to 3 dd_line locations are emitted per case (minimal responsible lines
-    from delta-debug).  Confirmed hotspots (suspect-only perturbation reproduced
-    the instability) are ::warning::; unconfirmed ones are downgraded to
-    ::notice:: so a suspect attribution is not presented as fact.  Up to 3
-    cancellation sites per case are emitted as ::notice:: so the diff also
-    highlights subtraction-cancellation hotspots from --check-cancellation.
+    Up to 3 cancellation sites per case are emitted as ::notice:: so the diff
+    highlights subtraction-cancellation hotspots from --check-cancellation. A site
+    whose .fpp line sits inside a #:for/#:def expansion (tracked in
+    cancellation_macro) is noted as possibly representing multiple instances.
     """
     if not os.environ.get("GITHUB_ACTIONS"):
         return
     for r in results:
-        status = "FAIL" if not r["passed"] else "sensitivity"
-        _sb = r.get("sig_bits")
-        _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a"
-        dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}"
-        unconfirmed = r.get("dd_line_confirmed") is False
-
-        for loc in r.get("dd_line_locs", [])[:3]:
-            location = f"file={loc['path']},line={loc['start']}"
-            if loc["end"] != loc["start"]:
-                location += f",endLine={loc['end']}"
-            note = dev_str
-            if loc.get("share") is not None:
-                note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)"
-            if loc.get("cancellation"):
-                note += " — also a catastrophic cancellation site"
-            if loc.get("macro"):
-                note += f" — {loc['macro']}-expanded line, may represent multiple instances"
-            if unconfirmed:
-                title = f"FP candidate (unconfirmed) [{r['name']}]"
-                print(f"::notice {location},title={title}::{note}", flush=True)
-            else:
-                title = f"FP {status} [{r['name']}]"
-                print(f"::warning {location},title={title}::{note}", flush=True)
-        n_dd = len(r.get("dd_line_locs", []))
-        if n_dd > 3:
-            print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True)
-
+        site_bits = r.get("cancellation_bits") or {}
+        macro_sites = r.get("cancellation_macro") or {}
         for fname, lineno in r.get("cancellation_locs", [])[:3]:
             loc = f"file={fname},line={lineno}"
             title = f"FP cancellation [{r['name']}]"
-            print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True)
+            note = "catastrophic cancellation site"
+            bits = site_bits.get((fname, lineno))
+            if bits:
+                note += f" — loses ≥ {bits / math.log2(10):.0f} of ~16 digits"
+            macro = macro_sites.get((fname, lineno))
+            if macro:
+                note += f" — inside a {macro}-expanded line, may represent multiple instances"
+            print(f"::notice {loc},title={title}::{note}", flush=True)
         n_cc = len(r.get("cancellation_locs", []))
         if n_cc > 3:
             print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True)
@@ -82,7 +59,7 @@ def _emit_github_summary(results: list, n_samples: int):
 
     Visible directly in the Actions run UI without downloading artifacts.
     Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels),
-    and dd_line source locations for any failing cases.
+    and catastrophic-cancellation source locations for any failing cases.
     """
     summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
     if not summary_path:
@@ -128,22 +105,17 @@ def _emit_github_summary(results: list, n_samples: int):
         )
         for r in cases_with_cancel:
             site_bits = r.get("cancellation_bits") or {}
-            # collapse continuation fragments to one entry per logical statement,
-            # keeping the worst bits-lost seen on that statement
-            stmts = {}  # (basename, stmt_start) -> {where, bits, text}
-            for fname, lineno in r["cancellation_locs"]:
-                stmt_start, _end, stmt_text = _statement_at(fname, lineno)
-                key = (os.path.basename(fname), stmt_start)
-                e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text})
-                e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0))
-            ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"]))
+            macro_sites = r.get("cancellation_macro") or {}
+            sites = [{"where": f"{fname}:{lineno}", "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"]]
+            ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"]))
             if ordered:
                 w = ordered[0]
-                md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
+                md.append(f"**`{r['name']}`** — {len(ordered)} site(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
             for e in ordered[:15]:
                 lost = e["bits"] / math.log2(10)
-                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
-            footer = _more_md(len(ordered), 15, "statement(s)")
+                ambiguous = f" — _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else ""
+                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`{ambiguous}")
+            footer = _more_md(len(ordered), 15, "site(s)")
             if footer:
                 md.append(footer)
             md.append("")
@@ -170,60 +142,6 @@ def _emit_github_summary(results: list, n_samples: int):
             md.append(f"| `{r['name']}` | {' | '.join(cols)} |")
         md.append("")
 
-    # dd_line — single-precision SENSITIVITY (where precision most affects the
-    # output). This is distinct from cancellation origin (reported separately):
-    # the leader is typically the time integrator / final accumulation, because
-    # perturbing the last write moves the output directly while upstream errors
-    # get re-rounded there. Not a culprit-finder for ill-conditioning.
-    cases_with_locs = [r for r in results if r["dd_line_locs"]]
-    if cases_with_locs:
-        md.append("<details>")
-        md.append("<summary>Single-precision sensitivity (dd_line) — usually the time integrator; expand for details</summary>\n")
-        md.append(
-            "> Where reduced precision most moves the output — **typically the time integrator / "
-            "final accumulation, which is expected and benign**. This is *not* where cancellation "
-            "originates (that's the section above); it shows where precision matters most.\n"
-        )
-        _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
-        for r in cases_with_locs:
-            status = "❌ FAIL" if not r["passed"] else "✅ pass"
-            md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}")
-            md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n")
-            for loc in r["dd_line_locs"][:10]:
-                rel_path, start, end = loc["path"], loc["start"], loc["end"]
-                where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}"
-                tags = []
-                if loc.get("share") is not None:
-                    tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy")
-                if loc.get("cancellation"):
-                    tags.append("catastrophic cancellation")
-                if loc.get("macro"):
-                    tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_")
-                suffix = f" — {', '.join(tags)}" if tags else ""
-                md.append(f"- `{where}`{suffix}")
-                snippet = _get_source_context(rel_path, start)
-                if snippet:
-                    md.append("  ```fortran")
-                    for line in snippet.splitlines():
-                        md.append(f"  {line}")
-                    md.append("  ```")
-            footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)")
-            if footer:
-                md.append(footer)
-            md.append("")
-        md.append("</details>\n")
-
-    # dd_sym function names (collapsed, since less actionable than dd_line)
-    cases_with_syms = [r for r in results if r["dd_sym_syms"]]
-    if cases_with_syms:
-        md.append("<details>")
-        md.append("<summary>Responsible functions (dd_sym)</summary>\n")
-        for r in cases_with_syms:
-            md.append(f"\n**`{r['name']}`**\n")
-            for sym in r["dd_sym_syms"]:
-                md.append(f"- `{sym}`")
-        md.append("\n</details>\n")
-
     # Float-max overflow sites
     cases_with_fmax = [r for r in results if r.get("float_max_locs")]
     if cases_with_fmax:
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 1d1c5a7b8f..8e404098aa 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -1,33 +1,23 @@
 """Verrou subprocess runners for the FP-stability suite.
 
-Each routine drives the verrou/valgrind binary (or the verrou_dd_* delta-debug
-tools) and returns parsed results.  Pure parsing / metric helpers live in
-fp_stability_metrics, which this module imports.
+Each routine drives the verrou/valgrind binary and returns parsed results.  Pure
+parsing / metric helpers live in fp_stability_metrics, which this module imports.
 """
 
 import glob
 import math
 import os
 import shutil
-import stat
 import subprocess
 import tempfile
-import textwrap
 
 from .common import MFC_ROOT_DIR, MFCException
 from .fp_stability_metrics import (
-    _DD_FALLBACK_THRESHOLD,
     VPREC_MANTISSA_BITS,
-    _build_source_filter,
-    _confirm_decision,
-    _is_arithmetic_loc,
     _max_abs_np,
     _max_diff_np,
     _parse_cancel_gen,
-    _parse_rddmin_locs,
-    _parse_rddmin_syms,
     _parse_vg_error_locs,
-    _rank_locs,
 )
 from .printer import cons
 
@@ -65,20 +55,6 @@ def _find_binary(name: str) -> str:
     return max(candidates, key=os.path.getmtime) if candidates else ""
 
 
-def _find_dd_tool(verrou_bin: str, tool: str) -> str:
-    """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary,
-    or '' if absent."""
-    c = os.path.join(os.path.dirname(verrou_bin), tool)
-    return c if os.path.isfile(c) else ""
-
-
-def _verrou_pythonpath(verrou_bin: str) -> str:
-    """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir)."""
-    verrou_home = os.path.dirname(os.path.dirname(verrou_bin))
-    matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind"))
-    return matches[0] if matches else ""
-
-
 def _verrou_env(verrou_bin: str) -> dict:
     """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt
     artifact extracted to a new prefix) can locate its tool — Valgrind bakes its
@@ -168,12 +144,7 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh
     except MFCException as exc:
         cons.print(f"  [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]")
         return None
-    raw = _parse_cancel_gen(gen_path)
-    filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
-    skipped = len(raw) - len(filtered)
-    if skipped and threshold == 10:
-        cons.print(f"  [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
-    return filtered
+    return _parse_cancel_gen(gen_path)
 
 
 def _run_mca_samples(
@@ -253,266 +224,3 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r
             dev = float("inf")
         results.append((bits, dev))
     return results
-
-
-def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str):
-    """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via
-    VERROU_EXCLUDE / VERROU_SOURCE environment variables.  For test runs, we use
-    --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices).
-    For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the
-    environment — we honour that so the reference is a stable nearest-rounding baseline
-    to compare against.  CLI --rounding-mode would override the env var and break the
-    reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env bash
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        VERROU_BIN={verrou_bin!r}
-        SIM_BIN={sim_bin!r}
-        IC_DIR={ic_dir!r}
-
-        RUNDIR="$1"
-        TMPDIR_RUN=$(mktemp -d)
-        trap 'rm -rf "$TMPDIR_RUN"' EXIT
-
-        cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all"
-        cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp"
-        for fname in indices.dat pre_time_data.dat io_time_data.dat; do
-            [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/"
-        done
-        mkdir -p "$TMPDIR_RUN/D"
-
-        # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and
-        # leaves it unset for test runs.  Defaulting to float gives deterministic
-        # test steps while letting the reference use nearest-rounding.
-        ROUND="${{VERROU_ROUNDING_MODE:-float}}"
-
-        # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation).
-        # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to).
-        # Forward them as valgrind flags when set.
-        EXTRA=""
-        [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE"
-        [ -n "${{VERROU_SOURCE:-}}" ]  && EXTRA="$EXTRA --source=$VERROU_SOURCE"
-
-        cd "$TMPDIR_RUN"
-        "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN"
-        rc=$?
-
-        [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/"
-        exit $rc
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _write_dd_cmp_py(path: str, compare_files: list, threshold: float):
-    """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line.
-
-    verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR
-    Exits 0 (stable) or 1 (unstable) based on threshold.
-    """
-    content = textwrap.dedent(f"""\
-        #!/usr/bin/env python3
-        # Generated by mfc.sh fp-stability — do not edit by hand.
-        import sys, os, numpy as np
-
-        COMPARE_FILES = {compare_files!r}
-        THRESHOLD = {threshold!r}
-
-        ref_dir, run_dir = sys.argv[1], sys.argv[2]
-        max_dev = 0.0
-        for fname in COMPARE_FILES:
-            ref_p = os.path.join(ref_dir, fname)
-            run_p = os.path.join(run_dir, fname)
-            if not os.path.exists(ref_p) or not os.path.exists(run_p):
-                print(f"MISSING: {{fname}}")
-                sys.exit(1)
-            ref = np.atleast_2d(np.loadtxt(ref_p))[:, 1]
-            run = np.atleast_2d(np.loadtxt(run_p))[:, 1]
-            dev = float(np.max(np.abs(ref - run)))
-            max_dev = max(max_dev, dev)
-
-        print(f"max_dev={{max_dev:.3e}}  threshold={{THRESHOLD:.0e}}")
-        sys.exit(0 if max_dev <= THRESHOLD else 1)
-    """)
-    with open(path, "w") as f:
-        f.write(content)
-    os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
-
-
-def _dd_env(verrou_bin: str) -> dict:
-    """Environment for verrou_dd_*: VALGRIND_LIB (so a relocated tree's inner valgrind
-    calls resolve) plus PYTHONPATH (for the verrou_dd_* imports)."""
-    py_pkg = _verrou_pythonpath(verrou_bin)
-    env = _verrou_env(verrou_bin)
-    if py_pkg:
-        existing = env.get("PYTHONPATH", "")
-        env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing]))
-    return env
-
-
-def _run_dd_tool(
-    dd_bin: str,
-    dd_dir: str,
-    dd_run_sh: str,
-    dd_cmp_py: str,
-    env: dict,
-    log_name: str,
-    summary_subdir: str,
-    label: str,
-) -> list:
-    """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines."""
-    log_file = os.path.join(dd_dir, log_name)
-    cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py]
-    cons.print(f"  [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]")
-    with open(log_file, "w") as f:
-        result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False)
-    summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary")
-    summary_lines = []
-    if result.returncode == 0:
-        if os.path.isfile(summary_path):
-            with open(summary_path) as f:
-                summary_lines = f.readlines()
-            cons.print(f"  [bold yellow]{label} result[/bold yellow]:")
-            for line in summary_lines:
-                cons.print(f"    {line.rstrip()}")
-        else:
-            cons.print(f"  [dim]{label} done; see {log_file}[/dim]")
-    else:
-        cons.print(f"  [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})")
-    return summary_lines
-
-
-def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float):
-    """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their
-    paths.  The threshold falls back to _DD_FALLBACK_THRESHOLD when unset."""
-    os.makedirs(dd_dir, exist_ok=True)
-    dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
-    dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
-    _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
-    _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
-    return dd_run_sh, dd_cmp_py
-
-
-def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list:
-    """Run verrou_dd_sym; return list of responsible symbol names."""
-    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym")
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_sym not found; skipping delta-debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
-    cons.print(f"  [dim]dd_sym logs: {dd_dir}[/dim]")
-    return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
-
-
-def _run_dd_line(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    log_dir: str,
-    threshold: float = None,
-) -> list:
-    """Run verrou_dd_line; return [{path, start, end, macro}] location dicts."""
-    dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line")
-    if not dd_bin:
-        cons.print("  [dim]verrou_dd_line not found; skipping line-level debug[/dim]")
-        return []
-
-    dd_dir = os.path.join(log_dir, case["name"])
-    dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold)
-    _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
-    return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary"))
-
-
-def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag):
-    """Perturb only the lines in src_lines (deterministic float mode) and return
-    the L-inf deviation from the nearest-rounding reference, or None on failure."""
-    src_path = os.path.join(conf_dir, f"source_{tag}.txt")
-    with open(src_path, "w") as fh:
-        fh.writelines(src_lines)
-    run_dir = os.path.join(conf_dir, f"perturb_{tag}")
-    os.makedirs(run_dir, exist_ok=True)
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="float",
-            extra_flags=[f"--source={src_path}"],
-        )
-    except MFCException:
-        return None
-    return _max_diff_np(ref_dir, run_dir, compare)
-
-
-def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path):
-    """Run nearest-rounding with --gen-source to capture the symbol-correct
-    executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure."""
-    try:
-        _run_simulation_verrou(
-            verrou_bin,
-            sim_bin,
-            work_dir,
-            run_dir,
-            rounding_mode="nearest",
-            extra_flags=[f"--gen-source={gen_path}"],
-        )
-    except MFCException:
-        return None
-    if not os.path.isfile(gen_path):
-        return None
-    with open(gen_path) as fh:
-        return fh.readlines()
-
-
-def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy):
-    """Positive control for dd_line: perturb ONLY the suspect lines and confirm
-    the instability reproduces, then rank each line by its individual share.
-
-    Verrou's --source matches file+line+symbol (not file+line alone), so we first
-    capture the symbol-correct executed source lines via --gen-source, filter them
-    to the suspect set, then run deterministic float-mode restricted to just those
-    lines.  If the suspect-only deviation reaches dd_threshold the attribution is
-    confirmed; if it stays near zero the reported lines do not actually carry the
-    instability (e.g. a #:for-expanded line blamed for the wrong instance).
-
-    Each line is then perturbed alone so its 'share_dev' (and 'share' of
-    float_proxy) shows which computation dominates.
-
-    Returns (confirmed, suspect_dev, ranked_locs).
-    """
-    if not dd_line_locs:
-        return None, None, dd_line_locs
-    conf_dir = os.path.join(work_dir, "confirm")
-    os.makedirs(conf_dir, exist_ok=True)
-    gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt"))
-    if gen_lines is None:
-        return None, None, dd_line_locs
-    compare = case["compare"]
-
-    # whole-set positive control
-    suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs]
-    set_src = _build_source_filter(gen_lines, suspects)
-    if not set_src:
-        # none of the reported lines performs an instrumented FP op -> not reproduced
-        return False, 0.0, dd_line_locs
-    set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set")
-    confirmed = _confirm_decision(set_dev, dd_threshold)
-
-    # per-line ranking (a single line trivially owns the whole set deviation)
-    if len(dd_line_locs) == 1:
-        dd_line_locs[0]["share_dev"] = set_dev
-    else:
-        for i, loc in enumerate(dd_line_locs):
-            one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])])
-            loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0
-    ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev))
-    return confirmed, set_dev, ranked
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index b2b43bfc02..38d49d60eb 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -1,5 +1,5 @@
-"""Unit tests for the pure helpers behind the FP-stability dd_line confirmation
-pass (#1) and macro-expansion flagging (#2).
+"""Unit tests for the pure helpers behind the FP-stability cancellation pass and
+its fypp macro-expansion flagging.
 
 The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job;
 here we test only the pure functions that decide what to instrument and how to
@@ -9,15 +9,10 @@
 from mfc.fp_stability_metrics import (
     MIN_SIG_BITS,
     _autodetect_compare,
-    _build_source_filter,
     _cancellation_severity,
-    _confirm_decision,
     _digits_left,
     _macro_context_in_lines,
-    _mark_cancellation,
-    _rank_locs,
     _sig_bits,
-    _statement_bounds_in_lines,
 )
 
 # --- #2: fypp macro-expansion context detection ---
@@ -93,127 +88,6 @@ def test_macro_context_unbalanced_close_is_safe():
     assert _macro_context_in_lines(["#:endfor\n", "  a = b - c\n"], 2) is None
 
 
-# --- #1: building the symbol-correct --source filter from --gen-source output ---
-
-
-def test_build_source_filter_keeps_matching_file_and_line_with_symbol():
-    gen = [
-        "m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n",
-        "m_riemann_solvers.fpp\t999\ts_other\n",
-    ]
-    suspects = [("src/simulation/m_riemann_solvers.fpp", 512, 512)]
-    out = _build_source_filter(gen, suspects)
-    assert out == ["m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n"]
-
-
-def test_build_source_filter_matches_inclusive_range():
-    gen = [
-        "m_foo.fpp\t10\tsym\n",
-        "m_foo.fpp\t11\tsym\n",
-        "m_foo.fpp\t12\tsym\n",
-        "m_foo.fpp\t13\tsym\n",
-    ]
-    suspects = [("m_foo.fpp", 11, 12)]
-    out = _build_source_filter(gen, suspects)
-    assert out == ["m_foo.fpp\t11\tsym\n", "m_foo.fpp\t12\tsym\n"]
-
-
-def test_build_source_filter_excludes_other_basenames():
-    gen = ["m_bar.fpp\t5\tsym\n"]
-    suspects = [("m_foo.fpp", 5, 5)]
-    assert _build_source_filter(gen, suspects) == []
-
-
-def test_build_source_filter_matches_on_basename_not_full_path():
-    # gen-source emits a basename; dd_line locs are repo-relative paths.
-    gen = ["m_foo.fpp\t5\tsym\n"]
-    suspects = [("src/common/m_foo.fpp", 5, 5)]
-    assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"]
-
-
-def test_build_source_filter_skips_malformed_lines():
-    gen = ["garbage-no-tab\n", "m_foo.fpp\tnotanumber\tsym\n", "m_foo.fpp\t5\tsym\n"]
-    suspects = [("m_foo.fpp", 5, 5)]
-    assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"]
-
-
-# --- #1: confirmation decision ---
-
-
-def test_confirm_decision_true_when_suspect_reproduces_deviation():
-    # perturbing only the suspect lines yields >= dd_threshold deviation
-    assert _confirm_decision(suspect_dev=1e-3, dd_threshold=1e-5) is True
-
-
-def test_confirm_decision_false_when_suspect_is_inert():
-    # suspect lines barely move the result -> attribution not reproduced
-    assert _confirm_decision(suspect_dev=1e-9, dd_threshold=1e-5) is False
-
-
-def test_confirm_decision_none_when_measurement_unavailable():
-    assert _confirm_decision(suspect_dev=None, dd_threshold=1e-5) is None
-
-
-# --- Tier 1: per-line confirmation ranking ---
-
-
-def test_rank_locs_sorts_by_share_dev_descending():
-    locs = [
-        {"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.1},
-        {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.9},
-    ]
-    ranked = _rank_locs(locs, total=1.0)
-    assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"]
-
-
-def test_rank_locs_computes_share_as_fraction_of_total():
-    locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}]
-    ranked = _rank_locs(locs, total=0.5)
-    assert ranked[0]["share"] == 0.5
-
-
-def test_rank_locs_share_none_when_total_nonpositive():
-    locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}]
-    ranked = _rank_locs(locs, total=0.0)
-    assert ranked[0]["share"] is None
-
-
-def test_rank_locs_treats_missing_share_dev_as_zero_and_sorts_last():
-    locs = [
-        {"path": "a.fpp", "start": 1, "end": 1, "share_dev": None},
-        {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.3},
-    ]
-    ranked = _rank_locs(locs, total=1.0)
-    assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"]
-
-
-# --- Tier 1b: dd_line x cancellation cross-reference ---
-
-
-def test_mark_cancellation_flags_loc_on_a_cancellation_line():
-    locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}]
-    _mark_cancellation(locs, [("m_foo.fpp", 11)])
-    assert locs[0]["cancellation"] is True
-
-
-def test_mark_cancellation_false_when_no_site_in_range():
-    locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}]
-    _mark_cancellation(locs, [("m_foo.fpp", 99)])
-    assert locs[0]["cancellation"] is False
-
-
-def test_mark_cancellation_matches_on_basename_not_full_path():
-    locs = [{"path": "src/common/m_foo.fpp", "start": 5, "end": 5}]
-    _mark_cancellation(locs, [("/abs/build/m_foo.fpp", 5)])
-    assert locs[0]["cancellation"] is True
-
-
-def test_mark_cancellation_false_for_different_basename():
-    locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}]
-    _mark_cancellation(locs, [("m_bar.fpp", 5)])
-    assert locs[0]["cancellation"] is False
-
-
 # --- per-site cancellation severity (bits lost), from a threshold sweep ---
 
 
@@ -289,44 +163,6 @@ def test_digits_left_full_and_clamped():
     assert _digits_left(60) == 0.0  # clamp: never negative
 
 
-# --- Fortran line-continuation handling (correct-line labeling) ---
-
-
-def test_statement_bounds_single_line():
-    lines = ["  a = b - c\n"]
-    assert _statement_bounds_in_lines(lines, 1) == (1, 1)
-
-
-def test_statement_bounds_spans_continuation_from_first_line():
-    lines = ["  poly = (s_cb(i+3) - s_cb(i+1)) * &\n", "         (s_cb(i+2) - s_cb(i))\n"]
-    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
-
-
-def test_statement_bounds_from_middle_continuation_line():
-    # a hit on the continuation fragment must resolve to the statement start
-    lines = ["  x = a + &\n", "      b + &\n", "      c\n"]
-    assert _statement_bounds_in_lines(lines, 2) == (1, 3)
-    assert _statement_bounds_in_lines(lines, 3) == (1, 3)
-
-
-def test_statement_bounds_ignores_ampersand_in_trailing_comment_logic():
-    # a real continuation '&' before a trailing comment still continues
-    lines = ["  x = a & ! note\n", "      + b\n"]
-    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
-
-
-def test_statement_bounds_non_continuation_neighbors():
-    lines = ["  x = 1\n", "  y = 2\n", "  z = 3\n"]
-    assert _statement_bounds_in_lines(lines, 2) == (2, 2)
-
-
-def test_statement_bounds_with_leading_ampersand_continuation():
-    # the MFC WENO style: line ends with '&' and the next line *starts* with '&'
-    lines = ["  beta = x**2 &\n", "       & + eps\n"]
-    assert _statement_bounds_in_lines(lines, 1) == (1, 2)
-    assert _statement_bounds_in_lines(lines, 2) == (1, 2)
-
-
 # --- report emitters: must survive blank and populated result dicts (CI-only path) ---
 
 
@@ -359,26 +195,31 @@ def test_emit_summary_populated_result(tmp_path, monkeypatch):
         sig_bits=30.0,
         float_proxy=1e-6,
         vprec=[(52, 1e-14), (23, float("inf"))],  # exercises the "crash" branch
-        dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": "#:for", "share": 0.4, "cancellation": True}],
-        dd_line_confirmed=False,
         cancellation_locs=[("src/x/m_a.fpp", 5)],
         cancellation_bits={("src/x/m_a.fpp", 5): 40},
+        cancellation_macro={("src/x/m_a.fpp", 5): "#:for"},
         float_max_locs=[("m_a.fpp", 9)],
     )
     text = _emit_to_tmp([r], tmp_path, monkeypatch)
     assert "💥 crash" in text and "digits lost" in text
+    assert "may represent multiple instances" in text  # fypp-ambiguous marker
 
 
-def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys):
+def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys):
     from mfc import fp_stability_report as report
     from mfc.fp_stability import _blank_result
 
     monkeypatch.setenv("GITHUB_ACTIONS", "1")
     r = _blank_result("demo")
-    r.update(dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": None, "share": 0.9, "cancellation": False}], dd_line_confirmed=False)
+    r.update(
+        cancellation_locs=[("src/x/m_a.fpp", 5)],
+        cancellation_bits={("src/x/m_a.fpp", 5): 40},
+        cancellation_macro={("src/x/m_a.fpp", 5): "#:for"},
+    )
     report._emit_github_annotations([r])
     out = capsys.readouterr().out
-    assert "::notice" in out and "::warning" not in out  # unconfirmed -> notice, not warning
+    assert "::notice" in out
+    assert "multiple instances" in out  # fypp-expanded cancellation site flagged
 
 
 # --- Verrou discovery: a bare system valgrind must read as "Verrou absent" ---
@@ -480,27 +321,6 @@ def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch):
     assert env["VALGRIND_LIB"] == "/user/chosen/lib"  # not clobbered
 
 
-def test_dd_env_prepends_pythonpath_and_inherits_valgrind_lib(tmp_path, monkeypatch):
-    from mfc import fp_stability_runners as runners
-
-    (tmp_path / "libexec" / "valgrind").mkdir(parents=True)
-    monkeypatch.delenv("VALGRIND_LIB", raising=False)
-    monkeypatch.setenv("PYTHONPATH", "/pre/existing")
-    monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/site-packages/valgrind")
-    env = runners._dd_env(str(tmp_path / "bin" / "valgrind"))
-    assert env["PYTHONPATH"] == "/vg/site-packages/valgrind:/pre/existing"
-    assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind")
-
-
-def test_dd_env_no_leading_colon_when_pythonpath_empty(tmp_path, monkeypatch):
-    from mfc import fp_stability_runners as runners
-
-    monkeypatch.delenv("PYTHONPATH", raising=False)
-    monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/valgrind")
-    env = runners._dd_env(str(tmp_path / "bin" / "valgrind"))
-    assert env["PYTHONPATH"] == "/vg/valgrind"  # no stray leading ':'
-
-
 # --- auto-install hard-fail guards ---
 
 

From eea0c8def458b892d95d6acc3f20237652696d50 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 2 Jun 2026 20:17:36 -0400
Subject: [PATCH 19/25] fp-stability: drop the MCA pass (redundant with the
 random-rounding sig-bits gate)

MCA (--backend=mcaquad) reported a significant-bits estimate that duplicated the metric the core random-rounding suite already produces for PASS/FAIL; on well-conditioned cases the two agree, so it added a slower second opinion without a distinct capability. Removed _run_mca_samples, the MCA pass + --no-mca flag + result keys + summary column. Cancellation/vprec/float-proxy/float-max and the PASS/FAIL core are unchanged. ~50 lines off; it was also the slowest pass (N extra mcaquad runs/case).
---
 toolchain/mfc/cli/commands.py         | 11 +--------
 toolchain/mfc/fp_stability.py         | 31 ++------------------------
 toolchain/mfc/fp_stability_report.py  |  7 +++---
 toolchain/mfc/fp_stability_runners.py | 32 ---------------------------
 4 files changed, 6 insertions(+), 75 deletions(-)

diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 6dfbb57c77..7d9771f772 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -917,7 +917,6 @@
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
         "  cancellation   --check-cancellation origins, ranked by significant digits lost\n"
-        "  mca-sigbits    Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n"
         "  float-max      --check-max-float detection of double→float overflow sites\n"
     ),
     include_common=["mfc_config", "verbose", "debug_log"],
@@ -977,13 +976,6 @@
             default=False,
             dest="no_cancellation",
         ),
-        Argument(
-            name="no-mca",
-            help="Skip Monte Carlo Arithmetic (mcaquad) significant-bits estimate.",
-            action=ArgAction.STORE_TRUE,
-            default=False,
-            dest="no_mca",
-        ),
         Argument(
             name="no-float-max",
             help="Skip --check-max-float float32 overflow detection.",
@@ -1001,7 +993,7 @@
         ),
         Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"),
         Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"),
-        Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"),
+        Example("./mfc.sh fp-stability --no-cancellation --no-float-max", "Skip analysis passes"),
     ],
     key_options=[
         ("--sim-binary PATH", "Serial simulation binary (debug, no-MPI)"),
@@ -1011,7 +1003,6 @@
         ("--no-float-proxy", "Skip float-rounding proxy run"),
         ("--no-vprec", "Skip VPREC mantissa-bit sweep"),
         ("--no-cancellation", "Skip cancellation detection"),
-        ("--no-mca", "Skip MCA significant-bits estimate"),
         ("--no-float-max", "Skip float32 overflow detection"),
     ],
 )
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 102d512d52..bf3f557455 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -22,11 +22,7 @@
    .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous
    (the line maps to multiple generated instances).
 
-E. MCA significant-bits estimate (--no-mca to skip)
-   N runs with --backend=mcaquad; max deviation vs nearest-rounding
-   reference gives a lower bound on significant bits: s = -log2(dev/scale).
-
-F. Float-max overflow detection (--no-float-max to skip)
+E. Float-max overflow detection (--no-float-max to skip)
    One run with --check-max-float=yes; reports locations where a
    double→float conversion would overflow to ±Inf.
 
@@ -80,7 +76,6 @@
     _run_cancellation_check,
     _run_float_max_check,
     _run_float_proxy,
-    _run_mca_samples,
     _run_preprocess,
     _run_simulation_verrou,
     _run_vprec_sweep,
@@ -372,8 +367,6 @@ def _blank_result(name: str) -> dict:
         "cancellation_locs": [],
         "cancellation_bits": {},
         "cancellation_macro": {},
-        "mca_dev": None,
-        "mca_sigbits": None,
         "float_max_locs": [],
     }
 
@@ -387,7 +380,6 @@ def _run_case(
     run_float: bool,
     run_vprec: bool,
     run_cancellation: bool,
-    run_mca: bool,
     run_float_max: bool,
 ) -> dict:
     name = case["name"]
@@ -494,22 +486,7 @@ def _run_case(
             except Exception as exc:
                 cons.print(f"  [bold yellow]cancellation check error[/bold yellow]: {exc}")
 
-        # --- E: MCA significant-bits estimate ---
-        if run_mca:
-            cons.print(f"  [dim]MCA significant-bits estimate (N={n_samples})...[/dim]")
-            try:
-                mca_dev, mca_sigbits, n_ok = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples)
-                if n_ok == 0:
-                    cons.print(f"  [bold yellow]MCA: no samples completed (0/{n_samples}; see logs)[/bold yellow]")
-                else:
-                    result["mca_dev"] = mca_dev
-                    result["mca_sigbits"] = mca_sigbits
-                    bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a"
-                    cons.print(f"  MCA: dev={mca_dev:.3e}  ({bits_str})  [{n_ok}/{n_samples} samples]")
-            except Exception as exc:
-                cons.print(f"  [bold yellow]MCA error[/bold yellow]: {exc}")
-
-        # --- F: float-max overflow detection ---
+        # --- E: float-max overflow detection ---
         if run_float_max:
             cons.print("  [dim]float-max overflow check...[/dim]")
             try:
@@ -610,7 +587,6 @@ def fp_stability():
     run_float = not ARG("no_float_proxy")
     run_vprec = not ARG("no_vprec")
     run_cancellation = not ARG("no_cancellation")
-    run_mca = not ARG("no_mca")
     run_float_max = not ARG("no_float_max")
 
     cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES
@@ -633,8 +609,6 @@ def fp_stability():
         features.append("vprec-sweep")
     if run_cancellation:
         features.append("cancellation")
-    if run_mca:
-        features.append("mca-sigbits")
     if run_float_max:
         features.append("float-max")
     cons.print(f"  features:    {', '.join(features) if features else 'stability only'}")
@@ -654,7 +628,6 @@ def fp_stability():
                 run_float,
                 run_vprec,
                 run_cancellation,
-                run_mca,
                 run_float_max,
             )
         except MFCException as exc:
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index 2e4fe1abb5..8e0a2580b8 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -80,14 +80,13 @@ def _emit_github_summary(results: list, n_samples: int):
 
     # Main results table — pass/fail is scale-free: bits retained vs a single floor
     md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
-    md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |")
-    md.append("|------|:------:|:------:|--------:|--------:|:------:|")
+    md.append("| Case | Status | bits retained | max\\_dev | Float proxy |")
+    md.append("|------|:------:|:------:|--------:|--------:|")
     for r in results:
         status = "✅" if r["passed"] else "❌"
         bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
         fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
-        sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
-        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
+        md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |")
     md.append("")
 
     # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 8e404098aa..12af6041c3 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -5,7 +5,6 @@
 """
 
 import glob
-import math
 import os
 import shutil
 import subprocess
@@ -14,7 +13,6 @@
 from .common import MFC_ROOT_DIR, MFCException
 from .fp_stability_metrics import (
     VPREC_MANTISSA_BITS,
-    _max_abs_np,
     _max_diff_np,
     _parse_cancel_gen,
     _parse_vg_error_locs,
@@ -147,36 +145,6 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh
     return _parse_cancel_gen(gen_path)
 
 
-def _run_mca_samples(
-    case: dict,
-    verrou_bin: str,
-    sim_bin: str,
-    work_dir: str,
-    ref_dir: str,
-    n_mca: int,
-) -> tuple:
-    """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound, n_ok) where
-    n_ok is how many samples actually completed (0 => no usable measurement)."""
-    compare = case["compare"]
-    ref_scale = _max_abs_np(ref_dir, compare)
-    max_dev = 0.0
-    n_ok = 0
-    flags = ["--backend=mcaquad", "--mca-mode=mca"]
-    for i in range(n_mca):
-        run_dir = os.path.join(work_dir, f"mca_{i:02d}")
-        os.makedirs(run_dir, exist_ok=True)
-        try:
-            _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags)
-            max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
-            n_ok += 1
-        except MFCException as exc:
-            cons.print(f"  [dim]MCA sample {i} failed: {exc}[/dim]")
-    sig_bits = None
-    if n_ok and max_dev > 0.0 and ref_scale > 0.0:
-        sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale))))
-    return max_dev, sig_bits, n_ok
-
-
 def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
     """Run with --check-max-float=yes; return [(fname, line)] of overflow sites,
     or None if the run failed (distinct from [] = ran and found none)."""

From c4d1ef0f41c10497902ef2f96a5e3541d29c7aa8 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 08:52:46 -0400
Subject: [PATCH 20/25] =?UTF-8?q?fp-stability:=20address=20Copilot=20revie?=
 =?UTF-8?q?w=20=E2=80=94=20verify=20--verrou-binary=20executability,=20env?=
 =?UTF-8?q?=20on=20PATH=20probe,=20comment=20rot?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot PR review (all valid): (1) gate explicit --verrou-binary on os.access(X_OK) so the 'not executable' message is honest; (6) pass VALGRIND_LIB via _verrou_env to the PATH-fallback verrou probe too, so a relocated prebuilt on PATH isn't misjudged absent; (3,5) fix two stale comments left by the dd/MCA removals (a 'sensitivity list' that no longer exists; a VPREC '❌ where bits<floor' that never matched the deviation table); (2) inline-code the fp-stability-logs/ path in the truncation footer. Tests updated for the 2-arg _has_verrou_tool stub.
---
 toolchain/mfc/fp_stability.py         | 2 +-
 toolchain/mfc/fp_stability_report.py  | 8 ++++----
 toolchain/mfc/fp_stability_runners.py | 6 ++++--
 toolchain/mfc/test_fp_stability.py    | 4 ++--
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index bf3f557455..b0a2f5b058 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -570,7 +570,7 @@ def _install_verrou() -> str:
 
 def fp_stability():
     verrou_bin = ARG("verrou_binary") or _find_verrou()
-    if not verrou_bin or not os.path.isfile(verrou_bin):
+    if not verrou_bin or not (os.path.isfile(verrou_bin) and os.access(verrou_bin, os.X_OK)):
         if ARG("verrou_binary"):
             raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.")
         verrou_bin = _install_verrou()
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index 8e0a2580b8..6b8f07079c 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -51,7 +51,7 @@ def _more_md(total: int, shown: int, noun: str) -> str:
     or '' when nothing was truncated."""
     if total <= shown:
         return ""
-    return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_"
+    return f"- …and {total - shown} more {noun}; see `fp-stability-logs/`"
 
 
 def _emit_github_summary(results: list, n_samples: int):
@@ -90,8 +90,7 @@ def _emit_github_summary(results: list, n_samples: int):
     md.append("")
 
     # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
-    # most severe (most bits lost). The numerically interesting signal; the
-    # sensitivity list further down is dominated by the (benign) time integrator.
+    # most severe (most bits lost).
     cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
     if cases_with_cancel:
         md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
@@ -119,7 +118,8 @@ def _emit_github_summary(results: list, n_samples: int):
                 md.append(footer)
             md.append("")
 
-    # VPREC sweep — one column per bit level, ❌ where bits retained < floor
+    # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at
+    # that reduced precision (💥 crash = run diverged/failed, — = not measured).
     if any(r["vprec"] for r in results):
         _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
         header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 12af6041c3..7c2519f018 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -40,9 +40,11 @@ def _find_verrou() -> str:
     if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)):
         return candidate
     # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system
-    # valgrind must read as "Verrou absent" so it gets installed, not misused.
+    # valgrind must read as "Verrou absent" so it gets installed, not misused. Verify
+    # with VALGRIND_LIB too, so a relocated prebuilt on PATH (env.sh not sourced) isn't
+    # wrongly judged absent.
     path_vg = shutil.which("valgrind")
-    if path_vg and _has_verrou_tool(path_vg):
+    if path_vg and _has_verrou_tool(path_vg, _verrou_env(path_vg)):
         return path_vg
     return ""
 
diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index 38d49d60eb..b8b6104403 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -260,7 +260,7 @@ def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch):
     # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool.
     monkeypatch.setenv("VERROU_HOME", str(tmp_path))
     monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind")
-    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: False)
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False)
     assert runners._find_verrou() == ""
 
 
@@ -269,7 +269,7 @@ def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch)
 
     monkeypatch.setenv("VERROU_HOME", str(tmp_path))
     monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind")
-    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: True)
+    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True)
     assert runners._find_verrou() == "/opt/verrou/bin/valgrind"
 
 

From 39a1b0f99a49a6102bb4f396b9aa161a66245319 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 09:01:08 -0400
Subject: [PATCH 21/25] fp-stability: give _run_simulation_verrou sole
 ownership of run_dir creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The makedirs(run_dir) sat after the --log-file path and the sim.out open that both need it, so it was a dead no-op that worked only because every caller pre-created run_dir — misleading, and a future caller would hit FileNotFoundError with no hint. Moved it to the top of _run_simulation_verrou (before those uses) and dropped the now-redundant pre-creates in all callers (_run_cancellation_check, _run_float_max_check, _run_float_proxy, _run_vprec_sweep, and _run_case's ref + sample runs). Single, clear ownership. (Claude Code Review finding.)
---
 toolchain/mfc/fp_stability.py         |  2 --
 toolchain/mfc/fp_stability_runners.py | 10 ++++------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index b0a2f5b058..7fce9ae6fe 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -399,7 +399,6 @@ def _run_case(
         _run_preprocess(pp_bin, case["pre"], work_dir)
 
         ref_dir = os.path.join(work_dir, "ref")
-        os.makedirs(ref_dir)
         cons.print("  [dim]reference run (rounding=nearest)...[/dim]")
         _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest")
 
@@ -420,7 +419,6 @@ def _run_case(
         cons.print(f"  [dim]random-rounding runs (N={n_samples})...[/dim]")
         for i in range(n_samples):
             run_dir = os.path.join(work_dir, f"run_{i:02d}")
-            os.makedirs(run_dir)
             _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random")
             max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
 
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index 7c2519f018..f282f47e12 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -94,11 +94,14 @@ def _run_simulation_verrou(
     rounding_mode: str = None,
     extra_flags: list = None,
 ):
-    """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output.
+    """Create run_dir, copy ICs into a fresh tmpdir, run simulation under verrou,
+    and collect its D/ output into run_dir. Owns run_dir creation, so callers need
+    not pre-create it.
 
     rounding_mode is passed as --rounding-mode=<mode> when not None.
     extra_flags are appended before the binary (e.g. --backend=vprec ...).
     """
+    os.makedirs(run_dir, exist_ok=True)  # needed before --log-file / sim.out below
     with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir:
         for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]:
             src = os.path.join(work_dir, fname)
@@ -121,7 +124,6 @@ def _run_simulation_verrou(
             tag = rounding_mode or "vprec"
             raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out")
 
-        os.makedirs(run_dir, exist_ok=True)
         for fn in os.listdir(os.path.join(tmpdir, "D")):
             shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir)
 
@@ -132,7 +134,6 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh
     or None if the run itself failed (distinct from [] = ran and found none)."""
     tag = f"cancellation_{threshold}"
     run_dir = os.path.join(work_dir, tag)
-    os.makedirs(run_dir, exist_ok=True)
     gen_path = os.path.join(run_dir, "cancel_gen.txt")
     flags = [
         "--check-cancellation=yes",
@@ -151,7 +152,6 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
     """Run with --check-max-float=yes; return [(fname, line)] of overflow sites,
     or None if the run failed (distinct from [] = ran and found none)."""
     run_dir = os.path.join(work_dir, "float_max")
-    os.makedirs(run_dir, exist_ok=True)
     try:
         _run_simulation_verrou(
             verrou_bin,
@@ -170,7 +170,6 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
 def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
     """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref."""
     run_dir = os.path.join(work_dir, "float_proxy")
-    os.makedirs(run_dir)
     _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
     return _max_diff_np(ref_dir, run_dir, case["compare"])
 
@@ -180,7 +179,6 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r
     results = []
     for bits in VPREC_MANTISSA_BITS:
         run_dir = os.path.join(work_dir, f"vprec_{bits}")
-        os.makedirs(run_dir)
         flags = [
             "--backend=vprec",
             "--vprec-mode=full",

From d809997822d17717d7b0d51410563cde4d4de364 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 09:21:00 -0400
Subject: [PATCH 22/25] fp-stability: prune unit tests to the high-value
 contracts (33 -> 17)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kept the behavioral contracts (verrou-absent/broken detection — the case a real bug hit, scale-free pass/fail invariant + zero-scale safety, cancellation severity, output autodetect, blank-result emitter KeyError guard, fypp-ambiguity annotation, VALGRIND_LIB relocation incl. don't-clobber, install no-binary hard-fail) and the subtle edges. Dropped redundant enumerations (5 of 8 macro-context micro-cases, 3 of 5 sig-bits math cases) and trivial-math/empty-input/constant assertions (digits_left, min_sig_bits==24, *_empty, omits-when-absent, the obvious bootstrap-returncode guard). -125 lines; the dropped paths are covered by the kept tests' shared code or the end-to-end CI job.
---
 toolchain/mfc/test_fp_stability.py | 147 +++--------------------------
 1 file changed, 11 insertions(+), 136 deletions(-)

diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py
index b8b6104403..6521705b96 100644
--- a/toolchain/mfc/test_fp_stability.py
+++ b/toolchain/mfc/test_fp_stability.py
@@ -1,30 +1,20 @@
-"""Unit tests for the pure helpers behind the FP-stability cancellation pass and
-its fypp macro-expansion flagging.
+"""Unit tests for the pure helpers behind the FP-stability cancellation pass, its
+fypp macro-expansion flagging, scale-free pass/fail, and Verrou discovery/install.
 
 The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job;
 here we test only the pure functions that decide what to instrument and how to
-label results, so they can run without Verrou or built binaries.
+label results, so they can run without Verrou or built binaries. We keep the tests
+that pin a real behavioral contract or a subtle edge, not every micro-variation.
 """
 
 from mfc.fp_stability_metrics import (
-    MIN_SIG_BITS,
     _autodetect_compare,
     _cancellation_severity,
-    _digits_left,
     _macro_context_in_lines,
     _sig_bits,
 )
 
-# --- #2: fypp macro-expansion context detection ---
-
-
-def test_macro_context_none_outside_any_block():
-    lines = [
-        "subroutine s_foo()\n",
-        "  a = b - c\n",
-        "end subroutine\n",
-    ]
-    assert _macro_context_in_lines(lines, 2) is None
+# --- fypp macro-expansion context detection (a #:for/#:def line maps to N instances) ---
 
 
 def test_macro_context_inside_for_loop_body():
@@ -37,6 +27,7 @@ def test_macro_context_inside_for_loop_body():
 
 
 def test_macro_context_if_block_is_not_duplicating():
+    # #:if selects code but does not duplicate it, so it must NOT be flagged.
     lines = [
         "#:if FOO\n",
         "  a = b - c\n",
@@ -45,50 +36,12 @@ def test_macro_context_if_block_is_not_duplicating():
     assert _macro_context_in_lines(lines, 2) is None
 
 
-def test_macro_context_reports_innermost_duplicating_block():
-    lines = [
-        "#:def MACRO(x)\n",
-        "  #:if cond\n",
-        "    #:for j in range(3)\n",
-        "      y = ${x}$ - z\n",
-        "    #:endfor\n",
-        "  #:endif\n",
-        "#:enddef\n",
-    ]
-    assert _macro_context_in_lines(lines, 4) == "#:for"
-
-
-def test_macro_context_balances_closers():
-    lines = [
-        "#:for i in [1, 2]\n",
-        "  a = b - c\n",
-        "#:endfor\n",
-        "d = e - f\n",
-    ]
-    # line 4 is after the loop closed -> not in any duplicating block
-    assert _macro_context_in_lines(lines, 4) is None
-
-
-def test_macro_context_def_body_when_no_inner_loop():
-    lines = [
-        "#:def GEOM(n)\n",
-        "  r = x - y\n",
-        "#:enddef\n",
-    ]
-    assert _macro_context_in_lines(lines, 2) == "#:def"
-
-
-def test_macro_context_block_and_call_are_duplicating():
-    assert _macro_context_in_lines(["#:block B\n", "  a = b - c\n", "#:endblock\n"], 2) == "#:block"
-    assert _macro_context_in_lines(["#:call M()\n", "  a = b - c\n", "#:endcall\n"], 2) == "#:call"
-
-
 def test_macro_context_unbalanced_close_is_safe():
     # a stray #:endfor with an empty stack must not crash or misreport
     assert _macro_context_in_lines(["#:endfor\n", "  a = b - c\n"], 2) is None
 
 
-# --- per-site cancellation severity (bits lost), from a threshold sweep ---
+# --- per-site cancellation severity (highest bit-threshold a site survives) ---
 
 
 def test_cancellation_severity_takes_highest_surviving_threshold():
@@ -101,10 +54,6 @@ def test_cancellation_severity_takes_highest_surviving_threshold():
     assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10}
 
 
-def test_cancellation_severity_empty():
-    assert _cancellation_severity([]) == {}
-
-
 # --- auto-detect which output files to compare (for a user case) ---
 
 
@@ -123,47 +72,20 @@ def test_autodetect_compare_falls_back_to_prim_when_no_cons():
     assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"]
 
 
-def test_autodetect_compare_empty_when_no_field_output():
-    assert _autodetect_compare(["indices.dat", "pre_time_data.dat", "foo.txt"]) == []
-
-
 # --- scale-free pass/fail: significant bits retained ---
 
 
-def test_sig_bits_relative_deviation():
-    # max_dev/ref_scale = 1e-14 -> ~46.5 retained bits
-    assert 46 < _sig_bits(1e-14, 1.0) < 47
-
-
 def test_sig_bits_is_scale_free():
     # same relative deviation -> same bits regardless of absolute magnitude
     assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9
 
 
-def test_sig_bits_zero_deviation_is_full_precision():
-    assert _sig_bits(0.0, 1.0) == 53.0
-
-
 def test_sig_bits_zero_scale_is_safe():
+    # a zero/degenerate field scale must not divide-by-zero; report full precision
     assert _sig_bits(1e-12, 0.0) == 53.0
 
 
-def test_sig_bits_deviation_at_scale_is_unstable():
-    # deviation as large as the field -> <= 0 retained bits
-    assert _sig_bits(1.0, 1.0) <= 0.0
-
-
-def test_min_sig_bits_is_single_precision_floor():
-    assert MIN_SIG_BITS == 24
-
-
-def test_digits_left_full_and_clamped():
-    assert 15.5 < _digits_left(0) < 16.0  # full double ~ 16 sig digits
-    assert _digits_left(53) == 0.0
-    assert _digits_left(60) == 0.0  # clamp: never negative
-
-
-# --- report emitters: must survive blank and populated result dicts (CI-only path) ---
+# --- report emitters: must survive the CI-only path without KeyError / regressions ---
 
 
 def _emit_to_tmp(results, tmp_path, monkeypatch):
@@ -185,26 +107,6 @@ def test_emit_summary_survives_blank_result(tmp_path, monkeypatch):
     assert "0 passed, 1 failed" in text
 
 
-def test_emit_summary_populated_result(tmp_path, monkeypatch):
-    from mfc.fp_stability import _blank_result
-
-    r = _blank_result("demo")
-    r.update(
-        passed=False,
-        max_dev=1e-9,
-        sig_bits=30.0,
-        float_proxy=1e-6,
-        vprec=[(52, 1e-14), (23, float("inf"))],  # exercises the "crash" branch
-        cancellation_locs=[("src/x/m_a.fpp", 5)],
-        cancellation_bits={("src/x/m_a.fpp", 5): 40},
-        cancellation_macro={("src/x/m_a.fpp", 5): "#:for"},
-        float_max_locs=[("m_a.fpp", 9)],
-    )
-    text = _emit_to_tmp([r], tmp_path, monkeypatch)
-    assert "💥 crash" in text and "digits lost" in text
-    assert "may represent multiple instances" in text  # fypp-ambiguous marker
-
-
 def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys):
     from mfc import fp_stability_report as report
     from mfc.fp_stability import _blank_result
@@ -222,7 +124,7 @@ def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatc
     assert "multiple instances" in out  # fypp-expanded cancellation site flagged
 
 
-# --- Verrou discovery: a bare system valgrind must read as "Verrou absent" ---
+# --- Verrou discovery: a bare/broken valgrind must read as "Verrou absent" ---
 
 
 def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch):
@@ -264,15 +166,6 @@ def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch):
     assert runners._find_verrou() == ""
 
 
-def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch):
-    from mfc import fp_stability_runners as runners
-
-    monkeypatch.setenv("VERROU_HOME", str(tmp_path))
-    monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind")
-    monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True)
-    assert runners._find_verrou() == "/opt/verrou/bin/valgrind"
-
-
 def test_has_verrou_tool_reflects_exit_code(monkeypatch):
     from mfc import fp_stability_runners as runners
 
@@ -304,14 +197,6 @@ def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch
     assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind")
 
 
-def test_verrou_env_omits_valgrind_lib_when_libexec_absent(tmp_path, monkeypatch):
-    from mfc import fp_stability_runners as runners
-
-    monkeypatch.delenv("VALGRIND_LIB", raising=False)
-    env = runners._verrou_env(str(tmp_path / "bin" / "valgrind"))
-    assert "VALGRIND_LIB" not in env
-
-
 def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch):
     from mfc import fp_stability_runners as runners
 
@@ -321,17 +206,7 @@ def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch):
     assert env["VALGRIND_LIB"] == "/user/chosen/lib"  # not clobbered
 
 
-# --- auto-install hard-fail guards ---
-
-
-def test_install_verrou_raises_when_bootstrap_fails(monkeypatch):
-    import pytest
-
-    from mfc import fp_stability as fps
-
-    monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 1})())
-    with pytest.raises(fps.MFCException, match="Verrou install failed"):
-        fps._install_verrou()
+# --- auto-install hard-fail guard (a green bootstrap that produced no binary) ---
 
 
 def test_install_verrou_raises_when_no_binary_appears(monkeypatch):

From a9dbb4237d4c37de8ae9ee63645d38b4a6f5533c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 09:38:55 -0400
Subject: [PATCH 23/25] ci(fp-stability): derive Verrou cache key from
 verrou.sh content (no hand-synced version)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cache key hardcoded verrou-a58d434-valgrind-3.26.0, synced to the installer's pins only by a comment — if verrou.sh bumped but the key didn't, CI would restore the stale cached tree and silently never exercise the new version. Key off hashFiles('toolchain/bootstrap/verrou.sh') so any pin change (or edit) auto-busts the cache. Also dropped the version literals from the workflow's header comment; the pinned versions now live solely in verrou.sh.
---
 .github/workflows/fp-stability.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml
index 203cff3ad4..b3c9a6b5cc 100644
--- a/.github/workflows/fp-stability.yml
+++ b/.github/workflows/fp-stability.yml
@@ -24,8 +24,9 @@ name: FP Stability
 # On FAIL: verrou_dd_sym runs to identify the responsible function symbols.
 # Logs are uploaded as CI artifacts.
 #
-# Verrou (Valgrind 3.26.0 + edf-hpc/verrou@a58d434) is built once and cached.
-# Build takes ~20 min uncached; cached runs restore in ~30 s.
+# Verrou (the pinned Valgrind+Verrou pair; versions live in toolchain/bootstrap/verrou.sh)
+# is installed by fp-stability on first use and cached. The prebuilt download is seconds;
+# a cache miss with no prebuilt falls back to a ~20-min source build.
 
 on:
   push:
@@ -68,8 +69,10 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.local/verrou
-          # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the installer).
-          key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }}
+          # Key off the installer's content so any version bump (or other edit) in
+          # verrou.sh auto-busts the cache and forces a fresh install — no hand-synced
+          # version string to drift out of date.
+          key: verrou-${{ hashFiles('toolchain/bootstrap/verrou.sh') }}-${{ runner.os }}
 
       - name: Install system dependencies
         run: |

From c58d44fd3df2587febf68f2d414fc2ad3b65260c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 09:42:57 -0400
Subject: [PATCH 24/25] fp-stability: remove emoji from console +
 GitHub-summary output (ASCII only)

Replaced the check/cross marks in the results list (PASS/FAIL), the summary table status (PASS/FAIL), and the VPREC 'crash' cell + its comment. No emoji in the toolchain output now.
---
 toolchain/mfc/fp_stability.py        | 4 ++--
 toolchain/mfc/fp_stability_report.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 7fce9ae6fe..8710bb717c 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -639,8 +639,8 @@ def fp_stability():
 
     cons.print(f"[bold]Results[/bold] ({elapsed:.0f}s):  [green]{n_pass} passed[/green]  [red]{n_fail} failed[/red]")
     for r in results:
-        mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]"
-        cons.print(f"  {mark} {r['name']}")
+        mark = "[green]PASS[/green]" if r["passed"] else "[red]FAIL[/red]"
+        cons.print(f"  {mark}  {r['name']}")
 
     _emit_github_summary(results, n_samples)
     _emit_github_annotations(results)
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index 6b8f07079c..2ea90be081 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -83,7 +83,7 @@ def _emit_github_summary(results: list, n_samples: int):
     md.append("| Case | Status | bits retained | max\\_dev | Float proxy |")
     md.append("|------|:------:|:------:|--------:|--------:|")
     for r in results:
-        status = "✅" if r["passed"] else "❌"
+        status = "PASS" if r["passed"] else "FAIL"
         bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
         fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
         md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |")
@@ -119,7 +119,7 @@ def _emit_github_summary(results: list, n_samples: int):
             md.append("")
 
     # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at
-    # that reduced precision (💥 crash = run diverged/failed, — = not measured).
+    # that reduced precision ("crash" = run diverged/failed; dash = not measured).
     if any(r["vprec"] for r in results):
         _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
         header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
@@ -135,7 +135,7 @@ def _emit_github_summary(results: list, n_samples: int):
                 if d is None:
                     cols.append("—")
                 elif d == float("inf"):
-                    cols.append("💥 crash")
+                    cols.append("crash")
                 else:
                     cols.append(f"{d:.2e}")
             md.append(f"| `{r['name']}` | {' | '.join(cols)} |")

From 009967422bc982fd9625159d9df5d92a896e55d1 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 3 Jun 2026 09:50:19 -0400
Subject: [PATCH 25/25] =?UTF-8?q?fp-stability:=20ascii-only=20=E2=80=94=20?=
 =?UTF-8?q?convert=20em-dash/arrow/math=20glyphs=20in=20comments=20+=20out?=
 =?UTF-8?q?put?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaced non-ASCII in the toolchain (em-dash/en-dash -> '-', '->' for arrows, '>=' for >=, '+/-', '~' for approx, 'inf'/'Linf' for the infinity glyph, '...' for ellipsis) across fp_stability*.py, verrou.sh, and the fp-stability command help. Display/comment text only; no logic change. The viz command's pre-existing glyphs are left untouched (not part of this PR).
---
 toolchain/bootstrap/verrou.sh         | 20 ++++++++---------
 toolchain/mfc/cli/commands.py         |  8 +++----
 toolchain/mfc/fp_stability.py         | 22 +++++++++---------
 toolchain/mfc/fp_stability_metrics.py |  4 ++--
 toolchain/mfc/fp_stability_report.py  | 32 +++++++++++++--------------
 toolchain/mfc/fp_stability_runners.py |  6 ++---
 6 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh
index dfbd462231..dd2a67c653 100755
--- a/toolchain/bootstrap/verrou.sh
+++ b/toolchain/bootstrap/verrou.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by
-# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of
+# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package - it is a fork of
 # Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds);
 # if none is available for this tag/arch it falls back to a source build (~20 min).
 # fp-stability auto-runs this on first use when Verrou is absent (printing what it
@@ -19,7 +19,7 @@ set -euo pipefail
 VALGRIND_VERSION="3.26.0"
 VERROU_COMMIT="a58d434"
 # Prebuilt artifacts (built once per arch) live in a small companion repo. The tag
-# pins to the (valgrind, verrou) pair above — bump all three together.
+# pins to the (valgrind, verrou) pair above - bump all three together.
 VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}"
 VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}"
 PREFIX="${VERROU_HOME:-$HOME/.local/verrou}"
@@ -46,7 +46,7 @@ case "$(uname -m)" in
     aarch64|arm64)
         arch_tag="aarch64"
         echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2
-        echo "         best-validated on x86_64 — treat results as experimental on this arch." >&2
+        echo "         best-validated on x86_64 - treat results as experimental on this arch." >&2
         ;;
     *)
         echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2
@@ -73,31 +73,31 @@ try_prebuilt() {
         if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi
     }
     if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then
-        echo "==> No prebuilt for this tag/arch — building from source instead."
+        echo "==> No prebuilt for this tag/arch - building from source instead."
         rm -rf "$dl"; return 1
     fi
     if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then
-        echo "WARNING: prebuilt checksum mismatch — building from source instead." >&2
+        echo "WARNING: prebuilt checksum mismatch - building from source instead." >&2
         rm -rf "$dl"; return 1
     fi
 
     # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e
     # is suppressed inside a function used as an `if` condition, so check each step
-    # explicitly — otherwise a failed extract would fall through and the source
+    # explicitly - otherwise a failed extract would fall through and the source
     # build would install on top of a half-written tree (or a stale one on --force).
     local stage="$dl/stage"
     mkdir -p "$stage"
     if tar --zstd --help >/dev/null 2>&1; then
-        tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; }
+        tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; }
     else
-        zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; }
+        zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; }
     fi
 
     # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets
     # VALGRIND_LIB relative to the tree so the relocated install works. Verify the
     # staged tree runs before committing it.
     if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then
-        echo "WARNING: prebuilt did not run — building from source instead." >&2
+        echo "WARNING: prebuilt did not run - building from source instead." >&2
         rm -rf "$dl"; return 1
     fi
 
@@ -105,7 +105,7 @@ try_prebuilt() {
     mkdir -p "$(dirname "$PREFIX")"
     rm -rf "$PREFIX"
     if ! mv "$stage" "$PREFIX"; then
-        echo "WARNING: could not install prebuilt to ${PREFIX} — building from source instead." >&2
+        echo "WARNING: could not install prebuilt to ${PREFIX} - building from source instead." >&2
         rm -rf "$dl"; return 1
     fi
     rm -rf "$dl"
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 7d9771f772..b0591fc9a6 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -899,25 +899,25 @@
     help="Run floating-point stability tests using Verrou.",
     description=(
         "Runs Verrou random-rounding stability analysis on a built-in suite of small "
-        "1-D cases, or — given a case .py (positional INPUT) — on your own case. Each "
+        "1-D cases, or - given a case .py (positional INPUT) - on your own case. Each "
         "case is run N times under Verrou's random IEEE-754 rounding and compared "
         "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must "
         "retain at least ~24 significant bits (single precision) under random rounding "
         "(no per-case thresholds).\n\n"
         "With a case .py, that case is run as a SINGLE serial CPU process under Verrou "
-        "(~30x slower, and run many times), so it must be a small, short proxy — large "
+        "(~30x slower, and run many times), so it must be a small, short proxy - large "
         "grids or long runs are rejected with guidance; serial .dat I/O is forced. "
         "Example: ./mfc.sh fp-stability my_case.py\n\n"
         "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to "
         "$HOME/.local/verrou); if absent it is installed automatically (a pinned, "
-        "hash-verified prebuilt is downloaded, with a source build as fallback) — "
+        "hash-verified prebuilt is downloaded, with a source build as fallback) - "
         "aborts if that install fails. The simulation and pre_process binaries must "
         "be serial (no-MPI, no-GPU) debug builds.\n\n"
         "Analysis passes (skip with --no-* flags):\n"
         "  float proxy    One run with --rounding-mode=float (single-precision sensitivity)\n"
         "  vprec sweep    Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n"
         "  cancellation   --check-cancellation origins, ranked by significant digits lost\n"
-        "  float-max      --check-max-float detection of double→float overflow sites\n"
+        "  float-max      --check-max-float detection of double->float overflow sites\n"
     ),
     include_common=["mfc_config", "verbose", "debug_log"],
     positionals=[
diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py
index 8710bb717c..200748203c 100644
--- a/toolchain/mfc/fp_stability.py
+++ b/toolchain/mfc/fp_stability.py
@@ -8,7 +8,7 @@
    (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold).
 
 B. Float proxy (--no-float-proxy to skip)
-   One run with --rounding-mode=float — deterministic proxy for
+   One run with --rounding-mode=float - deterministic proxy for
    single-precision sensitivity without recompiling.
 
 C. VPREC precision sweep (--no-vprec to skip)
@@ -24,7 +24,7 @@
 
 E. Float-max overflow detection (--no-float-max to skip)
    One run with --check-max-float=yes; reports locations where a
-   double→float conversion would overflow to ±Inf.
+   double->float conversion would overflow to +/-Inf.
 
 Logs are saved to fp-stability-logs/ and uploaded as CI artifacts.
 On GitHub Actions: a step summary table and ::warning:: file annotations
@@ -334,7 +334,7 @@ def _merge(*dicts):
         "name": "low_mach",
         "description": "1-D water shock with low_Mach=1 HLLC correction active",
         "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
-        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0",
+        "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M~0",
         "pre": _merge(
             _BASE_PRE,
             _WATER_EOS,
@@ -475,10 +475,10 @@ def _run_case(
                     result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))}
                     if locs:
                         worst = max(bits.values()) if bits else 0
-                        cons.print(f"  cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
+                        cons.print(f"  cancellation: {len(locs)} site(s), worst loses >= {worst / math.log2(10):.0f} of ~16 digits")
                         n_macro = len(result["cancellation_macro"])
                         if n_macro:
-                            cons.print(f"  [dim]{n_macro} inside fypp expansions — line maps to multiple instances[/dim]")
+                            cons.print(f"  [dim]{n_macro} inside fypp expansions - line maps to multiple instances[/dim]")
                     else:
                         cons.print("  cancellation: none detected")
             except Exception as exc:
@@ -518,7 +518,7 @@ def _load_user_case(input_path: str) -> dict:
     """Build a single fp-stability case from a user case .py.
 
     The case is run as ONE serial CPU process under Verrou (so it must be small
-    and short — a coarsened proxy of a production run, not the real thing); a grid
+    and short - a coarsened proxy of a production run, not the real thing); a grid
     too large to be feasible errors. The output files to compare are auto-detected
     from the reference run, so 'compare' is left empty here.
     """
@@ -533,14 +533,14 @@ def _load_user_case(input_path: str) -> dict:
     t_stop = int(params.get("t_step_stop", 0) or 0)
     work = cells * max(t_stop, 1)
     if cells > FP_CASE_MAX_CELLS:
-        raise MFCException(f"case has {cells:,} cells — too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).")
+        raise MFCException(f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).")
     if work > FP_CASE_MAX_WORK:
         raise MFCException(
-            f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) — too slow under "
+            f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under "
             f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)."
         )
     stem = os.path.splitext(os.path.basename(input_path))[0]
-    if stem == "case":  # examples/<name>/case.py — the dir name is more telling
+    if stem == "case":  # examples/<name>/case.py - the dir name is more telling
         stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem
     return {
         "name": stem,
@@ -554,10 +554,10 @@ def _load_user_case(input_path: str) -> dict:
 
 def _install_verrou() -> str:
     """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified
-    prebuilt; source build as fallback) and return the valgrind path. Aborts on failure —
+    prebuilt; source build as fallback) and return the valgrind path. Aborts on failure -
     fp-stability cannot run without Verrou, so this is a hard error, not a skip."""
     script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh")
-    cons.print("[bold]Verrou not found — installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]")
+    cons.print("[bold]Verrou not found - installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]")
     if subprocess.run(["bash", script], check=False).returncode != 0:
         raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.")
     verrou_bin = _find_verrou()
diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py
index a985b363af..4917e293f5 100644
--- a/toolchain/mfc/fp_stability_metrics.py
+++ b/toolchain/mfc/fp_stability_metrics.py
@@ -155,7 +155,7 @@ def _max_abs_np(ref_dir: str, compare_files: list) -> float:
 
 
 def _parse_cancel_gen(gen_path: str) -> list:
-    """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources."""
+    """Parse cc-gen-file TSV (file\\tline\\tsymbol) -> sorted unique [(fname, line)] for MFC sources."""
     if not os.path.isfile(gen_path):
         return []
     locs = []
@@ -215,7 +215,7 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
 # Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
 # filter: a site is reported only if it lost >= the threshold bits. Sweeping these
 # levels and taking the highest each site survives gives a per-site "bits lost"
-# severity (a lower bound — no false positives). 48 is near the full 53-bit
+# severity (a lower bound - no false positives). 48 is near the full 53-bit
 # double mantissa (the top of the sweep), not the mantissa width itself.
 CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
 
diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py
index 2ea90be081..2ca469b9e9 100644
--- a/toolchain/mfc/fp_stability_report.py
+++ b/toolchain/mfc/fp_stability_report.py
@@ -36,10 +36,10 @@ def _emit_github_annotations(results: list):
             note = "catastrophic cancellation site"
             bits = site_bits.get((fname, lineno))
             if bits:
-                note += f" — loses ≥ {bits / math.log2(10):.0f} of ~16 digits"
+                note += f" - loses >= {bits / math.log2(10):.0f} of ~16 digits"
             macro = macro_sites.get((fname, lineno))
             if macro:
-                note += f" — inside a {macro}-expanded line, may represent multiple instances"
+                note += f" - inside a {macro}-expanded line, may represent multiple instances"
             print(f"::notice {loc},title={title}::{note}", flush=True)
         n_cc = len(r.get("cancellation_locs", []))
         if n_cc > 3:
@@ -51,7 +51,7 @@ def _more_md(total: int, shown: int, noun: str) -> str:
     or '' when nothing was truncated."""
     if total <= shown:
         return ""
-    return f"- …and {total - shown} more {noun}; see `fp-stability-logs/`"
+    return f"- ...and {total - shown} more {noun}; see `fp-stability-logs/`"
 
 
 def _emit_github_summary(results: list, n_samples: int):
@@ -70,26 +70,26 @@ def _emit_github_summary(results: list, n_samples: int):
 
     md = []
     md.append("## FP Stability Results\n")
-    md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n")
+    md.append(f"**{n_pass} passed, {n_fail} failed** - {n_samples} random-rounding samples per case\n")
     md.append(
         f"> **Coverage:** {len(results)} one-dimensional case(s) "
         f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these "
-        "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
+        "cases exercise - not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths "
         "they do not reach.\n"
     )
 
-    # Main results table — pass/fail is scale-free: bits retained vs a single floor
+    # Main results table - pass/fail is scale-free: bits retained vs a single floor
     md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
     md.append("| Case | Status | bits retained | max\\_dev | Float proxy |")
     md.append("|------|:------:|:------:|--------:|--------:|")
     for r in results:
         status = "PASS" if r["passed"] else "FAIL"
-        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
-        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
+        bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "-"
+        fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "-"
         md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |")
     md.append("")
 
-    # Cancellation ORIGINS — where ill-conditioning actually arises, led with the
+    # Cancellation ORIGINS - where ill-conditioning actually arises, led with the
     # most severe (most bits lost).
     cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
     if cases_with_cancel:
@@ -98,7 +98,7 @@ def _emit_github_summary(results: list, n_samples: int):
             "> Subtraction of nearly-equal values loses leading significant digits. A double carries "
             "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
             "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
-            "single-precision trust. Site *count* is not severity — one site losing many digits outweighs "
+            "single-precision trust. Site *count* is not severity - one site losing many digits outweighs "
             "many mild ones.\n"
         )
         for r in cases_with_cancel:
@@ -108,17 +108,17 @@ def _emit_github_summary(results: list, n_samples: int):
             ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"]))
             if ordered:
                 w = ordered[0]
-                md.append(f"**`{r['name']}`** — {len(ordered)} site(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
+                md.append(f"**`{r['name']}`** - {len(ordered)} site(s); worst loses >= {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
             for e in ordered[:15]:
                 lost = e["bits"] / math.log2(10)
-                ambiguous = f" — _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else ""
-                md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`{ambiguous}")
+                ambiguous = f" - _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else ""
+                md.append(f"- **>= {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) - `{e['where']}`{ambiguous}")
             footer = _more_md(len(ordered), 15, "site(s)")
             if footer:
                 md.append(footer)
             md.append("")
 
-    # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at
+    # VPREC sweep - one column per mantissa-bit level showing the Linf deviation at
     # that reduced precision ("crash" = run diverged/failed; dash = not measured).
     if any(r["vprec"] for r in results):
         _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
@@ -133,7 +133,7 @@ def _emit_github_summary(results: list, n_samples: int):
             for b in VPREC_MANTISSA_BITS:
                 d = vmap.get(b)
                 if d is None:
-                    cols.append("—")
+                    cols.append("-")
                 elif d == float("inf"):
                     cols.append("crash")
                 else:
@@ -146,7 +146,7 @@ def _emit_github_summary(results: list, n_samples: int):
     if cases_with_fmax:
         md.append("### Float32 overflow sites (check\\_max\\_float)\n")
         for r in cases_with_fmax:
-            md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n")
+            md.append(f"**`{r['name']}`** - {len(r['float_max_locs'])} site(s)\n")
             for fname, lineno in r["float_max_locs"][:10]:
                 md.append(f"- `{fname}:{lineno}`")
             footer = _more_md(len(r["float_max_locs"]), 10, "site(s)")
diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py
index f282f47e12..3809ee9992 100644
--- a/toolchain/mfc/fp_stability_runners.py
+++ b/toolchain/mfc/fp_stability_runners.py
@@ -22,7 +22,7 @@
 
 def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool:
     """True if this valgrind actually provides the 'verrou' tool. A plain system
-    valgrind does not — accepting one would only fail later at run time. Pass env
+    valgrind does not - accepting one would only fail later at run time. Pass env
     (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its
     tool without it."""
     try:
@@ -57,7 +57,7 @@ def _find_binary(name: str) -> str:
 
 def _verrou_env(verrou_bin: str) -> dict:
     """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt
-    artifact extracted to a new prefix) can locate its tool — Valgrind bakes its
+    artifact extracted to a new prefix) can locate its tool - Valgrind bakes its
     build prefix into the binary otherwise. Harmless for a source-built tree, where
     VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the
     environment (user sourced env.sh) is left untouched."""
@@ -168,7 +168,7 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str):
 
 
 def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float:
-    """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref."""
+    """One run with --rounding-mode=float; returns Linf deviation from nearest-ref."""
     run_dir = os.path.join(work_dir, "float_proxy")
     _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float")
     return _max_diff_np(ref_dir, run_dir, case["compare"])