From ac398abecac8ed76418ac2df10080157e1f6db1c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 1 Jun 2026 19:01:52 -0400 Subject: [PATCH 01/25] fp-stability: confirm, rank, and disambiguate dd_line hotspots dd_line reports a minimal set of source lines, but presented them as a flat, equally-weighted list of confident warnings. Three problems: (1) no check that the reported lines actually reproduce the instability; (2) fypp #:for/#:def expansion collapses many generated computations onto one .fpp line, so a hit can be the wrong instance; (3) a multi-op line did not say which op was at fault. This adds, reusing the verified Verrou --source mechanism (matches file+line+symbol, captured via --gen-source): - Confirmation: perturb only the suspect lines; lines that fail to reproduce the deviation are downgraded from ::warning:: to ::notice:: (unconfirmed). - Per-line ranking: perturb each line alone and rank by the share of float-proxy it reproduces, so the dominant computation is named (e.g. m_time_steppers.fpp:510 = 100%). - Cancellation cross-reference: label dd_line hotspots that coincide with a stage-F catastrophic-cancellation site. - Macro-expansion flag: mark hotspots whose .fpp line sits inside a #:for/#:def expansion as instance-ambiguous. Surfaced in console, the GitHub step summary (ranked, tagged list), and inline annotations. Pure helpers covered by toolchain/mfc/test_fp_stability.py (22 tests, TDD). Verified end-to-end on a serial debug build. --- toolchain/mfc/fp_stability.py | 300 +++++++++++++++++++++++++++-- toolchain/mfc/test_fp_stability.py | 198 +++++++++++++++++++ 2 files changed, 480 insertions(+), 18 deletions(-) create mode 100644 toolchain/mfc/test_fp_stability.py diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index dd848f046c..fde268170c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -20,6 +20,16 @@ E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip) Further bisects to exact *source lines* within the responsible functions. + Each reported line is then *confirmed* by a positive control: --gen-source + captures the symbol-correct executed lines, those are filtered to the suspect + set, and a float-mode run with --source restricted to just them must + reproduce the instability. Lines that do not reproduce it are reported as + unconfirmed (downgraded from ::warning:: to ::notice::). Each line is then + perturbed alone and ranked by the share of the single-precision deviation it + reproduces, so the most flagrant computation is identified rather than a flat + list. Hotspots are additionally cross-referenced against the stage-F + cancellation sites (to name the offending subtraction) and flagged as + instance-ambiguous when the .fpp line sits inside a #:for/#:def expansion. F. Cancellation detection (--no-cancellation to skip) One run with --check-cancellation=yes; reports MFC source lines that @@ -79,6 +89,15 @@ # Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". _VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") +# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def +# defines a macro instantiated at multiple call sites) collapse many distinct +# generated computations onto a single .fpp source line, so a dd_line hit inside +# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select +# code but do not duplicate it, so they are tracked for balance but not flagged. +_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE) +_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE) +_FYPP_DUPLICATING = ("for", "def", "block", "call") + # Lines that are clearly control-flow delimiters rather than arithmetic. # dd_line sometimes reports these when the responsible arithmetic is on the # preceding line but shares DWARF debug info with the delimiter (e.g. loop @@ -114,6 +133,44 @@ def _read_source_line(fname: str, lineno: int) -> str: return "" +def _macro_context_in_lines(lines: list, lineno: int) -> str: + """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that + encloses `lineno` (1-based) in `lines`, or None if none does. + + Used to flag dd_line hotspots whose .fpp line is shared across multiple + expanded instances (a #:for body, a #:def macro used in many places), where + line-level attribution cannot identify which instance is responsible. + """ + stack = [] + for raw in lines[: max(0, lineno - 1)]: + mo = _FYPP_BLOCK_OPEN.match(raw) + if mo: + stack.append(mo.group(1).lower()) + continue + if _FYPP_BLOCK_CLOSE.match(raw) and stack: + stack.pop() + for kw in reversed(stack): + if kw in _FYPP_DUPLICATING: + return f"#:{kw}" + return None + + +def _macro_context(fname: str, lineno: int) -> str: + """File-backed wrapper around _macro_context_in_lines; '' path safe.""" + if os.path.isabs(fname) and os.path.isfile(fname): + candidates = [fname] + else: + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + if not candidates: + return None + try: + with open(candidates[0]) as fh: + lines = fh.readlines() + except OSError: + return None + return _macro_context_in_lines(lines, lineno) + + def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: """Return True if any line in [start, end] contains non-trivial arithmetic. @@ -804,7 +861,9 @@ def _dd_env(verrou_bin: str) -> dict: def _parse_rddmin_locs(summary_path: str) -> list: - """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary. + """Extract dd_line locations from an rddmin_summary as + [{path, start, end, macro}] dicts (path is repo-relative; macro is the + enclosing fypp duplicating block, e.g. '#:for', or None). Filters out locations whose source lines are pure control-flow delimiters (loop boundaries, fypp directive closers, blank/comment lines). These can @@ -831,7 +890,7 @@ def _parse_rddmin_locs(summary_path: str) -> list: rel = path rel = rel.replace("\\", "/") if _is_arithmetic_loc(path, start, end): - locs.append((rel, start, end)) + locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)}) else: skipped.append((rel, start, end)) for rel, start, end in skipped: @@ -866,6 +925,75 @@ def _parse_rddmin_syms(summary_path: str) -> list: return syms +def _build_source_filter(gen_lines: list, suspect_locs: list) -> list: + """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a + suspect dd_line location. + + gen_lines come from a --gen-source run and carry the exact symbol Verrou + requires (--source matches on file+line+symbol, not file+line alone). + suspect_locs are (path, start, end) tuples whose path may be a repo-relative + path while gen-source emits a basename, so matching is by basename + line. + """ + ranges = {} + for path, start, end in suspect_locs: + ranges.setdefault(os.path.basename(path), []).append((start, end)) + out = [] + for raw in gen_lines: + parts = raw.rstrip("\n").split("\t") + if len(parts) < 2: + continue + base = os.path.basename(parts[0].strip()) + try: + ln = int(parts[1].strip()) + except ValueError: + continue + if any(s <= ln <= e for s, e in ranges.get(base, [])): + out.append(raw if raw.endswith("\n") else raw + "\n") + return out + + +def _confirm_decision(suspect_dev, dd_threshold: float): + """Decide whether perturbing only the suspect lines reproduces the instability. + + Returns True (confirmed), False (suspect lines are inert -> attribution + suspect, e.g. macro-collapse misattribution), or None if unmeasured. + """ + if suspect_dev is None: + return None + return suspect_dev >= dd_threshold + + +def _rank_locs(locs: list, total: float) -> list: + """Attach a 'share' (per-line deviation / total) to each loc dict — which + must already carry 'share_dev' from a single-line positive control — and + return the locs sorted by that deviation, most flagrant first. + + 'total' is normally float_proxy, so share is the fraction of the full + single-precision deviation that perturbing that one line alone reproduces. + A non-positive total yields share=None (cannot normalize). + """ + for loc in locs: + dev = loc.get("share_dev") + loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None + return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True) + + +def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: + """Set loc['cancellation']=True for each dd_line loc whose line range covers a + catastrophic-cancellation site (stage F), matched by basename + line. + + This pins the flagrant operation on a multi-op line to the subtraction that + cancels, rather than just naming the line. + """ + by_base = {} + for fname, lineno in cancellation_locs: + by_base.setdefault(os.path.basename(fname), set()).add(lineno) + for loc in dd_line_locs: + lines = by_base.get(os.path.basename(loc["path"]), set()) + loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1)) + return dd_line_locs + + def _run_dd_tool( dd_bin: str, dd_dir: str, @@ -924,7 +1052,7 @@ def _run_dd_line( log_dir: str, threshold: float = None, ) -> list: - """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples.""" + """Run verrou_dd_line; return [{path, start, end, macro}] location dicts.""" dd_bin = _find_dd_line(verrou_bin) if not dd_bin: cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") @@ -941,6 +1069,86 @@ def _run_dd_line( return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) +def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag): + """Perturb only the lines in src_lines (deterministic float mode) and return + the L-inf deviation from the nearest-rounding reference, or None on failure.""" + src_path = os.path.join(conf_dir, f"source_{tag}.txt") + with open(src_path, "w") as fh: + fh.writelines(src_lines) + run_dir = os.path.join(conf_dir, f"perturb_{tag}") + os.makedirs(run_dir, exist_ok=True) + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="float", + extra_flags=[f"--source={src_path}"], + ) + except MFCException: + return None + return _max_diff_np(ref_dir, run_dir, compare) + + +def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy): + """Positive control for dd_line: perturb ONLY the suspect lines and confirm + the instability reproduces, then rank each line by its individual share. + + Verrou's --source matches file+line+symbol (not file+line alone), so we first + capture the symbol-correct executed source lines via --gen-source, filter them + to the suspect set, then run deterministic float-mode restricted to just those + lines. If the suspect-only deviation reaches dd_threshold the attribution is + confirmed; if it stays near zero the reported lines do not actually carry the + instability (e.g. a #:for-expanded line blamed for the wrong instance). + + Each line is then perturbed alone so its 'share_dev' (and 'share' of + float_proxy) shows which computation dominates. + + Returns (confirmed, suspect_dev, ranked_locs). + """ + if not dd_line_locs: + return None, None, dd_line_locs + conf_dir = os.path.join(work_dir, "confirm") + os.makedirs(conf_dir, exist_ok=True) + gen_path = os.path.join(conf_dir, "gen_source.txt") + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + conf_dir, + rounding_mode="nearest", + extra_flags=[f"--gen-source={gen_path}"], + ) + except MFCException: + return None, None, dd_line_locs + if not os.path.isfile(gen_path): + return None, None, dd_line_locs + with open(gen_path) as fh: + gen_lines = fh.readlines() + compare = case["compare"] + + # whole-set positive control + suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs] + set_src = _build_source_filter(gen_lines, suspects) + if not set_src: + # none of the reported lines performs an instrumented FP op -> not reproduced + return False, 0.0, dd_line_locs + set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set") + confirmed = _confirm_decision(set_dev, dd_threshold) + + # per-line ranking (a single line trivially owns the whole set deviation) + if len(dd_line_locs) == 1: + dd_line_locs[0]["share_dev"] = set_dev + else: + for i, loc in enumerate(dd_line_locs): + one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])]) + loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0 + ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev)) + return confirmed, set_dev, ranked + + def _run_case( case: dict, verrou_bin: str, @@ -976,6 +1184,8 @@ def _run_case( "vprec": [], "dd_sym_syms": [], "dd_line_locs": [], + "dd_line_confirmed": None, + "dd_line_confirm_dev": None, "cancellation_locs": [], "mca_dev": None, "mca_sigbits": None, @@ -1060,9 +1270,30 @@ def _run_case( log_dir, threshold=dd_threshold, ) + macro_n = sum(1 for loc in result["dd_line_locs"] if loc["macro"]) + if macro_n: + cons.print(f" [dim]dd_line: {macro_n} hotspot(s) inside fypp-expanded code (instance-ambiguous)[/dim]") except Exception as exc: cons.print(f" [bold yellow]dd_line error[/bold yellow]: {exc}") + # --- E2: confirm dd_line hotspots and rank each by its individual share --- + if dd_threshold > 0 and run_dd_line and result["dd_line_locs"]: + cons.print(" [dim]confirming + ranking dd_line hotspots (per-line perturbation)...[/dim]") + try: + confirmed, cdev, ranked = _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, result["dd_line_locs"], dd_threshold, float_proxy) + result["dd_line_locs"] = ranked + result["dd_line_confirmed"] = confirmed + result["dd_line_confirm_dev"] = cdev + if confirmed is True: + cons.print(f" [bold green]dd_line confirmed[/bold green]: suspect-only dev={cdev:.3e} >= {dd_threshold:.1e}") + elif confirmed is False: + cons.print(f" [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)") + top = ranked[0] if ranked else None + if top and top.get("share") is not None: + cons.print(f" most flagrant: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)") + except Exception as exc: + cons.print(f" [bold yellow]dd_line confirmation error[/bold yellow]: {exc}") + # --- F: cancellation detection --- if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") @@ -1073,6 +1304,12 @@ def _run_case( cons.print(f" cancellation: {len(locs)} unique source location(s)") else: cons.print(" cancellation: none detected") + # cross-reference: label dd_line hotspots that sit on a cancellation site + if result["dd_line_locs"] and locs: + _mark_cancellation(result["dd_line_locs"], locs) + n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation")) + if n_xref: + cons.print(f" {n_xref} hotspot(s) coincide with a catastrophic-cancellation site") except Exception as exc: cons.print(f" [bold yellow]cancellation check error[/bold yellow]: {exc}") @@ -1114,23 +1351,37 @@ def _emit_github_annotations(results: list): Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations appear inline on the responsible source lines in the PR diff view. - Up to 3 dd_line locations are emitted as ::warning:: per case (minimal - responsible lines from delta-debug). Up to 3 cancellation sites per case - are emitted as ::notice:: so the diff also highlights subtraction- - cancellation hotspots identified by --check-cancellation. + Up to 3 dd_line locations are emitted per case (minimal responsible lines + from delta-debug). Confirmed hotspots (suspect-only perturbation reproduced + the instability) are ::warning::; unconfirmed ones are downgraded to + ::notice:: so a suspect attribution is not presented as fact. Up to 3 + cancellation sites per case are emitted as ::notice:: so the diff also + highlights subtraction-cancellation hotspots from --check-cancellation. """ if not os.environ.get("GITHUB_ACTIONS"): return for r in results: status = "FAIL" if not r["passed"] else "hotspot" dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})" - - for rel_path, start, end in r.get("dd_line_locs", [])[:3]: - loc = f"file={rel_path},line={start}" - if end != start: - loc += f",endLine={end}" - title = f"FP {status} [{r['name']}]" - print(f"::warning {loc},title={title}::{dev_str}", flush=True) + unconfirmed = r.get("dd_line_confirmed") is False + + for loc in r.get("dd_line_locs", [])[:3]: + location = f"file={loc['path']},line={loc['start']}" + if loc["end"] != loc["start"]: + location += f",endLine={loc['end']}" + note = dev_str + if loc.get("share") is not None: + note += f" — reproduces {loc['share'] * 100:.0f}% of float-proxy alone" + if loc.get("cancellation"): + note += " — catastrophic cancellation site" + if loc.get("macro"): + note += f" — {loc['macro']}-expanded line, may represent multiple instances" + if unconfirmed: + title = f"FP candidate (unconfirmed) [{r['name']}]" + print(f"::notice {location},title={title}::{note}", flush=True) + else: + title = f"FP {status} [{r['name']}]" + print(f"::warning {location},title={title}::{note}", flush=True) for fname, lineno in r.get("cancellation_locs", [])[:3]: loc = f"file={fname},line={lineno}" @@ -1192,12 +1443,23 @@ def _emit_github_summary(results: list, n_samples: int): cases_with_locs = [r for r in results if r["dd_line_locs"]] if cases_with_locs: md.append("### Top FP hotspots (dd\\_line)\n") + _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} for r in cases_with_locs: status = "❌ FAIL" if not r["passed"] else "✅ pass" - md.append(f"**`{r['name']}`** ({status})\n") - for rel_path, start, end in r["dd_line_locs"][:10]: - loc = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" - md.append(f"- `{loc}`") + md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}") + md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n") + for loc in r["dd_line_locs"][:10]: + rel_path, start, end = loc["path"], loc["start"], loc["end"] + where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" + tags = [] + if loc.get("share") is not None: + tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy") + if loc.get("cancellation"): + tags.append("catastrophic cancellation") + if loc.get("macro"): + tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") + suffix = f" — {', '.join(tags)}" if tags else "" + md.append(f"- `{where}`{suffix}") snippet = _get_source_context(rel_path, start) if snippet: md.append(" ```fortran") @@ -1328,6 +1590,8 @@ def fp_stability(): "vprec": [], "dd_sym_syms": [], "dd_line_locs": [], + "dd_line_confirmed": None, + "dd_line_confirm_dev": None, "cancellation_locs": [], "mca_dev": None, "mca_sigbits": None, diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py new file mode 100644 index 0000000000..694da7d906 --- /dev/null +++ b/toolchain/mfc/test_fp_stability.py @@ -0,0 +1,198 @@ +"""Unit tests for the pure helpers behind the FP-stability dd_line confirmation +pass (#1) and macro-expansion flagging (#2). + +The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job; +here we test only the pure functions that decide what to instrument and how to +label results, so they can run without Verrou or built binaries. +""" + +from mfc.fp_stability import ( + _build_source_filter, + _confirm_decision, + _macro_context_in_lines, + _mark_cancellation, + _rank_locs, +) + +# --- #2: fypp macro-expansion context detection --- + + +def test_macro_context_none_outside_any_block(): + lines = [ + "subroutine s_foo()\n", + " a = b - c\n", + "end subroutine\n", + ] + assert _macro_context_in_lines(lines, 2) is None + + +def test_macro_context_inside_for_loop_body(): + lines = [ + "#:for i in [1, 2, 3]\n", + " q(${i}$) = a - b\n", + "#:endfor\n", + ] + assert _macro_context_in_lines(lines, 2) == "#:for" + + +def test_macro_context_if_block_is_not_duplicating(): + lines = [ + "#:if FOO\n", + " a = b - c\n", + "#:endif\n", + ] + assert _macro_context_in_lines(lines, 2) is None + + +def test_macro_context_reports_innermost_duplicating_block(): + lines = [ + "#:def MACRO(x)\n", + " #:if cond\n", + " #:for j in range(3)\n", + " y = ${x}$ - z\n", + " #:endfor\n", + " #:endif\n", + "#:enddef\n", + ] + assert _macro_context_in_lines(lines, 4) == "#:for" + + +def test_macro_context_balances_closers(): + lines = [ + "#:for i in [1, 2]\n", + " a = b - c\n", + "#:endfor\n", + "d = e - f\n", + ] + # line 4 is after the loop closed -> not in any duplicating block + assert _macro_context_in_lines(lines, 4) is None + + +def test_macro_context_def_body_when_no_inner_loop(): + lines = [ + "#:def GEOM(n)\n", + " r = x - y\n", + "#:enddef\n", + ] + assert _macro_context_in_lines(lines, 2) == "#:def" + + +# --- #1: building the symbol-correct --source filter from --gen-source output --- + + +def test_build_source_filter_keeps_matching_file_and_line_with_symbol(): + gen = [ + "m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n", + "m_riemann_solvers.fpp\t999\ts_other\n", + ] + suspects = [("src/simulation/m_riemann_solvers.fpp", 512, 512)] + out = _build_source_filter(gen, suspects) + assert out == ["m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n"] + + +def test_build_source_filter_matches_inclusive_range(): + gen = [ + "m_foo.fpp\t10\tsym\n", + "m_foo.fpp\t11\tsym\n", + "m_foo.fpp\t12\tsym\n", + "m_foo.fpp\t13\tsym\n", + ] + suspects = [("m_foo.fpp", 11, 12)] + out = _build_source_filter(gen, suspects) + assert out == ["m_foo.fpp\t11\tsym\n", "m_foo.fpp\t12\tsym\n"] + + +def test_build_source_filter_excludes_other_basenames(): + gen = ["m_bar.fpp\t5\tsym\n"] + suspects = [("m_foo.fpp", 5, 5)] + assert _build_source_filter(gen, suspects) == [] + + +def test_build_source_filter_matches_on_basename_not_full_path(): + # gen-source emits a basename; dd_line locs are repo-relative paths. + gen = ["m_foo.fpp\t5\tsym\n"] + suspects = [("src/common/m_foo.fpp", 5, 5)] + assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"] + + +def test_build_source_filter_skips_malformed_lines(): + gen = ["garbage-no-tab\n", "m_foo.fpp\tnotanumber\tsym\n", "m_foo.fpp\t5\tsym\n"] + suspects = [("m_foo.fpp", 5, 5)] + assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"] + + +# --- #1: confirmation decision --- + + +def test_confirm_decision_true_when_suspect_reproduces_deviation(): + # perturbing only the suspect lines yields >= dd_threshold deviation + assert _confirm_decision(suspect_dev=1e-3, dd_threshold=1e-5) is True + + +def test_confirm_decision_false_when_suspect_is_inert(): + # suspect lines barely move the result -> attribution not reproduced + assert _confirm_decision(suspect_dev=1e-9, dd_threshold=1e-5) is False + + +def test_confirm_decision_none_when_measurement_unavailable(): + assert _confirm_decision(suspect_dev=None, dd_threshold=1e-5) is None + + +# --- Tier 1: per-line confirmation ranking --- + + +def test_rank_locs_sorts_by_share_dev_descending(): + locs = [ + {"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.1}, + {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.9}, + ] + ranked = _rank_locs(locs, total=1.0) + assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"] + + +def test_rank_locs_computes_share_as_fraction_of_total(): + locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}] + ranked = _rank_locs(locs, total=0.5) + assert ranked[0]["share"] == 0.5 + + +def test_rank_locs_share_none_when_total_nonpositive(): + locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}] + ranked = _rank_locs(locs, total=0.0) + assert ranked[0]["share"] is None + + +def test_rank_locs_treats_missing_share_dev_as_zero_and_sorts_last(): + locs = [ + {"path": "a.fpp", "start": 1, "end": 1, "share_dev": None}, + {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.3}, + ] + ranked = _rank_locs(locs, total=1.0) + assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"] + + +# --- Tier 1b: dd_line x cancellation cross-reference --- + + +def test_mark_cancellation_flags_loc_on_a_cancellation_line(): + locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}] + _mark_cancellation(locs, [("m_foo.fpp", 11)]) + assert locs[0]["cancellation"] is True + + +def test_mark_cancellation_false_when_no_site_in_range(): + locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}] + _mark_cancellation(locs, [("m_foo.fpp", 99)]) + assert locs[0]["cancellation"] is False + + +def test_mark_cancellation_matches_on_basename_not_full_path(): + locs = [{"path": "src/common/m_foo.fpp", "start": 5, "end": 5}] + _mark_cancellation(locs, [("/abs/build/m_foo.fpp", 5)]) + assert locs[0]["cancellation"] is True + + +def test_mark_cancellation_false_for_different_basename(): + locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}] + _mark_cancellation(locs, [("m_bar.fpp", 5)]) + assert locs[0]["cancellation"] is False From 196aff5e461e01f8fbdaf15f92b2f93c3229dba6 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 1 Jun 2026 20:12:13 -0400 Subject: [PATCH 02/25] fp-stability: per-instance disambiguation of fypp-expanded hotspots (Tier 2) dd_line attributes to .fpp source lines, but a #:for/#:def expansion collapses many generated computations onto one line, so a macro-ambiguous hotspot cannot be pinned to a single runtime instance. This adds an opt-in precision path that resolves it. Mechanism (validated against gfortran+Verrou): a new build flag --fp-precision-lines strips the fypp line markers from each generated .f90 so the compiler attributes every expanded instance to a distinct physical line, emitting a .linemap.json sidecar mapping each line back to (.fpp file, line, instance). Marker renumbering was tried first but hit gfortran's DWARF line-number ceiling (~300k) and 700-line shadow runs; stripping avoids both and survives the cpp #if layer. fp-stability gains --precision-sim-binary: for the most flagrant macro-ambiguous hotspot, each expanded instance is perturbed alone (Verrou --source) on the precision binary and ranked, naming the responsible instance and showing its concrete generated code. The strip is gated to the simulation target only (pre/post run on CPU). Validated end-to-end: m_weno.fpp:238 (3 #:for instances) resolved to instance #0 = s_cb(i+3)-s_cb(i+1). toolchain/mfc/fp_precision_lines.py is pure + TDD'd (12 tests); normal build path is byte-identical and unaffected. --- CMakeLists.txt | 34 ++++++- toolchain/mfc/build.py | 1 + toolchain/mfc/cli/commands.py | 14 +++ toolchain/mfc/fp_precision_lines.py | 123 +++++++++++++++++++++++ toolchain/mfc/fp_stability.py | 98 ++++++++++++++++++ toolchain/mfc/test_fp_precision_lines.py | 112 +++++++++++++++++++++ 6 files changed, 380 insertions(+), 2 deletions(-) create mode 100644 toolchain/mfc/fp_precision_lines.py create mode 100644 toolchain/mfc/test_fp_precision_lines.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 83bbb8fe0e..532c377702 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ option(MFC_DOCUMENTATION "Build documentation" OFF option(MFC_ALL "Build everything" OFF) option(MFC_SINGLE_PRECISION "Build single precision" OFF) option(MFC_MIXED_PRECISION "Build mixed precision" OFF) +option(MFC_FP_PRECISION_LINES "Strip fypp markers for per-instance fp-stability attribution" OFF) if (MFC_ALL) set(MFC_PRE_PROCESS ON FORCE) @@ -433,8 +434,24 @@ macro(HANDLE_SOURCES target useCommon) cmake_path(GET fpp FILENAME fpp_filename) set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90") + # In a precision-lines build, Fypp writes a marked intermediate that is + # then stripped of its line markers (so each expanded instance compiles + # to a distinct physical line) before compilation; the strip step emits a + # .linemap.json sidecar. Otherwise Fypp writes ${f90} directly. Only the + # simulation target is analyzed by fp-stability, so pre/post_process are + # always built normally. + set(_precision_lines OFF) + if (MFC_FP_PRECISION_LINES AND "${target}" STREQUAL "simulation") + set(_precision_lines ON) + endif() + if (_precision_lines) + set(f90_out "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.marked.f90") + else() + set(f90_out "${f90}") + endif() + add_custom_command( - OUTPUT ${f90} + OUTPUT ${f90_out} COMMAND ${FYPP_EXE} -m re -I "${CMAKE_BINARY_DIR}/include/${target}" -I "${${target}_DIR}/include" @@ -450,12 +467,25 @@ macro(HANDLE_SOURCES target useCommon) --line-length=999 --line-numbering-mode=nocontlines ${FYPP_GCOV_OPTS} - "${fpp}" "${f90}" + "${fpp}" "${f90_out}" DEPENDS "${fpp};${${target}_incs}" COMMENT "Preprocessing (Fypp) ${fpp_filename}" VERBATIM ) + if (_precision_lines) + add_custom_command( + OUTPUT ${f90} + COMMAND ${Python3_EXECUTABLE} + "${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py" + "${f90_out}" "${f90}" + "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.linemap.json" + DEPENDS "${f90_out};${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py" + COMMENT "Stripping markers (fp-precision-lines) ${fpp_filename}" + VERBATIM + ) + endif() + list(APPEND ${target}_SRCs ${f90}) endforeach() endmacro() diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 01efb1a9b1..01a0c8ece3 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -421,6 +421,7 @@ def configure(self, case: Case): flags.append(f"-DMFC_GCov={'ON' if ARG('gcov') else 'OFF'}") flags.append(f"-DMFC_Unified={'ON' if ARG('unified') else 'OFF'}") flags.append(f"-DMFC_Fastmath={'ON' if ARG('fastmath') else 'OFF'}") + flags.append(f"-DMFC_FP_PRECISION_LINES={'ON' if ARG('fp_precision_lines') else 'OFF'}") command = ["cmake"] + flags + ["-S", cmake_dirpath, "-B", build_dirpath] diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 54bbff4641..cff47c3ecf 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -141,6 +141,13 @@ default=False, dest="deps_only", ), + Argument( + name="fp-precision-lines", + help="(fp-stability) Strip fypp line markers so each expanded instance gets a distinct line; emits sidecars for per-instance attribution.", + action=ArgAction.STORE_TRUE, + default=False, + dest="fp_precision_lines", + ), ], examples=[ Example("./mfc.sh build", "Build all default targets (CPU)"), @@ -938,6 +945,13 @@ default=None, metavar="PATH", ), + Argument( + name="precision-sim-binary", + help="Path to a simulation binary built with --fp-precision-lines. When given, macro-ambiguous hotspots are disambiguated to the individual fypp-expanded instance.", + default=None, + dest="precision_sim_binary", + metavar="PATH", + ), Argument( name="samples", short="N", diff --git a/toolchain/mfc/fp_precision_lines.py b/toolchain/mfc/fp_precision_lines.py new file mode 100644 index 0000000000..6dc1df04c1 --- /dev/null +++ b/toolchain/mfc/fp_precision_lines.py @@ -0,0 +1,123 @@ +"""FP-stability precision-lines transform (Tier 2). + +A fypp #:for/#:def expansion emits many generated computations that all carry +the same cpp line marker (`# N "file.fpp"`), so DWARF — and therefore Verrou — +collapse every expanded instance onto one .fpp line. This transform removes the +fypp line markers from a generated .f90 so the compiler attributes each statement +to the generated file's own physical line (which *is* distinct per expanded +instance), and records a sidecar mapping each surviving physical line back to +(file, original .fpp line, instance index). Genuine cpp directives +(#if/#define/#endif/...) are preserved so conditional compilation is unchanged. + +When the stripped .f90 is compiled, Verrou attributes — and fp-stability ranks +and isolates via --source — per expanded instance rather than per source line. +Used only by a dedicated precision build (MFC_FP_PRECISION_LINES); the normal +build is unaffected. The mechanism (stripped markers -> instance-distinct +physical-line attribution -> per-instance Verrou --source isolation, surviving +the cpp #if layer) is validated against gfortran + Verrou. +""" + +import json +import os +import re + +# A fypp line marker: "# """ possibly with trailing flags. A cpp +# conditional/define directive (#if, #define, #endif, ...) has a word, not a +# number, after the '#', so the two are unambiguous. +_FYPP_MARKER = re.compile(r'^#\s+(\d+)\s+"([^"]+)"') +# Any other preprocessor directive line (kept, but it is not a .fpp source line, +# so it neither consumes a source-line increment nor gets a sidecar entry). +_CPP_DIRECTIVE = re.compile(r"^\s*#") + + +def strip_markers(lines: list) -> tuple: + """Strip fypp line markers; return (output_lines, sidecar). + + sidecar maps each 1-based physical output line number to + {"file", "line", "instance"}: the .fpp file, the .fpp line that physical + line came from (auto-incremented within a marker region), and how many times + that marker's (file, line) had been seen before (0 = first/real occurrence, + >=1 = an expanded instance). + """ + seen = {} + out = [] + sidecar = {} + cur_file = None + cur_line = None + cur_instance = None + for raw in lines: + m = _FYPP_MARKER.match(raw) + if m: + cur_file = m.group(2) + cur_line = int(m.group(1)) + cur_instance = seen.get((cur_file, cur_line), 0) + seen[(cur_file, cur_line)] = cur_instance + 1 + continue # drop the marker line + out.append(raw) + if cur_file is None or _CPP_DIRECTIVE.match(raw): + # cpp directives are kept verbatim but are not .fpp source lines + continue + sidecar[len(out)] = {"file": cur_file, "line": cur_line, "instance": cur_instance} + cur_line += 1 # subsequent physical source lines map to the next .fpp line + return out, sidecar + + +def transform_file(in_path: str, out_path: str, sidecar_path: str) -> int: + """Strip a generated .f90 to its precision-lines variant. + + Reads in_path, writes the marker-stripped source to out_path and the sidecar + JSON to sidecar_path. Returns the number of mapped physical lines. + """ + with open(in_path) as fh: + lines = fh.readlines() + out, sidecar = strip_markers(lines) + with open(out_path, "w") as fh: + fh.writelines(out) + with open(sidecar_path, "w") as fh: + json.dump({str(k): v for k, v in sidecar.items()}, fh) + return len(sidecar) + + +# --- consumption side (Tier 2): locating and querying the sidecars --- + + +def sidecar_dir_for_binary(sim_bin: str) -> str: + """Map a precision simulation binary path to its sidecar directory. + + .../build/install//bin/simulation -> .../build/staging//fypp/simulation + """ + bin_dir = os.path.dirname(os.path.abspath(sim_bin)) # .../install//bin + hash_dir = os.path.dirname(bin_dir) # .../install/ + cfg_hash = os.path.basename(hash_dir) + build_root = os.path.dirname(os.path.dirname(hash_dir)) # .../build + return os.path.join(build_root, "staging", cfg_hash, "fypp", "simulation") + + +def sidecar_path(sidecar_dir: str, fpp_file: str) -> str: + """Sidecar JSON path for a .fpp file: /.linemap.json.""" + return os.path.join(sidecar_dir, os.path.basename(fpp_file) + ".linemap.json") + + +def load_sidecar(path: str) -> dict: + """Load a sidecar JSON into {physical_line:int -> {file, line, instance}}.""" + if not os.path.isfile(path): + return {} + with open(path) as fh: + raw = json.load(fh) + return {int(k): v for k, v in raw.items()} + + +def instances_of(sidecar: dict, fpp_file: str, fpp_line: int) -> list: + """Return [(physical_line, instance), ...] (sorted by physical line) for every + expanded instance of fpp_file:fpp_line, matched by basename.""" + base = os.path.basename(fpp_file) + hits = [(physline, entry["instance"]) for physline, entry in sidecar.items() if os.path.basename(entry["file"]) == base and entry["line"] == fpp_line] + return sorted(hits) + + +if __name__ == "__main__": + import sys + + if len(sys.argv) != 4: + sys.exit("usage: fp_precision_lines.py ") + transform_file(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index fde268170c..8ac03d2b87 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -44,6 +44,15 @@ One run with --check-max-float=yes; reports locations where a double→float conversion would overflow to ±Inf. +I. Per-instance disambiguation (--precision-sim-binary PATH; opt-in) + A fypp #:for/#:def expansion collapses many generated computations onto one + .fpp line, so a macro-ambiguous hotspot cannot be pinned to a single runtime + instance. Given a simulation binary built with `--fp-precision-lines` (markers + stripped so each instance is a distinct line, plus .linemap.json sidecars), the + most flagrant macro-ambiguous hotspot is disambiguated: each expanded instance + is perturbed alone on the precision binary, ranking them to the responsible + instance and showing its concrete generated code. + Logs are saved to fp-stability-logs/ and uploaded as CI artifacts. On GitHub Actions: a step summary table and ::warning:: file annotations are emitted automatically so failing source lines appear in the PR diff. @@ -1149,6 +1158,67 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs return confirmed, set_dev, ranked +def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line): + """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot. + + Uses a precision binary (built with --fp-precision-lines) in which each + expanded instance of hotspot_file:hotspot_line compiles to a distinct + physical .f90 line. The sidecar enumerates those physical lines; each is + perturbed alone (float mode, vs the precision binary's own nearest-rounding + reference) so the dominant instance is identified. + + Returns a list of {instance, physline, dev, snippet} sorted most-flagrant + first (empty if no sidecar / no instrumented instances). + """ + from . import fp_precision_lines as fpl + + sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin) + sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file)) + instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line) + if not instances: + return [] + + prec_dir = os.path.join(work_dir, "precision") + ref_dir = os.path.join(prec_dir, "ref") + os.makedirs(ref_dir, exist_ok=True) + gen_path = os.path.join(prec_dir, "gen_source.txt") + try: + _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest") + _run_simulation_verrou( + verrou_bin, + prec_sim_bin, + work_dir, + prec_dir, + rounding_mode="nearest", + extra_flags=[f"--gen-source={gen_path}"], + ) + except MFCException: + return [] + if not os.path.isfile(gen_path): + return [] + with open(gen_path) as fh: + gen_lines = fh.readlines() + + f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90") + compare = case["compare"] + results = [] + for physline, instance in instances: + src = _build_source_filter(gen_lines, [(f90_file, physline, physline)]) + if not src: + continue # this instance performs no instrumented FP op + dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}") + results.append( + { + "instance": instance, + "physline": physline, + "dev": dev or 0.0, + "snippet": _read_source_line(f90_file, physline).strip(), + } + ) + results.sort(key=lambda r: r["dev"], reverse=True) + return results + + def _run_case( case: dict, verrou_bin: str, @@ -1163,6 +1233,7 @@ def _run_case( run_cancellation: bool, run_mca: bool, run_float_max: bool, + prec_sim_bin: str = None, ) -> dict: name = case["name"] threshold = case["threshold"] @@ -1294,6 +1365,24 @@ def _run_case( except Exception as exc: cons.print(f" [bold yellow]dd_line confirmation error[/bold yellow]: {exc}") + # --- E3: per-instance disambiguation of the most flagrant macro-ambiguous hotspot --- + if prec_sim_bin and result["dd_line_locs"]: + macro_loc = next((loc for loc in result["dd_line_locs"] if loc.get("macro")), None) + if macro_loc: + cons.print(f" [dim]disambiguating fypp instances of {macro_loc['path']}:{macro_loc['start']} (precision binary)...[/dim]") + try: + insts = _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, macro_loc["path"], macro_loc["start"]) + macro_loc["instances"] = insts + if insts and insts[0]["dev"] > 0: + win = insts[0] + cons.print(f" flagrant instance: #{win['instance']} (.f90:{win['physline']}, dev={win['dev']:.3e}) {win['snippet']}") + elif insts: + cons.print(f" [dim]{len(insts)} instance(s) enumerated; none perturbed measurably (hotspot inert)[/dim]") + else: + cons.print(" [dim]no sidecar instances found for this hotspot[/dim]") + except Exception as exc: + cons.print(f" [bold yellow]instance disambiguation error[/bold yellow]: {exc}") + # --- F: cancellation detection --- if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") @@ -1460,6 +1549,9 @@ def _emit_github_summary(results: list, n_samples: int): tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") suffix = f" — {', '.join(tags)}" if tags else "" md.append(f"- `{where}`{suffix}") + for inst in loc.get("instances", [])[:8]: + flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else "" + md.append(f" - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`") snippet = _get_source_context(rel_path, start) if snippet: md.append(" ```fortran") @@ -1531,6 +1623,9 @@ def fp_stability(): run_cancellation = not ARG("no_cancellation") run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") + prec_sim_bin = ARG("precision_sim_binary") + if prec_sim_bin and not os.path.isfile(prec_sim_bin): + raise MFCException(f"precision simulation binary not found: {prec_sim_bin}") log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs") os.makedirs(log_dir, exist_ok=True) @@ -1540,6 +1635,8 @@ def fp_stability(): cons.print(f" verrou: {verrou_bin}") cons.print(f" simulation: {sim_bin}") cons.print(f" pre_process: {pp_bin}") + if prec_sim_bin: + cons.print(f" precision: {prec_sim_bin} (per-instance disambiguation)") cons.print(f" samples: {n_samples}") features = [] if run_float: @@ -1578,6 +1675,7 @@ def fp_stability(): run_cancellation, run_mca, run_float_max, + prec_sim_bin, ) except MFCException as exc: cons.print(f" [bold red]ERROR[/bold red]: {exc}") diff --git a/toolchain/mfc/test_fp_precision_lines.py b/toolchain/mfc/test_fp_precision_lines.py new file mode 100644 index 0000000000..ddb139af2d --- /dev/null +++ b/toolchain/mfc/test_fp_precision_lines.py @@ -0,0 +1,112 @@ +"""Unit tests for the fp-stability precision-lines transform (Tier 2, P1). + +A fypp #:for/#:def expansion re-marks many generated computations with the same +cpp line marker (`# N "file.fpp"`), so DWARF — and Verrou — collapse every +expanded instance onto one .fpp line. strip_markers removes the fypp line +markers so the compiler attributes to the generated .f90's own (instance- +distinct) physical lines, and emits a sidecar mapping each surviving physical +line back to (file, original .fpp line, instance index). Genuine cpp directives +(#if/#define/...) are kept so conditional compilation still works. +""" + +import os + +from mfc.fp_precision_lines import ( + instances_of, + sidecar_dir_for_binary, + sidecar_path, + strip_markers, +) + + +def test_strips_fypp_markers_and_keeps_code(): + out, sidecar = strip_markers(['# 700 "real.fpp"\n', " x = a - b\n"]) + assert out == [" x = a - b\n"] + assert sidecar == {1: {"file": "real.fpp", "line": 700, "instance": 0}} + + +def test_keeps_cpp_conditional_directives(): + lines = ['# 700 "real.fpp"\n', "#if defined(FOO)\n", " x = 1\n", "#endif\n"] + out, _ = strip_markers(lines) + assert out == ["#if defined(FOO)\n", " x = 1\n", "#endif\n"] + + +def test_repeated_marker_increments_instance(): + lines = ['# 700 "real.fpp"\n', " s1 = x\n", '# 700 "real.fpp"\n', " s2 = y\n"] + out, sidecar = strip_markers(lines) + assert out == [" s1 = x\n", " s2 = y\n"] + assert sidecar[1] == {"file": "real.fpp", "line": 700, "instance": 0} + assert sidecar[2] == {"file": "real.fpp", "line": 700, "instance": 1} + + +def test_distinguishes_fypp_marker_from_cpp_directive(): + # no fypp line markers here -> nothing stripped, no origin recorded + lines = ["#define X 1\n", "#if X\n", " a = 1\n", "#endif\n"] + out, sidecar = strip_markers(lines) + assert out == lines + assert sidecar == {} + + +def test_source_line_auto_increments_within_a_region(): + lines = ['# 700 "real.fpp"\n', " a = 1\n", " b = 2\n"] + _, sidecar = strip_markers(lines) + assert sidecar[1]["line"] == 700 + assert sidecar[2]["line"] == 701 + + +# --- Tier 2 consumption: locating + querying sidecars --- + + +def test_instances_of_returns_physical_lines_for_a_source_line(): + sidecar = { + 7: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 0}, + 11: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 1}, + 20: {"file": "/abs/src/simulation/m_weno.fpp", "line": 999, "instance": 0}, + } + # matched by basename; the repo-relative path from a dd_line hotspot still matches + assert instances_of(sidecar, "src/simulation/m_weno.fpp", 241) == [(7, 0), (11, 1)] + + +def test_instances_of_empty_when_no_match(): + sidecar = {7: {"file": "m_weno.fpp", "line": 241, "instance": 0}} + assert instances_of(sidecar, "m_weno.fpp", 999) == [] + assert instances_of(sidecar, "m_other.fpp", 241) == [] + + +def test_instances_of_sorted_by_physical_line(): + sidecar = { + 30: {"file": "f.fpp", "line": 5, "instance": 2}, + 10: {"file": "f.fpp", "line": 5, "instance": 0}, + 20: {"file": "f.fpp", "line": 5, "instance": 1}, + } + assert instances_of(sidecar, "f.fpp", 5) == [(10, 0), (20, 1), (30, 2)] + + +def test_sidecar_dir_for_binary_maps_install_to_staging(): + got = sidecar_dir_for_binary("/x/build/install/HASH/bin/simulation") + assert got == os.path.join("/x/build/staging/HASH/fypp/simulation") + + +def test_sidecar_path_uses_fpp_basename_and_linemap_suffix(): + got = sidecar_path("/x/staging/HASH/fypp/simulation", "src/simulation/m_weno.fpp") + assert got == os.path.join("/x/staging/HASH/fypp/simulation", "m_weno.fpp.linemap.json") + + +def test_cpp_directives_do_not_consume_a_source_line_increment(): + # the #else line must not advance the .fpp source line nor get a sidecar entry + lines = ['# 700 "real.fpp"\n', " a = 1\n", "#else\n", " b = 2\n"] + out, sidecar = strip_markers(lines) + assert out == [" a = 1\n", "#else\n", " b = 2\n"] + assert sidecar[1]["line"] == 700 # a = 1 + assert 2 not in sidecar # #else: kept, but not a source line + assert sidecar[3]["line"] == 701 # b = 2 (not 702) + + +def test_sidecar_line_numbers_are_physical_output_lines(): + # output physical line numbers (1-based, after stripping) are the keys + lines = ['# 10 "f"\n', " a = 1\n", '# 20 "f"\n', " b = 2\n", " c = 3\n"] + out, sidecar = strip_markers(lines) + assert out == [" a = 1\n", " b = 2\n", " c = 3\n"] + assert sidecar[1] == {"file": "f", "line": 10, "instance": 0} + assert sidecar[2] == {"file": "f", "line": 20, "instance": 0} + assert sidecar[3] == {"file": "f", "line": 21, "instance": 0} From bc7e516fd2fa065cf87be635019fd30e83abbaeb Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 1 Jun 2026 20:52:12 -0400 Subject: [PATCH 03/25] fp-stability: distinguish precision-sensitivity from cancellation-origin; surface caps + coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-line --source ranking measures sensitivity (where reduced precision most moves the output), which is structurally dominated by the time integrator / final accumulation: perturbing the last write to q_cons hits the output 1:1, while upstream errors get re-rounded there. Empirically, sod_standard's cancellation concentrates in m_weno.fpp (14 sites) and m_riemann_solvers.fpp (5), with m_time_steppers.fpp at just 1 — yet the time-stepper led the share ranking at 100%. Presenting it as 'most flagrant' conflated sensitivity with where ill-conditioning originates. Reframe: the dd_line/share view is relabeled 'single-precision sensitivity' with an explicit caveat (typically the time integrator, expected/benign, not a cancellation-origin finder); a new per-file cancellation-density line (_cancellation_by_file) headlines where cancellation actually concentrates; console + GitHub summary + inline annotations updated to keep the two signals distinct. Also: no silent caps (truncated dd_line/cancellation/float-max lists now report '…and N more'; annotations emit a dropped-count notice), and a coverage caveat in the summary header (N 1-D cases; a pass is not a guarantee for unexercised multi-D/viscous/MHD/IGR/bubble paths). _cancellation_by_file is pure + TDD'd. --- toolchain/mfc/fp_stability.py | 79 +++++++++++++++++++++++++----- toolchain/mfc/test_fp_stability.py | 22 +++++++++ 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 8ac03d2b87..440134b04c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -26,10 +26,13 @@ reproduce the instability. Lines that do not reproduce it are reported as unconfirmed (downgraded from ::warning:: to ::notice::). Each line is then perturbed alone and ranked by the share of the single-precision deviation it - reproduces, so the most flagrant computation is identified rather than a flat - list. Hotspots are additionally cross-referenced against the stage-F - cancellation sites (to name the offending subtraction) and flagged as - instance-ambiguous when the .fpp line sits inside a #:for/#:def expansion. + reproduces. NOTE: this is a *sensitivity* measure — where reduced precision + most moves the output — and is typically dominated by the time integrator / + final accumulation, NOT by where cancellation originates. Stage F (and its + per-file density) is the cancellation-origin view; the two usually differ. + Hotspots are cross-referenced against the stage-F cancellation sites and + flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def + expansion. F. Cancellation detection (--no-cancellation to skip) One run with --check-cancellation=yes; reports MFC source lines that @@ -1003,6 +1006,22 @@ def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: return dd_line_locs +def _cancellation_by_file(cancellation_locs: list) -> list: + """Aggregate cancellation sites by source file → [(basename, count)] sorted by + count (desc), ties by name. + + This is the cancellation-*origin* view (where ill-conditioning concentrates), + as opposed to the per-line --source share, which is a *sensitivity* view + (where reduced precision most moves the output — typically the time + integrator / final accumulation, regardless of where error originates). + """ + counts = {} + for fname, _lineno in cancellation_locs: + base = os.path.basename(fname) + counts[base] = counts.get(base, 0) + 1 + return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])) + + def _run_dd_tool( dd_bin: str, dd_dir: str, @@ -1361,7 +1380,9 @@ def _run_case( cons.print(f" [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)") top = ranked[0] if ranked else None if top and top.get("share") is not None: - cons.print(f" most flagrant: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)") + cons.print(f" highest single-precision sensitivity: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)") + cons.print(" [dim](sensitivity = where reduced precision most moves the output, often the time") + cons.print(" [dim] integrator; not necessarily where cancellation originates — see cancellation sites)[/dim]") except Exception as exc: cons.print(f" [bold yellow]dd_line confirmation error[/bold yellow]: {exc}") @@ -1450,7 +1471,7 @@ def _emit_github_annotations(results: list): if not os.environ.get("GITHUB_ACTIONS"): return for r in results: - status = "FAIL" if not r["passed"] else "hotspot" + status = "FAIL" if not r["passed"] else "sensitivity" dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})" unconfirmed = r.get("dd_line_confirmed") is False @@ -1460,9 +1481,9 @@ def _emit_github_annotations(results: list): location += f",endLine={loc['end']}" note = dev_str if loc.get("share") is not None: - note += f" — reproduces {loc['share'] * 100:.0f}% of float-proxy alone" + note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)" if loc.get("cancellation"): - note += " — catastrophic cancellation site" + note += " — also a catastrophic cancellation site" if loc.get("macro"): note += f" — {loc['macro']}-expanded line, may represent multiple instances" if unconfirmed: @@ -1471,11 +1492,17 @@ def _emit_github_annotations(results: list): else: title = f"FP {status} [{r['name']}]" print(f"::warning {location},title={title}::{note}", flush=True) + n_dd = len(r.get("dd_line_locs", [])) + if n_dd > 3: + print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True) for fname, lineno in r.get("cancellation_locs", [])[:3]: loc = f"file={fname},line={lineno}" title = f"FP cancellation [{r['name']}]" print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True) + n_cc = len(r.get("cancellation_locs", [])) + if n_cc > 3: + print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) def _emit_github_summary(results: list, n_samples: int): @@ -1495,6 +1522,12 @@ def _emit_github_summary(results: list, n_samples: int): md = [] md.append("## FP Stability Results\n") md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n") + md.append( + f"> **Coverage:** {len(results)} one-dimensional case(s) " + f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these " + "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " + "they do not reach.\n" + ) # Main results table md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |") @@ -1528,10 +1561,19 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"| `{r['name']}` | {' | '.join(cols)} |") md.append("") - # dd_line hotspot sources — always shown (top 10 per case) with source context + # dd_line — single-precision SENSITIVITY (where precision most affects the + # output). This is distinct from cancellation origin (reported separately): + # the leader is typically the time integrator / final accumulation, because + # perturbing the last write moves the output directly while upstream errors + # get re-rounded there. Not a culprit-finder for ill-conditioning. cases_with_locs = [r for r in results if r["dd_line_locs"]] if cases_with_locs: - md.append("### Top FP hotspots (dd\\_line)\n") + md.append("### Single-precision sensitivity (dd\\_line)\n") + md.append( + "> Where reduced precision most moves the output — **typically the time integrator / " + "final accumulation, which is expected and benign**. This is *not* the same as where " + "cancellation originates; see **Catastrophic cancellation sites** below for that.\n" + ) _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} for r in cases_with_locs: status = "❌ FAIL" if not r["passed"] else "✅ pass" @@ -1558,6 +1600,8 @@ def _emit_github_summary(results: list, n_samples: int): for line in snippet.splitlines(): md.append(f" {line}") md.append(" ```") + if len(r["dd_line_locs"]) > 10: + md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_") md.append("") # dd_sym function names (collapsed, since less actionable than dd_line) @@ -1571,12 +1615,19 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"- `{sym}`") md.append("\n\n") - # Cancellation hotspots + # Cancellation hotspots — the ORIGIN view (where ill-conditioning concentrates). cases_with_cancel = [r for r in results if r.get("cancellation_locs")] if cases_with_cancel: md.append("### Catastrophic cancellation sites\n") + md.append( + "> Where cancellation actually originates (subtraction of nearly-equal values). This is " + "the numerically interesting signal — and it usually differs from the sensitivity leader " + "above.\n" + ) for r in cases_with_cancel: - md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s)\n") + by_file = _cancellation_by_file(r["cancellation_locs"]) + density = ", ".join(f"`{f}` ({n})" for f, n in by_file[:6]) + md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s); concentrates in: {density}\n") for fname, lineno in r["cancellation_locs"][:15]: md.append(f"- `{fname}:{lineno}`") snippet = _get_source_context(fname, lineno) @@ -1585,6 +1636,8 @@ def _emit_github_summary(results: list, n_samples: int): for line in snippet.splitlines(): md.append(f" {line}") md.append(" ```") + if len(r["cancellation_locs"]) > 15: + md.append(f"- _…and {len(r['cancellation_locs']) - 15} more site(s); see fp-stability-logs/_") md.append("") # Float-max overflow sites @@ -1595,6 +1648,8 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") for fname, lineno in r["float_max_locs"][:10]: md.append(f"- `{fname}:{lineno}`") + if len(r["float_max_locs"]) > 10: + md.append(f"- _…and {len(r['float_max_locs']) - 10} more site(s); see fp-stability-logs/_") md.append("") with open(summary_path, "a") as f: diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 694da7d906..ae188054f0 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -8,6 +8,7 @@ from mfc.fp_stability import ( _build_source_filter, + _cancellation_by_file, _confirm_decision, _macro_context_in_lines, _mark_cancellation, @@ -196,3 +197,24 @@ def test_mark_cancellation_false_for_different_basename(): locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}] _mark_cancellation(locs, [("m_bar.fpp", 5)]) assert locs[0]["cancellation"] is False + + +# --- cancellation-origin view: where cancellation concentrates --- + + +def test_cancellation_by_file_counts_and_sorts_by_density(): + locs = [ + ("src/simulation/m_weno.fpp", 10), + ("m_weno.fpp", 20), + ("a/m_riemann_solvers.fpp", 5), + ] + assert _cancellation_by_file(locs) == [("m_weno.fpp", 2), ("m_riemann_solvers.fpp", 1)] + + +def test_cancellation_by_file_breaks_ties_by_name(): + locs = [("z.fpp", 1), ("a.fpp", 2)] + assert _cancellation_by_file(locs) == [("a.fpp", 1), ("z.fpp", 1)] + + +def test_cancellation_by_file_empty(): + assert _cancellation_by_file([]) == [] From 1825dd9a84cc16a7a37904a550e2c2c97ca2229e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 1 Jun 2026 21:05:26 -0400 Subject: [PATCH 04/25] fp-stability: rank cancellation by severity (bits lost), not count; resolve continuations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes prompted by review: (1) site COUNT is not severity — one catastrophic cancellation outweighs many mild ones; (2) attribution can land on a continuation fragment, so the labelled line was unclear. Severity: Verrou exposes no per-site bit-count, but --cc-threshold-double is itself a severity filter (a site is only reported if it lost >= threshold bits). A second pass at 26 bits identifies SEVERE sites with no false positives (may under-count). Severe sites are listed first and labelled; the count-by-file view is demoted with an explicit 'count != severity' caveat. On sod_standard this surfaces the real origins — flux divergence (m_rhs), divided differences and smoothness indicators (m_weno), HLLC wave speeds (m_riemann) — and correctly omits the time integrator. Continuations: _statement_bounds_in_lines follows free-form '&' continuations (leading or trailing) to the logical-statement start; cancellation sites are de-duplicated and displayed as the full statement at its canonical start line, so a hit on a fragment resolves to the whole expression. Pure helpers TDD'd (60 toolchain tests). --- toolchain/mfc/fp_stability.py | 110 +++++++++++++++++++++++------ toolchain/mfc/test_fp_stability.py | 39 ++++++++++ 2 files changed, 127 insertions(+), 22 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 440134b04c..4d8b07b378 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -183,6 +183,53 @@ def _macro_context(fname: str, lineno: int) -> str: return _macro_context_in_lines(lines, lineno) +def _ends_with_continuation(line: str) -> bool: + """True if a free-form Fortran line ends with a continuation '&' (the last + non-blank token before any trailing comment).""" + code = line.split("!", 1)[0].rstrip() # drop trailing comment (string-'!' is rare; fine here) + return code.endswith("&") + + +def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple: + """Return the (start, end) 1-based physical line range of the Fortran logical + statement containing lineno, following '&' continuations in both directions. + + A hit reported on a continuation fragment thus resolves to the whole + statement, so the labelled location is the full expression rather than a + mid-statement piece. + """ + n = len(lines) + start = lineno + while start > 1 and _ends_with_continuation(lines[start - 2]): + start -= 1 + end = lineno + while end < n and _ends_with_continuation(lines[end - 1]): + end += 1 + return start, end + + +def _statement_at(fname: str, lineno: int) -> tuple: + """File-backed (start, end, text) for the logical statement at fname:lineno; + text is the joined statement. Returns (lineno, lineno, '') if unreadable.""" + if os.path.isabs(fname) and os.path.isfile(fname): + candidates = [fname] + else: + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + if not candidates: + return lineno, lineno, "" + try: + with open(candidates[0]) as fh: + lines = fh.readlines() + except OSError: + return lineno, lineno, "" + if not 0 < lineno <= len(lines): + return lineno, lineno, "" + start, end = _statement_bounds_in_lines(lines, lineno) + # join physical lines, dropping the continuation '&' that may lead or trail each + text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end]) + return start, end, text + + def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: """Return True if any line in [start, end] contains non-trivial arithmetic. @@ -678,14 +725,22 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: return locs -def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list: - """Run with --check-cancellation=yes; return [(fname, line)] of MFC cancellation sites.""" - run_dir = os.path.join(work_dir, "cancellation") +# A site reported at this bit threshold has lost at least this many significant +# bits to cancellation — a *severity* floor (Verrou only reports a site when it +# exceeds the threshold, so a high-threshold pass has no false positives). +CANCEL_SEVERE_BITS = 26 + + +def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list: + """Run --check-cancellation at the given bit threshold; return [(fname, line)] + of MFC cancellation sites (subtractions losing >= `threshold` significant bits).""" + tag = f"cancellation_{threshold}" + run_dir = os.path.join(work_dir, tag) os.makedirs(run_dir, exist_ok=True) gen_path = os.path.join(run_dir, "cancel_gen.txt") flags = [ "--check-cancellation=yes", - "--cc-threshold-double=10", + f"--cc-threshold-double={threshold}", f"--cc-gen-file={gen_path}", ] try: @@ -695,7 +750,7 @@ def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: raw = _parse_cancel_gen(gen_path) filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] skipped = len(raw) - len(filtered) - if skipped: + if skipped and threshold == 10: cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") return filtered @@ -1277,6 +1332,7 @@ def _run_case( "dd_line_confirmed": None, "dd_line_confirm_dev": None, "cancellation_locs": [], + "cancellation_severe": set(), "mca_dev": None, "mca_sigbits": None, "float_max_locs": [], @@ -1411,7 +1467,10 @@ def _run_case( locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir) result["cancellation_locs"] = locs if locs: - cons.print(f" cancellation: {len(locs)} unique source location(s)") + # severity pass: which sites lose >= CANCEL_SEVERE_BITS bits + severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS)) + result["cancellation_severe"] = severe + cons.print(f" cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)") else: cons.print(" cancellation: none detected") # cross-reference: label dd_line hotspots that sit on a cancellation site @@ -1620,24 +1679,30 @@ def _emit_github_summary(results: list, n_samples: int): if cases_with_cancel: md.append("### Catastrophic cancellation sites\n") md.append( - "> Where cancellation actually originates (subtraction of nearly-equal values). This is " - "the numerically interesting signal — and it usually differs from the sensitivity leader " - "above.\n" + "> Where cancellation actually originates (subtraction of nearly-equal values). " + f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is " + "not severity — one severe site outweighs many mild ones, so the severe sites are listed " + "first. (Severe detection has no false positives but may under-count.)\n" ) for r in cases_with_cancel: - by_file = _cancellation_by_file(r["cancellation_locs"]) - density = ", ".join(f"`{f}` ({n})" for f, n in by_file[:6]) - md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s); concentrates in: {density}\n") - for fname, lineno in r["cancellation_locs"][:15]: - md.append(f"- `{fname}:{lineno}`") - snippet = _get_source_context(fname, lineno) - if snippet: - md.append(" ```fortran") - for line in snippet.splitlines(): - md.append(f" {line}") - md.append(" ```") - if len(r["cancellation_locs"]) > 15: - md.append(f"- _…and {len(r['cancellation_locs']) - 15} more site(s); see fp-stability-logs/_") + severe = r.get("cancellation_severe") or set() + # collapse continuation fragments to one entry per logical statement, + # severe statements first (the ones that matter) + stmts = {} # (basename, stmt_start) -> {where, severe, text} + for fname, lineno in sorted(r["cancellation_locs"]): + stmt_start, _end, stmt_text = _statement_at(fname, lineno) + key = (os.path.basename(fname), stmt_start) + entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text}) + if (fname, lineno) in severe: + entry["severe"] = True + ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"])) + n_severe_stmt = sum(1 for e in ordered if e["severe"]) + md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n") + for e in ordered[:15]: + sev = " **severe**" if e["severe"] else "" + md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else "")) + if len(ordered) > 15: + md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_") md.append("") # Float-max overflow sites @@ -1746,6 +1811,7 @@ def fp_stability(): "dd_line_confirmed": None, "dd_line_confirm_dev": None, "cancellation_locs": [], + "cancellation_severe": set(), "mca_dev": None, "mca_sigbits": None, "float_max_locs": [], diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index ae188054f0..41b4502579 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -13,6 +13,7 @@ _macro_context_in_lines, _mark_cancellation, _rank_locs, + _statement_bounds_in_lines, ) # --- #2: fypp macro-expansion context detection --- @@ -218,3 +219,41 @@ def test_cancellation_by_file_breaks_ties_by_name(): def test_cancellation_by_file_empty(): assert _cancellation_by_file([]) == [] + + +# --- Fortran line-continuation handling (correct-line labeling) --- + + +def test_statement_bounds_single_line(): + lines = [" a = b - c\n"] + assert _statement_bounds_in_lines(lines, 1) == (1, 1) + + +def test_statement_bounds_spans_continuation_from_first_line(): + lines = [" poly = (s_cb(i+3) - s_cb(i+1)) * &\n", " (s_cb(i+2) - s_cb(i))\n"] + assert _statement_bounds_in_lines(lines, 1) == (1, 2) + + +def test_statement_bounds_from_middle_continuation_line(): + # a hit on the continuation fragment must resolve to the statement start + lines = [" x = a + &\n", " b + &\n", " c\n"] + assert _statement_bounds_in_lines(lines, 2) == (1, 3) + assert _statement_bounds_in_lines(lines, 3) == (1, 3) + + +def test_statement_bounds_ignores_ampersand_in_trailing_comment_logic(): + # a real continuation '&' before a trailing comment still continues + lines = [" x = a & ! note\n", " + b\n"] + assert _statement_bounds_in_lines(lines, 1) == (1, 2) + + +def test_statement_bounds_non_continuation_neighbors(): + lines = [" x = 1\n", " y = 2\n", " z = 3\n"] + assert _statement_bounds_in_lines(lines, 2) == (2, 2) + + +def test_statement_bounds_with_leading_ampersand_continuation(): + # the MFC WENO style: line ends with '&' and the next line *starts* with '&' + lines = [" beta = x**2 &\n", " & + eps\n"] + assert _statement_bounds_in_lines(lines, 1) == (1, 2) + assert _statement_bounds_in_lines(lines, 2) == (1, 2) From b9e790f5bedeeb2d2c36668c29087afedf181f8c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 1 Jun 2026 21:19:26 -0400 Subject: [PATCH 05/25] fp-stability: scale-free pass/fail via significant bits, replacing 6 hand-tuned thresholds Each case had a hand-tuned absolute L-inf threshold spanning 1e-13..2e-7 (six orders), driven by field magnitude and conditioning. Maintaining per-case thresholds is fragile. Normalizing the deviation by the field's peak magnitude removes the scale, so a single global criterion suffices. Pass/fail is now sig_bits = -log2(max_dev / max|ref|) >= MIN_SIG_BITS (24 = single precision retained under random rounding). The per-case 'threshold' field is removed from CASES; pass/fail, the VPREC FAIL marker, console, summary table, and inline annotations all report bits-retained vs the floor. The dd_sym/dd_line oracle keeps its own float-proxy-derived threshold (unchanged). Validated: max_dev spans 1e-14..7e-8 across the 6 cases but sig_bits is a tight 30.3..48.7 band, all >= 24 with margin; classification matches the prior thresholds (6/6 pass). Pure _sig_bits/_stability_pass are TDD'd (67 toolchain tests). A per-case auto-measured baseline + regression delta would add sensitivity for moderate drops; deferred as a heavier change. --- toolchain/mfc/fp_stability.py | 83 +++++++++++++++++++++--------- toolchain/mfc/test_fp_stability.py | 40 ++++++++++++++ 2 files changed, 98 insertions(+), 25 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 4d8b07b378..a5cd636562 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -4,7 +4,8 @@ Features -------- A. Stability suite (always) - N random-rounding samples per case, threshold-based PASS/FAIL. + N random-rounding samples per case; PASS/FAIL on significant bits retained + (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold). B. Float proxy (--no-float-proxy to skip) One run with --rounding-mode=float — deterministic proxy for @@ -92,6 +93,35 @@ # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. VPREC_MANTISSA_BITS = [52, 23, 16, 10] +# Stability pass/fail (stage A) is scale-free: a case must retain at least this +# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)). +# 24 ~= single precision. One global floor replaces per-case absolute thresholds +# (which spanned 6 orders of magnitude purely from field scale + conditioning); +# normalising by the field scale collapses that, so a single number suffices. +MIN_SIG_BITS = 24 + +# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy- +# derived threshold is supplied (callers always pass one, so this is only a guard). +_DD_FALLBACK_THRESHOLD = 1e-12 + + +def _sig_bits(max_dev: float, ref_scale: float) -> float: + """Significant bits retained = -log2(max_dev / ref_scale). + + Scale-free: dividing the deviation by the field's peak magnitude removes the + absolute scale, leaving only the conditioning. Zero deviation (or zero + scale) returns 53.0 = full double precision retained. + """ + if not (max_dev > 0) or not (ref_scale > 0): + return 53.0 + return -math.log2(max_dev / ref_scale) + + +def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool: + """A case passes when it retains at least `floor` significant bits.""" + return _sig_bits(max_dev, ref_scale) >= floor + + # Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. _LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) @@ -341,8 +371,9 @@ def _merge(*dicts): # name - unique identifier used in log paths and console output # description - human-readable summary # compare - D/ output files compared between reference and perturbed runs -# threshold - max L∞ deviation allowed before the case is declared FAIL # ill_cond - known source of cancellation (empty string = none expected) +# Pass/fail is scale-free (>= MIN_SIG_BITS significant bits retained), so cases +# need no per-case deviation threshold regardless of field magnitude. # pre - parameters for pre_process (generates initial conditions) # sim - parameters for simulation CASES = [ @@ -350,7 +381,6 @@ def _merge(*dicts): "name": "sod_standard", "description": "1-D standard Sod, p_L/p_R=10, ideal gas (well-conditioned baseline)", "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"], - "threshold": 1e-13, "ill_cond": "", "pre": _merge( _BASE_PRE, @@ -373,7 +403,6 @@ def _merge(*dicts): "name": "sod_strong", "description": "1-D Sod, p_L/p_R=100,000, ideal gas", "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"], - "threshold": 1e-10, "ill_cond": "HLLC xi factor: (s_L - vel_L)/(s_L - s_S) cancels near sonic contact", "pre": _merge( _BASE_PRE, @@ -396,8 +425,7 @@ def _merge(*dicts): "name": "water_stiffened", "description": "1-D water shock, stiffened EOS (pi_inf=4046)", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 1e-8, - "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000) [threshold loosened until reduced-energy (Etilde) scheme is merged]", + "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000)", "pre": _merge( _BASE_PRE, _WATER_EOS, @@ -419,7 +447,6 @@ def _merge(*dicts): "name": "air_water_interface", "description": "1-D air/water isobaric contact (two-fluid, pi_inf=4046)", "compare": ["cons.1.00.000050.dat", "cons.4.00.000050.dat", "cons.5.00.000050.dat"], - "threshold": 1e-10, "ill_cond": "Mixed-cell pressure recovery: E-alpha_w*gamma_w*pi_inf cancels when alpha_w<<1", "pre": _merge( _BASE_PRE, @@ -460,7 +487,6 @@ def _merge(*dicts): "name": "bubble_rp", "description": "1-D bubbly water, pressure step 2:1 driving Rayleigh-Plesset oscillations (nb=1, Keller-Miksis)", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 1e-8, "ill_cond": "RP ODE: (p_bub - p_ext) cancels near bubble equilibrium", "pre": _merge( _BASE_PRE, @@ -528,8 +554,7 @@ def _merge(*dicts): "name": "low_mach", "description": "1-D water shock with low_Mach=1 HLLC correction active", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 2e-7, - "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0 (threshold loosened to 2e-7 to absorb MCA sampling variance)", + "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0", "pre": _merge( _BASE_PRE, _WATER_EOS, @@ -1121,7 +1146,7 @@ def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_di dd_run_sh = os.path.join(dd_dir, "dd_run.sh") dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else case["threshold"]) + _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) @@ -1145,7 +1170,7 @@ def _run_dd_line( os.makedirs(dd_dir, exist_ok=True) dd_run_sh = os.path.join(dd_dir, "dd_run.sh") dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - effective_threshold = threshold if threshold is not None else case["threshold"] + effective_threshold = threshold if threshold is not None else _DD_FALLBACK_THRESHOLD _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") @@ -1310,21 +1335,20 @@ def _run_case( prec_sim_bin: str = None, ) -> dict: name = case["name"] - threshold = case["threshold"] compare = case["compare"] cons.print(f"[bold]{name}[/bold]: {case['description']}") cons.indent() if case["ill_cond"]: cons.print(f" ill-conditioning: {case['ill_cond']}") - cons.print(f" threshold: {threshold:.0e}") + cons.print(f" pass floor: >= {MIN_SIG_BITS} significant bits retained") work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-") result = { "name": name, "passed": False, "max_dev": float("inf"), - "threshold": threshold, + "sig_bits": None, "float_proxy": None, "vprec": [], "dd_sym_syms": [], @@ -1348,6 +1372,9 @@ def _run_case( _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest") # --- A: random-rounding stability samples --- + # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale), + # vs one global floor (no per-case hand-tuned absolute threshold). + ref_scale = _max_abs_np(ref_dir, compare) max_dev = 0.0 cons.print(f" [dim]random-rounding runs (N={n_samples})...[/dim]") for i in range(n_samples): @@ -1356,11 +1383,13 @@ def _run_case( _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random") max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - passed = max_dev <= threshold + sig_bits = _sig_bits(max_dev, ref_scale) + passed = sig_bits >= MIN_SIG_BITS result["passed"] = passed result["max_dev"] = max_dev + result["sig_bits"] = sig_bits tag = "[bold green]PASS[/bold green]" if passed else "[bold red]FAIL[/bold red]" - cons.print(f" {tag} max_dev={max_dev:.3e} threshold={threshold:.0e}") + cons.print(f" {tag} {sig_bits:.1f} bits retained (floor {MIN_SIG_BITS}) max_dev={max_dev:.3e}") # --- B: float proxy --- if run_float: @@ -1383,7 +1412,7 @@ def _run_case( marker = "" if dev == float("inf"): marker = " [red]crashed[/red]" - elif dev > threshold: + elif _sig_bits(dev, ref_scale) < MIN_SIG_BITS: marker = " [red]FAIL[/red]" cons.print(f" {bits:2d} bits{label_str}: dev={dev:.3e}{marker}") @@ -1531,7 +1560,9 @@ def _emit_github_annotations(results: list): return for r in results: status = "FAIL" if not r["passed"] else "sensitivity" - dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})" + _sb = r.get("sig_bits") + _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a" + dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}" unconfirmed = r.get("dd_line_confirmed") is False for loc in r.get("dd_line_locs", [])[:3]: @@ -1588,17 +1619,19 @@ def _emit_github_summary(results: list, n_samples: int): "they do not reach.\n" ) - # Main results table - md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |") - md.append("|------|:------:|--------:|--------:|--------:|:------:|") + # Main results table — pass/fail is scale-free: bits retained vs a single floor + md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") + md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |") + md.append("|------|:------:|:------:|--------:|--------:|:------:|") for r in results: status = "✅" if r["passed"] else "❌" + bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—" - md.append(f"| `{r['name']}` | {status} | {r['max_dev']:.2e} | {r['threshold']:.0e} | {fp} | {sb} |") + md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |") md.append("") - # VPREC sweep — one column per bit level, ❌ where dev > threshold + # VPREC sweep — one column per bit level, ❌ where bits retained < floor if any(r["vprec"] for r in results): _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) @@ -1803,7 +1836,7 @@ def fp_stability(): "name": case["name"], "passed": False, "max_dev": float("inf"), - "threshold": case["threshold"], + "sig_bits": None, "float_proxy": None, "vprec": [], "dd_sym_syms": [], diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 41b4502579..056193c519 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -7,12 +7,15 @@ """ from mfc.fp_stability import ( + MIN_SIG_BITS, _build_source_filter, _cancellation_by_file, _confirm_decision, _macro_context_in_lines, _mark_cancellation, _rank_locs, + _sig_bits, + _stability_pass, _statement_bounds_in_lines, ) @@ -221,6 +224,43 @@ def test_cancellation_by_file_empty(): assert _cancellation_by_file([]) == [] +# --- scale-free pass/fail: significant bits retained --- + + +def test_sig_bits_relative_deviation(): + # max_dev/ref_scale = 1e-14 -> ~46.5 retained bits + assert 46 < _sig_bits(1e-14, 1.0) < 47 + + +def test_sig_bits_is_scale_free(): + # same relative deviation -> same bits regardless of absolute magnitude + assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9 + + +def test_sig_bits_zero_deviation_is_full_precision(): + assert _sig_bits(0.0, 1.0) == 53.0 + + +def test_sig_bits_zero_scale_is_safe(): + assert _sig_bits(1e-12, 0.0) == 53.0 + + +def test_sig_bits_deviation_at_scale_is_unstable(): + # deviation as large as the field -> <= 0 retained bits + assert _sig_bits(1.0, 1.0) <= 0.0 + + +def test_stability_pass_uses_global_floor(): + # well-conditioned: ~46 bits >= floor + assert _stability_pass(1e-14, 1.0, MIN_SIG_BITS) is True + # catastrophic: deviation at field scale -> fails + assert _stability_pass(0.5, 1.0, MIN_SIG_BITS) is False + + +def test_min_sig_bits_is_single_precision_floor(): + assert MIN_SIG_BITS == 24 + + # --- Fortran line-continuation handling (correct-line labeling) --- From 84bec6d3b30d1575887e8c415e924557551d7e95 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 07:41:12 -0400 Subject: [PATCH 06/25] fp-stability: lead with cancellation origins, report digits lost (not 'severe'), collapse sensitivity Review feedback on the summary: (1) it buried the interesting cancellation origins below the long, mostly-expected sensitivity list; (2) 'severe' is a binary label when an actual magnitude is far more useful; (3) 'bits lost' is not intuitive. Reorder: the cancellation-origins section now leads (right after the results table), ranked worst-first; the single-precision sensitivity list (dominated by the benign time integrator) is collapsed into a
. Severity as a number: a sweep of --cc-threshold-double levels [10,20,30,40,48] buckets each site by the highest it survives (_cancellation_severity), giving per-site bits lost (a lower bound). Bits are translated to decimal digits (a double carries ~16; _digits_left) so each entry reads e.g. '>= 12 digits lost (~4 of 16 left)' with the full statement. On sod_standard the worst origins (flux divergence, divided differences, HLLC wave speeds) lose ~14 of 16 digits; the sweep discriminates (23 sites >=10 bits, 11 >=48). 69 toolchain tests. --- toolchain/mfc/fp_stability.py | 115 ++++++++++++++++++----------- toolchain/mfc/test_fp_stability.py | 18 +++++ 2 files changed, 88 insertions(+), 45 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index a5cd636562..d826ca56c9 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -750,10 +750,28 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: return locs -# A site reported at this bit threshold has lost at least this many significant -# bits to cancellation — a *severity* floor (Verrou only reports a site when it -# exceeds the threshold, so a high-threshold pass has no false positives). -CANCEL_SEVERE_BITS = 26 +# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity +# filter: a site is reported only if it lost >= the threshold bits. Sweeping these +# levels and taking the highest each site survives gives a per-site "bits lost" +# severity (a lower bound — no false positives). 48 ~ full double mantissa. +CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] + + +def _cancellation_severity(level_sites: list) -> dict: + """Given [(threshold, [sites])], return {site: highest threshold it survives} + = the per-site bits-lost severity (a lower bound).""" + sev = {} + for level, sites in level_sites: + for site in sites: + if level > sev.get(site, 0): + sev[site] = level + return sev + + +def _digits_left(bits_lost: float) -> float: + """Approximate trustworthy decimal digits remaining after losing `bits_lost` + bits of a double's 53-bit mantissa (~15.95 digits full).""" + return max(0.0, (53 - bits_lost) / math.log2(10)) def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list: @@ -1356,7 +1374,7 @@ def _run_case( "dd_line_confirmed": None, "dd_line_confirm_dev": None, "cancellation_locs": [], - "cancellation_severe": set(), + "cancellation_bits": {}, "mca_dev": None, "mca_sigbits": None, "float_max_locs": [], @@ -1493,13 +1511,15 @@ def _run_case( if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") try: - locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir) + # sweep bit thresholds to get per-site severity (bits lost) + level_sites = [(level, _run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS] + locs = level_sites[0][1] # lowest threshold = full list + bits = _cancellation_severity(level_sites) result["cancellation_locs"] = locs + result["cancellation_bits"] = bits if locs: - # severity pass: which sites lose >= CANCEL_SEVERE_BITS bits - severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS)) - result["cancellation_severe"] = severe - cons.print(f" cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)") + worst = max(bits.values()) if bits else 0 + cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits") else: cons.print(" cancellation: none detected") # cross-reference: label dd_line hotspots that sit on a cancellation site @@ -1631,6 +1651,40 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |") md.append("") + # Cancellation ORIGINS — where ill-conditioning actually arises, led with the + # most severe (most bits lost). The numerically interesting signal; the + # sensitivity list further down is dominated by the (benign) time integrator. + cases_with_cancel = [r for r in results if r.get("cancellation_locs")] + if cases_with_cancel: + md.append("### Catastrophic cancellation origins (ranked by digits lost)\n") + md.append( + "> Subtraction of nearly-equal values loses leading significant digits. A double carries " + "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away " + "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only " + "single-precision trust. Site *count* is not severity — one site losing many digits outweighs " + "many mild ones.\n" + ) + for r in cases_with_cancel: + site_bits = r.get("cancellation_bits") or {} + # collapse continuation fragments to one entry per logical statement, + # keeping the worst bits-lost seen on that statement + stmts = {} # (basename, stmt_start) -> {where, bits, text} + for fname, lineno in r["cancellation_locs"]: + stmt_start, _end, stmt_text = _statement_at(fname, lineno) + key = (os.path.basename(fname), stmt_start) + e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text}) + e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0)) + ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"])) + if ordered: + w = ordered[0] + md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") + for e in ordered[:15]: + lost = e["bits"] / math.log2(10) + md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else "")) + if len(ordered) > 15: + md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_") + md.append("") + # VPREC sweep — one column per bit level, ❌ where bits retained < floor if any(r["vprec"] for r in results): _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} @@ -1660,11 +1714,12 @@ def _emit_github_summary(results: list, n_samples: int): # get re-rounded there. Not a culprit-finder for ill-conditioning. cases_with_locs = [r for r in results if r["dd_line_locs"]] if cases_with_locs: - md.append("### Single-precision sensitivity (dd\\_line)\n") + md.append("
") + md.append("Single-precision sensitivity (dd_line) — usually the time integrator; expand for details\n") md.append( "> Where reduced precision most moves the output — **typically the time integrator / " - "final accumulation, which is expected and benign**. This is *not* the same as where " - "cancellation originates; see **Catastrophic cancellation sites** below for that.\n" + "final accumulation, which is expected and benign**. This is *not* where cancellation " + "originates (that's the section above); it shows where precision matters most.\n" ) _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} for r in cases_with_locs: @@ -1695,6 +1750,7 @@ def _emit_github_summary(results: list, n_samples: int): if len(r["dd_line_locs"]) > 10: md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_") md.append("") + md.append("
\n") # dd_sym function names (collapsed, since less actionable than dd_line) cases_with_syms = [r for r in results if r["dd_sym_syms"]] @@ -1707,37 +1763,6 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"- `{sym}`") md.append("\n
\n") - # Cancellation hotspots — the ORIGIN view (where ill-conditioning concentrates). - cases_with_cancel = [r for r in results if r.get("cancellation_locs")] - if cases_with_cancel: - md.append("### Catastrophic cancellation sites\n") - md.append( - "> Where cancellation actually originates (subtraction of nearly-equal values). " - f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is " - "not severity — one severe site outweighs many mild ones, so the severe sites are listed " - "first. (Severe detection has no false positives but may under-count.)\n" - ) - for r in cases_with_cancel: - severe = r.get("cancellation_severe") or set() - # collapse continuation fragments to one entry per logical statement, - # severe statements first (the ones that matter) - stmts = {} # (basename, stmt_start) -> {where, severe, text} - for fname, lineno in sorted(r["cancellation_locs"]): - stmt_start, _end, stmt_text = _statement_at(fname, lineno) - key = (os.path.basename(fname), stmt_start) - entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text}) - if (fname, lineno) in severe: - entry["severe"] = True - ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"])) - n_severe_stmt = sum(1 for e in ordered if e["severe"]) - md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n") - for e in ordered[:15]: - sev = " **severe**" if e["severe"] else "" - md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else "")) - if len(ordered) > 15: - md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_") - md.append("") - # Float-max overflow sites cases_with_fmax = [r for r in results if r.get("float_max_locs")] if cases_with_fmax: @@ -1844,7 +1869,7 @@ def fp_stability(): "dd_line_confirmed": None, "dd_line_confirm_dev": None, "cancellation_locs": [], - "cancellation_severe": set(), + "cancellation_bits": {}, "mca_dev": None, "mca_sigbits": None, "float_max_locs": [], diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 056193c519..e89694d19b 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -10,6 +10,7 @@ MIN_SIG_BITS, _build_source_filter, _cancellation_by_file, + _cancellation_severity, _confirm_decision, _macro_context_in_lines, _mark_cancellation, @@ -224,6 +225,23 @@ def test_cancellation_by_file_empty(): assert _cancellation_by_file([]) == [] +# --- per-site cancellation severity (bits lost), from a threshold sweep --- + + +def test_cancellation_severity_takes_highest_surviving_threshold(): + level_sites = [ + (10, [("a.fpp", 1), ("b.fpp", 2)]), + (20, [("a.fpp", 1)]), + (30, [("a.fpp", 1)]), + ] + # a.fpp:1 survives to 30 bits; b.fpp:2 only at 10 + assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10} + + +def test_cancellation_severity_empty(): + assert _cancellation_severity([]) == {} + + # --- scale-free pass/fail: significant bits retained --- From d45bc5bb8819dadde8414c84cffc26ea4537809b Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 07:58:36 -0400 Subject: [PATCH 07/25] fp-stability: de-duplicate helpers from the review additions (no behavior change) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup pass over the ~680 lines this branch added to fp_stability.py. Extracts shared helpers that had accreted across the six feature commits, with identical behavior (69 toolchain tests + ruff + precheck green; emitted console/summary text unchanged): - _resolve_source / _read_source_lines: the 'abs-path-or-glob-under-src(-then-tree)' + readlines block was repeated in _read_source_line, _macro_context, _statement_at, _get_source_context. A search_whole_tree flag preserves the one difference (only _get_source_context fell back to the whole tree). - _blank_result(name): the 15-field result dict was written verbatim twice. _find_dd_tool(verrou_bin, tool): merges _find_dd_sym/_find_dd_line. _setup_dd_run: shared dd_run.sh/dd_cmp.py setup + threshold-default for dd_sym and dd_line. _capture_gen_source: shared nearest --gen-source capture for confirmation and disambiguation. _more_md: the '…and N more' truncation footer used in three summary sections. --- toolchain/mfc/fp_stability.py | 244 ++++++++++++++++------------------ 1 file changed, 113 insertions(+), 131 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index d826ca56c9..961e48d001 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -159,20 +159,34 @@ def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool: ) -def _read_source_line(fname: str, lineno: int) -> str: - """Return the raw source line at lineno (1-based), or '' if unavailable.""" +def _resolve_source(fname: str, search_whole_tree: bool = False) -> str: + """Resolve a (possibly bare) source filename to an existing path, or '' if not + found. An absolute existing path is used as-is; otherwise the basename is + located recursively under src/ (then the whole tree if `search_whole_tree`).""" if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - return "" + return fname + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + if not candidates and search_whole_tree: + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) + return candidates[0] if candidates else "" + + +def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list: + """Resolve `fname` and return its lines (with newlines), or [] if unreadable.""" + path = _resolve_source(fname, search_whole_tree) + if not path: + return [] try: - with open(candidates[0]) as fh: - lines = fh.readlines() - return lines[lineno - 1] if 0 < lineno <= len(lines) else "" + with open(path) as fh: + return fh.readlines() except OSError: - return "" + return [] + + +def _read_source_line(fname: str, lineno: int) -> str: + """Return the raw source line at lineno (1-based), or '' if unavailable.""" + lines = _read_source_lines(fname) + return lines[lineno - 1] if 0 < lineno <= len(lines) else "" def _macro_context_in_lines(lines: list, lineno: int) -> str: @@ -199,16 +213,8 @@ def _macro_context_in_lines(lines: list, lineno: int) -> str: def _macro_context(fname: str, lineno: int) -> str: """File-backed wrapper around _macro_context_in_lines; '' path safe.""" - if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - return None - try: - with open(candidates[0]) as fh: - lines = fh.readlines() - except OSError: + lines = _read_source_lines(fname) + if not lines: return None return _macro_context_in_lines(lines, lineno) @@ -241,17 +247,7 @@ def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple: def _statement_at(fname: str, lineno: int) -> tuple: """File-backed (start, end, text) for the logical statement at fname:lineno; text is the joined statement. Returns (lineno, lineno, '') if unreadable.""" - if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - return lineno, lineno, "" - try: - with open(candidates[0]) as fh: - lines = fh.readlines() - except OSError: - return lineno, lineno, "" + lines = _read_source_lines(fname) if not 0 < lineno <= len(lines): return lineno, lineno, "" start, end = _statement_bounds_in_lines(lines, lineno) @@ -283,18 +279,8 @@ def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path. Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree. """ - if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) - if not candidates: - return "" - try: - with open(candidates[0]) as fh: - lines = fh.readlines() - except OSError: + lines = _read_source_lines(fname, search_whole_tree=True) + if not lines: return "" start = max(0, lineno - context - 1) end = min(len(lines), lineno + context) @@ -589,13 +575,10 @@ def _find_binary(name: str) -> str: return max(candidates, key=os.path.getmtime) if candidates else "" -def _find_dd_sym(verrou_bin: str) -> str: - c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_sym") - return c if os.path.isfile(c) else "" - - -def _find_dd_line(verrou_bin: str) -> str: - c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_line") +def _find_dd_tool(verrou_bin: str, tool: str) -> str: + """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary, + or '' if absent.""" + c = os.path.join(os.path.dirname(verrou_bin), tool) return c if os.path.isfile(c) else "" @@ -1152,19 +1135,26 @@ def _run_dd_tool( return summary_lines +def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float): + """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their + paths. The threshold falls back to _DD_FALLBACK_THRESHOLD when unset.""" + os.makedirs(dd_dir, exist_ok=True) + dd_run_sh = os.path.join(dd_dir, "dd_run.sh") + dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") + _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) + _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) + return dd_run_sh, dd_cmp_py + + def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: """Run verrou_dd_sym; return list of responsible symbol names.""" - dd_bin = _find_dd_sym(verrou_bin) + dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym") if not dd_bin: cons.print(" [dim]verrou_dd_sym not found; skipping delta-debug[/dim]") return [] dd_dir = os.path.join(log_dir, case["name"]) - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) + dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) @@ -1179,18 +1169,13 @@ def _run_dd_line( threshold: float = None, ) -> list: """Run verrou_dd_line; return [{path, start, end, macro}] location dicts.""" - dd_bin = _find_dd_line(verrou_bin) + dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line") if not dd_bin: cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") return [] dd_dir = os.path.join(log_dir, case["name"]) - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - effective_threshold = threshold if threshold is not None else _DD_FALLBACK_THRESHOLD - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) + dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) @@ -1217,6 +1202,26 @@ def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_li return _max_diff_np(ref_dir, run_dir, compare) +def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path): + """Run nearest-rounding with --gen-source to capture the symbol-correct + executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure.""" + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="nearest", + extra_flags=[f"--gen-source={gen_path}"], + ) + except MFCException: + return None + if not os.path.isfile(gen_path): + return None + with open(gen_path) as fh: + return fh.readlines() + + def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy): """Positive control for dd_line: perturb ONLY the suspect lines and confirm the instability reproduces, then rank each line by its individual share. @@ -1237,22 +1242,9 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs return None, None, dd_line_locs conf_dir = os.path.join(work_dir, "confirm") os.makedirs(conf_dir, exist_ok=True) - gen_path = os.path.join(conf_dir, "gen_source.txt") - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - conf_dir, - rounding_mode="nearest", - extra_flags=[f"--gen-source={gen_path}"], - ) - except MFCException: + gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt")) + if gen_lines is None: return None, None, dd_line_locs - if not os.path.isfile(gen_path): - return None, None, dd_line_locs - with open(gen_path) as fh: - gen_lines = fh.readlines() compare = case["compare"] # whole-set positive control @@ -1298,23 +1290,13 @@ def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_fi prec_dir = os.path.join(work_dir, "precision") ref_dir = os.path.join(prec_dir, "ref") os.makedirs(ref_dir, exist_ok=True) - gen_path = os.path.join(prec_dir, "gen_source.txt") try: _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest") - _run_simulation_verrou( - verrou_bin, - prec_sim_bin, - work_dir, - prec_dir, - rounding_mode="nearest", - extra_flags=[f"--gen-source={gen_path}"], - ) except MFCException: return [] - if not os.path.isfile(gen_path): + gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt")) + if gen_lines is None: return [] - with open(gen_path) as fh: - gen_lines = fh.readlines() f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90") compare = case["compare"] @@ -1336,6 +1318,27 @@ def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_fi return results +def _blank_result(name: str) -> dict: + """A result dict with every field at its empty/unmeasured default.""" + return { + "name": name, + "passed": False, + "max_dev": float("inf"), + "sig_bits": None, + "float_proxy": None, + "vprec": [], + "dd_sym_syms": [], + "dd_line_locs": [], + "dd_line_confirmed": None, + "dd_line_confirm_dev": None, + "cancellation_locs": [], + "cancellation_bits": {}, + "mca_dev": None, + "mca_sigbits": None, + "float_max_locs": [], + } + + def _run_case( case: dict, verrou_bin: str, @@ -1362,23 +1365,7 @@ def _run_case( cons.print(f" pass floor: >= {MIN_SIG_BITS} significant bits retained") work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-") - result = { - "name": name, - "passed": False, - "max_dev": float("inf"), - "sig_bits": None, - "float_proxy": None, - "vprec": [], - "dd_sym_syms": [], - "dd_line_locs": [], - "dd_line_confirmed": None, - "dd_line_confirm_dev": None, - "cancellation_locs": [], - "cancellation_bits": {}, - "mca_dev": None, - "mca_sigbits": None, - "float_max_locs": [], - } + result = _blank_result(name) try: cons.print(" [dim]running pre_process...[/dim]") _write_inp(case["sim"], "simulation", work_dir) @@ -1615,6 +1602,14 @@ def _emit_github_annotations(results: list): print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) +def _more_md(total: int, shown: int, noun: str) -> str: + """Markdown bullet noting `total - shown` further items elided from a list, + or '' when nothing was truncated.""" + if total <= shown: + return "" + return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_" + + def _emit_github_summary(results: list, n_samples: int): """Write a markdown results table to GITHUB_STEP_SUMMARY. @@ -1681,8 +1676,9 @@ def _emit_github_summary(results: list, n_samples: int): for e in ordered[:15]: lost = e["bits"] / math.log2(10) md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else "")) - if len(ordered) > 15: - md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_") + footer = _more_md(len(ordered), 15, "statement(s)") + if footer: + md.append(footer) md.append("") # VPREC sweep — one column per bit level, ❌ where bits retained < floor @@ -1747,8 +1743,9 @@ def _emit_github_summary(results: list, n_samples: int): for line in snippet.splitlines(): md.append(f" {line}") md.append(" ```") - if len(r["dd_line_locs"]) > 10: - md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_") + footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)") + if footer: + md.append(footer) md.append("") md.append("\n") @@ -1771,8 +1768,9 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") for fname, lineno in r["float_max_locs"][:10]: md.append(f"- `{fname}:{lineno}`") - if len(r["float_max_locs"]) > 10: - md.append(f"- _…and {len(r['float_max_locs']) - 10} more site(s); see fp-stability-logs/_") + footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") + if footer: + md.append(footer) md.append("") with open(summary_path, "a") as f: @@ -1857,23 +1855,7 @@ def fp_stability(): ) except MFCException as exc: cons.print(f" [bold red]ERROR[/bold red]: {exc}") - r = { - "name": case["name"], - "passed": False, - "max_dev": float("inf"), - "sig_bits": None, - "float_proxy": None, - "vprec": [], - "dd_sym_syms": [], - "dd_line_locs": [], - "dd_line_confirmed": None, - "dd_line_confirm_dev": None, - "cancellation_locs": [], - "cancellation_bits": {}, - "mca_dev": None, - "mca_sigbits": None, - "float_max_locs": [], - } + r = _blank_result(case["name"]) results.append(r) elapsed = time.time() - start From 9f868c742679a0301f0b3c6e12ddd31073820dbb Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 09:03:26 -0400 Subject: [PATCH 08/25] fp-stability: split the 1876-line module into metrics/runners/report (no behavior change) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR had grown fp_stability.py well past the 1000-line soft guideline. Pure relocation — no function body, string, constant, or logic changed — into a clean dependency chain: - fp_stability_metrics.py (474, leaf): regexes/constants + pure parsing, source-reading, sig-bits, cancellation, ranking, statement-bounds helpers. Imports no sibling. - fp_stability_runners.py (530): Verrou subprocess orchestration (run/dd/vprec/cancellation/confirmation/disambiguation). Imports metrics. - fp_stability_report.py (244): GitHub summary + annotation emitters. Imports metrics. - fp_stability.py (715): CLI entry, CASES, _run_case, _blank_result; imports explicitly from the three. No import cycles. Also dropped the unused 'case' parameter from _run_cancellation_check/_run_float_max_check. Verified: 69 toolchain tests, ruff (incl. F-rules: no undefined names), precheck all 7, and a live fp-stability run confirming the cross-module orchestration (sig-bits pass/fail + cancellation sweep) is unchanged. Test import repointed to fp_stability_metrics. --- toolchain/mfc/fp_stability.py | 1223 +------------------------ toolchain/mfc/fp_stability_metrics.py | 474 ++++++++++ toolchain/mfc/fp_stability_report.py | 244 +++++ toolchain/mfc/fp_stability_runners.py | 530 +++++++++++ toolchain/mfc/test_fp_stability.py | 2 +- 5 files changed, 1280 insertions(+), 1193 deletions(-) create mode 100644 toolchain/mfc/fp_stability_metrics.py create mode 100644 toolchain/mfc/fp_stability_report.py create mode 100644 toolchain/mfc/fp_stability_runners.py diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 961e48d001..0579502910 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -73,223 +73,46 @@ ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH """ -import glob import math import os -import re import shutil -import stat -import subprocess import sys import tempfile -import textwrap import time from .common import MFC_ROOT_DIR, MFCException +from .fp_stability_metrics import ( + CANCEL_BIT_LEVELS, + MIN_SIG_BITS, + _cancellation_severity, + _mark_cancellation, + _max_abs_np, + _max_diff_np, + _sig_bits, +) +from .fp_stability_report import ( + _emit_github_annotations, + _emit_github_summary, +) +from .fp_stability_runners import ( + _disambiguate_instances, + _find_binary, + _find_verrou, + _run_cancellation_check, + _run_confirmation, + _run_dd_line, + _run_dd_sym, + _run_float_max_check, + _run_float_proxy, + _run_mca_samples, + _run_preprocess, + _run_simulation_verrou, + _run_vprec_sweep, + _write_inp, +) from .printer import cons from .state import ARG -# Mantissa-bit levels for the VPREC sweep (C). -# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. -VPREC_MANTISSA_BITS = [52, 23, 16, 10] - -# Stability pass/fail (stage A) is scale-free: a case must retain at least this -# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)). -# 24 ~= single precision. One global floor replaces per-case absolute thresholds -# (which spanned 6 orders of magnitude purely from field scale + conditioning); -# normalising by the field scale collapses that, so a single number suffices. -MIN_SIG_BITS = 24 - -# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy- -# derived threshold is supplied (callers always pass one, so this is only a guard). -_DD_FALLBACK_THRESHOLD = 1e-12 - - -def _sig_bits(max_dev: float, ref_scale: float) -> float: - """Significant bits retained = -log2(max_dev / ref_scale). - - Scale-free: dividing the deviation by the field's peak magnitude removes the - absolute scale, leaving only the conditioning. Zero deviation (or zero - scale) returns 53.0 = full double precision retained. - """ - if not (max_dev > 0) or not (ref_scale > 0): - return 53.0 - return -math.log2(max_dev / ref_scale) - - -def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool: - """A case passes when it retains at least `floor` significant bits.""" - return _sig_bits(max_dev, ref_scale) >= floor - - -# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. -_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) - -# Files to exclude from cancellation / float-max reports (runtime loaders, XALT). -_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so") - -# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". -_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") - -# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def -# defines a macro instantiated at multiple call sites) collapse many distinct -# generated computations onto a single .fpp source line, so a dd_line hit inside -# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select -# code but do not duplicate it, so they are tracked for balance but not flagged. -_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE) -_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE) -_FYPP_DUPLICATING = ("for", "def", "block", "call") - -# Lines that are clearly control-flow delimiters rather than arithmetic. -# dd_line sometimes reports these when the responsible arithmetic is on the -# preceding line but shares DWARF debug info with the delimiter (e.g. loop -# boundaries in #:for-expanded code, or inlined functions at call sites). -_CONTROL_FLOW_RE = re.compile( - r"^\s*(" - r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" - r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) - r"|else(\s+if\s*\(.*\)\s*then)?\s*$" # else / else if (...) then - r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+" # subroutine declaration - r"|\$:END_GPU\w+" # fypp GPU macro closers - r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) - r"|\s*!\s*$" # comment-only lines - r"|\s*$" # blank lines - r")", - re.IGNORECASE, -) - - -def _resolve_source(fname: str, search_whole_tree: bool = False) -> str: - """Resolve a (possibly bare) source filename to an existing path, or '' if not - found. An absolute existing path is used as-is; otherwise the basename is - located recursively under src/ (then the whole tree if `search_whole_tree`).""" - if os.path.isabs(fname) and os.path.isfile(fname): - return fname - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates and search_whole_tree: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) - return candidates[0] if candidates else "" - - -def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list: - """Resolve `fname` and return its lines (with newlines), or [] if unreadable.""" - path = _resolve_source(fname, search_whole_tree) - if not path: - return [] - try: - with open(path) as fh: - return fh.readlines() - except OSError: - return [] - - -def _read_source_line(fname: str, lineno: int) -> str: - """Return the raw source line at lineno (1-based), or '' if unavailable.""" - lines = _read_source_lines(fname) - return lines[lineno - 1] if 0 < lineno <= len(lines) else "" - - -def _macro_context_in_lines(lines: list, lineno: int) -> str: - """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that - encloses `lineno` (1-based) in `lines`, or None if none does. - - Used to flag dd_line hotspots whose .fpp line is shared across multiple - expanded instances (a #:for body, a #:def macro used in many places), where - line-level attribution cannot identify which instance is responsible. - """ - stack = [] - for raw in lines[: max(0, lineno - 1)]: - mo = _FYPP_BLOCK_OPEN.match(raw) - if mo: - stack.append(mo.group(1).lower()) - continue - if _FYPP_BLOCK_CLOSE.match(raw) and stack: - stack.pop() - for kw in reversed(stack): - if kw in _FYPP_DUPLICATING: - return f"#:{kw}" - return None - - -def _macro_context(fname: str, lineno: int) -> str: - """File-backed wrapper around _macro_context_in_lines; '' path safe.""" - lines = _read_source_lines(fname) - if not lines: - return None - return _macro_context_in_lines(lines, lineno) - - -def _ends_with_continuation(line: str) -> bool: - """True if a free-form Fortran line ends with a continuation '&' (the last - non-blank token before any trailing comment).""" - code = line.split("!", 1)[0].rstrip() # drop trailing comment (string-'!' is rare; fine here) - return code.endswith("&") - - -def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple: - """Return the (start, end) 1-based physical line range of the Fortran logical - statement containing lineno, following '&' continuations in both directions. - - A hit reported on a continuation fragment thus resolves to the whole - statement, so the labelled location is the full expression rather than a - mid-statement piece. - """ - n = len(lines) - start = lineno - while start > 1 and _ends_with_continuation(lines[start - 2]): - start -= 1 - end = lineno - while end < n and _ends_with_continuation(lines[end - 1]): - end += 1 - return start, end - - -def _statement_at(fname: str, lineno: int) -> tuple: - """File-backed (start, end, text) for the logical statement at fname:lineno; - text is the joined statement. Returns (lineno, lineno, '') if unreadable.""" - lines = _read_source_lines(fname) - if not 0 < lineno <= len(lines): - return lineno, lineno, "" - start, end = _statement_bounds_in_lines(lines, lineno) - # join physical lines, dropping the continuation '&' that may lead or trail each - text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end]) - return start, end, text - - -def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: - """Return True if any line in [start, end] contains non-trivial arithmetic. - - Filters out loop delimiters and fypp directive lines that dd_line sometimes - reports when the responsible arithmetic shares DWARF info with its enclosing - control-flow boundary (inlining, #:for template expansion, etc.). - Returns True (keep) when uncertain so we never silently drop real hotspots. - """ - for lineno in range(start, end + 1): - line = _read_source_line(fname, lineno) - if not line: - return True # can't read — keep to be safe - if not _CONTROL_FLOW_RE.match(line): - return True - return False - - -def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: - """Return a annotated source snippet around lineno, or '' if file not found. - - fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path. - Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree. - """ - lines = _read_source_lines(fname, search_whole_tree=True) - if not lines: - return "" - start = max(0, lineno - context - 1) - end = min(len(lines), lineno + context) - rows = [] - for i, line in enumerate(lines[start:end], start=start + 1): - marker = ">" if i == lineno else " " - rows.append(f"{marker}{i:5d} | {line.rstrip()}") - return "\n".join(rows) - def _merge(*dicts): """Merge dicts left-to-right; later entries override earlier ones.""" @@ -561,763 +384,6 @@ def _merge(*dicts): ] -def _find_verrou() -> str: - verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) - candidate = os.path.join(verrou_home, "bin", "valgrind") - if os.path.isfile(candidate) and os.access(candidate, os.X_OK): - return candidate - return shutil.which("valgrind") or "" - - -def _find_binary(name: str) -> str: - install_dir = os.path.join(MFC_ROOT_DIR, "build", "install") - candidates = glob.glob(os.path.join(install_dir, "*", "bin", name)) - return max(candidates, key=os.path.getmtime) if candidates else "" - - -def _find_dd_tool(verrou_bin: str, tool: str) -> str: - """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary, - or '' if absent.""" - c = os.path.join(os.path.dirname(verrou_bin), tool) - return c if os.path.isfile(c) else "" - - -def _verrou_pythonpath(verrou_bin: str) -> str: - """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir).""" - verrou_home = os.path.dirname(os.path.dirname(verrou_bin)) - matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind")) - return matches[0] if matches else "" - - -def _write_inp(params: dict, target_name: str, work_dir: str) -> None: - """Write a Fortran namelist .inp file from a Python params dict.""" - from .run import case_dicts - - master_keys = case_dicts.get_input_dict_keys(target_name) - lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys] - with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh: - fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n") - - -def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str): - _write_inp(pre_params, "pre_process", work_dir) - with open(os.path.join(work_dir, "pre.log"), "w") as f: - result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False) - if result.returncode != 0: - raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log") - - -def _run_simulation_verrou( - verrou_bin: str, - sim_bin: str, - work_dir: str, - run_dir: str, - rounding_mode: str = None, - extra_flags: list = None, -): - """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output. - - rounding_mode is passed as --rounding-mode= when not None. - extra_flags are appended before the binary (e.g. --backend=vprec ...). - """ - with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir: - for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]: - src = os.path.join(work_dir, fname) - if os.path.exists(src): - shutil.copy2(src, tmpdir) - shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all")) - os.makedirs(os.path.join(tmpdir, "D")) - - log_path = os.path.join(run_dir, "verrou.log") - cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"] - if rounding_mode: - cmd.append(f"--rounding-mode={rounding_mode}") - cmd.extend(extra_flags or []) - cmd.append(sim_bin) - - with open(os.path.join(run_dir, "sim.out"), "w") as f: - result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False) - - if result.returncode != 0: - tag = rounding_mode or "vprec" - raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out") - - os.makedirs(run_dir, exist_ok=True) - for fn in os.listdir(os.path.join(tmpdir, "D")): - shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) - - -def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: - import numpy as np - - total = 0.0 - for fname in compare_files: - ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname) - if not os.path.exists(ref_p) or not os.path.exists(run_p): - return float("inf") - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] - total = max(total, float(np.max(np.abs(ref - run)))) - return total - - -def _max_abs_np(ref_dir: str, compare_files: list) -> float: - """Return the maximum absolute value across all reference output files.""" - import numpy as np - - total = 0.0 - for fname in compare_files: - ref_p = os.path.join(ref_dir, fname) - if not os.path.exists(ref_p): - continue - ref = np.loadtxt(ref_p)[:, 1] - total = max(total, float(np.max(np.abs(ref)))) - return total - - -def _parse_cancel_gen(gen_path: str) -> list: - """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources.""" - if not os.path.isfile(gen_path): - return [] - locs = [] - seen = set() - with open(gen_path) as fh: - for raw in fh: - parts = raw.rstrip("\n").split("\t") - if len(parts) < 2: - continue - fname = parts[0].strip() - if any(ext in fname for ext in _EXTERNAL_SRCS): - continue - if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")): - continue - try: - lineno = int(parts[1].strip()) - except ValueError: - continue - key = (fname, lineno) - if key not in seen: - seen.add(key) - locs.append(key) - return locs - - -def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: - """Extract first MFC-source frame from each Valgrind error matching error_keyword.""" - if not os.path.isfile(log_path): - return [] - locs = [] - seen = set() - in_error = False - with open(log_path) as fh: - for raw in fh: - line = re.sub(r"^==\d+== ?", "", raw) - if error_keyword in line: - in_error = True - continue - if in_error: - if " at " in line or " by " in line: - m = _VGFRAME_RE.search(line) - if m: - fname = m.group(1) - if any(ext in fname for ext in _EXTERNAL_SRCS): - continue - lineno = int(m.group(2)) - key = (fname, lineno) - if key not in seen: - seen.add(key) - locs.append(key) - in_error = False - elif line.strip() == "": - in_error = False - return locs - - -# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity -# filter: a site is reported only if it lost >= the threshold bits. Sweeping these -# levels and taking the highest each site survives gives a per-site "bits lost" -# severity (a lower bound — no false positives). 48 ~ full double mantissa. -CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] - - -def _cancellation_severity(level_sites: list) -> dict: - """Given [(threshold, [sites])], return {site: highest threshold it survives} - = the per-site bits-lost severity (a lower bound).""" - sev = {} - for level, sites in level_sites: - for site in sites: - if level > sev.get(site, 0): - sev[site] = level - return sev - - -def _digits_left(bits_lost: float) -> float: - """Approximate trustworthy decimal digits remaining after losing `bits_lost` - bits of a double's 53-bit mantissa (~15.95 digits full).""" - return max(0.0, (53 - bits_lost) / math.log2(10)) - - -def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list: - """Run --check-cancellation at the given bit threshold; return [(fname, line)] - of MFC cancellation sites (subtractions losing >= `threshold` significant bits).""" - tag = f"cancellation_{threshold}" - run_dir = os.path.join(work_dir, tag) - os.makedirs(run_dir, exist_ok=True) - gen_path = os.path.join(run_dir, "cancel_gen.txt") - flags = [ - "--check-cancellation=yes", - f"--cc-threshold-double={threshold}", - f"--cc-gen-file={gen_path}", - ] - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) - except MFCException: - pass - raw = _parse_cancel_gen(gen_path) - filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] - skipped = len(raw) - len(filtered) - if skipped and threshold == 10: - cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") - return filtered - - -def _run_mca_samples( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - ref_dir: str, - n_mca: int, -) -> tuple: - """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound).""" - compare = case["compare"] - ref_scale = _max_abs_np(ref_dir, compare) - max_dev = 0.0 - flags = ["--backend=mcaquad", "--mca-mode=mca"] - for i in range(n_mca): - run_dir = os.path.join(work_dir, f"mca_{i:02d}") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) - max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - except MFCException: - pass - sig_bits = None - if max_dev > 0.0 and ref_scale > 0.0: - sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale)))) - return max_dev, sig_bits - - -def _run_float_max_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list: - """Run with --check-max-float=yes; return [(fname, line)] of overflow sites.""" - run_dir = os.path.join(work_dir, "float_max") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="nearest", - extra_flags=["--check-max-float=yes"], - ) - except MFCException: - pass - return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float") - - -def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: - """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref.""" - run_dir = os.path.join(work_dir, "float_proxy") - os.makedirs(run_dir) - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") - return _max_diff_np(ref_dir, run_dir, case["compare"]) - - -def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list: - """Run at each mantissa-bit level. Returns [(bits, dev), ...].""" - results = [] - for bits in VPREC_MANTISSA_BITS: - run_dir = os.path.join(work_dir, f"vprec_{bits}") - os.makedirs(run_dir) - flags = [ - "--backend=vprec", - "--vprec-mode=full", - f"--vprec-precision-binary64={bits}", - "--vprec-range-binary64=11", - ] - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) - dev = _max_diff_np(ref_dir, run_dir, case["compare"]) - except MFCException: - dev = float("inf") - results.append((bits, dev)) - return results - - -def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): - """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via - VERROU_EXCLUDE / VERROU_SOURCE environment variables. For test runs, we use - --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices). - For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the - environment — we honour that so the reference is a stable nearest-rounding baseline - to compare against. CLI --rounding-mode would override the env var and break the - reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env bash - # Generated by mfc.sh fp-stability — do not edit by hand. - VERROU_BIN={verrou_bin!r} - SIM_BIN={sim_bin!r} - IC_DIR={ic_dir!r} - - RUNDIR="$1" - TMPDIR_RUN=$(mktemp -d) - trap 'rm -rf "$TMPDIR_RUN"' EXIT - - cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all" - cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp" - for fname in indices.dat pre_time_data.dat io_time_data.dat; do - [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/" - done - mkdir -p "$TMPDIR_RUN/D" - - # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and - # leaves it unset for test runs. Defaulting to float gives deterministic - # test steps while letting the reference use nearest-rounding. - ROUND="${{VERROU_ROUNDING_MODE:-float}}" - - # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation). - # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to). - # Forward them as valgrind flags when set. - EXTRA="" - [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE" - [ -n "${{VERROU_SOURCE:-}}" ] && EXTRA="$EXTRA --source=$VERROU_SOURCE" - - cd "$TMPDIR_RUN" - "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN" - rc=$? - - [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" - exit $rc - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): - """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR - Exits 0 (stable) or 1 (unstable) based on threshold. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env python3 - # Generated by mfc.sh fp-stability — do not edit by hand. - import sys, os, numpy as np - - COMPARE_FILES = {compare_files!r} - THRESHOLD = {threshold!r} - - ref_dir, run_dir = sys.argv[1], sys.argv[2] - max_dev = 0.0 - for fname in COMPARE_FILES: - ref_p = os.path.join(ref_dir, fname) - run_p = os.path.join(run_dir, fname) - if not os.path.exists(ref_p) or not os.path.exists(run_p): - print(f"MISSING: {{fname}}") - sys.exit(1) - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] - dev = float(np.max(np.abs(ref - run))) - max_dev = max(max_dev, dev) - - print(f"max_dev={{max_dev:.3e}} threshold={{THRESHOLD:.0e}}") - sys.exit(0 if max_dev <= THRESHOLD else 1) - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _dd_env(verrou_bin: str) -> dict: - """Environment with PYTHONPATH set for verrou_dd_* imports.""" - py_pkg = _verrou_pythonpath(verrou_bin) - env = os.environ.copy() - if py_pkg: - existing = env.get("PYTHONPATH", "") - env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing])) - return env - - -def _parse_rddmin_locs(summary_path: str) -> list: - """Extract dd_line locations from an rddmin_summary as - [{path, start, end, macro}] dicts (path is repo-relative; macro is the - enclosing fypp duplicating block, e.g. '#:for', or None). - - Filters out locations whose source lines are pure control-flow delimiters - (loop boundaries, fypp directive closers, blank/comment lines). These can - appear when the responsible arithmetic shares DWARF debug info with an - enclosing boundary due to inlining or #:for template expansion. - """ - if not os.path.isfile(summary_path): - return [] - locs = [] - skipped = [] - with open(summary_path) as fh: - for line in fh: - m = _LOC_RE.search(line) - if not m: - continue - path = m.group(1) - start = int(m.group(2)) - end = int(m.group(3)) if m.group(3) else start - try: - rel = os.path.relpath(path, MFC_ROOT_DIR) - if rel.startswith(".."): - rel = path - except ValueError: - rel = path - rel = rel.replace("\\", "/") - if _is_arithmetic_loc(path, start, end): - locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)}) - else: - skipped.append((rel, start, end)) - for rel, start, end in skipped: - loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}" - cons.print(f" [dim]dd_line: skipped control-flow boundary {loc}[/dim]") - return locs - - -def _parse_rddmin_syms(summary_path: str) -> list: - """Extract symbol/function names from a dd_sym rddmin_summary. - - rddmin_summary format: - ddmin0:\\tFail Ratio: ...\\tFail indexes: ... - \\t\\t - ddmin1:\\t... - \\t\\t - - Lines starting with 'ddmin' are metadata; function names are on the - indented (tab-prefixed) lines as the first tab-delimited field. - """ - if not os.path.isfile(summary_path): - return [] - syms = [] - with open(summary_path) as fh: - for ln in fh: - stripped = ln.strip() - if not stripped or stripped.startswith("ddmin"): - continue - sym = stripped.split("\t")[0].strip() - if sym: - syms.append(sym) - return syms - - -def _build_source_filter(gen_lines: list, suspect_locs: list) -> list: - """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a - suspect dd_line location. - - gen_lines come from a --gen-source run and carry the exact symbol Verrou - requires (--source matches on file+line+symbol, not file+line alone). - suspect_locs are (path, start, end) tuples whose path may be a repo-relative - path while gen-source emits a basename, so matching is by basename + line. - """ - ranges = {} - for path, start, end in suspect_locs: - ranges.setdefault(os.path.basename(path), []).append((start, end)) - out = [] - for raw in gen_lines: - parts = raw.rstrip("\n").split("\t") - if len(parts) < 2: - continue - base = os.path.basename(parts[0].strip()) - try: - ln = int(parts[1].strip()) - except ValueError: - continue - if any(s <= ln <= e for s, e in ranges.get(base, [])): - out.append(raw if raw.endswith("\n") else raw + "\n") - return out - - -def _confirm_decision(suspect_dev, dd_threshold: float): - """Decide whether perturbing only the suspect lines reproduces the instability. - - Returns True (confirmed), False (suspect lines are inert -> attribution - suspect, e.g. macro-collapse misattribution), or None if unmeasured. - """ - if suspect_dev is None: - return None - return suspect_dev >= dd_threshold - - -def _rank_locs(locs: list, total: float) -> list: - """Attach a 'share' (per-line deviation / total) to each loc dict — which - must already carry 'share_dev' from a single-line positive control — and - return the locs sorted by that deviation, most flagrant first. - - 'total' is normally float_proxy, so share is the fraction of the full - single-precision deviation that perturbing that one line alone reproduces. - A non-positive total yields share=None (cannot normalize). - """ - for loc in locs: - dev = loc.get("share_dev") - loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None - return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True) - - -def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: - """Set loc['cancellation']=True for each dd_line loc whose line range covers a - catastrophic-cancellation site (stage F), matched by basename + line. - - This pins the flagrant operation on a multi-op line to the subtraction that - cancels, rather than just naming the line. - """ - by_base = {} - for fname, lineno in cancellation_locs: - by_base.setdefault(os.path.basename(fname), set()).add(lineno) - for loc in dd_line_locs: - lines = by_base.get(os.path.basename(loc["path"]), set()) - loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1)) - return dd_line_locs - - -def _cancellation_by_file(cancellation_locs: list) -> list: - """Aggregate cancellation sites by source file → [(basename, count)] sorted by - count (desc), ties by name. - - This is the cancellation-*origin* view (where ill-conditioning concentrates), - as opposed to the per-line --source share, which is a *sensitivity* view - (where reduced precision most moves the output — typically the time - integrator / final accumulation, regardless of where error originates). - """ - counts = {} - for fname, _lineno in cancellation_locs: - base = os.path.basename(fname) - counts[base] = counts.get(base, 0) + 1 - return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])) - - -def _run_dd_tool( - dd_bin: str, - dd_dir: str, - dd_run_sh: str, - dd_cmp_py: str, - env: dict, - log_name: str, - summary_subdir: str, - label: str, -) -> list: - """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines.""" - log_file = os.path.join(dd_dir, log_name) - cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py] - cons.print(f" [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]") - with open(log_file, "w") as f: - result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False) - summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary") - summary_lines = [] - if result.returncode == 0: - if os.path.isfile(summary_path): - with open(summary_path) as f: - summary_lines = f.readlines() - cons.print(f" [bold yellow]{label} result[/bold yellow]:") - for line in summary_lines: - cons.print(f" {line.rstrip()}") - else: - cons.print(f" [dim]{label} done; see {log_file}[/dim]") - else: - cons.print(f" [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})") - return summary_lines - - -def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float): - """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their - paths. The threshold falls back to _DD_FALLBACK_THRESHOLD when unset.""" - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) - return dd_run_sh, dd_cmp_py - - -def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: - """Run verrou_dd_sym; return list of responsible symbol names.""" - dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym") - if not dd_bin: - cons.print(" [dim]verrou_dd_sym not found; skipping delta-debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") - cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") - return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) - - -def _run_dd_line( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - log_dir: str, - threshold: float = None, -) -> list: - """Run verrou_dd_line; return [{path, start, end, macro}] location dicts.""" - dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line") - if not dd_bin: - cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") - return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) - - -def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag): - """Perturb only the lines in src_lines (deterministic float mode) and return - the L-inf deviation from the nearest-rounding reference, or None on failure.""" - src_path = os.path.join(conf_dir, f"source_{tag}.txt") - with open(src_path, "w") as fh: - fh.writelines(src_lines) - run_dir = os.path.join(conf_dir, f"perturb_{tag}") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="float", - extra_flags=[f"--source={src_path}"], - ) - except MFCException: - return None - return _max_diff_np(ref_dir, run_dir, compare) - - -def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path): - """Run nearest-rounding with --gen-source to capture the symbol-correct - executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure.""" - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="nearest", - extra_flags=[f"--gen-source={gen_path}"], - ) - except MFCException: - return None - if not os.path.isfile(gen_path): - return None - with open(gen_path) as fh: - return fh.readlines() - - -def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy): - """Positive control for dd_line: perturb ONLY the suspect lines and confirm - the instability reproduces, then rank each line by its individual share. - - Verrou's --source matches file+line+symbol (not file+line alone), so we first - capture the symbol-correct executed source lines via --gen-source, filter them - to the suspect set, then run deterministic float-mode restricted to just those - lines. If the suspect-only deviation reaches dd_threshold the attribution is - confirmed; if it stays near zero the reported lines do not actually carry the - instability (e.g. a #:for-expanded line blamed for the wrong instance). - - Each line is then perturbed alone so its 'share_dev' (and 'share' of - float_proxy) shows which computation dominates. - - Returns (confirmed, suspect_dev, ranked_locs). - """ - if not dd_line_locs: - return None, None, dd_line_locs - conf_dir = os.path.join(work_dir, "confirm") - os.makedirs(conf_dir, exist_ok=True) - gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt")) - if gen_lines is None: - return None, None, dd_line_locs - compare = case["compare"] - - # whole-set positive control - suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs] - set_src = _build_source_filter(gen_lines, suspects) - if not set_src: - # none of the reported lines performs an instrumented FP op -> not reproduced - return False, 0.0, dd_line_locs - set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set") - confirmed = _confirm_decision(set_dev, dd_threshold) - - # per-line ranking (a single line trivially owns the whole set deviation) - if len(dd_line_locs) == 1: - dd_line_locs[0]["share_dev"] = set_dev - else: - for i, loc in enumerate(dd_line_locs): - one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])]) - loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0 - ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev)) - return confirmed, set_dev, ranked - - -def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line): - """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot. - - Uses a precision binary (built with --fp-precision-lines) in which each - expanded instance of hotspot_file:hotspot_line compiles to a distinct - physical .f90 line. The sidecar enumerates those physical lines; each is - perturbed alone (float mode, vs the precision binary's own nearest-rounding - reference) so the dominant instance is identified. - - Returns a list of {instance, physline, dev, snippet} sorted most-flagrant - first (empty if no sidecar / no instrumented instances). - """ - from . import fp_precision_lines as fpl - - sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin) - sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file)) - instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line) - if not instances: - return [] - - prec_dir = os.path.join(work_dir, "precision") - ref_dir = os.path.join(prec_dir, "ref") - os.makedirs(ref_dir, exist_ok=True) - try: - _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest") - except MFCException: - return [] - gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt")) - if gen_lines is None: - return [] - - f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90") - compare = case["compare"] - results = [] - for physline, instance in instances: - src = _build_source_filter(gen_lines, [(f90_file, physline, physline)]) - if not src: - continue # this instance performs no instrumented FP op - dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}") - results.append( - { - "instance": instance, - "physline": physline, - "dev": dev or 0.0, - "snippet": _read_source_line(f90_file, physline).strip(), - } - ) - results.sort(key=lambda r: r["dev"], reverse=True) - return results - - def _blank_result(name: str) -> dict: """A result dict with every field at its empty/unmeasured default.""" return { @@ -1499,7 +565,7 @@ def _run_case( cons.print(" [dim]cancellation detection...[/dim]") try: # sweep bit thresholds to get per-site severity (bits lost) - level_sites = [(level, _run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS] + level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS] locs = level_sites[0][1] # lowest threshold = full list bits = _cancellation_severity(level_sites) result["cancellation_locs"] = locs @@ -1534,7 +600,7 @@ def _run_case( if run_float_max: cons.print(" [dim]float-max overflow check...[/dim]") try: - locs = _run_float_max_check(case, verrou_bin, sim_bin, work_dir) + locs = _run_float_max_check(verrou_bin, sim_bin, work_dir) result["float_max_locs"] = locs if locs: cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") @@ -1550,233 +616,6 @@ def _run_case( return result -def _emit_github_annotations(results: list): - """Emit GitHub annotations for FP hotspots. - - Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations - appear inline on the responsible source lines in the PR diff view. - - Up to 3 dd_line locations are emitted per case (minimal responsible lines - from delta-debug). Confirmed hotspots (suspect-only perturbation reproduced - the instability) are ::warning::; unconfirmed ones are downgraded to - ::notice:: so a suspect attribution is not presented as fact. Up to 3 - cancellation sites per case are emitted as ::notice:: so the diff also - highlights subtraction-cancellation hotspots from --check-cancellation. - """ - if not os.environ.get("GITHUB_ACTIONS"): - return - for r in results: - status = "FAIL" if not r["passed"] else "sensitivity" - _sb = r.get("sig_bits") - _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a" - dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}" - unconfirmed = r.get("dd_line_confirmed") is False - - for loc in r.get("dd_line_locs", [])[:3]: - location = f"file={loc['path']},line={loc['start']}" - if loc["end"] != loc["start"]: - location += f",endLine={loc['end']}" - note = dev_str - if loc.get("share") is not None: - note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)" - if loc.get("cancellation"): - note += " — also a catastrophic cancellation site" - if loc.get("macro"): - note += f" — {loc['macro']}-expanded line, may represent multiple instances" - if unconfirmed: - title = f"FP candidate (unconfirmed) [{r['name']}]" - print(f"::notice {location},title={title}::{note}", flush=True) - else: - title = f"FP {status} [{r['name']}]" - print(f"::warning {location},title={title}::{note}", flush=True) - n_dd = len(r.get("dd_line_locs", [])) - if n_dd > 3: - print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True) - - for fname, lineno in r.get("cancellation_locs", [])[:3]: - loc = f"file={fname},line={lineno}" - title = f"FP cancellation [{r['name']}]" - print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True) - n_cc = len(r.get("cancellation_locs", [])) - if n_cc > 3: - print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) - - -def _more_md(total: int, shown: int, noun: str) -> str: - """Markdown bullet noting `total - shown` further items elided from a list, - or '' when nothing was truncated.""" - if total <= shown: - return "" - return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_" - - -def _emit_github_summary(results: list, n_samples: int): - """Write a markdown results table to GITHUB_STEP_SUMMARY. - - Visible directly in the Actions run UI without downloading artifacts. - Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), - and dd_line source locations for any failing cases. - """ - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if not summary_path: - return - - n_pass = sum(1 for r in results if r["passed"]) - n_fail = len(results) - n_pass - - md = [] - md.append("## FP Stability Results\n") - md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n") - md.append( - f"> **Coverage:** {len(results)} one-dimensional case(s) " - f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these " - "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " - "they do not reach.\n" - ) - - # Main results table — pass/fail is scale-free: bits retained vs a single floor - md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") - md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |") - md.append("|------|:------:|:------:|--------:|--------:|:------:|") - for r in results: - status = "✅" if r["passed"] else "❌" - bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" - fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" - sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—" - md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |") - md.append("") - - # Cancellation ORIGINS — where ill-conditioning actually arises, led with the - # most severe (most bits lost). The numerically interesting signal; the - # sensitivity list further down is dominated by the (benign) time integrator. - cases_with_cancel = [r for r in results if r.get("cancellation_locs")] - if cases_with_cancel: - md.append("### Catastrophic cancellation origins (ranked by digits lost)\n") - md.append( - "> Subtraction of nearly-equal values loses leading significant digits. A double carries " - "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away " - "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only " - "single-precision trust. Site *count* is not severity — one site losing many digits outweighs " - "many mild ones.\n" - ) - for r in cases_with_cancel: - site_bits = r.get("cancellation_bits") or {} - # collapse continuation fragments to one entry per logical statement, - # keeping the worst bits-lost seen on that statement - stmts = {} # (basename, stmt_start) -> {where, bits, text} - for fname, lineno in r["cancellation_locs"]: - stmt_start, _end, stmt_text = _statement_at(fname, lineno) - key = (os.path.basename(fname), stmt_start) - e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text}) - e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0)) - ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"])) - if ordered: - w = ordered[0] - md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") - for e in ordered[:15]: - lost = e["bits"] / math.log2(10) - md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else "")) - footer = _more_md(len(ordered), 15, "statement(s)") - if footer: - md.append(footer) - md.append("") - - # VPREC sweep — one column per bit level, ❌ where bits retained < floor - if any(r["vprec"] for r in results): - _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} - header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) - sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS) - md.append("### VPREC precision sweep\n") - md.append(f"| Case | {header} |") - md.append(f"|------|{sep}|") - for r in results: - vmap = {b: d for b, d in r["vprec"]} - cols = [] - for b in VPREC_MANTISSA_BITS: - d = vmap.get(b) - if d is None: - cols.append("—") - elif d == float("inf"): - cols.append("💥 crash") - else: - cols.append(f"{d:.2e}") - md.append(f"| `{r['name']}` | {' | '.join(cols)} |") - md.append("") - - # dd_line — single-precision SENSITIVITY (where precision most affects the - # output). This is distinct from cancellation origin (reported separately): - # the leader is typically the time integrator / final accumulation, because - # perturbing the last write moves the output directly while upstream errors - # get re-rounded there. Not a culprit-finder for ill-conditioning. - cases_with_locs = [r for r in results if r["dd_line_locs"]] - if cases_with_locs: - md.append("
") - md.append("Single-precision sensitivity (dd_line) — usually the time integrator; expand for details\n") - md.append( - "> Where reduced precision most moves the output — **typically the time integrator / " - "final accumulation, which is expected and benign**. This is *not* where cancellation " - "originates (that's the section above); it shows where precision matters most.\n" - ) - _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} - for r in cases_with_locs: - status = "❌ FAIL" if not r["passed"] else "✅ pass" - md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}") - md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n") - for loc in r["dd_line_locs"][:10]: - rel_path, start, end = loc["path"], loc["start"], loc["end"] - where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" - tags = [] - if loc.get("share") is not None: - tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy") - if loc.get("cancellation"): - tags.append("catastrophic cancellation") - if loc.get("macro"): - tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") - suffix = f" — {', '.join(tags)}" if tags else "" - md.append(f"- `{where}`{suffix}") - for inst in loc.get("instances", [])[:8]: - flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else "" - md.append(f" - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`") - snippet = _get_source_context(rel_path, start) - if snippet: - md.append(" ```fortran") - for line in snippet.splitlines(): - md.append(f" {line}") - md.append(" ```") - footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)") - if footer: - md.append(footer) - md.append("") - md.append("
\n") - - # dd_sym function names (collapsed, since less actionable than dd_line) - cases_with_syms = [r for r in results if r["dd_sym_syms"]] - if cases_with_syms: - md.append("
") - md.append("Responsible functions (dd_sym)\n") - for r in cases_with_syms: - md.append(f"\n**`{r['name']}`**\n") - for sym in r["dd_sym_syms"]: - md.append(f"- `{sym}`") - md.append("\n
\n") - - # Float-max overflow sites - cases_with_fmax = [r for r in results if r.get("float_max_locs")] - if cases_with_fmax: - md.append("### Float32 overflow sites (check\\_max\\_float)\n") - for r in cases_with_fmax: - md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") - for fname, lineno in r["float_max_locs"][:10]: - md.append(f"- `{fname}:{lineno}`") - footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") - if footer: - md.append(footer) - md.append("") - - with open(summary_path, "a") as f: - f.write("\n".join(md) + "\n") - - def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() if not verrou_bin or not os.path.isfile(verrou_bin): diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py new file mode 100644 index 0000000000..01940618d5 --- /dev/null +++ b/toolchain/mfc/fp_stability_metrics.py @@ -0,0 +1,474 @@ +"""Pure metrics, source-resolution, and parsing helpers for the FP-stability suite. + +Leaf module: imports only stdlib + MFC_ROOT_DIR + cons. No sibling fp_stability* +imports, so the runners/report/orchestrator modules can all depend on it. +""" + +import glob +import math +import os +import re + +from .common import MFC_ROOT_DIR +from .printer import cons + +# Mantissa-bit levels for the VPREC sweep (C). +# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. +VPREC_MANTISSA_BITS = [52, 23, 16, 10] + +# Stability pass/fail (stage A) is scale-free: a case must retain at least this +# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)). +# 24 ~= single precision. One global floor replaces per-case absolute thresholds +# (which spanned 6 orders of magnitude purely from field scale + conditioning); +# normalising by the field scale collapses that, so a single number suffices. +MIN_SIG_BITS = 24 + +# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy- +# derived threshold is supplied (callers always pass one, so this is only a guard). +_DD_FALLBACK_THRESHOLD = 1e-12 + + +def _sig_bits(max_dev: float, ref_scale: float) -> float: + """Significant bits retained = -log2(max_dev / ref_scale). + + Scale-free: dividing the deviation by the field's peak magnitude removes the + absolute scale, leaving only the conditioning. Zero deviation (or zero + scale) returns 53.0 = full double precision retained. + """ + if not (max_dev > 0) or not (ref_scale > 0): + return 53.0 + return -math.log2(max_dev / ref_scale) + + +def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool: + """A case passes when it retains at least `floor` significant bits.""" + return _sig_bits(max_dev, ref_scale) >= floor + + +# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. +_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) + +# Files to exclude from cancellation / float-max reports (runtime loaders, XALT). +_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so") + +# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". +_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") + +# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def +# defines a macro instantiated at multiple call sites) collapse many distinct +# generated computations onto a single .fpp source line, so a dd_line hit inside +# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select +# code but do not duplicate it, so they are tracked for balance but not flagged. +_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE) +_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE) +_FYPP_DUPLICATING = ("for", "def", "block", "call") + +# Lines that are clearly control-flow delimiters rather than arithmetic. +# dd_line sometimes reports these when the responsible arithmetic is on the +# preceding line but shares DWARF debug info with the delimiter (e.g. loop +# boundaries in #:for-expanded code, or inlined functions at call sites). +_CONTROL_FLOW_RE = re.compile( + r"^\s*(" + r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" + r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) + r"|else(\s+if\s*\(.*\)\s*then)?\s*$" # else / else if (...) then + r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+" # subroutine declaration + r"|\$:END_GPU\w+" # fypp GPU macro closers + r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) + r"|\s*!\s*$" # comment-only lines + r"|\s*$" # blank lines + r")", + re.IGNORECASE, +) + + +def _resolve_source(fname: str, search_whole_tree: bool = False) -> str: + """Resolve a (possibly bare) source filename to an existing path, or '' if not + found. An absolute existing path is used as-is; otherwise the basename is + located recursively under src/ (then the whole tree if `search_whole_tree`).""" + if os.path.isabs(fname) and os.path.isfile(fname): + return fname + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + if not candidates and search_whole_tree: + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) + return candidates[0] if candidates else "" + + +def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list: + """Resolve `fname` and return its lines (with newlines), or [] if unreadable.""" + path = _resolve_source(fname, search_whole_tree) + if not path: + return [] + try: + with open(path) as fh: + return fh.readlines() + except OSError: + return [] + + +def _read_source_line(fname: str, lineno: int) -> str: + """Return the raw source line at lineno (1-based), or '' if unavailable.""" + lines = _read_source_lines(fname) + return lines[lineno - 1] if 0 < lineno <= len(lines) else "" + + +def _macro_context_in_lines(lines: list, lineno: int) -> str: + """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that + encloses `lineno` (1-based) in `lines`, or None if none does. + + Used to flag dd_line hotspots whose .fpp line is shared across multiple + expanded instances (a #:for body, a #:def macro used in many places), where + line-level attribution cannot identify which instance is responsible. + """ + stack = [] + for raw in lines[: max(0, lineno - 1)]: + mo = _FYPP_BLOCK_OPEN.match(raw) + if mo: + stack.append(mo.group(1).lower()) + continue + if _FYPP_BLOCK_CLOSE.match(raw) and stack: + stack.pop() + for kw in reversed(stack): + if kw in _FYPP_DUPLICATING: + return f"#:{kw}" + return None + + +def _macro_context(fname: str, lineno: int) -> str: + """File-backed wrapper around _macro_context_in_lines; '' path safe.""" + lines = _read_source_lines(fname) + if not lines: + return None + return _macro_context_in_lines(lines, lineno) + + +def _ends_with_continuation(line: str) -> bool: + """True if a free-form Fortran line ends with a continuation '&' (the last + non-blank token before any trailing comment).""" + code = line.split("!", 1)[0].rstrip() # drop trailing comment (string-'!' is rare; fine here) + return code.endswith("&") + + +def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple: + """Return the (start, end) 1-based physical line range of the Fortran logical + statement containing lineno, following '&' continuations in both directions. + + A hit reported on a continuation fragment thus resolves to the whole + statement, so the labelled location is the full expression rather than a + mid-statement piece. + """ + n = len(lines) + start = lineno + while start > 1 and _ends_with_continuation(lines[start - 2]): + start -= 1 + end = lineno + while end < n and _ends_with_continuation(lines[end - 1]): + end += 1 + return start, end + + +def _statement_at(fname: str, lineno: int) -> tuple: + """File-backed (start, end, text) for the logical statement at fname:lineno; + text is the joined statement. Returns (lineno, lineno, '') if unreadable.""" + lines = _read_source_lines(fname) + if not 0 < lineno <= len(lines): + return lineno, lineno, "" + start, end = _statement_bounds_in_lines(lines, lineno) + # join physical lines, dropping the continuation '&' that may lead or trail each + text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end]) + return start, end, text + + +def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: + """Return True if any line in [start, end] contains non-trivial arithmetic. + + Filters out loop delimiters and fypp directive lines that dd_line sometimes + reports when the responsible arithmetic shares DWARF info with its enclosing + control-flow boundary (inlining, #:for template expansion, etc.). + Returns True (keep) when uncertain so we never silently drop real hotspots. + """ + for lineno in range(start, end + 1): + line = _read_source_line(fname, lineno) + if not line: + return True # can't read — keep to be safe + if not _CONTROL_FLOW_RE.match(line): + return True + return False + + +def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: + """Return a annotated source snippet around lineno, or '' if file not found. + + fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path. + Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree. + """ + lines = _read_source_lines(fname, search_whole_tree=True) + if not lines: + return "" + start = max(0, lineno - context - 1) + end = min(len(lines), lineno + context) + rows = [] + for i, line in enumerate(lines[start:end], start=start + 1): + marker = ">" if i == lineno else " " + rows.append(f"{marker}{i:5d} | {line.rstrip()}") + return "\n".join(rows) + + +def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: + import numpy as np + + total = 0.0 + for fname in compare_files: + ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname) + if not os.path.exists(ref_p) or not os.path.exists(run_p): + return float("inf") + ref = np.loadtxt(ref_p)[:, 1] + run = np.loadtxt(run_p)[:, 1] + total = max(total, float(np.max(np.abs(ref - run)))) + return total + + +def _max_abs_np(ref_dir: str, compare_files: list) -> float: + """Return the maximum absolute value across all reference output files.""" + import numpy as np + + total = 0.0 + for fname in compare_files: + ref_p = os.path.join(ref_dir, fname) + if not os.path.exists(ref_p): + continue + ref = np.loadtxt(ref_p)[:, 1] + total = max(total, float(np.max(np.abs(ref)))) + return total + + +def _parse_cancel_gen(gen_path: str) -> list: + """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources.""" + if not os.path.isfile(gen_path): + return [] + locs = [] + seen = set() + with open(gen_path) as fh: + for raw in fh: + parts = raw.rstrip("\n").split("\t") + if len(parts) < 2: + continue + fname = parts[0].strip() + if any(ext in fname for ext in _EXTERNAL_SRCS): + continue + if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")): + continue + try: + lineno = int(parts[1].strip()) + except ValueError: + continue + key = (fname, lineno) + if key not in seen: + seen.add(key) + locs.append(key) + return locs + + +def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: + """Extract first MFC-source frame from each Valgrind error matching error_keyword.""" + if not os.path.isfile(log_path): + return [] + locs = [] + seen = set() + in_error = False + with open(log_path) as fh: + for raw in fh: + line = re.sub(r"^==\d+== ?", "", raw) + if error_keyword in line: + in_error = True + continue + if in_error: + if " at " in line or " by " in line: + m = _VGFRAME_RE.search(line) + if m: + fname = m.group(1) + if any(ext in fname for ext in _EXTERNAL_SRCS): + continue + lineno = int(m.group(2)) + key = (fname, lineno) + if key not in seen: + seen.add(key) + locs.append(key) + in_error = False + elif line.strip() == "": + in_error = False + return locs + + +# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity +# filter: a site is reported only if it lost >= the threshold bits. Sweeping these +# levels and taking the highest each site survives gives a per-site "bits lost" +# severity (a lower bound — no false positives). 48 ~ full double mantissa. +CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] + + +def _cancellation_severity(level_sites: list) -> dict: + """Given [(threshold, [sites])], return {site: highest threshold it survives} + = the per-site bits-lost severity (a lower bound).""" + sev = {} + for level, sites in level_sites: + for site in sites: + if level > sev.get(site, 0): + sev[site] = level + return sev + + +def _digits_left(bits_lost: float) -> float: + """Approximate trustworthy decimal digits remaining after losing `bits_lost` + bits of a double's 53-bit mantissa (~15.95 digits full).""" + return max(0.0, (53 - bits_lost) / math.log2(10)) + + +def _parse_rddmin_locs(summary_path: str) -> list: + """Extract dd_line locations from an rddmin_summary as + [{path, start, end, macro}] dicts (path is repo-relative; macro is the + enclosing fypp duplicating block, e.g. '#:for', or None). + + Filters out locations whose source lines are pure control-flow delimiters + (loop boundaries, fypp directive closers, blank/comment lines). These can + appear when the responsible arithmetic shares DWARF debug info with an + enclosing boundary due to inlining or #:for template expansion. + """ + if not os.path.isfile(summary_path): + return [] + locs = [] + skipped = [] + with open(summary_path) as fh: + for line in fh: + m = _LOC_RE.search(line) + if not m: + continue + path = m.group(1) + start = int(m.group(2)) + end = int(m.group(3)) if m.group(3) else start + try: + rel = os.path.relpath(path, MFC_ROOT_DIR) + if rel.startswith(".."): + rel = path + except ValueError: + rel = path + rel = rel.replace("\\", "/") + if _is_arithmetic_loc(path, start, end): + locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)}) + else: + skipped.append((rel, start, end)) + for rel, start, end in skipped: + loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}" + cons.print(f" [dim]dd_line: skipped control-flow boundary {loc}[/dim]") + return locs + + +def _parse_rddmin_syms(summary_path: str) -> list: + """Extract symbol/function names from a dd_sym rddmin_summary. + + rddmin_summary format: + ddmin0:\\tFail Ratio: ...\\tFail indexes: ... + \\t\\t + ddmin1:\\t... + \\t\\t + + Lines starting with 'ddmin' are metadata; function names are on the + indented (tab-prefixed) lines as the first tab-delimited field. + """ + if not os.path.isfile(summary_path): + return [] + syms = [] + with open(summary_path) as fh: + for ln in fh: + stripped = ln.strip() + if not stripped or stripped.startswith("ddmin"): + continue + sym = stripped.split("\t")[0].strip() + if sym: + syms.append(sym) + return syms + + +def _build_source_filter(gen_lines: list, suspect_locs: list) -> list: + """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a + suspect dd_line location. + + gen_lines come from a --gen-source run and carry the exact symbol Verrou + requires (--source matches on file+line+symbol, not file+line alone). + suspect_locs are (path, start, end) tuples whose path may be a repo-relative + path while gen-source emits a basename, so matching is by basename + line. + """ + ranges = {} + for path, start, end in suspect_locs: + ranges.setdefault(os.path.basename(path), []).append((start, end)) + out = [] + for raw in gen_lines: + parts = raw.rstrip("\n").split("\t") + if len(parts) < 2: + continue + base = os.path.basename(parts[0].strip()) + try: + ln = int(parts[1].strip()) + except ValueError: + continue + if any(s <= ln <= e for s, e in ranges.get(base, [])): + out.append(raw if raw.endswith("\n") else raw + "\n") + return out + + +def _confirm_decision(suspect_dev, dd_threshold: float): + """Decide whether perturbing only the suspect lines reproduces the instability. + + Returns True (confirmed), False (suspect lines are inert -> attribution + suspect, e.g. macro-collapse misattribution), or None if unmeasured. + """ + if suspect_dev is None: + return None + return suspect_dev >= dd_threshold + + +def _rank_locs(locs: list, total: float) -> list: + """Attach a 'share' (per-line deviation / total) to each loc dict — which + must already carry 'share_dev' from a single-line positive control — and + return the locs sorted by that deviation, most flagrant first. + + 'total' is normally float_proxy, so share is the fraction of the full + single-precision deviation that perturbing that one line alone reproduces. + A non-positive total yields share=None (cannot normalize). + """ + for loc in locs: + dev = loc.get("share_dev") + loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None + return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True) + + +def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: + """Set loc['cancellation']=True for each dd_line loc whose line range covers a + catastrophic-cancellation site (stage F), matched by basename + line. + + This pins the flagrant operation on a multi-op line to the subtraction that + cancels, rather than just naming the line. + """ + by_base = {} + for fname, lineno in cancellation_locs: + by_base.setdefault(os.path.basename(fname), set()).add(lineno) + for loc in dd_line_locs: + lines = by_base.get(os.path.basename(loc["path"]), set()) + loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1)) + return dd_line_locs + + +def _cancellation_by_file(cancellation_locs: list) -> list: + """Aggregate cancellation sites by source file → [(basename, count)] sorted by + count (desc), ties by name. + + This is the cancellation-*origin* view (where ill-conditioning concentrates), + as opposed to the per-line --source share, which is a *sensitivity* view + (where reduced precision most moves the output — typically the time + integrator / final accumulation, regardless of where error originates). + """ + counts = {} + for fname, _lineno in cancellation_locs: + base = os.path.basename(fname) + counts[base] = counts.get(base, 0) + 1 + return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])) diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py new file mode 100644 index 0000000000..f0583002f2 --- /dev/null +++ b/toolchain/mfc/fp_stability_report.py @@ -0,0 +1,244 @@ +"""GitHub-output emitters for the FP-stability suite (step summary + annotations). + +Pure formatting of the result dicts produced by the runners; the metric helpers +it uses (statement resolution, source context, digit math) live in +fp_stability_metrics. +""" + +import math +import os + +from .fp_stability_metrics import ( + MIN_SIG_BITS, + VPREC_MANTISSA_BITS, + _digits_left, + _get_source_context, + _statement_at, +) + + +def _emit_github_annotations(results: list): + """Emit GitHub annotations for FP hotspots. + + Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations + appear inline on the responsible source lines in the PR diff view. + + Up to 3 dd_line locations are emitted per case (minimal responsible lines + from delta-debug). Confirmed hotspots (suspect-only perturbation reproduced + the instability) are ::warning::; unconfirmed ones are downgraded to + ::notice:: so a suspect attribution is not presented as fact. Up to 3 + cancellation sites per case are emitted as ::notice:: so the diff also + highlights subtraction-cancellation hotspots from --check-cancellation. + """ + if not os.environ.get("GITHUB_ACTIONS"): + return + for r in results: + status = "FAIL" if not r["passed"] else "sensitivity" + _sb = r.get("sig_bits") + _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a" + dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}" + unconfirmed = r.get("dd_line_confirmed") is False + + for loc in r.get("dd_line_locs", [])[:3]: + location = f"file={loc['path']},line={loc['start']}" + if loc["end"] != loc["start"]: + location += f",endLine={loc['end']}" + note = dev_str + if loc.get("share") is not None: + note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)" + if loc.get("cancellation"): + note += " — also a catastrophic cancellation site" + if loc.get("macro"): + note += f" — {loc['macro']}-expanded line, may represent multiple instances" + if unconfirmed: + title = f"FP candidate (unconfirmed) [{r['name']}]" + print(f"::notice {location},title={title}::{note}", flush=True) + else: + title = f"FP {status} [{r['name']}]" + print(f"::warning {location},title={title}::{note}", flush=True) + n_dd = len(r.get("dd_line_locs", [])) + if n_dd > 3: + print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True) + + for fname, lineno in r.get("cancellation_locs", [])[:3]: + loc = f"file={fname},line={lineno}" + title = f"FP cancellation [{r['name']}]" + print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True) + n_cc = len(r.get("cancellation_locs", [])) + if n_cc > 3: + print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) + + +def _more_md(total: int, shown: int, noun: str) -> str: + """Markdown bullet noting `total - shown` further items elided from a list, + or '' when nothing was truncated.""" + if total <= shown: + return "" + return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_" + + +def _emit_github_summary(results: list, n_samples: int): + """Write a markdown results table to GITHUB_STEP_SUMMARY. + + Visible directly in the Actions run UI without downloading artifacts. + Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), + and dd_line source locations for any failing cases. + """ + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + + n_pass = sum(1 for r in results if r["passed"]) + n_fail = len(results) - n_pass + + md = [] + md.append("## FP Stability Results\n") + md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n") + md.append( + f"> **Coverage:** {len(results)} one-dimensional case(s) " + f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these " + "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " + "they do not reach.\n" + ) + + # Main results table — pass/fail is scale-free: bits retained vs a single floor + md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") + md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |") + md.append("|------|:------:|:------:|--------:|--------:|:------:|") + for r in results: + status = "✅" if r["passed"] else "❌" + bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" + fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" + sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—" + md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |") + md.append("") + + # Cancellation ORIGINS — where ill-conditioning actually arises, led with the + # most severe (most bits lost). The numerically interesting signal; the + # sensitivity list further down is dominated by the (benign) time integrator. + cases_with_cancel = [r for r in results if r.get("cancellation_locs")] + if cases_with_cancel: + md.append("### Catastrophic cancellation origins (ranked by digits lost)\n") + md.append( + "> Subtraction of nearly-equal values loses leading significant digits. A double carries " + "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away " + "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only " + "single-precision trust. Site *count* is not severity — one site losing many digits outweighs " + "many mild ones.\n" + ) + for r in cases_with_cancel: + site_bits = r.get("cancellation_bits") or {} + # collapse continuation fragments to one entry per logical statement, + # keeping the worst bits-lost seen on that statement + stmts = {} # (basename, stmt_start) -> {where, bits, text} + for fname, lineno in r["cancellation_locs"]: + stmt_start, _end, stmt_text = _statement_at(fname, lineno) + key = (os.path.basename(fname), stmt_start) + e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text}) + e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0)) + ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"])) + if ordered: + w = ordered[0] + md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") + for e in ordered[:15]: + lost = e["bits"] / math.log2(10) + md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else "")) + footer = _more_md(len(ordered), 15, "statement(s)") + if footer: + md.append(footer) + md.append("") + + # VPREC sweep — one column per bit level, ❌ where bits retained < floor + if any(r["vprec"] for r in results): + _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} + header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) + sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS) + md.append("### VPREC precision sweep\n") + md.append(f"| Case | {header} |") + md.append(f"|------|{sep}|") + for r in results: + vmap = {b: d for b, d in r["vprec"]} + cols = [] + for b in VPREC_MANTISSA_BITS: + d = vmap.get(b) + if d is None: + cols.append("—") + elif d == float("inf"): + cols.append("💥 crash") + else: + cols.append(f"{d:.2e}") + md.append(f"| `{r['name']}` | {' | '.join(cols)} |") + md.append("") + + # dd_line — single-precision SENSITIVITY (where precision most affects the + # output). This is distinct from cancellation origin (reported separately): + # the leader is typically the time integrator / final accumulation, because + # perturbing the last write moves the output directly while upstream errors + # get re-rounded there. Not a culprit-finder for ill-conditioning. + cases_with_locs = [r for r in results if r["dd_line_locs"]] + if cases_with_locs: + md.append("
") + md.append("Single-precision sensitivity (dd_line) — usually the time integrator; expand for details\n") + md.append( + "> Where reduced precision most moves the output — **typically the time integrator / " + "final accumulation, which is expected and benign**. This is *not* where cancellation " + "originates (that's the section above); it shows where precision matters most.\n" + ) + _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} + for r in cases_with_locs: + status = "❌ FAIL" if not r["passed"] else "✅ pass" + md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}") + md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n") + for loc in r["dd_line_locs"][:10]: + rel_path, start, end = loc["path"], loc["start"], loc["end"] + where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" + tags = [] + if loc.get("share") is not None: + tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy") + if loc.get("cancellation"): + tags.append("catastrophic cancellation") + if loc.get("macro"): + tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") + suffix = f" — {', '.join(tags)}" if tags else "" + md.append(f"- `{where}`{suffix}") + for inst in loc.get("instances", [])[:8]: + flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else "" + md.append(f" - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`") + snippet = _get_source_context(rel_path, start) + if snippet: + md.append(" ```fortran") + for line in snippet.splitlines(): + md.append(f" {line}") + md.append(" ```") + footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)") + if footer: + md.append(footer) + md.append("") + md.append("
\n") + + # dd_sym function names (collapsed, since less actionable than dd_line) + cases_with_syms = [r for r in results if r["dd_sym_syms"]] + if cases_with_syms: + md.append("
") + md.append("Responsible functions (dd_sym)\n") + for r in cases_with_syms: + md.append(f"\n**`{r['name']}`**\n") + for sym in r["dd_sym_syms"]: + md.append(f"- `{sym}`") + md.append("\n
\n") + + # Float-max overflow sites + cases_with_fmax = [r for r in results if r.get("float_max_locs")] + if cases_with_fmax: + md.append("### Float32 overflow sites (check\\_max\\_float)\n") + for r in cases_with_fmax: + md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") + for fname, lineno in r["float_max_locs"][:10]: + md.append(f"- `{fname}:{lineno}`") + footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") + if footer: + md.append(footer) + md.append("") + + with open(summary_path, "a") as f: + f.write("\n".join(md) + "\n") diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py new file mode 100644 index 0000000000..c9b7ee375b --- /dev/null +++ b/toolchain/mfc/fp_stability_runners.py @@ -0,0 +1,530 @@ +"""Verrou subprocess runners for the FP-stability suite. + +Each routine drives the verrou/valgrind binary (or the verrou_dd_* delta-debug +tools) and returns parsed results. Pure parsing / metric helpers live in +fp_stability_metrics, which this module imports. +""" + +import glob +import math +import os +import shutil +import stat +import subprocess +import tempfile +import textwrap + +from .common import MFC_ROOT_DIR, MFCException +from .fp_stability_metrics import ( + _DD_FALLBACK_THRESHOLD, + VPREC_MANTISSA_BITS, + _build_source_filter, + _confirm_decision, + _is_arithmetic_loc, + _max_abs_np, + _max_diff_np, + _parse_cancel_gen, + _parse_rddmin_locs, + _parse_rddmin_syms, + _parse_vg_error_locs, + _rank_locs, + _read_source_line, +) +from .printer import cons + + +def _find_verrou() -> str: + verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) + candidate = os.path.join(verrou_home, "bin", "valgrind") + if os.path.isfile(candidate) and os.access(candidate, os.X_OK): + return candidate + return shutil.which("valgrind") or "" + + +def _find_binary(name: str) -> str: + install_dir = os.path.join(MFC_ROOT_DIR, "build", "install") + candidates = glob.glob(os.path.join(install_dir, "*", "bin", name)) + return max(candidates, key=os.path.getmtime) if candidates else "" + + +def _find_dd_tool(verrou_bin: str, tool: str) -> str: + """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary, + or '' if absent.""" + c = os.path.join(os.path.dirname(verrou_bin), tool) + return c if os.path.isfile(c) else "" + + +def _verrou_pythonpath(verrou_bin: str) -> str: + """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir).""" + verrou_home = os.path.dirname(os.path.dirname(verrou_bin)) + matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind")) + return matches[0] if matches else "" + + +def _write_inp(params: dict, target_name: str, work_dir: str) -> None: + """Write a Fortran namelist .inp file from a Python params dict.""" + from .run import case_dicts + + master_keys = case_dicts.get_input_dict_keys(target_name) + lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys] + with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh: + fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n") + + +def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str): + _write_inp(pre_params, "pre_process", work_dir) + with open(os.path.join(work_dir, "pre.log"), "w") as f: + result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False) + if result.returncode != 0: + raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log") + + +def _run_simulation_verrou( + verrou_bin: str, + sim_bin: str, + work_dir: str, + run_dir: str, + rounding_mode: str = None, + extra_flags: list = None, +): + """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output. + + rounding_mode is passed as --rounding-mode= when not None. + extra_flags are appended before the binary (e.g. --backend=vprec ...). + """ + with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir: + for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]: + src = os.path.join(work_dir, fname) + if os.path.exists(src): + shutil.copy2(src, tmpdir) + shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all")) + os.makedirs(os.path.join(tmpdir, "D")) + + log_path = os.path.join(run_dir, "verrou.log") + cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"] + if rounding_mode: + cmd.append(f"--rounding-mode={rounding_mode}") + cmd.extend(extra_flags or []) + cmd.append(sim_bin) + + with open(os.path.join(run_dir, "sim.out"), "w") as f: + result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False) + + if result.returncode != 0: + tag = rounding_mode or "vprec" + raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out") + + os.makedirs(run_dir, exist_ok=True) + for fn in os.listdir(os.path.join(tmpdir, "D")): + shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) + + +def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list: + """Run --check-cancellation at the given bit threshold; return [(fname, line)] + of MFC cancellation sites (subtractions losing >= `threshold` significant bits).""" + tag = f"cancellation_{threshold}" + run_dir = os.path.join(work_dir, tag) + os.makedirs(run_dir, exist_ok=True) + gen_path = os.path.join(run_dir, "cancel_gen.txt") + flags = [ + "--check-cancellation=yes", + f"--cc-threshold-double={threshold}", + f"--cc-gen-file={gen_path}", + ] + try: + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) + except MFCException: + pass + raw = _parse_cancel_gen(gen_path) + filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] + skipped = len(raw) - len(filtered) + if skipped and threshold == 10: + cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") + return filtered + + +def _run_mca_samples( + case: dict, + verrou_bin: str, + sim_bin: str, + work_dir: str, + ref_dir: str, + n_mca: int, +) -> tuple: + """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound).""" + compare = case["compare"] + ref_scale = _max_abs_np(ref_dir, compare) + max_dev = 0.0 + flags = ["--backend=mcaquad", "--mca-mode=mca"] + for i in range(n_mca): + run_dir = os.path.join(work_dir, f"mca_{i:02d}") + os.makedirs(run_dir, exist_ok=True) + try: + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) + max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) + except MFCException: + pass + sig_bits = None + if max_dev > 0.0 and ref_scale > 0.0: + sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale)))) + return max_dev, sig_bits + + +def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list: + """Run with --check-max-float=yes; return [(fname, line)] of overflow sites.""" + run_dir = os.path.join(work_dir, "float_max") + os.makedirs(run_dir, exist_ok=True) + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="nearest", + extra_flags=["--check-max-float=yes"], + ) + except MFCException: + pass + return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float") + + +def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: + """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref.""" + run_dir = os.path.join(work_dir, "float_proxy") + os.makedirs(run_dir) + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") + return _max_diff_np(ref_dir, run_dir, case["compare"]) + + +def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list: + """Run at each mantissa-bit level. Returns [(bits, dev), ...].""" + results = [] + for bits in VPREC_MANTISSA_BITS: + run_dir = os.path.join(work_dir, f"vprec_{bits}") + os.makedirs(run_dir) + flags = [ + "--backend=vprec", + "--vprec-mode=full", + f"--vprec-precision-binary64={bits}", + "--vprec-range-binary64=11", + ] + try: + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) + dev = _max_diff_np(ref_dir, run_dir, case["compare"]) + except MFCException: + dev = float("inf") + results.append((bits, dev)) + return results + + +def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): + """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. + + verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via + VERROU_EXCLUDE / VERROU_SOURCE environment variables. For test runs, we use + --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices). + For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the + environment — we honour that so the reference is a stable nearest-rounding baseline + to compare against. CLI --rounding-mode would override the env var and break the + reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. + """ + content = textwrap.dedent(f"""\ + #!/usr/bin/env bash + # Generated by mfc.sh fp-stability — do not edit by hand. + VERROU_BIN={verrou_bin!r} + SIM_BIN={sim_bin!r} + IC_DIR={ic_dir!r} + + RUNDIR="$1" + TMPDIR_RUN=$(mktemp -d) + trap 'rm -rf "$TMPDIR_RUN"' EXIT + + cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all" + cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp" + for fname in indices.dat pre_time_data.dat io_time_data.dat; do + [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/" + done + mkdir -p "$TMPDIR_RUN/D" + + # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and + # leaves it unset for test runs. Defaulting to float gives deterministic + # test steps while letting the reference use nearest-rounding. + ROUND="${{VERROU_ROUNDING_MODE:-float}}" + + # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation). + # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to). + # Forward them as valgrind flags when set. + EXTRA="" + [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE" + [ -n "${{VERROU_SOURCE:-}}" ] && EXTRA="$EXTRA --source=$VERROU_SOURCE" + + cd "$TMPDIR_RUN" + "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN" + rc=$? + + [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" + exit $rc + """) + with open(path, "w") as f: + f.write(content) + os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): + """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line. + + verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR + Exits 0 (stable) or 1 (unstable) based on threshold. + """ + content = textwrap.dedent(f"""\ + #!/usr/bin/env python3 + # Generated by mfc.sh fp-stability — do not edit by hand. + import sys, os, numpy as np + + COMPARE_FILES = {compare_files!r} + THRESHOLD = {threshold!r} + + ref_dir, run_dir = sys.argv[1], sys.argv[2] + max_dev = 0.0 + for fname in COMPARE_FILES: + ref_p = os.path.join(ref_dir, fname) + run_p = os.path.join(run_dir, fname) + if not os.path.exists(ref_p) or not os.path.exists(run_p): + print(f"MISSING: {{fname}}") + sys.exit(1) + ref = np.loadtxt(ref_p)[:, 1] + run = np.loadtxt(run_p)[:, 1] + dev = float(np.max(np.abs(ref - run))) + max_dev = max(max_dev, dev) + + print(f"max_dev={{max_dev:.3e}} threshold={{THRESHOLD:.0e}}") + sys.exit(0 if max_dev <= THRESHOLD else 1) + """) + with open(path, "w") as f: + f.write(content) + os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +def _dd_env(verrou_bin: str) -> dict: + """Environment with PYTHONPATH set for verrou_dd_* imports.""" + py_pkg = _verrou_pythonpath(verrou_bin) + env = os.environ.copy() + if py_pkg: + existing = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing])) + return env + + +def _run_dd_tool( + dd_bin: str, + dd_dir: str, + dd_run_sh: str, + dd_cmp_py: str, + env: dict, + log_name: str, + summary_subdir: str, + label: str, +) -> list: + """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines.""" + log_file = os.path.join(dd_dir, log_name) + cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py] + cons.print(f" [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]") + with open(log_file, "w") as f: + result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False) + summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary") + summary_lines = [] + if result.returncode == 0: + if os.path.isfile(summary_path): + with open(summary_path) as f: + summary_lines = f.readlines() + cons.print(f" [bold yellow]{label} result[/bold yellow]:") + for line in summary_lines: + cons.print(f" {line.rstrip()}") + else: + cons.print(f" [dim]{label} done; see {log_file}[/dim]") + else: + cons.print(f" [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})") + return summary_lines + + +def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float): + """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their + paths. The threshold falls back to _DD_FALLBACK_THRESHOLD when unset.""" + os.makedirs(dd_dir, exist_ok=True) + dd_run_sh = os.path.join(dd_dir, "dd_run.sh") + dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") + _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) + _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) + return dd_run_sh, dd_cmp_py + + +def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: + """Run verrou_dd_sym; return list of responsible symbol names.""" + dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym") + if not dd_bin: + cons.print(" [dim]verrou_dd_sym not found; skipping delta-debug[/dim]") + return [] + + dd_dir = os.path.join(log_dir, case["name"]) + dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) + _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") + cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") + return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) + + +def _run_dd_line( + case: dict, + verrou_bin: str, + sim_bin: str, + work_dir: str, + log_dir: str, + threshold: float = None, +) -> list: + """Run verrou_dd_line; return [{path, start, end, macro}] location dicts.""" + dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line") + if not dd_bin: + cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") + return [] + + dd_dir = os.path.join(log_dir, case["name"]) + dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) + _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") + return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) + + +def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag): + """Perturb only the lines in src_lines (deterministic float mode) and return + the L-inf deviation from the nearest-rounding reference, or None on failure.""" + src_path = os.path.join(conf_dir, f"source_{tag}.txt") + with open(src_path, "w") as fh: + fh.writelines(src_lines) + run_dir = os.path.join(conf_dir, f"perturb_{tag}") + os.makedirs(run_dir, exist_ok=True) + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="float", + extra_flags=[f"--source={src_path}"], + ) + except MFCException: + return None + return _max_diff_np(ref_dir, run_dir, compare) + + +def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path): + """Run nearest-rounding with --gen-source to capture the symbol-correct + executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure.""" + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="nearest", + extra_flags=[f"--gen-source={gen_path}"], + ) + except MFCException: + return None + if not os.path.isfile(gen_path): + return None + with open(gen_path) as fh: + return fh.readlines() + + +def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy): + """Positive control for dd_line: perturb ONLY the suspect lines and confirm + the instability reproduces, then rank each line by its individual share. + + Verrou's --source matches file+line+symbol (not file+line alone), so we first + capture the symbol-correct executed source lines via --gen-source, filter them + to the suspect set, then run deterministic float-mode restricted to just those + lines. If the suspect-only deviation reaches dd_threshold the attribution is + confirmed; if it stays near zero the reported lines do not actually carry the + instability (e.g. a #:for-expanded line blamed for the wrong instance). + + Each line is then perturbed alone so its 'share_dev' (and 'share' of + float_proxy) shows which computation dominates. + + Returns (confirmed, suspect_dev, ranked_locs). + """ + if not dd_line_locs: + return None, None, dd_line_locs + conf_dir = os.path.join(work_dir, "confirm") + os.makedirs(conf_dir, exist_ok=True) + gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt")) + if gen_lines is None: + return None, None, dd_line_locs + compare = case["compare"] + + # whole-set positive control + suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs] + set_src = _build_source_filter(gen_lines, suspects) + if not set_src: + # none of the reported lines performs an instrumented FP op -> not reproduced + return False, 0.0, dd_line_locs + set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set") + confirmed = _confirm_decision(set_dev, dd_threshold) + + # per-line ranking (a single line trivially owns the whole set deviation) + if len(dd_line_locs) == 1: + dd_line_locs[0]["share_dev"] = set_dev + else: + for i, loc in enumerate(dd_line_locs): + one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])]) + loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0 + ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev)) + return confirmed, set_dev, ranked + + +def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line): + """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot. + + Uses a precision binary (built with --fp-precision-lines) in which each + expanded instance of hotspot_file:hotspot_line compiles to a distinct + physical .f90 line. The sidecar enumerates those physical lines; each is + perturbed alone (float mode, vs the precision binary's own nearest-rounding + reference) so the dominant instance is identified. + + Returns a list of {instance, physline, dev, snippet} sorted most-flagrant + first (empty if no sidecar / no instrumented instances). + """ + from . import fp_precision_lines as fpl + + sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin) + sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file)) + instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line) + if not instances: + return [] + + prec_dir = os.path.join(work_dir, "precision") + ref_dir = os.path.join(prec_dir, "ref") + os.makedirs(ref_dir, exist_ok=True) + try: + _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest") + except MFCException: + return [] + gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt")) + if gen_lines is None: + return [] + + f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90") + compare = case["compare"] + results = [] + for physline, instance in instances: + src = _build_source_filter(gen_lines, [(f90_file, physline, physline)]) + if not src: + continue # this instance performs no instrumented FP op + dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}") + results.append( + { + "instance": instance, + "physline": physline, + "dev": dev or 0.0, + "snippet": _read_source_line(f90_file, physline).strip(), + } + ) + results.sort(key=lambda r: r["dev"], reverse=True) + return results diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index e89694d19b..4630cd3db6 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -6,7 +6,7 @@ label results, so they can run without Verrou or built binaries. """ -from mfc.fp_stability import ( +from mfc.fp_stability_metrics import ( MIN_SIG_BITS, _build_source_filter, _cancellation_by_file, From 982ec890e30b6aca95aa8d09a7e4344f82dc2240 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 09:55:34 -0400 Subject: [PATCH 09/25] fp-stability: remove Tier 2 per-instance disambiguation entirely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per the weight review, the precision-build-based per-instance disambiguation was the heaviest piece (its own module + a build flag + CMake plumbing + tests) for the narrowest trigger (fires only when the most-flagrant hotspot is also inside a #:for/#:def expansion). Removed in full: - deleted toolchain/mfc/fp_precision_lines.py and its tests; deleted _disambiguate_instances - reverted CMakeLists.txt and build.py to upstream (no MFC_FP_PRECISION_LINES option, no marker-strip step, no -D flag); dropped the --fp-precision-lines build arg and the --precision-sim-binary fp-stability arg - removed the E3 disambiguation stage, its docstring section, and the per-instance summary display Kept: the lightweight '#:for/#:def-expanded — may represent multiple instances' hotspot warning (cheap, honest, separate from the disambiguation machinery). 57 toolchain tests, ruff, precheck all 7 green; CMakeLists.txt and build.py are byte-identical to upstream. --- CMakeLists.txt | 34 +------ toolchain/mfc/build.py | 1 - toolchain/mfc/cli/commands.py | 14 --- toolchain/mfc/fp_precision_lines.py | 123 ----------------------- toolchain/mfc/fp_stability.py | 35 ------- toolchain/mfc/fp_stability_report.py | 3 - toolchain/mfc/fp_stability_runners.py | 52 ---------- toolchain/mfc/test_fp_precision_lines.py | 112 --------------------- 8 files changed, 2 insertions(+), 372 deletions(-) delete mode 100644 toolchain/mfc/fp_precision_lines.py delete mode 100644 toolchain/mfc/test_fp_precision_lines.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 532c377702..83bbb8fe0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,6 @@ option(MFC_DOCUMENTATION "Build documentation" OFF option(MFC_ALL "Build everything" OFF) option(MFC_SINGLE_PRECISION "Build single precision" OFF) option(MFC_MIXED_PRECISION "Build mixed precision" OFF) -option(MFC_FP_PRECISION_LINES "Strip fypp markers for per-instance fp-stability attribution" OFF) if (MFC_ALL) set(MFC_PRE_PROCESS ON FORCE) @@ -434,24 +433,8 @@ macro(HANDLE_SOURCES target useCommon) cmake_path(GET fpp FILENAME fpp_filename) set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90") - # In a precision-lines build, Fypp writes a marked intermediate that is - # then stripped of its line markers (so each expanded instance compiles - # to a distinct physical line) before compilation; the strip step emits a - # .linemap.json sidecar. Otherwise Fypp writes ${f90} directly. Only the - # simulation target is analyzed by fp-stability, so pre/post_process are - # always built normally. - set(_precision_lines OFF) - if (MFC_FP_PRECISION_LINES AND "${target}" STREQUAL "simulation") - set(_precision_lines ON) - endif() - if (_precision_lines) - set(f90_out "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.marked.f90") - else() - set(f90_out "${f90}") - endif() - add_custom_command( - OUTPUT ${f90_out} + OUTPUT ${f90} COMMAND ${FYPP_EXE} -m re -I "${CMAKE_BINARY_DIR}/include/${target}" -I "${${target}_DIR}/include" @@ -467,25 +450,12 @@ macro(HANDLE_SOURCES target useCommon) --line-length=999 --line-numbering-mode=nocontlines ${FYPP_GCOV_OPTS} - "${fpp}" "${f90_out}" + "${fpp}" "${f90}" DEPENDS "${fpp};${${target}_incs}" COMMENT "Preprocessing (Fypp) ${fpp_filename}" VERBATIM ) - if (_precision_lines) - add_custom_command( - OUTPUT ${f90} - COMMAND ${Python3_EXECUTABLE} - "${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py" - "${f90_out}" "${f90}" - "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.linemap.json" - DEPENDS "${f90_out};${CMAKE_SOURCE_DIR}/toolchain/mfc/fp_precision_lines.py" - COMMENT "Stripping markers (fp-precision-lines) ${fpp_filename}" - VERBATIM - ) - endif() - list(APPEND ${target}_SRCs ${f90}) endforeach() endmacro() diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 01a0c8ece3..01efb1a9b1 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -421,7 +421,6 @@ def configure(self, case: Case): flags.append(f"-DMFC_GCov={'ON' if ARG('gcov') else 'OFF'}") flags.append(f"-DMFC_Unified={'ON' if ARG('unified') else 'OFF'}") flags.append(f"-DMFC_Fastmath={'ON' if ARG('fastmath') else 'OFF'}") - flags.append(f"-DMFC_FP_PRECISION_LINES={'ON' if ARG('fp_precision_lines') else 'OFF'}") command = ["cmake"] + flags + ["-S", cmake_dirpath, "-B", build_dirpath] diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index cff47c3ecf..54bbff4641 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -141,13 +141,6 @@ default=False, dest="deps_only", ), - Argument( - name="fp-precision-lines", - help="(fp-stability) Strip fypp line markers so each expanded instance gets a distinct line; emits sidecars for per-instance attribution.", - action=ArgAction.STORE_TRUE, - default=False, - dest="fp_precision_lines", - ), ], examples=[ Example("./mfc.sh build", "Build all default targets (CPU)"), @@ -945,13 +938,6 @@ default=None, metavar="PATH", ), - Argument( - name="precision-sim-binary", - help="Path to a simulation binary built with --fp-precision-lines. When given, macro-ambiguous hotspots are disambiguated to the individual fypp-expanded instance.", - default=None, - dest="precision_sim_binary", - metavar="PATH", - ), Argument( name="samples", short="N", diff --git a/toolchain/mfc/fp_precision_lines.py b/toolchain/mfc/fp_precision_lines.py deleted file mode 100644 index 6dc1df04c1..0000000000 --- a/toolchain/mfc/fp_precision_lines.py +++ /dev/null @@ -1,123 +0,0 @@ -"""FP-stability precision-lines transform (Tier 2). - -A fypp #:for/#:def expansion emits many generated computations that all carry -the same cpp line marker (`# N "file.fpp"`), so DWARF — and therefore Verrou — -collapse every expanded instance onto one .fpp line. This transform removes the -fypp line markers from a generated .f90 so the compiler attributes each statement -to the generated file's own physical line (which *is* distinct per expanded -instance), and records a sidecar mapping each surviving physical line back to -(file, original .fpp line, instance index). Genuine cpp directives -(#if/#define/#endif/...) are preserved so conditional compilation is unchanged. - -When the stripped .f90 is compiled, Verrou attributes — and fp-stability ranks -and isolates via --source — per expanded instance rather than per source line. -Used only by a dedicated precision build (MFC_FP_PRECISION_LINES); the normal -build is unaffected. The mechanism (stripped markers -> instance-distinct -physical-line attribution -> per-instance Verrou --source isolation, surviving -the cpp #if layer) is validated against gfortran + Verrou. -""" - -import json -import os -import re - -# A fypp line marker: "# """ possibly with trailing flags. A cpp -# conditional/define directive (#if, #define, #endif, ...) has a word, not a -# number, after the '#', so the two are unambiguous. -_FYPP_MARKER = re.compile(r'^#\s+(\d+)\s+"([^"]+)"') -# Any other preprocessor directive line (kept, but it is not a .fpp source line, -# so it neither consumes a source-line increment nor gets a sidecar entry). -_CPP_DIRECTIVE = re.compile(r"^\s*#") - - -def strip_markers(lines: list) -> tuple: - """Strip fypp line markers; return (output_lines, sidecar). - - sidecar maps each 1-based physical output line number to - {"file", "line", "instance"}: the .fpp file, the .fpp line that physical - line came from (auto-incremented within a marker region), and how many times - that marker's (file, line) had been seen before (0 = first/real occurrence, - >=1 = an expanded instance). - """ - seen = {} - out = [] - sidecar = {} - cur_file = None - cur_line = None - cur_instance = None - for raw in lines: - m = _FYPP_MARKER.match(raw) - if m: - cur_file = m.group(2) - cur_line = int(m.group(1)) - cur_instance = seen.get((cur_file, cur_line), 0) - seen[(cur_file, cur_line)] = cur_instance + 1 - continue # drop the marker line - out.append(raw) - if cur_file is None or _CPP_DIRECTIVE.match(raw): - # cpp directives are kept verbatim but are not .fpp source lines - continue - sidecar[len(out)] = {"file": cur_file, "line": cur_line, "instance": cur_instance} - cur_line += 1 # subsequent physical source lines map to the next .fpp line - return out, sidecar - - -def transform_file(in_path: str, out_path: str, sidecar_path: str) -> int: - """Strip a generated .f90 to its precision-lines variant. - - Reads in_path, writes the marker-stripped source to out_path and the sidecar - JSON to sidecar_path. Returns the number of mapped physical lines. - """ - with open(in_path) as fh: - lines = fh.readlines() - out, sidecar = strip_markers(lines) - with open(out_path, "w") as fh: - fh.writelines(out) - with open(sidecar_path, "w") as fh: - json.dump({str(k): v for k, v in sidecar.items()}, fh) - return len(sidecar) - - -# --- consumption side (Tier 2): locating and querying the sidecars --- - - -def sidecar_dir_for_binary(sim_bin: str) -> str: - """Map a precision simulation binary path to its sidecar directory. - - .../build/install//bin/simulation -> .../build/staging//fypp/simulation - """ - bin_dir = os.path.dirname(os.path.abspath(sim_bin)) # .../install//bin - hash_dir = os.path.dirname(bin_dir) # .../install/ - cfg_hash = os.path.basename(hash_dir) - build_root = os.path.dirname(os.path.dirname(hash_dir)) # .../build - return os.path.join(build_root, "staging", cfg_hash, "fypp", "simulation") - - -def sidecar_path(sidecar_dir: str, fpp_file: str) -> str: - """Sidecar JSON path for a .fpp file: /.linemap.json.""" - return os.path.join(sidecar_dir, os.path.basename(fpp_file) + ".linemap.json") - - -def load_sidecar(path: str) -> dict: - """Load a sidecar JSON into {physical_line:int -> {file, line, instance}}.""" - if not os.path.isfile(path): - return {} - with open(path) as fh: - raw = json.load(fh) - return {int(k): v for k, v in raw.items()} - - -def instances_of(sidecar: dict, fpp_file: str, fpp_line: int) -> list: - """Return [(physical_line, instance), ...] (sorted by physical line) for every - expanded instance of fpp_file:fpp_line, matched by basename.""" - base = os.path.basename(fpp_file) - hits = [(physline, entry["instance"]) for physline, entry in sidecar.items() if os.path.basename(entry["file"]) == base and entry["line"] == fpp_line] - return sorted(hits) - - -if __name__ == "__main__": - import sys - - if len(sys.argv) != 4: - sys.exit("usage: fp_precision_lines.py ") - transform_file(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 0579502910..c84b6e97ba 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -48,15 +48,6 @@ One run with --check-max-float=yes; reports locations where a double→float conversion would overflow to ±Inf. -I. Per-instance disambiguation (--precision-sim-binary PATH; opt-in) - A fypp #:for/#:def expansion collapses many generated computations onto one - .fpp line, so a macro-ambiguous hotspot cannot be pinned to a single runtime - instance. Given a simulation binary built with `--fp-precision-lines` (markers - stripped so each instance is a distinct line, plus .linemap.json sidecars), the - most flagrant macro-ambiguous hotspot is disambiguated: each expanded instance - is perturbed alone on the precision binary, ranking them to the responsible - instance and showing its concrete generated code. - Logs are saved to fp-stability-logs/ and uploaded as CI artifacts. On GitHub Actions: a step summary table and ::warning:: file annotations are emitted automatically so failing source lines appear in the PR diff. @@ -95,7 +86,6 @@ _emit_github_summary, ) from .fp_stability_runners import ( - _disambiguate_instances, _find_binary, _find_verrou, _run_cancellation_check, @@ -419,7 +409,6 @@ def _run_case( run_cancellation: bool, run_mca: bool, run_float_max: bool, - prec_sim_bin: str = None, ) -> dict: name = case["name"] compare = case["compare"] @@ -542,24 +531,6 @@ def _run_case( except Exception as exc: cons.print(f" [bold yellow]dd_line confirmation error[/bold yellow]: {exc}") - # --- E3: per-instance disambiguation of the most flagrant macro-ambiguous hotspot --- - if prec_sim_bin and result["dd_line_locs"]: - macro_loc = next((loc for loc in result["dd_line_locs"] if loc.get("macro")), None) - if macro_loc: - cons.print(f" [dim]disambiguating fypp instances of {macro_loc['path']}:{macro_loc['start']} (precision binary)...[/dim]") - try: - insts = _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, macro_loc["path"], macro_loc["start"]) - macro_loc["instances"] = insts - if insts and insts[0]["dev"] > 0: - win = insts[0] - cons.print(f" flagrant instance: #{win['instance']} (.f90:{win['physline']}, dev={win['dev']:.3e}) {win['snippet']}") - elif insts: - cons.print(f" [dim]{len(insts)} instance(s) enumerated; none perturbed measurably (hotspot inert)[/dim]") - else: - cons.print(" [dim]no sidecar instances found for this hotspot[/dim]") - except Exception as exc: - cons.print(f" [bold yellow]instance disambiguation error[/bold yellow]: {exc}") - # --- F: cancellation detection --- if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") @@ -638,9 +609,6 @@ def fp_stability(): run_cancellation = not ARG("no_cancellation") run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") - prec_sim_bin = ARG("precision_sim_binary") - if prec_sim_bin and not os.path.isfile(prec_sim_bin): - raise MFCException(f"precision simulation binary not found: {prec_sim_bin}") log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs") os.makedirs(log_dir, exist_ok=True) @@ -650,8 +618,6 @@ def fp_stability(): cons.print(f" verrou: {verrou_bin}") cons.print(f" simulation: {sim_bin}") cons.print(f" pre_process: {pp_bin}") - if prec_sim_bin: - cons.print(f" precision: {prec_sim_bin} (per-instance disambiguation)") cons.print(f" samples: {n_samples}") features = [] if run_float: @@ -690,7 +656,6 @@ def fp_stability(): run_cancellation, run_mca, run_float_max, - prec_sim_bin, ) except MFCException as exc: cons.print(f" [bold red]ERROR[/bold red]: {exc}") diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index f0583002f2..05d31d0c9d 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -201,9 +201,6 @@ def _emit_github_summary(results: list, n_samples: int): tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") suffix = f" — {', '.join(tags)}" if tags else "" md.append(f"- `{where}`{suffix}") - for inst in loc.get("instances", [])[:8]: - flag = " ⟵ flagrant" if inst is loc["instances"][0] and inst["dev"] > 0 else "" - md.append(f" - instance #{inst['instance']} (`.f90:{inst['physline']}`, dev={inst['dev']:.2e}){flag}: `{inst['snippet']}`") snippet = _get_source_context(rel_path, start) if snippet: md.append(" ```fortran") diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index c9b7ee375b..4146baab11 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -28,7 +28,6 @@ _parse_rddmin_syms, _parse_vg_error_locs, _rank_locs, - _read_source_line, ) from .printer import cons @@ -477,54 +476,3 @@ def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0 ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev)) return confirmed, set_dev, ranked - - -def _disambiguate_instances(case, prec_sim_bin, verrou_bin, work_dir, hotspot_file, hotspot_line): - """Rank the individual fypp-expanded instances of a macro-ambiguous hotspot. - - Uses a precision binary (built with --fp-precision-lines) in which each - expanded instance of hotspot_file:hotspot_line compiles to a distinct - physical .f90 line. The sidecar enumerates those physical lines; each is - perturbed alone (float mode, vs the precision binary's own nearest-rounding - reference) so the dominant instance is identified. - - Returns a list of {instance, physline, dev, snippet} sorted most-flagrant - first (empty if no sidecar / no instrumented instances). - """ - from . import fp_precision_lines as fpl - - sidecar_dir = fpl.sidecar_dir_for_binary(prec_sim_bin) - sidecar = fpl.load_sidecar(fpl.sidecar_path(sidecar_dir, hotspot_file)) - instances = fpl.instances_of(sidecar, hotspot_file, hotspot_line) - if not instances: - return [] - - prec_dir = os.path.join(work_dir, "precision") - ref_dir = os.path.join(prec_dir, "ref") - os.makedirs(ref_dir, exist_ok=True) - try: - _run_simulation_verrou(verrou_bin, prec_sim_bin, work_dir, ref_dir, rounding_mode="nearest") - except MFCException: - return [] - gen_lines = _capture_gen_source(verrou_bin, prec_sim_bin, work_dir, prec_dir, os.path.join(prec_dir, "gen_source.txt")) - if gen_lines is None: - return [] - - f90_file = os.path.join(sidecar_dir, os.path.basename(hotspot_file) + ".f90") - compare = case["compare"] - results = [] - for physline, instance in instances: - src = _build_source_filter(gen_lines, [(f90_file, physline, physline)]) - if not src: - continue # this instance performs no instrumented FP op - dev = _source_perturb_dev(verrou_bin, prec_sim_bin, work_dir, ref_dir, prec_dir, src, compare, f"inst{instance:02d}") - results.append( - { - "instance": instance, - "physline": physline, - "dev": dev or 0.0, - "snippet": _read_source_line(f90_file, physline).strip(), - } - ) - results.sort(key=lambda r: r["dev"], reverse=True) - return results diff --git a/toolchain/mfc/test_fp_precision_lines.py b/toolchain/mfc/test_fp_precision_lines.py deleted file mode 100644 index ddb139af2d..0000000000 --- a/toolchain/mfc/test_fp_precision_lines.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Unit tests for the fp-stability precision-lines transform (Tier 2, P1). - -A fypp #:for/#:def expansion re-marks many generated computations with the same -cpp line marker (`# N "file.fpp"`), so DWARF — and Verrou — collapse every -expanded instance onto one .fpp line. strip_markers removes the fypp line -markers so the compiler attributes to the generated .f90's own (instance- -distinct) physical lines, and emits a sidecar mapping each surviving physical -line back to (file, original .fpp line, instance index). Genuine cpp directives -(#if/#define/...) are kept so conditional compilation still works. -""" - -import os - -from mfc.fp_precision_lines import ( - instances_of, - sidecar_dir_for_binary, - sidecar_path, - strip_markers, -) - - -def test_strips_fypp_markers_and_keeps_code(): - out, sidecar = strip_markers(['# 700 "real.fpp"\n', " x = a - b\n"]) - assert out == [" x = a - b\n"] - assert sidecar == {1: {"file": "real.fpp", "line": 700, "instance": 0}} - - -def test_keeps_cpp_conditional_directives(): - lines = ['# 700 "real.fpp"\n', "#if defined(FOO)\n", " x = 1\n", "#endif\n"] - out, _ = strip_markers(lines) - assert out == ["#if defined(FOO)\n", " x = 1\n", "#endif\n"] - - -def test_repeated_marker_increments_instance(): - lines = ['# 700 "real.fpp"\n', " s1 = x\n", '# 700 "real.fpp"\n', " s2 = y\n"] - out, sidecar = strip_markers(lines) - assert out == [" s1 = x\n", " s2 = y\n"] - assert sidecar[1] == {"file": "real.fpp", "line": 700, "instance": 0} - assert sidecar[2] == {"file": "real.fpp", "line": 700, "instance": 1} - - -def test_distinguishes_fypp_marker_from_cpp_directive(): - # no fypp line markers here -> nothing stripped, no origin recorded - lines = ["#define X 1\n", "#if X\n", " a = 1\n", "#endif\n"] - out, sidecar = strip_markers(lines) - assert out == lines - assert sidecar == {} - - -def test_source_line_auto_increments_within_a_region(): - lines = ['# 700 "real.fpp"\n', " a = 1\n", " b = 2\n"] - _, sidecar = strip_markers(lines) - assert sidecar[1]["line"] == 700 - assert sidecar[2]["line"] == 701 - - -# --- Tier 2 consumption: locating + querying sidecars --- - - -def test_instances_of_returns_physical_lines_for_a_source_line(): - sidecar = { - 7: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 0}, - 11: {"file": "/abs/src/simulation/m_weno.fpp", "line": 241, "instance": 1}, - 20: {"file": "/abs/src/simulation/m_weno.fpp", "line": 999, "instance": 0}, - } - # matched by basename; the repo-relative path from a dd_line hotspot still matches - assert instances_of(sidecar, "src/simulation/m_weno.fpp", 241) == [(7, 0), (11, 1)] - - -def test_instances_of_empty_when_no_match(): - sidecar = {7: {"file": "m_weno.fpp", "line": 241, "instance": 0}} - assert instances_of(sidecar, "m_weno.fpp", 999) == [] - assert instances_of(sidecar, "m_other.fpp", 241) == [] - - -def test_instances_of_sorted_by_physical_line(): - sidecar = { - 30: {"file": "f.fpp", "line": 5, "instance": 2}, - 10: {"file": "f.fpp", "line": 5, "instance": 0}, - 20: {"file": "f.fpp", "line": 5, "instance": 1}, - } - assert instances_of(sidecar, "f.fpp", 5) == [(10, 0), (20, 1), (30, 2)] - - -def test_sidecar_dir_for_binary_maps_install_to_staging(): - got = sidecar_dir_for_binary("/x/build/install/HASH/bin/simulation") - assert got == os.path.join("/x/build/staging/HASH/fypp/simulation") - - -def test_sidecar_path_uses_fpp_basename_and_linemap_suffix(): - got = sidecar_path("/x/staging/HASH/fypp/simulation", "src/simulation/m_weno.fpp") - assert got == os.path.join("/x/staging/HASH/fypp/simulation", "m_weno.fpp.linemap.json") - - -def test_cpp_directives_do_not_consume_a_source_line_increment(): - # the #else line must not advance the .fpp source line nor get a sidecar entry - lines = ['# 700 "real.fpp"\n', " a = 1\n", "#else\n", " b = 2\n"] - out, sidecar = strip_markers(lines) - assert out == [" a = 1\n", "#else\n", " b = 2\n"] - assert sidecar[1]["line"] == 700 # a = 1 - assert 2 not in sidecar # #else: kept, but not a source line - assert sidecar[3]["line"] == 701 # b = 2 (not 702) - - -def test_sidecar_line_numbers_are_physical_output_lines(): - # output physical line numbers (1-based, after stripping) are the keys - lines = ['# 10 "f"\n', " a = 1\n", '# 20 "f"\n', " b = 2\n", " c = 3\n"] - out, sidecar = strip_markers(lines) - assert out == [" a = 1\n", " b = 2\n", " c = 3\n"] - assert sidecar[1] == {"file": "f", "line": 10, "instance": 0} - assert sidecar[2] == {"file": "f", "line": 20, "instance": 0} - assert sidecar[3] == {"file": "f", "line": 21, "instance": 0} From 2276eb1926df04003a787a81829bbcf6e69ca70f Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 10:26:58 -0400 Subject: [PATCH 10/25] fp-stability: accept a user case.py (positional, like run), with a feasibility guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following the native convention (run/validate/viz take the case .py as a positional 'input'), fp-stability now does too — './mfc.sh fp-stability my_case.py' analyzes your case instead of the built-in suite; omitting it runs the suite as before. It loads the case via the shared loader (run.input.load), runs it as a single case, and auto-detects the files to diff from the reference run (_autodetect_compare: conserved-var .dat at the final step, prim fallback). Output is forced to serial .dat I/O (parallel_io=F) since the no-MPI binary is run as one process and the suite diffs serial files. Guard (Verrou is ~30x and the suite runs the sim many times): the case must be a small, short, single-process proxy — errors if cells > 100k or work (cells x t_step_stop) > 200k cell-steps, with guidance to coarsen. Validated end-to-end on a real case .py (auto-compare + sig-bits PASS + cancellation digits); guard correctly rejects 1D_sodshocktube (400k cell-steps). 60 toolchain tests, ruff, precheck all 7. --- toolchain/mfc/cli/commands.py | 11 ++++- toolchain/mfc/fp_stability.py | 61 ++++++++++++++++++++++++++- toolchain/mfc/fp_stability_metrics.py | 18 ++++++++ toolchain/mfc/test_fp_stability.py | 23 ++++++++++ 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 54bbff4641..32527ab166 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -919,6 +919,14 @@ " float-max --check-max-float detection of double→float overflow sites\n" ), include_common=["mfc_config", "verbose", "debug_log"], + positionals=[ + Positional( + name="input", + help="Optional case .py to analyze instead of the built-in suite (run as a single serial CPU process under Verrou; must be small/short).", + nargs="?", + completion=Completion(type=CompletionType.FILES_PY), + ), + ], arguments=[ Argument( name="sim-binary", @@ -997,7 +1005,8 @@ ), ], examples=[ - Example("./mfc.sh fp-stability", "Auto-discover binaries and run all cases"), + Example("./mfc.sh fp-stability", "Auto-discover binaries and run the built-in suite"), + Example("./mfc.sh fp-stability my_case.py", "Analyze your own case (small/short, serial, CPU)"), Example( "./mfc.sh fp-stability --sim-binary build/install/abc123/bin/simulation", "Specify simulation binary explicitly", diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index c84b6e97ba..1a2188a9ed 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -75,6 +75,7 @@ from .fp_stability_metrics import ( CANCEL_BIT_LEVELS, MIN_SIG_BITS, + _autodetect_compare, _cancellation_severity, _mark_cancellation, _max_abs_np, @@ -431,6 +432,15 @@ def _run_case( cons.print(" [dim]reference run (rounding=nearest)...[/dim]") _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest") + # For a user case with no fixed compare list, diff whatever the reference + # run actually wrote (conserved vars at the final step). + if not compare: + compare = _autodetect_compare(os.listdir(ref_dir)) + case["compare"] = compare + if not compare: + raise MFCException("case produced no cons.*/prim.* output to compare (check t_step_save/t_step_stop and parallel_io)") + cons.print(f" [dim]comparing: {', '.join(compare)}[/dim]") + # --- A: random-rounding stability samples --- # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale), # vs one global floor (no per-case hand-tuned absolute threshold). @@ -587,6 +597,51 @@ def _run_case( return result +# Verrou is ~30x slower and the suite runs the simulation many times, so a user +# case must be a small, short, single-process proxy. Work = cells x time steps; +# both a huge grid and a long run are rejected (built-in cases are ~1k cell-steps). +FP_CASE_MAX_CELLS = 100_000 +FP_CASE_MAX_WORK = 200_000 # cells x t_step_stop + + +def _load_user_case(input_path: str) -> dict: + """Build a single fp-stability case from a user case .py. + + The case is run as ONE serial CPU process under Verrou (so it must be small + and short — a coarsened proxy of a production run, not the real thing); a grid + too large to be feasible errors. The output files to compare are auto-detected + from the reference run, so 'compare' is left empty here. + """ + from .run import input as run_input # lazy import: avoids a circular import + + params = run_input.load(input_path, None, {}, do_print=False).params + # Force serial .dat I/O: the suite runs the no-MPI binary as one process and + # diffs serial cons.*/prim.* files (not the parallel SILO/HDF5 path). + params["parallel_io"] = "F" + m, n, p = (int(params.get(k, 0) or 0) for k in ("m", "n", "p")) + cells = (m + 1) * (n + 1) * (p + 1) + t_stop = int(params.get("t_step_stop", 0) or 0) + work = cells * max(t_stop, 1) + if cells > FP_CASE_MAX_CELLS: + raise MFCException(f"case has {cells:,} cells — too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).") + if work > FP_CASE_MAX_WORK: + raise MFCException( + f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) — too slow under " + f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)." + ) + stem = os.path.splitext(os.path.basename(input_path))[0] + if stem == "case": # examples//case.py — the dir name is more telling + stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem + return { + "name": stem, + "description": f"user case {input_path} ({cells} cells, run single-rank on CPU)", + "compare": [], # auto-detected from the reference run's output + "ill_cond": "", + "pre": params, + "sim": params, + } + + def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() if not verrou_bin or not os.path.isfile(verrou_bin): @@ -610,6 +665,8 @@ def fp_stability(): run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") + cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES + log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs") os.makedirs(log_dir, exist_ok=True) @@ -618,6 +675,8 @@ def fp_stability(): cons.print(f" verrou: {verrou_bin}") cons.print(f" simulation: {sim_bin}") cons.print(f" pre_process: {pp_bin}") + if ARG("input"): + cons.print(f" case: {ARG('input')} (single serial CPU run under Verrou)") cons.print(f" samples: {n_samples}") features = [] if run_float: @@ -640,7 +699,7 @@ def fp_stability(): start = time.time() results = [] - for case in CASES: + for case in cases_to_run: try: r = _run_case( case, diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py index 01940618d5..f84f088db3 100644 --- a/toolchain/mfc/fp_stability_metrics.py +++ b/toolchain/mfc/fp_stability_metrics.py @@ -16,6 +16,24 @@ # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. VPREC_MANTISSA_BITS = [52, 23, 16, 10] +_OUTPUT_DAT = re.compile(r"^(cons|prim)\.\d+\.\d+\.(\d+)\.dat$") + + +def _autodetect_compare(filenames: list) -> list: + """Pick the D/ output files to diff for a user-supplied case: the conserved- + variable files at the latest written time step (falling back to primitive + files if none are written). Returns [] if the case produced no field output.""" + by_step = {} + for f in filenames: + m = _OUTPUT_DAT.match(os.path.basename(f)) + if m: + by_step.setdefault(int(m.group(2)), {"cons": [], "prim": []})[m.group(1)].append(os.path.basename(f)) + if not by_step: + return [] + last = by_step[max(by_step)] + return sorted(last["cons"] or last["prim"]) + + # Stability pass/fail (stage A) is scale-free: a case must retain at least this # many significant bits under random rounding (sig_bits = -log2(max_dev/scale)). # 24 ~= single precision. One global floor replaces per-case absolute thresholds diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 4630cd3db6..30fb2f0caa 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -8,6 +8,7 @@ from mfc.fp_stability_metrics import ( MIN_SIG_BITS, + _autodetect_compare, _build_source_filter, _cancellation_by_file, _cancellation_severity, @@ -242,6 +243,28 @@ def test_cancellation_severity_empty(): assert _cancellation_severity([]) == {} +# --- auto-detect which output files to compare (for a user case) --- + + +def test_autodetect_compare_picks_cons_at_latest_step(): + fns = [ + "cons.1.00.000000.dat", + "cons.1.00.000050.dat", + "cons.2.00.000050.dat", + "prim.1.00.000050.dat", + ] + assert _autodetect_compare(fns) == ["cons.1.00.000050.dat", "cons.2.00.000050.dat"] + + +def test_autodetect_compare_falls_back_to_prim_when_no_cons(): + fns = ["prim.1.00.000010.dat", "prim.3.00.000010.dat"] + assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"] + + +def test_autodetect_compare_empty_when_no_field_output(): + assert _autodetect_compare(["indices.dat", "pre_time_data.dat", "foo.txt"]) == [] + + # --- scale-free pass/fail: significant bits retained --- From 3b662db38c1bc7e99ee9e6721573a1155a427de0 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 10:34:01 -0400 Subject: [PATCH 11/25] fp-stability: refresh --help description for case.py usage + sig-bits/cancellation reframe The --help prose was stale: it claimed 'PASS/FAIL against per-case thresholds' (now scale-free sig-bits), didn't mention the case.py positional / its serial-CPU constraints / the feasibility guard, and listed an outdated case set. Rewrote the description to cover: running on a built-in suite or a user case .py (with constraints + guard), the >= 24-bit scale-free pass criterion, and the analysis passes (dd confirmation/ranking, cancellation origins by digits lost). Also updated the module-docstring Usage. The positional INPUT and a case.py example were already shown in --help; this fixes the surrounding prose. --- toolchain/mfc/cli/commands.py | 26 ++++++++++++++------------ toolchain/mfc/fp_stability.py | 7 ++++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 32527ab166..a6eae93846 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -898,23 +898,25 @@ name="fp-stability", help="Run floating-point stability tests using Verrou.", description=( - "Runs each registered test case N times under Verrou's random IEEE-754 " - "rounding mode and compares against a nearest-rounding reference run. " - "Reports the max L∞ deviation and PASS/FAIL against per-case thresholds.\n\n" + "Runs Verrou random-rounding stability analysis on a built-in suite of small " + "1-D cases, or — given a case .py (positional INPUT) — on your own case. Each " + "case is run N times under Verrou's random IEEE-754 rounding and compared " + "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must " + "retain at least ~24 significant bits (single precision) under random rounding " + "(no per-case thresholds).\n\n" + "With a case .py, that case is run as a SINGLE serial CPU process under Verrou " + "(~30x slower, and run many times), so it must be a small, short proxy — large " + "grids or long runs are rejected with guidance; serial .dat I/O is forced. " + "Example: ./mfc.sh fp-stability my_case.py\n\n" "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind " "(defaults to $HOME/.local/verrou). The simulation and pre_process " "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n" - "Test cases:\n" - " sod_standard 1-D standard Sod, p_L/p_R=10 (well-conditioned baseline)\n" - " sod_strong 1-D Sod, p_L/p_R=100,000 — HLLC xi-factor cancellation\n" - " water_stiffened 1-D water shock (pi_inf=4046) — pressure-recovery cancellation\n" - " air_water_interface 1-D air/water contact (two-fluid) — mixed-cell cancellation\n\n" - "Additional features (skip with --no-* flags):\n" + "Analysis passes (skip with --no-* flags):\n" " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" - " dd_sym verrou_dd_sym bisection to responsible functions (on failure)\n" - " dd_line verrou_dd_line bisection to responsible source lines (on failure)\n" - " cancellation --check-cancellation detection of catastrophic cancellation sites\n" + " dd_sym/dd_line verrou_dd bisection to responsible functions/lines, then a\n" + " --source positive control confirms + ranks them by sensitivity\n" + " cancellation --check-cancellation origins, ranked by significant digits lost\n" " mca-sigbits Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n" " float-max --check-max-float detection of double→float overflow sites\n" ), diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 1a2188a9ed..7dde83e2e3 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -59,9 +59,14 @@ - A serial pre_process binary (to generate initial conditions) Usage: - ./mfc.sh fp-stability + ./mfc.sh fp-stability # built-in 1-D suite + ./mfc.sh fp-stability my_case.py # your own case (small/short, serial, CPU) ./mfc.sh fp-stability --no-vprec --no-dd-line ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH + +A user case .py is run as a single serial CPU process under Verrou, so it must be +a small, short proxy (a feasibility guard rejects large grids / long runs); output +is forced to serial .dat I/O and the files to diff are auto-detected. """ import math From c6637a0f46f7a89e8ec52fc8efbee2a34ce847c4 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 11:19:17 -0400 Subject: [PATCH 12/25] =?UTF-8?q?fp-stability:=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20silent=20failures,=201-row=20crash,=20dead=20code,?= =?UTF-8?q?=20tests,=20comment=20rot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From a multi-agent PR review: - Silent failures (critical): a crashed cancellation/float-max/MCA run was reported as a clean result ('none detected' / 'no overflows' / 'dev=0.0'). The run helpers now log the failure and return None (cancellation/float-max) or a completed-sample count (MCA, distinct from measured zero); _run_case reports 'run failed' instead of a false all-clear. - Crash (important): np.loadtxt(...)[:,1] raised IndexError on a single-row .dat (reachable via a 1-cell user case), aborting the whole suite. Added _dat_column using np.atleast_2d; also fixed the generated dd_cmp.py oracle. - Dead code: removed _cancellation_by_file (superseded by the digits-lost severity view) and _stability_pass (the orchestrator inlines the comparison), plus their tests. - Tests: smoke tests for the CI-only report emitters (blank + populated result, and unconfirmed->::notice:: downgrade), _digits_left clamp, and #:block/#:call/unbalanced macro cases. - Comment rot: stage-E docstring now says confirmation is set-level (not per-line); dropped the non-existent 'per-file density' reference; fixed the '48 ~ full mantissa' note (53-bit). 45 tests, ruff, precheck all 7. --- toolchain/mfc/fp_stability.py | 73 +++++++++++-------- toolchain/mfc/fp_stability_metrics.py | 39 +++------- toolchain/mfc/fp_stability_runners.py | 37 ++++++---- toolchain/mfc/test_fp_stability.py | 101 ++++++++++++++++++-------- 4 files changed, 147 insertions(+), 103 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 7dde83e2e3..9047e77bc9 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -24,13 +24,14 @@ Each reported line is then *confirmed* by a positive control: --gen-source captures the symbol-correct executed lines, those are filtered to the suspect set, and a float-mode run with --source restricted to just them must - reproduce the instability. Lines that do not reproduce it are reported as - unconfirmed (downgraded from ::warning:: to ::notice::). Each line is then - perturbed alone and ranked by the share of the single-precision deviation it - reproduces. NOTE: this is a *sensitivity* measure — where reduced precision - most moves the output — and is typically dominated by the time integrator / - final accumulation, NOT by where cancellation originates. Stage F (and its - per-file density) is the cancellation-origin view; the two usually differ. + reproduce the instability. If perturbing the suspect set does not reproduce + it, the case's hotspots are reported as unconfirmed (downgraded from + ::warning:: to ::notice::) — this is a single set-level verdict, not per line. + Each line is then perturbed alone and ranked by the share of the single- + precision deviation it reproduces. NOTE: that share is a *sensitivity* + measure — where reduced precision most moves the output — typically dominated + by the time integrator / final accumulation, NOT by where cancellation + originates. Stage F is the cancellation-origin view; the two usually differ. Hotspots are cross-referenced against the stage-F cancellation sites and flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def expansion. @@ -550,23 +551,27 @@ def _run_case( if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") try: - # sweep bit thresholds to get per-site severity (bits lost) + # sweep bit thresholds to get per-site severity (bits lost); each + # run returns None if it failed (distinct from [] = ran, found none) level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS] - locs = level_sites[0][1] # lowest threshold = full list - bits = _cancellation_severity(level_sites) - result["cancellation_locs"] = locs - result["cancellation_bits"] = bits - if locs: - worst = max(bits.values()) if bits else 0 - cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits") + locs = next((s for lvl, s in level_sites if lvl == CANCEL_BIT_LEVELS[0]), None) + if locs is None: + cons.print(" [bold yellow]cancellation: detection run failed (see logs); not reported[/bold yellow]") else: - cons.print(" cancellation: none detected") - # cross-reference: label dd_line hotspots that sit on a cancellation site - if result["dd_line_locs"] and locs: - _mark_cancellation(result["dd_line_locs"], locs) - n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation")) - if n_xref: - cons.print(f" {n_xref} hotspot(s) coincide with a catastrophic-cancellation site") + bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None]) + result["cancellation_locs"] = locs + result["cancellation_bits"] = bits + if locs: + worst = max(bits.values()) if bits else 0 + cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits") + else: + cons.print(" cancellation: none detected") + # cross-reference: label dd_line hotspots that sit on a cancellation site + if result["dd_line_locs"] and locs: + _mark_cancellation(result["dd_line_locs"], locs) + n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation")) + if n_xref: + cons.print(f" {n_xref} hotspot(s) coincide with a catastrophic-cancellation site") except Exception as exc: cons.print(f" [bold yellow]cancellation check error[/bold yellow]: {exc}") @@ -574,11 +579,14 @@ def _run_case( if run_mca: cons.print(f" [dim]MCA significant-bits estimate (N={n_samples})...[/dim]") try: - mca_dev, mca_sigbits = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples) - result["mca_dev"] = mca_dev - result["mca_sigbits"] = mca_sigbits - bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a" - cons.print(f" MCA: dev={mca_dev:.3e} ({bits_str})") + mca_dev, mca_sigbits, n_ok = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples) + if n_ok == 0: + cons.print(f" [bold yellow]MCA: no samples completed (0/{n_samples}; see logs)[/bold yellow]") + else: + result["mca_dev"] = mca_dev + result["mca_sigbits"] = mca_sigbits + bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a" + cons.print(f" MCA: dev={mca_dev:.3e} ({bits_str}) [{n_ok}/{n_samples} samples]") except Exception as exc: cons.print(f" [bold yellow]MCA error[/bold yellow]: {exc}") @@ -587,11 +595,14 @@ def _run_case( cons.print(" [dim]float-max overflow check...[/dim]") try: locs = _run_float_max_check(verrou_bin, sim_bin, work_dir) - result["float_max_locs"] = locs - if locs: - cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") + if locs is None: + cons.print(" [bold yellow]float-max: run failed (see logs); not reported[/bold yellow]") else: - cons.print(" float-max: no overflows") + result["float_max_locs"] = locs + if locs: + cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") + else: + cons.print(" float-max: no overflows") except Exception as exc: cons.print(f" [bold yellow]float-max check error[/bold yellow]: {exc}") diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py index f84f088db3..cfb3b2c1fd 100644 --- a/toolchain/mfc/fp_stability_metrics.py +++ b/toolchain/mfc/fp_stability_metrics.py @@ -58,11 +58,6 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float: return -math.log2(max_dev / ref_scale) -def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool: - """A case passes when it retains at least `floor` significant bits.""" - return _sig_bits(max_dev, ref_scale) >= floor - - # Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. _LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) @@ -232,6 +227,14 @@ def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: return "\n".join(rows) +def _dat_column(path: str): + """Load column 1 (the field value) from an MFC .dat file, robust to a + single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on).""" + import numpy as np + + return np.atleast_2d(np.loadtxt(path))[:, 1] + + def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: import numpy as np @@ -240,9 +243,7 @@ def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname) if not os.path.exists(ref_p) or not os.path.exists(run_p): return float("inf") - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] - total = max(total, float(np.max(np.abs(ref - run)))) + total = max(total, float(np.max(np.abs(_dat_column(ref_p) - _dat_column(run_p))))) return total @@ -255,8 +256,7 @@ def _max_abs_np(ref_dir: str, compare_files: list) -> float: ref_p = os.path.join(ref_dir, fname) if not os.path.exists(ref_p): continue - ref = np.loadtxt(ref_p)[:, 1] - total = max(total, float(np.max(np.abs(ref)))) + total = max(total, float(np.max(np.abs(_dat_column(ref_p))))) return total @@ -321,7 +321,8 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: # Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity # filter: a site is reported only if it lost >= the threshold bits. Sweeping these # levels and taking the highest each site survives gives a per-site "bits lost" -# severity (a lower bound — no false positives). 48 ~ full double mantissa. +# severity (a lower bound — no false positives). 48 is near the full 53-bit +# double mantissa (the top of the sweep), not the mantissa width itself. CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] @@ -474,19 +475,3 @@ def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: lines = by_base.get(os.path.basename(loc["path"]), set()) loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1)) return dd_line_locs - - -def _cancellation_by_file(cancellation_locs: list) -> list: - """Aggregate cancellation sites by source file → [(basename, count)] sorted by - count (desc), ties by name. - - This is the cancellation-*origin* view (where ill-conditioning concentrates), - as opposed to the per-line --source share, which is a *sensitivity* view - (where reduced precision most moves the output — typically the time - integrator / final accumulation, regardless of where error originates). - """ - counts = {} - for fname, _lineno in cancellation_locs: - base = os.path.basename(fname) - counts[base] = counts.get(base, 0) + 1 - return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])) diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 4146baab11..39f2ece47b 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -118,9 +118,10 @@ def _run_simulation_verrou( shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) -def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list: +def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10): """Run --check-cancellation at the given bit threshold; return [(fname, line)] - of MFC cancellation sites (subtractions losing >= `threshold` significant bits).""" + of MFC cancellation sites (subtractions losing >= `threshold` significant bits), + or None if the run itself failed (distinct from [] = ran and found none).""" tag = f"cancellation_{threshold}" run_dir = os.path.join(work_dir, tag) os.makedirs(run_dir, exist_ok=True) @@ -132,8 +133,9 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh ] try: _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) - except MFCException: - pass + except MFCException as exc: + cons.print(f" [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]") + return None raw = _parse_cancel_gen(gen_path) filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] skipped = len(raw) - len(filtered) @@ -150,10 +152,12 @@ def _run_mca_samples( ref_dir: str, n_mca: int, ) -> tuple: - """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound).""" + """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound, n_ok) where + n_ok is how many samples actually completed (0 => no usable measurement).""" compare = case["compare"] ref_scale = _max_abs_np(ref_dir, compare) max_dev = 0.0 + n_ok = 0 flags = ["--backend=mcaquad", "--mca-mode=mca"] for i in range(n_mca): run_dir = os.path.join(work_dir, f"mca_{i:02d}") @@ -161,16 +165,18 @@ def _run_mca_samples( try: _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - except MFCException: - pass + n_ok += 1 + except MFCException as exc: + cons.print(f" [dim]MCA sample {i} failed: {exc}[/dim]") sig_bits = None - if max_dev > 0.0 and ref_scale > 0.0: + if n_ok and max_dev > 0.0 and ref_scale > 0.0: sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale)))) - return max_dev, sig_bits + return max_dev, sig_bits, n_ok -def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list: - """Run with --check-max-float=yes; return [(fname, line)] of overflow sites.""" +def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): + """Run with --check-max-float=yes; return [(fname, line)] of overflow sites, + or None if the run failed (distinct from [] = ran and found none).""" run_dir = os.path.join(work_dir, "float_max") os.makedirs(run_dir, exist_ok=True) try: @@ -182,8 +188,9 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str) -> list: rounding_mode="nearest", extra_flags=["--check-max-float=yes"], ) - except MFCException: - pass + except MFCException as exc: + cons.print(f" [yellow]float-max run failed: {exc}[/yellow]") + return None return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float") @@ -291,8 +298,8 @@ def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): if not os.path.exists(ref_p) or not os.path.exists(run_p): print(f"MISSING: {{fname}}") sys.exit(1) - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] + ref = np.atleast_2d(np.loadtxt(ref_p))[:, 1] + run = np.atleast_2d(np.loadtxt(run_p))[:, 1] dev = float(np.max(np.abs(ref - run))) max_dev = max(max_dev, dev) diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 30fb2f0caa..981b8b3c86 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -10,14 +10,13 @@ MIN_SIG_BITS, _autodetect_compare, _build_source_filter, - _cancellation_by_file, _cancellation_severity, _confirm_decision, + _digits_left, _macro_context_in_lines, _mark_cancellation, _rank_locs, _sig_bits, - _stability_pass, _statement_bounds_in_lines, ) @@ -84,6 +83,16 @@ def test_macro_context_def_body_when_no_inner_loop(): assert _macro_context_in_lines(lines, 2) == "#:def" +def test_macro_context_block_and_call_are_duplicating(): + assert _macro_context_in_lines(["#:block B\n", " a = b - c\n", "#:endblock\n"], 2) == "#:block" + assert _macro_context_in_lines(["#:call M()\n", " a = b - c\n", "#:endcall\n"], 2) == "#:call" + + +def test_macro_context_unbalanced_close_is_safe(): + # a stray #:endfor with an empty stack must not crash or misreport + assert _macro_context_in_lines(["#:endfor\n", " a = b - c\n"], 2) is None + + # --- #1: building the symbol-correct --source filter from --gen-source output --- @@ -205,27 +214,6 @@ def test_mark_cancellation_false_for_different_basename(): assert locs[0]["cancellation"] is False -# --- cancellation-origin view: where cancellation concentrates --- - - -def test_cancellation_by_file_counts_and_sorts_by_density(): - locs = [ - ("src/simulation/m_weno.fpp", 10), - ("m_weno.fpp", 20), - ("a/m_riemann_solvers.fpp", 5), - ] - assert _cancellation_by_file(locs) == [("m_weno.fpp", 2), ("m_riemann_solvers.fpp", 1)] - - -def test_cancellation_by_file_breaks_ties_by_name(): - locs = [("z.fpp", 1), ("a.fpp", 2)] - assert _cancellation_by_file(locs) == [("a.fpp", 1), ("z.fpp", 1)] - - -def test_cancellation_by_file_empty(): - assert _cancellation_by_file([]) == [] - - # --- per-site cancellation severity (bits lost), from a threshold sweep --- @@ -291,17 +279,16 @@ def test_sig_bits_deviation_at_scale_is_unstable(): assert _sig_bits(1.0, 1.0) <= 0.0 -def test_stability_pass_uses_global_floor(): - # well-conditioned: ~46 bits >= floor - assert _stability_pass(1e-14, 1.0, MIN_SIG_BITS) is True - # catastrophic: deviation at field scale -> fails - assert _stability_pass(0.5, 1.0, MIN_SIG_BITS) is False - - def test_min_sig_bits_is_single_precision_floor(): assert MIN_SIG_BITS == 24 +def test_digits_left_full_and_clamped(): + assert 15.5 < _digits_left(0) < 16.0 # full double ~ 16 sig digits + assert _digits_left(53) == 0.0 + assert _digits_left(60) == 0.0 # clamp: never negative + + # --- Fortran line-continuation handling (correct-line labeling) --- @@ -338,3 +325,57 @@ def test_statement_bounds_with_leading_ampersand_continuation(): lines = [" beta = x**2 &\n", " & + eps\n"] assert _statement_bounds_in_lines(lines, 1) == (1, 2) assert _statement_bounds_in_lines(lines, 2) == (1, 2) + + +# --- report emitters: must survive blank and populated result dicts (CI-only path) --- + + +def _emit_to_tmp(results, tmp_path, monkeypatch): + """Run _emit_github_summary into a temp file under the GitHub-Actions env.""" + from mfc import fp_stability_report as report + + out = tmp_path / "summary.md" + monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(out)) + monkeypatch.setenv("GITHUB_ACTIONS", "1") + report._emit_github_summary(results, 5) + return out.read_text() + + +def test_emit_summary_survives_blank_result(tmp_path, monkeypatch): + # the dict produced on the per-case error path must not KeyError the emitter + from mfc.fp_stability import _blank_result + + text = _emit_to_tmp([_blank_result("x")], tmp_path, monkeypatch) + assert "0 passed, 1 failed" in text + + +def test_emit_summary_populated_result(tmp_path, monkeypatch): + from mfc.fp_stability import _blank_result + + r = _blank_result("demo") + r.update( + passed=False, + max_dev=1e-9, + sig_bits=30.0, + float_proxy=1e-6, + vprec=[(52, 1e-14), (23, float("inf"))], # exercises the "crash" branch + dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": "#:for", "share": 0.4, "cancellation": True}], + dd_line_confirmed=False, + cancellation_locs=[("src/x/m_a.fpp", 5)], + cancellation_bits={("src/x/m_a.fpp", 5): 40}, + float_max_locs=[("m_a.fpp", 9)], + ) + text = _emit_to_tmp([r], tmp_path, monkeypatch) + assert "💥 crash" in text and "digits lost" in text + + +def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys): + from mfc import fp_stability_report as report + from mfc.fp_stability import _blank_result + + monkeypatch.setenv("GITHUB_ACTIONS", "1") + r = _blank_result("demo") + r.update(dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": None, "share": 0.9, "cancellation": False}], dd_line_confirmed=False) + report._emit_github_annotations([r]) + out = capsys.readouterr().out + assert "::notice" in out and "::warning" not in out # unconfirmed -> notice, not warning From d3919d5cc351a72ab307bfe42f70c50581a98ad6 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 13:17:41 -0400 Subject: [PATCH 13/25] fp-stability: add opt-in Verrou bootstrap script + actionable SKIP message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verrou is a compiled Valgrind fork (not a pip/uv/conda-friendly Python package), so it can't be installed into the venv. Add toolchain/bootstrap/verrou.sh — an explicit, opt-in installer that builds Valgrind+Verrou from source into $VERROU_HOME (default ~/.local/verrou), pinned to the same versions as the fp-stability CI workflow (Valgrind 3.26.0 + edf-hpc/verrou@a58d434). It is deliberately opt-in (never auto-built on a bare fp-stability run, since it's a ~20-min source build needing a C toolchain + autotools): idempotent (skips if already present, --force to rebuild), requires Linux (Valgrind has no modern-macOS/Apple-Silicon support), warns but proceeds on non-x86_64 (Valgrind builds on aarch64 etc., but Verrou's FP backends are best-validated on x86_64), and checks build deps up front with guidance. The fp-stability SKIP message now points at it and clarifies Verrou is not a pip package. --- toolchain/bootstrap/verrou.sh | 90 +++++++++++++++++++++++++++++++++++ toolchain/mfc/fp_stability.py | 4 +- 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100755 toolchain/bootstrap/verrou.sh diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh new file mode 100755 index 0000000000..5b22cbca1f --- /dev/null +++ b/toolchain/bootstrap/verrou.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by +# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of +# Valgrind that must be compiled from source (~20 min), so this is a deliberate, +# explicit step rather than something `fp-stability` does silently. +# +# bash toolchain/bootstrap/verrou.sh # build into $HOME/.local/verrou +# VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh +# bash toolchain/bootstrap/verrou.sh --force # rebuild even if present +# +# Versions are pinned to match the fp-stability CI workflow. + +set -euo pipefail + +VALGRIND_VERSION="3.26.0" +VERROU_COMMIT="a58d434" +PREFIX="${VERROU_HOME:-$HOME/.local/verrou}" +FORCE="${1:-}" + +echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}" + +# Idempotent: skip if already installed and working. +if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1; then + echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do." + exit 0 +fi + +# Platform: Valgrind has no working modern-macOS support; Linux only. +if [ "$(uname -s)" != "Linux" ]; then + echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2 + exit 1 +fi +case "$(uname -m)" in + x86_64) ;; + aarch64|arm64) + echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2 + echo " best-validated on x86_64 — treat results as experimental on this arch." >&2 + ;; + *) + echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2 + ;; +esac + +# Build dependencies. +missing="" +for tool in tar git make patch autoconf automake; do + command -v "$tool" >/dev/null 2>&1 || missing="$missing $tool" +done +command -v cc >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || missing="$missing gcc" +command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing="$missing wget/curl" +if [ -n "$missing" ]; then + echo "ERROR: missing build dependencies:$missing" >&2 + echo " Install them (e.g. apt: build-essential automake autoconf libtool; or load HPC modules) and retry." >&2 + exit 1 +fi + +workdir="$(mktemp -d)" +trap 'rm -rf "$workdir"' EXIT +cd "$workdir" + +tarball="valgrind-${VALGRIND_VERSION}.tar.bz2" +url="https://sourceware.org/pub/valgrind/${tarball}" +echo "==> Downloading ${tarball}" +if command -v wget >/dev/null 2>&1; then + wget -q "$url" +else + curl -fsSL -o "$tarball" "$url" +fi +tar xf "$tarball" + +echo "==> Cloning Verrou @ ${VERROU_COMMIT}" +git clone --quiet https://github.com/edf-hpc/verrou.git +git -C verrou checkout --quiet "$VERROU_COMMIT" + +# Merge Verrou into the Valgrind tree and apply its patch. +cp -r verrou "valgrind-${VALGRIND_VERSION}/verrou" +cd "valgrind-${VALGRIND_VERSION}" +cat verrou/valgrind.*diff | patch -p1 + +echo "==> Building (this takes ~20 min)" +./autogen.sh +./configure --enable-only64bit --prefix="$PREFIX" +make -j"$(nproc)" +make install + +echo "==> Verifying" +"${PREFIX}/bin/valgrind" --tool=verrou --version +echo "==> Done. Verrou installed at ${PREFIX}" +echo " Run: ./mfc.sh fp-stability (or set VERROU_HOME=${PREFIX} if you used a custom prefix)" diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 9047e77bc9..407dd06419 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -661,7 +661,9 @@ def _load_user_case(input_path: str) -> dict: def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() if not verrou_bin or not os.path.isfile(verrou_bin): - cons.print("[bold yellow]SKIP[/bold yellow]: verrou not found. Install at $HOME/.local/verrou or set VERROU_HOME.") + cons.print("[bold yellow]SKIP[/bold yellow]: Verrou not found (it is a compiled Valgrind tool, not a pip package).") + cons.print(" Install it (Linux; ~20 min source build) with: [bold]bash toolchain/bootstrap/verrou.sh[/bold]") + cons.print(" Or point at an existing build with --verrou-binary PATH or $VERROU_HOME.") sys.exit(0) sim_bin = ARG("sim_binary") or _find_binary("simulation") From c27b6aea593e78e423b12718177a1f1da8501f37 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 13:29:27 -0400 Subject: [PATCH 14/25] ci(fp-stability): build Verrou via the shared bootstrap script (DRY) Replace the inline Valgrind+Verrou build in the workflow with a call to toolchain/bootstrap/verrou.sh, so the local installer and CI share one pinned recipe (no drift between them). Cache gating and the system-deps step are unchanged; the build step is still skipped on a cache hit. Tightened the verify step to '--tool=verrou --version', and noted that the cache key's pinned versions must track the script. --- .github/workflows/fp-stability.yml | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 8a977cfcb3..45ff70d69f 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -68,6 +68,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.local/verrou + # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the builder). key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }} - name: Install system dependencies @@ -79,26 +80,10 @@ jobs: - name: Build Verrou if: steps.cache-verrou.outputs.cache-hit != 'true' - run: | - cd /tmp - wget -q https://sourceware.org/pub/valgrind/valgrind-3.26.0.tar.bz2 - tar xf valgrind-3.26.0.tar.bz2 - - git clone https://github.com/edf-hpc/verrou.git - git -C verrou checkout a58d434 - - # Merge Verrou into Valgrind source tree and patch - cp -r verrou valgrind-3.26.0/verrou - cd valgrind-3.26.0 - cat verrou/valgrind.*diff | patch -p1 - - ./autogen.sh - ./configure --enable-only64bit --prefix="$HOME/.local/verrou" - make -j"$(nproc)" - make install + run: bash toolchain/bootstrap/verrou.sh - name: Verify Verrou - run: ~/.local/verrou/bin/valgrind --version + run: ~/.local/verrou/bin/valgrind --tool=verrou --version - name: Build MFC (debug, serial) # FFLAGS=-fno-inline prevents gfortran from inlining small functions into From a4cfa79e74554ce388d34dc168edc716ee1c0a94 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 16:50:56 -0400 Subject: [PATCH 15/25] fp-stability: install Verrou from prebuilt artifact (verrou-dist), build as fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit verrou.sh now downloads a pinned, hash-verified prebuilt from sbryngelson/verrou-dist@v1 (seconds) and falls back to the ~20-min source build; idempotency check sources env.sh so a relocated prebuilt isn't re-fetched. fp_stability sets VALGRIND_LIB (via _verrou_env, reused by _dd_env) so a relocated tree's valgrind/verrou_dd_* calls resolve — harmless for source builds. CI installs zstd and sources env.sh before verifying. --- .github/workflows/fp-stability.yml | 12 +++-- toolchain/bootstrap/verrou.sh | 77 ++++++++++++++++++++++++--- toolchain/mfc/fp_stability_runners.py | 20 +++++-- 3 files changed, 95 insertions(+), 14 deletions(-) diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 45ff70d69f..205e3d711e 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -68,7 +68,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.local/verrou - # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the builder). + # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the installer). key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }} - name: Install system dependencies @@ -76,14 +76,18 @@ jobs: sudo apt-get update -y sudo apt-get install -y \ build-essential automake python3 python3-numpy libc6-dbg \ - cmake gfortran + cmake gfortran zstd - - name: Build Verrou + - name: Install Verrou (prebuilt artifact, or source build as fallback) if: steps.cache-verrou.outputs.cache-hit != 'true' run: bash toolchain/bootstrap/verrou.sh - name: Verify Verrou - run: ~/.local/verrou/bin/valgrind --tool=verrou --version + # Source env.sh first: a prebuilt (relocated) tree needs VALGRIND_LIB; a + # source build works either way. (fp-stability sets this itself at runtime.) + run: | + [ -f ~/.local/verrou/env.sh ] && . ~/.local/verrou/env.sh + ~/.local/verrou/bin/valgrind --tool=verrou --version - name: Build MFC (debug, serial) # FFLAGS=-fno-inline prevents gfortran from inlining small functions into diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh index 5b22cbca1f..8ebafdeb2f 100755 --- a/toolchain/bootstrap/verrou.sh +++ b/toolchain/bootstrap/verrou.sh @@ -2,12 +2,14 @@ # # Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by # `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of -# Valgrind that must be compiled from source (~20 min), so this is a deliberate, -# explicit step rather than something `fp-stability` does silently. +# Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds); +# if none is available for this tag/arch it falls back to a source build (~20 min). +# Either way it's a deliberate, explicit step, not something fp-stability does silently. # -# bash toolchain/bootstrap/verrou.sh # build into $HOME/.local/verrou +# bash toolchain/bootstrap/verrou.sh # install into $HOME/.local/verrou # VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh -# bash toolchain/bootstrap/verrou.sh --force # rebuild even if present +# bash toolchain/bootstrap/verrou.sh --force # reinstall even if present +# VERROU_BUILD_FROM_SOURCE=1 bash toolchain/bootstrap/verrou.sh # skip the prebuilt # # Versions are pinned to match the fp-stability CI workflow. @@ -15,13 +17,19 @@ set -euo pipefail VALGRIND_VERSION="3.26.0" VERROU_COMMIT="a58d434" +# Prebuilt artifacts (built once per arch) live in a small companion repo. The tag +# pins to the (valgrind, verrou) pair above — bump all three together. +VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}" +VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}" PREFIX="${VERROU_HOME:-$HOME/.local/verrou}" FORCE="${1:-}" echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}" -# Idempotent: skip if already installed and working. -if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1; then +# Idempotent: skip if already installed and working. Source env.sh first if present +# (a prebuilt tree needs VALGRIND_LIB to run; a source build works either way). +if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] \ + && ( [ -f "${PREFIX}/env.sh" ] && . "${PREFIX}/env.sh"; "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do." exit 0 fi @@ -31,9 +39,11 @@ if [ "$(uname -s)" != "Linux" ]; then echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2 exit 1 fi +arch_tag="" case "$(uname -m)" in - x86_64) ;; + x86_64) arch_tag="x86_64" ;; aarch64|arm64) + arch_tag="aarch64" echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2 echo " best-validated on x86_64 — treat results as experimental on this arch." >&2 ;; @@ -42,6 +52,59 @@ case "$(uname -m)" in ;; esac +# Fast path: download a prebuilt, hash-verified artifact and source its relocatable +# env.sh, instead of building from source. Any failure (no asset for this arch/tag, +# missing zstd/sha256sum, checksum mismatch, won't run) falls through to the build. +try_prebuilt() { + [ -n "$arch_tag" ] || return 1 + [ "${VERROU_BUILD_FROM_SOURCE:-}" = "1" ] && return 1 + command -v sha256sum >/dev/null 2>&1 || return 1 + tar --zstd --help >/dev/null 2>&1 || command -v zstd >/dev/null 2>&1 || return 1 + command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || return 1 + + local asset base dl + asset="verrou-${VERROU_COMMIT}-valgrind-${VALGRIND_VERSION}-linux-${arch_tag}.tar.zst" + base="https://github.com/${VERROU_DIST_REPO}/releases/download/${VERROU_DIST_TAG}/${asset}" + dl="$(mktemp -d)" + + echo "==> Trying prebuilt ${VERROU_DIST_REPO}@${VERROU_DIST_TAG} (${asset})" + _fetch() { # url dest + if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi + } + if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then + echo "==> No prebuilt for this tag/arch — building from source instead." + rm -rf "$dl"; return 1 + fi + if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then + echo "WARNING: prebuilt checksum mismatch — building from source instead." >&2 + rm -rf "$dl"; return 1 + fi + + mkdir -p "$PREFIX" + if tar --zstd --help >/dev/null 2>&1; then + tar -C "$PREFIX" --zstd -xf "$dl/$asset" + else + zstd -dc "$dl/$asset" | tar -C "$PREFIX" -xf - + fi + rm -rf "$dl" + + # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets + # VALGRIND_LIB relative to the extracted tree so the relocated install works. + if ! ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then + echo "WARNING: prebuilt did not run — building from source instead." >&2 + return 1 + fi + return 0 +} + +if try_prebuilt; then + echo "==> Verifying" + ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version ) + echo "==> Done (prebuilt). Verrou installed at ${PREFIX}" + echo " Run: ./mfc.sh fp-stability (or set VERROU_HOME=${PREFIX} if you used a custom prefix)" + exit 0 +fi + # Build dependencies. missing="" for tool in tar git make patch autoconf automake; do diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 39f2ece47b..c16f5540db 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -60,6 +60,19 @@ def _verrou_pythonpath(verrou_bin: str) -> str: return matches[0] if matches else "" +def _verrou_env(verrou_bin: str) -> dict: + """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt + artifact extracted to a new prefix) can locate its tool — Valgrind bakes its + build prefix into the binary otherwise. Harmless for a source-built tree, where + VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the + environment (user sourced env.sh) is left untouched.""" + env = os.environ.copy() + libdir = os.path.join(os.path.dirname(os.path.dirname(verrou_bin)), "libexec", "valgrind") + if "VALGRIND_LIB" not in env and os.path.isdir(libdir): + env["VALGRIND_LIB"] = libdir + return env + + def _write_inp(params: dict, target_name: str, work_dir: str) -> None: """Write a Fortran namelist .inp file from a Python params dict.""" from .run import case_dicts @@ -107,7 +120,7 @@ def _run_simulation_verrou( cmd.append(sim_bin) with open(os.path.join(run_dir, "sim.out"), "w") as f: - result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False) + result = subprocess.run(cmd, cwd=tmpdir, env=_verrou_env(verrou_bin), stdout=f, stderr=subprocess.STDOUT, check=False) if result.returncode != 0: tag = rounding_mode or "vprec" @@ -312,9 +325,10 @@ def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): def _dd_env(verrou_bin: str) -> dict: - """Environment with PYTHONPATH set for verrou_dd_* imports.""" + """Environment for verrou_dd_*: VALGRIND_LIB (so a relocated tree's inner valgrind + calls resolve) plus PYTHONPATH (for the verrou_dd_* imports).""" py_pkg = _verrou_pythonpath(verrou_bin) - env = os.environ.copy() + env = _verrou_env(verrou_bin) if py_pkg: existing = env.get("PYTHONPATH", "") env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing])) From 0613913080c42e4938d9c6ae3aec71de595d54f3 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 18:01:32 -0400 Subject: [PATCH 16/25] fp-stability: auto-install Verrou on first use (download prebuilt), hard-fail if it can't MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running ./mfc.sh fp-stability with no Verrou present now installs it via the bootstrap (downloads the pinned prebuilt from verrou-dist; source build as fallback) and proceeds, instead of SKIP+exit-0; a failed install is now a hard error. _find_verrou no longer accepts a bare system valgrind on PATH (it has no 'verrou' tool and would only fail at run time) — that case reads as 'Verrou absent' so it gets installed. CI drops the separate Install/Verify Verrou steps; the run does it. Tests added for the discovery logic. --- .github/workflows/fp-stability.yml | 13 ++------ toolchain/mfc/cli/commands.py | 8 +++-- toolchain/mfc/fp_stability.py | 22 ++++++++++--- toolchain/mfc/fp_stability_runners.py | 16 +++++++++- toolchain/mfc/test_fp_stability.py | 46 +++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 18 deletions(-) diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 205e3d711e..203cff3ad4 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -78,16 +78,9 @@ jobs: build-essential automake python3 python3-numpy libc6-dbg \ cmake gfortran zstd - - name: Install Verrou (prebuilt artifact, or source build as fallback) - if: steps.cache-verrou.outputs.cache-hit != 'true' - run: bash toolchain/bootstrap/verrou.sh - - - name: Verify Verrou - # Source env.sh first: a prebuilt (relocated) tree needs VALGRIND_LIB; a - # source build works either way. (fp-stability sets this itself at runtime.) - run: | - [ -f ~/.local/verrou/env.sh ] && . ~/.local/verrou/env.sh - ~/.local/verrou/bin/valgrind --tool=verrou --version + # Verrou is installed by `fp-stability` itself on first use (downloads the + # prebuilt artifact; aborts if that fails). The cache above restores it across + # runs so the download only happens on a cache miss. - name: Build MFC (debug, serial) # FFLAGS=-fno-inline prevents gfortran from inlining small functions into diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index a6eae93846..4beebd0f34 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -908,9 +908,11 @@ "(~30x slower, and run many times), so it must be a small, short proxy — large " "grids or long runs are rejected with guidance; serial .dat I/O is forced. " "Example: ./mfc.sh fp-stability my_case.py\n\n" - "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind " - "(defaults to $HOME/.local/verrou). The simulation and pre_process " - "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n" + "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to " + "$HOME/.local/verrou); if absent it is installed automatically (a pinned, " + "hash-verified prebuilt is downloaded, with a source build as fallback) — " + "aborts if that install fails. The simulation and pre_process binaries must " + "be serial (no-MPI, no-GPU) debug builds.\n\n" "Analysis passes (skip with --no-* flags):\n" " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 407dd06419..867c2fb1a6 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -73,6 +73,7 @@ import math import os import shutil +import subprocess import sys import tempfile import time @@ -658,13 +659,26 @@ def _load_user_case(input_path: str) -> dict: } +def _install_verrou() -> str: + """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified + prebuilt; source build as fallback) and return the valgrind path. Aborts on failure — + fp-stability cannot run without Verrou, so this is a hard error, not a skip.""" + script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh") + cons.print("[bold]Verrou not found — installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]") + if subprocess.run(["bash", script], check=False).returncode != 0: + raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.") + verrou_bin = _find_verrou() + if not verrou_bin or not os.path.isfile(verrou_bin): + raise MFCException("Verrou install reported success but no valgrind binary was found under $VERROU_HOME.") + return verrou_bin + + def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() if not verrou_bin or not os.path.isfile(verrou_bin): - cons.print("[bold yellow]SKIP[/bold yellow]: Verrou not found (it is a compiled Valgrind tool, not a pip package).") - cons.print(" Install it (Linux; ~20 min source build) with: [bold]bash toolchain/bootstrap/verrou.sh[/bold]") - cons.print(" Or point at an existing build with --verrou-binary PATH or $VERROU_HOME.") - sys.exit(0) + if ARG("verrou_binary"): + raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.") + verrou_bin = _install_verrou() sim_bin = ARG("sim_binary") or _find_binary("simulation") if not sim_bin or not os.path.isfile(sim_bin): diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index c16f5540db..3202fbd9de 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -32,12 +32,26 @@ from .printer import cons +def _has_verrou_tool(valgrind_bin: str) -> bool: + """True if this valgrind actually provides the 'verrou' tool. A plain system + valgrind does not — accepting one would only fail later at run time.""" + try: + return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0 + except OSError: + return False + + def _find_verrou() -> str: verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) candidate = os.path.join(verrou_home, "bin", "valgrind") if os.path.isfile(candidate) and os.access(candidate, os.X_OK): return candidate - return shutil.which("valgrind") or "" + # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system + # valgrind must read as "Verrou absent" so it gets installed, not misused. + path_vg = shutil.which("valgrind") + if path_vg and _has_verrou_tool(path_vg): + return path_vg + return "" def _find_binary(name: str) -> str: diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 981b8b3c86..da37ac750e 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -379,3 +379,49 @@ def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys): report._emit_github_annotations([r]) out = capsys.readouterr().out assert "::notice" in out and "::warning" not in out # unconfirmed -> notice, not warning + + +# --- Verrou discovery: a bare system valgrind must read as "Verrou absent" --- + + +def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + vbin = tmp_path / "bin" / "valgrind" + vbin.parent.mkdir(parents=True) + vbin.write_text("#!/bin/sh\n") + vbin.chmod(0o755) + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + assert runners._find_verrou() == str(vbin) + + +def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool. + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind") + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: False) + assert runners._find_verrou() == "" + + +def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind") + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: True) + assert runners._find_verrou() == "/opt/verrou/bin/valgrind" + + +def test_has_verrou_tool_reflects_exit_code(monkeypatch): + from mfc import fp_stability_runners as runners + + class _R: + def __init__(self, rc): + self.returncode = rc + + monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(0)) + assert runners._has_verrou_tool("/any/valgrind") is True + monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1)) + assert runners._has_verrou_tool("/any/valgrind") is False From 37b7a216db7f0835caa98f0893887a5c19476138 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 18:16:02 -0400 Subject: [PATCH 17/25] =?UTF-8?q?fp-stability:=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20atomic=20prebuilt=20install,=20verify=20VERROU=5FHO?= =?UTF-8?q?ME=20tree,=20more=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit verrou.sh try_prebuilt() runs as an if-condition (set -e suppressed), so a failed extract could fall through and the source build would install over a half-written tree; now extract+verify in a staging dir and swap into PREFIX atomically with explicit error checks. _find_verrou now verifies the $VERROU_HOME tree actually runs the verrou tool (with VALGRIND_LIB for a relocated prebuilt) so a broken/stale tree reads as absent and gets reinstalled, not used until it fails per-run. Fix comment rot (fp-stability now auto-installs). Add unit tests for _verrou_env (incl. preserve-user-VALGRIND_LIB), _dd_env PYTHONPATH composition, _install_verrou hard-fail guards, _has_verrou_tool OSError, and the broken-VERROU_HOME case. --- toolchain/bootstrap/verrou.sh | 31 ++++++--- toolchain/mfc/fp_stability_runners.py | 13 ++-- toolchain/mfc/test_fp_stability.py | 98 +++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 12 deletions(-) diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh index 8ebafdeb2f..dfbd462231 100755 --- a/toolchain/bootstrap/verrou.sh +++ b/toolchain/bootstrap/verrou.sh @@ -4,7 +4,8 @@ # `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of # Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds); # if none is available for this tag/arch it falls back to a source build (~20 min). -# Either way it's a deliberate, explicit step, not something fp-stability does silently. +# fp-stability auto-runs this on first use when Verrou is absent (printing what it +# does); it is also safe to run by hand. A failed install aborts, never a silent skip. # # bash toolchain/bootstrap/verrou.sh # install into $HOME/.local/verrou # VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh @@ -80,20 +81,34 @@ try_prebuilt() { rm -rf "$dl"; return 1 fi - mkdir -p "$PREFIX" + # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e + # is suppressed inside a function used as an `if` condition, so check each step + # explicitly — otherwise a failed extract would fall through and the source + # build would install on top of a half-written tree (or a stale one on --force). + local stage="$dl/stage" + mkdir -p "$stage" if tar --zstd --help >/dev/null 2>&1; then - tar -C "$PREFIX" --zstd -xf "$dl/$asset" + tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; } else - zstd -dc "$dl/$asset" | tar -C "$PREFIX" -xf - + zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; } fi - rm -rf "$dl" # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets - # VALGRIND_LIB relative to the extracted tree so the relocated install works. - if ! ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then + # VALGRIND_LIB relative to the tree so the relocated install works. Verify the + # staged tree runs before committing it. + if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then echo "WARNING: prebuilt did not run — building from source instead." >&2 - return 1 + rm -rf "$dl"; return 1 + fi + + # Commit only now: replace any existing $PREFIX atomically. + mkdir -p "$(dirname "$PREFIX")" + rm -rf "$PREFIX" + if ! mv "$stage" "$PREFIX"; then + echo "WARNING: could not install prebuilt to ${PREFIX} — building from source instead." >&2 + rm -rf "$dl"; return 1 fi + rm -rf "$dl" return 0 } diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 3202fbd9de..1d1c5a7b8f 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -32,11 +32,13 @@ from .printer import cons -def _has_verrou_tool(valgrind_bin: str) -> bool: +def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool: """True if this valgrind actually provides the 'verrou' tool. A plain system - valgrind does not — accepting one would only fail later at run time.""" + valgrind does not — accepting one would only fail later at run time. Pass env + (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its + tool without it.""" try: - return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0 + return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0 except OSError: return False @@ -44,7 +46,10 @@ def _has_verrou_tool(valgrind_bin: str) -> bool: def _find_verrou() -> str: verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) candidate = os.path.join(verrou_home, "bin", "valgrind") - if os.path.isfile(candidate) and os.access(candidate, os.X_OK): + # Require the $VERROU_HOME tree to actually run the verrou tool (with VALGRIND_LIB + # for a relocated prebuilt). A broken/stale/non-Verrou tree there must read as + # "absent" so it gets reinstalled, not used until it fails on every run. + if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)): return candidate # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system # valgrind must read as "Verrou absent" so it gets installed, not misused. diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index da37ac750e..b2b43bfc02 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -392,9 +392,27 @@ def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch): vbin.write_text("#!/bin/sh\n") vbin.chmod(0o755) monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + # The candidate must also verify as Verrou-enabled; stub that so the test + # exercises precedence, not a real valgrind invocation. + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True) assert runners._find_verrou() == str(vbin) +def test_find_verrou_rejects_broken_verrou_home_tree(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + # A valgrind exists at $VERROU_HOME but does not actually run the verrou tool + # (broken/stale/non-Verrou): it must read as absent, not be returned. + vbin = tmp_path / "bin" / "valgrind" + vbin.parent.mkdir(parents=True) + vbin.write_text("#!/bin/sh\n") + vbin.chmod(0o755) + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False) + monkeypatch.setattr(runners.shutil, "which", lambda _name: None) + assert runners._find_verrou() == "" + + def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch): from mfc import fp_stability_runners as runners @@ -425,3 +443,83 @@ def __init__(self, rc): assert runners._has_verrou_tool("/any/valgrind") is True monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1)) assert runners._has_verrou_tool("/any/valgrind") is False + + def _boom(*a, **k): + raise OSError("not executable") + + monkeypatch.setattr(runners.subprocess, "run", _boom) + assert runners._has_verrou_tool("/stale/valgrind") is False + + +# --- env composition for relocated (prebuilt) Verrou trees --- + + +def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + (tmp_path / "libexec" / "valgrind").mkdir(parents=True) + monkeypatch.delenv("VALGRIND_LIB", raising=False) + env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) + assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind") + + +def test_verrou_env_omits_valgrind_lib_when_libexec_absent(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + monkeypatch.delenv("VALGRIND_LIB", raising=False) + env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) + assert "VALGRIND_LIB" not in env + + +def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + (tmp_path / "libexec" / "valgrind").mkdir(parents=True) + monkeypatch.setenv("VALGRIND_LIB", "/user/chosen/lib") + env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) + assert env["VALGRIND_LIB"] == "/user/chosen/lib" # not clobbered + + +def test_dd_env_prepends_pythonpath_and_inherits_valgrind_lib(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + (tmp_path / "libexec" / "valgrind").mkdir(parents=True) + monkeypatch.delenv("VALGRIND_LIB", raising=False) + monkeypatch.setenv("PYTHONPATH", "/pre/existing") + monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/site-packages/valgrind") + env = runners._dd_env(str(tmp_path / "bin" / "valgrind")) + assert env["PYTHONPATH"] == "/vg/site-packages/valgrind:/pre/existing" + assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind") + + +def test_dd_env_no_leading_colon_when_pythonpath_empty(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + monkeypatch.delenv("PYTHONPATH", raising=False) + monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/valgrind") + env = runners._dd_env(str(tmp_path / "bin" / "valgrind")) + assert env["PYTHONPATH"] == "/vg/valgrind" # no stray leading ':' + + +# --- auto-install hard-fail guards --- + + +def test_install_verrou_raises_when_bootstrap_fails(monkeypatch): + import pytest + + from mfc import fp_stability as fps + + monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 1})()) + with pytest.raises(fps.MFCException, match="Verrou install failed"): + fps._install_verrou() + + +def test_install_verrou_raises_when_no_binary_appears(monkeypatch): + import pytest + + from mfc import fp_stability as fps + + monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0})()) + monkeypatch.setattr(fps, "_find_verrou", lambda: "") + with pytest.raises(fps.MFCException, match="no valgrind binary"): + fps._install_verrou() From 1f10f31f7b489f99e0c4ff355ccf15662e803c71 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 19:27:54 -0400 Subject: [PATCH 18/25] fp-stability: drop dd line/sym bisection; keep cancellation + move fypp flag onto it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dd delta-debug stack (bisection + confirmation positive-control + sensitivity ranking) tried to pinpoint and rank the single most precision-sensitive source line, but fypp #:for/#:def expansion collapses many generated computations onto one .fpp line, so that attribution is instance-ambiguous by construction — the fragile part. Removed it (~900 lines). The cancellation pass stays and now carries the fypp instance-ambiguity flag: each cancellation origin is checked with _macro_context and, if its .fpp line sits inside a #:for/#:def, marked 'may represent multiple instances' in console, annotations, and summary. file:line attribution (cancellation origins, ranked by digits lost) is preserved; only the false-precision line-pinpointing is gone. Verified end-to-end: 27 cancellation sites, 23 flagged fypp-ambiguous. --- toolchain/mfc/cli/commands.py | 20 +- toolchain/mfc/fp_stability.py | 132 ++---------- toolchain/mfc/fp_stability_metrics.py | 260 +--------------------- toolchain/mfc/fp_stability_report.py | 130 +++-------- toolchain/mfc/fp_stability_runners.py | 298 +------------------------- toolchain/mfc/test_fp_stability.py | 204 ++---------------- 6 files changed, 69 insertions(+), 975 deletions(-) diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 4beebd0f34..6dfbb57c77 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -916,8 +916,6 @@ "Analysis passes (skip with --no-* flags):\n" " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" - " dd_sym/dd_line verrou_dd bisection to responsible functions/lines, then a\n" - " --source positive control confirms + ranks them by sensitivity\n" " cancellation --check-cancellation origins, ranked by significant digits lost\n" " mca-sigbits Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n" " float-max --check-max-float detection of double→float overflow sites\n" @@ -972,20 +970,6 @@ default=False, dest="no_vprec", ), - Argument( - name="no-dd-sym", - help="Skip verrou_dd_sym function-level delta-debug on failure.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_dd_sym", - ), - Argument( - name="no-dd-line", - help="Skip verrou_dd_line source-line delta-debug on failure.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_dd_line", - ), Argument( name="no-cancellation", help="Skip --check-cancellation catastrophic-cancellation detection.", @@ -1016,7 +1000,7 @@ "Specify simulation binary explicitly", ), Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"), - Example("./mfc.sh fp-stability --no-vprec --no-dd-line", "Skip VPREC sweep and line debug"), + Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"), Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"), ], key_options=[ @@ -1026,8 +1010,6 @@ ("-N, --samples N", "Random-rounding samples per case (default: 5)"), ("--no-float-proxy", "Skip float-rounding proxy run"), ("--no-vprec", "Skip VPREC mantissa-bit sweep"), - ("--no-dd-sym", "Skip verrou_dd_sym on failure"), - ("--no-dd-line", "Skip verrou_dd_line on failure"), ("--no-cancellation", "Skip cancellation detection"), ("--no-mca", "Skip MCA significant-bits estimate"), ("--no-float-max", "Skip float32 overflow detection"), diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 867c2fb1a6..102d512d52 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -15,37 +15,18 @@ One run per mantissa-bit level [52,23,16,10] with --backend=vprec --vprec-mode=full; shows where each case breaks. -D. verrou_dd_sym on failure (--no-dd-sym to skip) - Delta-debug bisection isolates the minimal set of *functions* causing - instability. - -E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip) - Further bisects to exact *source lines* within the responsible functions. - Each reported line is then *confirmed* by a positive control: --gen-source - captures the symbol-correct executed lines, those are filtered to the suspect - set, and a float-mode run with --source restricted to just them must - reproduce the instability. If perturbing the suspect set does not reproduce - it, the case's hotspots are reported as unconfirmed (downgraded from - ::warning:: to ::notice::) — this is a single set-level verdict, not per line. - Each line is then perturbed alone and ranked by the share of the single- - precision deviation it reproduces. NOTE: that share is a *sensitivity* - measure — where reduced precision most moves the output — typically dominated - by the time integrator / final accumulation, NOT by where cancellation - originates. Stage F is the cancellation-origin view; the two usually differ. - Hotspots are cross-referenced against the stage-F cancellation sites and - flagged as instance-ambiguous when the .fpp line sits inside a #:for/#:def - expansion. - -F. Cancellation detection (--no-cancellation to skip) +D. Cancellation detection (--no-cancellation to skip) One run with --check-cancellation=yes; reports MFC source lines that produce catastrophic cancellation (subtraction of nearly-equal doubles). - Uses --cc-gen-file for structured per-line output. + Uses --cc-gen-file for structured per-line output. A cancellation site whose + .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous + (the line maps to multiple generated instances). -G. MCA significant-bits estimate (--no-mca to skip) +E. MCA significant-bits estimate (--no-mca to skip) N runs with --backend=mcaquad; max deviation vs nearest-rounding reference gives a lower bound on significant bits: s = -log2(dev/scale). -H. Float-max overflow detection (--no-float-max to skip) +F. Float-max overflow detection (--no-float-max to skip) One run with --check-max-float=yes; reports locations where a double→float conversion would overflow to ±Inf. @@ -62,7 +43,7 @@ Usage: ./mfc.sh fp-stability # built-in 1-D suite ./mfc.sh fp-stability my_case.py # your own case (small/short, serial, CPU) - ./mfc.sh fp-stability --no-vprec --no-dd-line + ./mfc.sh fp-stability --no-vprec --no-cancellation ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH A user case .py is run as a single serial CPU process under Verrou, so it must be @@ -84,7 +65,7 @@ MIN_SIG_BITS, _autodetect_compare, _cancellation_severity, - _mark_cancellation, + _macro_context, _max_abs_np, _max_diff_np, _sig_bits, @@ -97,9 +78,6 @@ _find_binary, _find_verrou, _run_cancellation_check, - _run_confirmation, - _run_dd_line, - _run_dd_sym, _run_float_max_check, _run_float_proxy, _run_mca_samples, @@ -391,12 +369,9 @@ def _blank_result(name: str) -> dict: "sig_bits": None, "float_proxy": None, "vprec": [], - "dd_sym_syms": [], - "dd_line_locs": [], - "dd_line_confirmed": None, - "dd_line_confirm_dev": None, "cancellation_locs": [], "cancellation_bits": {}, + "cancellation_macro": {}, "mca_dev": None, "mca_sigbits": None, "float_max_locs": [], @@ -409,11 +384,8 @@ def _run_case( sim_bin: str, pp_bin: str, n_samples: int, - log_dir: str, run_float: bool, run_vprec: bool, - run_dd_sym: bool, - run_dd_line: bool, run_cancellation: bool, run_mca: bool, run_float_max: bool, @@ -493,62 +465,7 @@ def _run_case( marker = " [red]FAIL[/red]" cons.print(f" {bits:2d} bits{label_str}: dev={dev:.3e}{marker}") - # --- D/E: delta-debug with float mode to find FP hotspots. - # dd_run.sh uses --rounding-mode=float (deterministic single-precision), - # so each bisection step is consistent and --nruns=1 suffices. Threshold - # = float_proxy/10: the full instrumented set produces ~float_proxy - # deviation; excluding the responsible function drops it to near zero; - # any subset missing the responsible function gives SAME. - # Skip when float_proxy is unavailable or too small to localize. - float_proxy = result.get("float_proxy") - _DD_FLOAT_MIN = 1e-6 - dd_threshold = float_proxy / 10.0 if float_proxy and float_proxy >= _DD_FLOAT_MIN else 0.0 - if dd_threshold > 0 and (run_dd_sym or run_dd_line): - cons.print(f" [dim]dd threshold: {dd_threshold:.1e} (float_proxy={float_proxy:.1e})[/dim]") - elif run_dd_sym or run_dd_line: - cons.print(f" [dim]skipping dd: float_proxy={float_proxy} < {_DD_FLOAT_MIN:.0e}[/dim]") - if dd_threshold > 0 and run_dd_sym: - try: - result["dd_sym_syms"] = _run_dd_sym(case, verrou_bin, sim_bin, work_dir, log_dir, threshold=dd_threshold) - except Exception as exc: - cons.print(f" [bold yellow]dd_sym error[/bold yellow]: {exc}") - if dd_threshold > 0 and run_dd_line: - try: - result["dd_line_locs"] = _run_dd_line( - case, - verrou_bin, - sim_bin, - work_dir, - log_dir, - threshold=dd_threshold, - ) - macro_n = sum(1 for loc in result["dd_line_locs"] if loc["macro"]) - if macro_n: - cons.print(f" [dim]dd_line: {macro_n} hotspot(s) inside fypp-expanded code (instance-ambiguous)[/dim]") - except Exception as exc: - cons.print(f" [bold yellow]dd_line error[/bold yellow]: {exc}") - - # --- E2: confirm dd_line hotspots and rank each by its individual share --- - if dd_threshold > 0 and run_dd_line and result["dd_line_locs"]: - cons.print(" [dim]confirming + ranking dd_line hotspots (per-line perturbation)...[/dim]") - try: - confirmed, cdev, ranked = _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, result["dd_line_locs"], dd_threshold, float_proxy) - result["dd_line_locs"] = ranked - result["dd_line_confirmed"] = confirmed - result["dd_line_confirm_dev"] = cdev - if confirmed is True: - cons.print(f" [bold green]dd_line confirmed[/bold green]: suspect-only dev={cdev:.3e} >= {dd_threshold:.1e}") - elif confirmed is False: - cons.print(f" [bold yellow]dd_line UNCONFIRMED[/bold yellow]: suspect-only dev={cdev:.3e} < {dd_threshold:.1e} (attribution suspect)") - top = ranked[0] if ranked else None - if top and top.get("share") is not None: - cons.print(f" highest single-precision sensitivity: {top['path']}:{top['start']} ({top['share'] * 100:.0f}% of float-proxy)") - cons.print(" [dim](sensitivity = where reduced precision most moves the output, often the time") - cons.print(" [dim] integrator; not necessarily where cancellation originates — see cancellation sites)[/dim]") - except Exception as exc: - cons.print(f" [bold yellow]dd_line confirmation error[/bold yellow]: {exc}") - - # --- F: cancellation detection --- + # --- D: cancellation detection --- if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") try: @@ -562,21 +479,22 @@ def _run_case( bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None]) result["cancellation_locs"] = locs result["cancellation_bits"] = bits + # flag cancellation sites whose .fpp line is inside a #:for/#:def + # expansion: the line maps to multiple generated instances, so the + # report cannot pin it to a unique runtime instance. + result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))} if locs: worst = max(bits.values()) if bits else 0 cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits") + n_macro = len(result["cancellation_macro"]) + if n_macro: + cons.print(f" [dim]{n_macro} inside fypp expansions — line maps to multiple instances[/dim]") else: cons.print(" cancellation: none detected") - # cross-reference: label dd_line hotspots that sit on a cancellation site - if result["dd_line_locs"] and locs: - _mark_cancellation(result["dd_line_locs"], locs) - n_xref = sum(1 for loc in result["dd_line_locs"] if loc.get("cancellation")) - if n_xref: - cons.print(f" {n_xref} hotspot(s) coincide with a catastrophic-cancellation site") except Exception as exc: cons.print(f" [bold yellow]cancellation check error[/bold yellow]: {exc}") - # --- G: MCA significant-bits estimate --- + # --- E: MCA significant-bits estimate --- if run_mca: cons.print(f" [dim]MCA significant-bits estimate (N={n_samples})...[/dim]") try: @@ -591,7 +509,7 @@ def _run_case( except Exception as exc: cons.print(f" [bold yellow]MCA error[/bold yellow]: {exc}") - # --- H: float-max overflow detection --- + # --- F: float-max overflow detection --- if run_float_max: cons.print(" [dim]float-max overflow check...[/dim]") try: @@ -691,8 +609,6 @@ def fp_stability(): n_samples = ARG("samples") run_float = not ARG("no_float_proxy") run_vprec = not ARG("no_vprec") - run_dd_sym = not ARG("no_dd_sym") - run_dd_line = not ARG("no_dd_line") run_cancellation = not ARG("no_cancellation") run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") @@ -715,10 +631,6 @@ def fp_stability(): features.append("float-proxy") if run_vprec: features.append("vprec-sweep") - if run_dd_sym: - features.append("dd_sym") - if run_dd_line: - features.append("dd_line") if run_cancellation: features.append("cancellation") if run_mca: @@ -739,11 +651,8 @@ def fp_stability(): sim_bin, pp_bin, n_samples, - log_dir, run_float, run_vprec, - run_dd_sym, - run_dd_line, run_cancellation, run_mca, run_float_max, @@ -762,9 +671,6 @@ def fp_stability(): mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]" cons.print(f" {mark} {r['name']}") - if n_fail > 0: - cons.print(f"\n dd_sym/dd_line logs in: {log_dir}") - _emit_github_summary(results, n_samples) _emit_github_annotations(results) diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py index cfb3b2c1fd..a985b363af 100644 --- a/toolchain/mfc/fp_stability_metrics.py +++ b/toolchain/mfc/fp_stability_metrics.py @@ -1,6 +1,6 @@ """Pure metrics, source-resolution, and parsing helpers for the FP-stability suite. -Leaf module: imports only stdlib + MFC_ROOT_DIR + cons. No sibling fp_stability* +Leaf module: imports only stdlib + MFC_ROOT_DIR. No sibling fp_stability* imports, so the runners/report/orchestrator modules can all depend on it. """ @@ -10,7 +10,6 @@ import re from .common import MFC_ROOT_DIR -from .printer import cons # Mantissa-bit levels for the VPREC sweep (C). # 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. @@ -41,10 +40,6 @@ def _autodetect_compare(filenames: list) -> list: # normalising by the field scale collapses that, so a single number suffices. MIN_SIG_BITS = 24 -# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy- -# derived threshold is supplied (callers always pass one, so this is only a guard). -_DD_FALLBACK_THRESHOLD = 1e-12 - def _sig_bits(max_dev: float, ref_scale: float) -> float: """Significant bits retained = -log2(max_dev / ref_scale). @@ -58,9 +53,6 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float: return -math.log2(max_dev / ref_scale) -# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. -_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) - # Files to exclude from cancellation / float-max reports (runtime loaders, XALT). _EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so") @@ -69,47 +61,27 @@ def _sig_bits(max_dev: float, ref_scale: float) -> float: # Fypp block directives. The duplicating ones (#:for expands to N copies, #:def # defines a macro instantiated at multiple call sites) collapse many distinct -# generated computations onto a single .fpp source line, so a dd_line hit inside -# one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute select -# code but do not duplicate it, so they are tracked for balance but not flagged. +# generated computations onto a single .fpp source line, so a cancellation site +# inside one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute +# select code but do not duplicate it, so they are tracked for balance but not flagged. _FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE) _FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE) _FYPP_DUPLICATING = ("for", "def", "block", "call") -# Lines that are clearly control-flow delimiters rather than arithmetic. -# dd_line sometimes reports these when the responsible arithmetic is on the -# preceding line but shares DWARF debug info with the delimiter (e.g. loop -# boundaries in #:for-expanded code, or inlined functions at call sites). -_CONTROL_FLOW_RE = re.compile( - r"^\s*(" - r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" - r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) - r"|else(\s+if\s*\(.*\)\s*then)?\s*$" # else / else if (...) then - r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+" # subroutine declaration - r"|\$:END_GPU\w+" # fypp GPU macro closers - r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) - r"|\s*!\s*$" # comment-only lines - r"|\s*$" # blank lines - r")", - re.IGNORECASE, -) - - -def _resolve_source(fname: str, search_whole_tree: bool = False) -> str: + +def _resolve_source(fname: str) -> str: """Resolve a (possibly bare) source filename to an existing path, or '' if not found. An absolute existing path is used as-is; otherwise the basename is - located recursively under src/ (then the whole tree if `search_whole_tree`).""" + located recursively under src/.""" if os.path.isabs(fname) and os.path.isfile(fname): return fname candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates and search_whole_tree: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) return candidates[0] if candidates else "" -def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list: +def _read_source_lines(fname: str) -> list: """Resolve `fname` and return its lines (with newlines), or [] if unreadable.""" - path = _resolve_source(fname, search_whole_tree) + path = _resolve_source(fname) if not path: return [] try: @@ -119,17 +91,11 @@ def _read_source_lines(fname: str, search_whole_tree: bool = False) -> list: return [] -def _read_source_line(fname: str, lineno: int) -> str: - """Return the raw source line at lineno (1-based), or '' if unavailable.""" - lines = _read_source_lines(fname) - return lines[lineno - 1] if 0 < lineno <= len(lines) else "" - - def _macro_context_in_lines(lines: list, lineno: int) -> str: """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that encloses `lineno` (1-based) in `lines`, or None if none does. - Used to flag dd_line hotspots whose .fpp line is shared across multiple + Used to flag cancellation sites whose .fpp line is shared across multiple expanded instances (a #:for body, a #:def macro used in many places), where line-level attribution cannot identify which instance is responsible. """ @@ -155,78 +121,6 @@ def _macro_context(fname: str, lineno: int) -> str: return _macro_context_in_lines(lines, lineno) -def _ends_with_continuation(line: str) -> bool: - """True if a free-form Fortran line ends with a continuation '&' (the last - non-blank token before any trailing comment).""" - code = line.split("!", 1)[0].rstrip() # drop trailing comment (string-'!' is rare; fine here) - return code.endswith("&") - - -def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple: - """Return the (start, end) 1-based physical line range of the Fortran logical - statement containing lineno, following '&' continuations in both directions. - - A hit reported on a continuation fragment thus resolves to the whole - statement, so the labelled location is the full expression rather than a - mid-statement piece. - """ - n = len(lines) - start = lineno - while start > 1 and _ends_with_continuation(lines[start - 2]): - start -= 1 - end = lineno - while end < n and _ends_with_continuation(lines[end - 1]): - end += 1 - return start, end - - -def _statement_at(fname: str, lineno: int) -> tuple: - """File-backed (start, end, text) for the logical statement at fname:lineno; - text is the joined statement. Returns (lineno, lineno, '') if unreadable.""" - lines = _read_source_lines(fname) - if not 0 < lineno <= len(lines): - return lineno, lineno, "" - start, end = _statement_bounds_in_lines(lines, lineno) - # join physical lines, dropping the continuation '&' that may lead or trail each - text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end]) - return start, end, text - - -def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: - """Return True if any line in [start, end] contains non-trivial arithmetic. - - Filters out loop delimiters and fypp directive lines that dd_line sometimes - reports when the responsible arithmetic shares DWARF info with its enclosing - control-flow boundary (inlining, #:for template expansion, etc.). - Returns True (keep) when uncertain so we never silently drop real hotspots. - """ - for lineno in range(start, end + 1): - line = _read_source_line(fname, lineno) - if not line: - return True # can't read — keep to be safe - if not _CONTROL_FLOW_RE.match(line): - return True - return False - - -def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: - """Return a annotated source snippet around lineno, or '' if file not found. - - fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path. - Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree. - """ - lines = _read_source_lines(fname, search_whole_tree=True) - if not lines: - return "" - start = max(0, lineno - context - 1) - end = min(len(lines), lineno + context) - rows = [] - for i, line in enumerate(lines[start:end], start=start + 1): - marker = ">" if i == lineno else " " - rows.append(f"{marker}{i:5d} | {line.rstrip()}") - return "\n".join(rows) - - def _dat_column(path: str): """Load column 1 (the field value) from an MFC .dat file, robust to a single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on).""" @@ -341,137 +235,3 @@ def _digits_left(bits_lost: float) -> float: """Approximate trustworthy decimal digits remaining after losing `bits_lost` bits of a double's 53-bit mantissa (~15.95 digits full).""" return max(0.0, (53 - bits_lost) / math.log2(10)) - - -def _parse_rddmin_locs(summary_path: str) -> list: - """Extract dd_line locations from an rddmin_summary as - [{path, start, end, macro}] dicts (path is repo-relative; macro is the - enclosing fypp duplicating block, e.g. '#:for', or None). - - Filters out locations whose source lines are pure control-flow delimiters - (loop boundaries, fypp directive closers, blank/comment lines). These can - appear when the responsible arithmetic shares DWARF debug info with an - enclosing boundary due to inlining or #:for template expansion. - """ - if not os.path.isfile(summary_path): - return [] - locs = [] - skipped = [] - with open(summary_path) as fh: - for line in fh: - m = _LOC_RE.search(line) - if not m: - continue - path = m.group(1) - start = int(m.group(2)) - end = int(m.group(3)) if m.group(3) else start - try: - rel = os.path.relpath(path, MFC_ROOT_DIR) - if rel.startswith(".."): - rel = path - except ValueError: - rel = path - rel = rel.replace("\\", "/") - if _is_arithmetic_loc(path, start, end): - locs.append({"path": rel, "start": start, "end": end, "macro": _macro_context(path, start)}) - else: - skipped.append((rel, start, end)) - for rel, start, end in skipped: - loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}" - cons.print(f" [dim]dd_line: skipped control-flow boundary {loc}[/dim]") - return locs - - -def _parse_rddmin_syms(summary_path: str) -> list: - """Extract symbol/function names from a dd_sym rddmin_summary. - - rddmin_summary format: - ddmin0:\\tFail Ratio: ...\\tFail indexes: ... - \\t\\t - ddmin1:\\t... - \\t\\t - - Lines starting with 'ddmin' are metadata; function names are on the - indented (tab-prefixed) lines as the first tab-delimited field. - """ - if not os.path.isfile(summary_path): - return [] - syms = [] - with open(summary_path) as fh: - for ln in fh: - stripped = ln.strip() - if not stripped or stripped.startswith("ddmin"): - continue - sym = stripped.split("\t")[0].strip() - if sym: - syms.append(sym) - return syms - - -def _build_source_filter(gen_lines: list, suspect_locs: list) -> list: - """Select the Verrou --source lines (FILE\\tLINE\\tSYMBOL) that fall on a - suspect dd_line location. - - gen_lines come from a --gen-source run and carry the exact symbol Verrou - requires (--source matches on file+line+symbol, not file+line alone). - suspect_locs are (path, start, end) tuples whose path may be a repo-relative - path while gen-source emits a basename, so matching is by basename + line. - """ - ranges = {} - for path, start, end in suspect_locs: - ranges.setdefault(os.path.basename(path), []).append((start, end)) - out = [] - for raw in gen_lines: - parts = raw.rstrip("\n").split("\t") - if len(parts) < 2: - continue - base = os.path.basename(parts[0].strip()) - try: - ln = int(parts[1].strip()) - except ValueError: - continue - if any(s <= ln <= e for s, e in ranges.get(base, [])): - out.append(raw if raw.endswith("\n") else raw + "\n") - return out - - -def _confirm_decision(suspect_dev, dd_threshold: float): - """Decide whether perturbing only the suspect lines reproduces the instability. - - Returns True (confirmed), False (suspect lines are inert -> attribution - suspect, e.g. macro-collapse misattribution), or None if unmeasured. - """ - if suspect_dev is None: - return None - return suspect_dev >= dd_threshold - - -def _rank_locs(locs: list, total: float) -> list: - """Attach a 'share' (per-line deviation / total) to each loc dict — which - must already carry 'share_dev' from a single-line positive control — and - return the locs sorted by that deviation, most flagrant first. - - 'total' is normally float_proxy, so share is the fraction of the full - single-precision deviation that perturbing that one line alone reproduces. - A non-positive total yields share=None (cannot normalize). - """ - for loc in locs: - dev = loc.get("share_dev") - loc["share"] = (dev / total) if (dev is not None and total and total > 0) else None - return sorted(locs, key=lambda loc: (loc.get("share_dev") or 0.0), reverse=True) - - -def _mark_cancellation(dd_line_locs: list, cancellation_locs: list) -> list: - """Set loc['cancellation']=True for each dd_line loc whose line range covers a - catastrophic-cancellation site (stage F), matched by basename + line. - - This pins the flagrant operation on a multi-op line to the subtraction that - cancels, rather than just naming the line. - """ - by_base = {} - for fname, lineno in cancellation_locs: - by_base.setdefault(os.path.basename(fname), set()).add(lineno) - for loc in dd_line_locs: - lines = by_base.get(os.path.basename(loc["path"]), set()) - loc["cancellation"] = any(ln in lines for ln in range(loc["start"], loc["end"] + 1)) - return dd_line_locs diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 05d31d0c9d..2e4fe1abb5 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -1,8 +1,7 @@ """GitHub-output emitters for the FP-stability suite (step summary + annotations). Pure formatting of the result dicts produced by the runners; the metric helpers -it uses (statement resolution, source context, digit math) live in -fp_stability_metrics. +it uses (digit math) live in fp_stability_metrics. """ import math @@ -12,58 +11,36 @@ MIN_SIG_BITS, VPREC_MANTISSA_BITS, _digits_left, - _get_source_context, - _statement_at, ) def _emit_github_annotations(results: list): - """Emit GitHub annotations for FP hotspots. + """Emit GitHub annotations for FP cancellation sites. Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations appear inline on the responsible source lines in the PR diff view. - Up to 3 dd_line locations are emitted per case (minimal responsible lines - from delta-debug). Confirmed hotspots (suspect-only perturbation reproduced - the instability) are ::warning::; unconfirmed ones are downgraded to - ::notice:: so a suspect attribution is not presented as fact. Up to 3 - cancellation sites per case are emitted as ::notice:: so the diff also - highlights subtraction-cancellation hotspots from --check-cancellation. + Up to 3 cancellation sites per case are emitted as ::notice:: so the diff + highlights subtraction-cancellation hotspots from --check-cancellation. A site + whose .fpp line sits inside a #:for/#:def expansion (tracked in + cancellation_macro) is noted as possibly representing multiple instances. """ if not os.environ.get("GITHUB_ACTIONS"): return for r in results: - status = "FAIL" if not r["passed"] else "sensitivity" - _sb = r.get("sig_bits") - _sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a" - dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}" - unconfirmed = r.get("dd_line_confirmed") is False - - for loc in r.get("dd_line_locs", [])[:3]: - location = f"file={loc['path']},line={loc['start']}" - if loc["end"] != loc["start"]: - location += f",endLine={loc['end']}" - note = dev_str - if loc.get("share") is not None: - note += f" — single-precision sensitivity: {loc['share'] * 100:.0f}% of float-proxy (where precision matters, not necessarily where cancellation originates)" - if loc.get("cancellation"): - note += " — also a catastrophic cancellation site" - if loc.get("macro"): - note += f" — {loc['macro']}-expanded line, may represent multiple instances" - if unconfirmed: - title = f"FP candidate (unconfirmed) [{r['name']}]" - print(f"::notice {location},title={title}::{note}", flush=True) - else: - title = f"FP {status} [{r['name']}]" - print(f"::warning {location},title={title}::{note}", flush=True) - n_dd = len(r.get("dd_line_locs", [])) - if n_dd > 3: - print(f"::notice title=FP hotspots [{r['name']}]::{n_dd - 3} more dd_line hotspot(s) not annotated inline; see the step summary", flush=True) - + site_bits = r.get("cancellation_bits") or {} + macro_sites = r.get("cancellation_macro") or {} for fname, lineno in r.get("cancellation_locs", [])[:3]: loc = f"file={fname},line={lineno}" title = f"FP cancellation [{r['name']}]" - print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True) + note = "catastrophic cancellation site" + bits = site_bits.get((fname, lineno)) + if bits: + note += f" — loses ≥ {bits / math.log2(10):.0f} of ~16 digits" + macro = macro_sites.get((fname, lineno)) + if macro: + note += f" — inside a {macro}-expanded line, may represent multiple instances" + print(f"::notice {loc},title={title}::{note}", flush=True) n_cc = len(r.get("cancellation_locs", [])) if n_cc > 3: print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) @@ -82,7 +59,7 @@ def _emit_github_summary(results: list, n_samples: int): Visible directly in the Actions run UI without downloading artifacts. Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), - and dd_line source locations for any failing cases. + and catastrophic-cancellation source locations for any failing cases. """ summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if not summary_path: @@ -128,22 +105,17 @@ def _emit_github_summary(results: list, n_samples: int): ) for r in cases_with_cancel: site_bits = r.get("cancellation_bits") or {} - # collapse continuation fragments to one entry per logical statement, - # keeping the worst bits-lost seen on that statement - stmts = {} # (basename, stmt_start) -> {where, bits, text} - for fname, lineno in r["cancellation_locs"]: - stmt_start, _end, stmt_text = _statement_at(fname, lineno) - key = (os.path.basename(fname), stmt_start) - e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text}) - e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0)) - ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"])) + macro_sites = r.get("cancellation_macro") or {} + sites = [{"where": f"{fname}:{lineno}", "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"]] + ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"])) if ordered: w = ordered[0] - md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") + md.append(f"**`{r['name']}`** — {len(ordered)} site(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") for e in ordered[:15]: lost = e["bits"] / math.log2(10) - md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else "")) - footer = _more_md(len(ordered), 15, "statement(s)") + ambiguous = f" — _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else "" + md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`{ambiguous}") + footer = _more_md(len(ordered), 15, "site(s)") if footer: md.append(footer) md.append("") @@ -170,60 +142,6 @@ def _emit_github_summary(results: list, n_samples: int): md.append(f"| `{r['name']}` | {' | '.join(cols)} |") md.append("") - # dd_line — single-precision SENSITIVITY (where precision most affects the - # output). This is distinct from cancellation origin (reported separately): - # the leader is typically the time integrator / final accumulation, because - # perturbing the last write moves the output directly while upstream errors - # get re-rounded there. Not a culprit-finder for ill-conditioning. - cases_with_locs = [r for r in results if r["dd_line_locs"]] - if cases_with_locs: - md.append("
") - md.append("Single-precision sensitivity (dd_line) — usually the time integrator; expand for details\n") - md.append( - "> Where reduced precision most moves the output — **typically the time integrator / " - "final accumulation, which is expected and benign**. This is *not* where cancellation " - "originates (that's the section above); it shows where precision matters most.\n" - ) - _confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"} - for r in cases_with_locs: - status = "❌ FAIL" if not r["passed"] else "✅ pass" - md.append(f"**`{r['name']}`** ({status}) — attribution {_confirm_label[r.get('dd_line_confirmed')]}") - md.append("_Ranked by the share of the single-precision deviation each line reproduces alone._\n") - for loc in r["dd_line_locs"][:10]: - rel_path, start, end = loc["path"], loc["start"], loc["end"] - where = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" - tags = [] - if loc.get("share") is not None: - tags.append(f"**{loc['share'] * 100:.0f}%** of float-proxy") - if loc.get("cancellation"): - tags.append("catastrophic cancellation") - if loc.get("macro"): - tags.append(f"_{loc['macro']}-expanded, may represent multiple instances_") - suffix = f" — {', '.join(tags)}" if tags else "" - md.append(f"- `{where}`{suffix}") - snippet = _get_source_context(rel_path, start) - if snippet: - md.append(" ```fortran") - for line in snippet.splitlines(): - md.append(f" {line}") - md.append(" ```") - footer = _more_md(len(r["dd_line_locs"]), 10, "hotspot(s)") - if footer: - md.append(footer) - md.append("") - md.append("
\n") - - # dd_sym function names (collapsed, since less actionable than dd_line) - cases_with_syms = [r for r in results if r["dd_sym_syms"]] - if cases_with_syms: - md.append("
") - md.append("Responsible functions (dd_sym)\n") - for r in cases_with_syms: - md.append(f"\n**`{r['name']}`**\n") - for sym in r["dd_sym_syms"]: - md.append(f"- `{sym}`") - md.append("\n
\n") - # Float-max overflow sites cases_with_fmax = [r for r in results if r.get("float_max_locs")] if cases_with_fmax: diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 1d1c5a7b8f..8e404098aa 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -1,33 +1,23 @@ """Verrou subprocess runners for the FP-stability suite. -Each routine drives the verrou/valgrind binary (or the verrou_dd_* delta-debug -tools) and returns parsed results. Pure parsing / metric helpers live in -fp_stability_metrics, which this module imports. +Each routine drives the verrou/valgrind binary and returns parsed results. Pure +parsing / metric helpers live in fp_stability_metrics, which this module imports. """ import glob import math import os import shutil -import stat import subprocess import tempfile -import textwrap from .common import MFC_ROOT_DIR, MFCException from .fp_stability_metrics import ( - _DD_FALLBACK_THRESHOLD, VPREC_MANTISSA_BITS, - _build_source_filter, - _confirm_decision, - _is_arithmetic_loc, _max_abs_np, _max_diff_np, _parse_cancel_gen, - _parse_rddmin_locs, - _parse_rddmin_syms, _parse_vg_error_locs, - _rank_locs, ) from .printer import cons @@ -65,20 +55,6 @@ def _find_binary(name: str) -> str: return max(candidates, key=os.path.getmtime) if candidates else "" -def _find_dd_tool(verrou_bin: str, tool: str) -> str: - """Path to a verrou_dd_* tool (e.g. 'verrou_dd_sym') next to the verrou binary, - or '' if absent.""" - c = os.path.join(os.path.dirname(verrou_bin), tool) - return c if os.path.isfile(c) else "" - - -def _verrou_pythonpath(verrou_bin: str) -> str: - """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir).""" - verrou_home = os.path.dirname(os.path.dirname(verrou_bin)) - matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind")) - return matches[0] if matches else "" - - def _verrou_env(verrou_bin: str) -> dict: """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt artifact extracted to a new prefix) can locate its tool — Valgrind bakes its @@ -168,12 +144,7 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh except MFCException as exc: cons.print(f" [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]") return None - raw = _parse_cancel_gen(gen_path) - filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] - skipped = len(raw) - len(filtered) - if skipped and threshold == 10: - cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") - return filtered + return _parse_cancel_gen(gen_path) def _run_mca_samples( @@ -253,266 +224,3 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r dev = float("inf") results.append((bits, dev)) return results - - -def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): - """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via - VERROU_EXCLUDE / VERROU_SOURCE environment variables. For test runs, we use - --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices). - For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the - environment — we honour that so the reference is a stable nearest-rounding baseline - to compare against. CLI --rounding-mode would override the env var and break the - reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env bash - # Generated by mfc.sh fp-stability — do not edit by hand. - VERROU_BIN={verrou_bin!r} - SIM_BIN={sim_bin!r} - IC_DIR={ic_dir!r} - - RUNDIR="$1" - TMPDIR_RUN=$(mktemp -d) - trap 'rm -rf "$TMPDIR_RUN"' EXIT - - cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all" - cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp" - for fname in indices.dat pre_time_data.dat io_time_data.dat; do - [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/" - done - mkdir -p "$TMPDIR_RUN/D" - - # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and - # leaves it unset for test runs. Defaulting to float gives deterministic - # test steps while letting the reference use nearest-rounding. - ROUND="${{VERROU_ROUNDING_MODE:-float}}" - - # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation). - # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to). - # Forward them as valgrind flags when set. - EXTRA="" - [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE" - [ -n "${{VERROU_SOURCE:-}}" ] && EXTRA="$EXTRA --source=$VERROU_SOURCE" - - cd "$TMPDIR_RUN" - "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN" - rc=$? - - [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" - exit $rc - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): - """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR - Exits 0 (stable) or 1 (unstable) based on threshold. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env python3 - # Generated by mfc.sh fp-stability — do not edit by hand. - import sys, os, numpy as np - - COMPARE_FILES = {compare_files!r} - THRESHOLD = {threshold!r} - - ref_dir, run_dir = sys.argv[1], sys.argv[2] - max_dev = 0.0 - for fname in COMPARE_FILES: - ref_p = os.path.join(ref_dir, fname) - run_p = os.path.join(run_dir, fname) - if not os.path.exists(ref_p) or not os.path.exists(run_p): - print(f"MISSING: {{fname}}") - sys.exit(1) - ref = np.atleast_2d(np.loadtxt(ref_p))[:, 1] - run = np.atleast_2d(np.loadtxt(run_p))[:, 1] - dev = float(np.max(np.abs(ref - run))) - max_dev = max(max_dev, dev) - - print(f"max_dev={{max_dev:.3e}} threshold={{THRESHOLD:.0e}}") - sys.exit(0 if max_dev <= THRESHOLD else 1) - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _dd_env(verrou_bin: str) -> dict: - """Environment for verrou_dd_*: VALGRIND_LIB (so a relocated tree's inner valgrind - calls resolve) plus PYTHONPATH (for the verrou_dd_* imports).""" - py_pkg = _verrou_pythonpath(verrou_bin) - env = _verrou_env(verrou_bin) - if py_pkg: - existing = env.get("PYTHONPATH", "") - env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing])) - return env - - -def _run_dd_tool( - dd_bin: str, - dd_dir: str, - dd_run_sh: str, - dd_cmp_py: str, - env: dict, - log_name: str, - summary_subdir: str, - label: str, -) -> list: - """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines.""" - log_file = os.path.join(dd_dir, log_name) - cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py] - cons.print(f" [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]") - with open(log_file, "w") as f: - result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False) - summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary") - summary_lines = [] - if result.returncode == 0: - if os.path.isfile(summary_path): - with open(summary_path) as f: - summary_lines = f.readlines() - cons.print(f" [bold yellow]{label} result[/bold yellow]:") - for line in summary_lines: - cons.print(f" {line.rstrip()}") - else: - cons.print(f" [dim]{label} done; see {log_file}[/dim]") - else: - cons.print(f" [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})") - return summary_lines - - -def _setup_dd_run(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, dd_dir: str, threshold: float): - """Write dd_run.sh and dd_cmp.py for a verrou_dd_* run into dd_dir; return their - paths. The threshold falls back to _DD_FALLBACK_THRESHOLD when unset.""" - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD) - return dd_run_sh, dd_cmp_py - - -def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: - """Run verrou_dd_sym; return list of responsible symbol names.""" - dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_sym") - if not dd_bin: - cons.print(" [dim]verrou_dd_sym not found; skipping delta-debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") - cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") - return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) - - -def _run_dd_line( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - log_dir: str, - threshold: float = None, -) -> list: - """Run verrou_dd_line; return [{path, start, end, macro}] location dicts.""" - dd_bin = _find_dd_tool(verrou_bin, "verrou_dd_line") - if not dd_bin: - cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - dd_run_sh, dd_cmp_py = _setup_dd_run(case, verrou_bin, sim_bin, work_dir, dd_dir, threshold) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") - return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) - - -def _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, src_lines, compare, tag): - """Perturb only the lines in src_lines (deterministic float mode) and return - the L-inf deviation from the nearest-rounding reference, or None on failure.""" - src_path = os.path.join(conf_dir, f"source_{tag}.txt") - with open(src_path, "w") as fh: - fh.writelines(src_lines) - run_dir = os.path.join(conf_dir, f"perturb_{tag}") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="float", - extra_flags=[f"--source={src_path}"], - ) - except MFCException: - return None - return _max_diff_np(ref_dir, run_dir, compare) - - -def _capture_gen_source(verrou_bin, sim_bin, work_dir, run_dir, gen_path): - """Run nearest-rounding with --gen-source to capture the symbol-correct - executed source lines (FILE\\tLINE\\tSYMBOL); return them, or None on failure.""" - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="nearest", - extra_flags=[f"--gen-source={gen_path}"], - ) - except MFCException: - return None - if not os.path.isfile(gen_path): - return None - with open(gen_path) as fh: - return fh.readlines() - - -def _run_confirmation(case, verrou_bin, sim_bin, work_dir, ref_dir, dd_line_locs, dd_threshold, float_proxy): - """Positive control for dd_line: perturb ONLY the suspect lines and confirm - the instability reproduces, then rank each line by its individual share. - - Verrou's --source matches file+line+symbol (not file+line alone), so we first - capture the symbol-correct executed source lines via --gen-source, filter them - to the suspect set, then run deterministic float-mode restricted to just those - lines. If the suspect-only deviation reaches dd_threshold the attribution is - confirmed; if it stays near zero the reported lines do not actually carry the - instability (e.g. a #:for-expanded line blamed for the wrong instance). - - Each line is then perturbed alone so its 'share_dev' (and 'share' of - float_proxy) shows which computation dominates. - - Returns (confirmed, suspect_dev, ranked_locs). - """ - if not dd_line_locs: - return None, None, dd_line_locs - conf_dir = os.path.join(work_dir, "confirm") - os.makedirs(conf_dir, exist_ok=True) - gen_lines = _capture_gen_source(verrou_bin, sim_bin, work_dir, conf_dir, os.path.join(conf_dir, "gen_source.txt")) - if gen_lines is None: - return None, None, dd_line_locs - compare = case["compare"] - - # whole-set positive control - suspects = [(loc["path"], loc["start"], loc["end"]) for loc in dd_line_locs] - set_src = _build_source_filter(gen_lines, suspects) - if not set_src: - # none of the reported lines performs an instrumented FP op -> not reproduced - return False, 0.0, dd_line_locs - set_dev = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, set_src, compare, "set") - confirmed = _confirm_decision(set_dev, dd_threshold) - - # per-line ranking (a single line trivially owns the whole set deviation) - if len(dd_line_locs) == 1: - dd_line_locs[0]["share_dev"] = set_dev - else: - for i, loc in enumerate(dd_line_locs): - one = _build_source_filter(gen_lines, [(loc["path"], loc["start"], loc["end"])]) - loc["share_dev"] = _source_perturb_dev(verrou_bin, sim_bin, work_dir, ref_dir, conf_dir, one, compare, f"line{i:02d}") if one else 0.0 - ranked = _rank_locs(dd_line_locs, total=(float_proxy or set_dev)) - return confirmed, set_dev, ranked diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index b2b43bfc02..38d49d60eb 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -1,5 +1,5 @@ -"""Unit tests for the pure helpers behind the FP-stability dd_line confirmation -pass (#1) and macro-expansion flagging (#2). +"""Unit tests for the pure helpers behind the FP-stability cancellation pass and +its fypp macro-expansion flagging. The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job; here we test only the pure functions that decide what to instrument and how to @@ -9,15 +9,10 @@ from mfc.fp_stability_metrics import ( MIN_SIG_BITS, _autodetect_compare, - _build_source_filter, _cancellation_severity, - _confirm_decision, _digits_left, _macro_context_in_lines, - _mark_cancellation, - _rank_locs, _sig_bits, - _statement_bounds_in_lines, ) # --- #2: fypp macro-expansion context detection --- @@ -93,127 +88,6 @@ def test_macro_context_unbalanced_close_is_safe(): assert _macro_context_in_lines(["#:endfor\n", " a = b - c\n"], 2) is None -# --- #1: building the symbol-correct --source filter from --gen-source output --- - - -def test_build_source_filter_keeps_matching_file_and_line_with_symbol(): - gen = [ - "m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n", - "m_riemann_solvers.fpp\t999\ts_other\n", - ] - suspects = [("src/simulation/m_riemann_solvers.fpp", 512, 512)] - out = _build_source_filter(gen, suspects) - assert out == ["m_riemann_solvers.fpp\t512\ts_hllc_riemann_solver\n"] - - -def test_build_source_filter_matches_inclusive_range(): - gen = [ - "m_foo.fpp\t10\tsym\n", - "m_foo.fpp\t11\tsym\n", - "m_foo.fpp\t12\tsym\n", - "m_foo.fpp\t13\tsym\n", - ] - suspects = [("m_foo.fpp", 11, 12)] - out = _build_source_filter(gen, suspects) - assert out == ["m_foo.fpp\t11\tsym\n", "m_foo.fpp\t12\tsym\n"] - - -def test_build_source_filter_excludes_other_basenames(): - gen = ["m_bar.fpp\t5\tsym\n"] - suspects = [("m_foo.fpp", 5, 5)] - assert _build_source_filter(gen, suspects) == [] - - -def test_build_source_filter_matches_on_basename_not_full_path(): - # gen-source emits a basename; dd_line locs are repo-relative paths. - gen = ["m_foo.fpp\t5\tsym\n"] - suspects = [("src/common/m_foo.fpp", 5, 5)] - assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"] - - -def test_build_source_filter_skips_malformed_lines(): - gen = ["garbage-no-tab\n", "m_foo.fpp\tnotanumber\tsym\n", "m_foo.fpp\t5\tsym\n"] - suspects = [("m_foo.fpp", 5, 5)] - assert _build_source_filter(gen, suspects) == ["m_foo.fpp\t5\tsym\n"] - - -# --- #1: confirmation decision --- - - -def test_confirm_decision_true_when_suspect_reproduces_deviation(): - # perturbing only the suspect lines yields >= dd_threshold deviation - assert _confirm_decision(suspect_dev=1e-3, dd_threshold=1e-5) is True - - -def test_confirm_decision_false_when_suspect_is_inert(): - # suspect lines barely move the result -> attribution not reproduced - assert _confirm_decision(suspect_dev=1e-9, dd_threshold=1e-5) is False - - -def test_confirm_decision_none_when_measurement_unavailable(): - assert _confirm_decision(suspect_dev=None, dd_threshold=1e-5) is None - - -# --- Tier 1: per-line confirmation ranking --- - - -def test_rank_locs_sorts_by_share_dev_descending(): - locs = [ - {"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.1}, - {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.9}, - ] - ranked = _rank_locs(locs, total=1.0) - assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"] - - -def test_rank_locs_computes_share_as_fraction_of_total(): - locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}] - ranked = _rank_locs(locs, total=0.5) - assert ranked[0]["share"] == 0.5 - - -def test_rank_locs_share_none_when_total_nonpositive(): - locs = [{"path": "a.fpp", "start": 1, "end": 1, "share_dev": 0.25}] - ranked = _rank_locs(locs, total=0.0) - assert ranked[0]["share"] is None - - -def test_rank_locs_treats_missing_share_dev_as_zero_and_sorts_last(): - locs = [ - {"path": "a.fpp", "start": 1, "end": 1, "share_dev": None}, - {"path": "b.fpp", "start": 2, "end": 2, "share_dev": 0.3}, - ] - ranked = _rank_locs(locs, total=1.0) - assert [loc["path"] for loc in ranked] == ["b.fpp", "a.fpp"] - - -# --- Tier 1b: dd_line x cancellation cross-reference --- - - -def test_mark_cancellation_flags_loc_on_a_cancellation_line(): - locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}] - _mark_cancellation(locs, [("m_foo.fpp", 11)]) - assert locs[0]["cancellation"] is True - - -def test_mark_cancellation_false_when_no_site_in_range(): - locs = [{"path": "src/common/m_foo.fpp", "start": 10, "end": 12}] - _mark_cancellation(locs, [("m_foo.fpp", 99)]) - assert locs[0]["cancellation"] is False - - -def test_mark_cancellation_matches_on_basename_not_full_path(): - locs = [{"path": "src/common/m_foo.fpp", "start": 5, "end": 5}] - _mark_cancellation(locs, [("/abs/build/m_foo.fpp", 5)]) - assert locs[0]["cancellation"] is True - - -def test_mark_cancellation_false_for_different_basename(): - locs = [{"path": "m_foo.fpp", "start": 5, "end": 5}] - _mark_cancellation(locs, [("m_bar.fpp", 5)]) - assert locs[0]["cancellation"] is False - - # --- per-site cancellation severity (bits lost), from a threshold sweep --- @@ -289,44 +163,6 @@ def test_digits_left_full_and_clamped(): assert _digits_left(60) == 0.0 # clamp: never negative -# --- Fortran line-continuation handling (correct-line labeling) --- - - -def test_statement_bounds_single_line(): - lines = [" a = b - c\n"] - assert _statement_bounds_in_lines(lines, 1) == (1, 1) - - -def test_statement_bounds_spans_continuation_from_first_line(): - lines = [" poly = (s_cb(i+3) - s_cb(i+1)) * &\n", " (s_cb(i+2) - s_cb(i))\n"] - assert _statement_bounds_in_lines(lines, 1) == (1, 2) - - -def test_statement_bounds_from_middle_continuation_line(): - # a hit on the continuation fragment must resolve to the statement start - lines = [" x = a + &\n", " b + &\n", " c\n"] - assert _statement_bounds_in_lines(lines, 2) == (1, 3) - assert _statement_bounds_in_lines(lines, 3) == (1, 3) - - -def test_statement_bounds_ignores_ampersand_in_trailing_comment_logic(): - # a real continuation '&' before a trailing comment still continues - lines = [" x = a & ! note\n", " + b\n"] - assert _statement_bounds_in_lines(lines, 1) == (1, 2) - - -def test_statement_bounds_non_continuation_neighbors(): - lines = [" x = 1\n", " y = 2\n", " z = 3\n"] - assert _statement_bounds_in_lines(lines, 2) == (2, 2) - - -def test_statement_bounds_with_leading_ampersand_continuation(): - # the MFC WENO style: line ends with '&' and the next line *starts* with '&' - lines = [" beta = x**2 &\n", " & + eps\n"] - assert _statement_bounds_in_lines(lines, 1) == (1, 2) - assert _statement_bounds_in_lines(lines, 2) == (1, 2) - - # --- report emitters: must survive blank and populated result dicts (CI-only path) --- @@ -359,26 +195,31 @@ def test_emit_summary_populated_result(tmp_path, monkeypatch): sig_bits=30.0, float_proxy=1e-6, vprec=[(52, 1e-14), (23, float("inf"))], # exercises the "crash" branch - dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": "#:for", "share": 0.4, "cancellation": True}], - dd_line_confirmed=False, cancellation_locs=[("src/x/m_a.fpp", 5)], cancellation_bits={("src/x/m_a.fpp", 5): 40}, + cancellation_macro={("src/x/m_a.fpp", 5): "#:for"}, float_max_locs=[("m_a.fpp", 9)], ) text = _emit_to_tmp([r], tmp_path, monkeypatch) assert "💥 crash" in text and "digits lost" in text + assert "may represent multiple instances" in text # fypp-ambiguous marker -def test_emit_annotations_downgrade_unconfirmed(tmp_path, monkeypatch, capsys): +def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys): from mfc import fp_stability_report as report from mfc.fp_stability import _blank_result monkeypatch.setenv("GITHUB_ACTIONS", "1") r = _blank_result("demo") - r.update(dd_line_locs=[{"path": "src/x/m_a.fpp", "start": 5, "end": 5, "macro": None, "share": 0.9, "cancellation": False}], dd_line_confirmed=False) + r.update( + cancellation_locs=[("src/x/m_a.fpp", 5)], + cancellation_bits={("src/x/m_a.fpp", 5): 40}, + cancellation_macro={("src/x/m_a.fpp", 5): "#:for"}, + ) report._emit_github_annotations([r]) out = capsys.readouterr().out - assert "::notice" in out and "::warning" not in out # unconfirmed -> notice, not warning + assert "::notice" in out + assert "multiple instances" in out # fypp-expanded cancellation site flagged # --- Verrou discovery: a bare system valgrind must read as "Verrou absent" --- @@ -480,27 +321,6 @@ def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch): assert env["VALGRIND_LIB"] == "/user/chosen/lib" # not clobbered -def test_dd_env_prepends_pythonpath_and_inherits_valgrind_lib(tmp_path, monkeypatch): - from mfc import fp_stability_runners as runners - - (tmp_path / "libexec" / "valgrind").mkdir(parents=True) - monkeypatch.delenv("VALGRIND_LIB", raising=False) - monkeypatch.setenv("PYTHONPATH", "/pre/existing") - monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/site-packages/valgrind") - env = runners._dd_env(str(tmp_path / "bin" / "valgrind")) - assert env["PYTHONPATH"] == "/vg/site-packages/valgrind:/pre/existing" - assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind") - - -def test_dd_env_no_leading_colon_when_pythonpath_empty(tmp_path, monkeypatch): - from mfc import fp_stability_runners as runners - - monkeypatch.delenv("PYTHONPATH", raising=False) - monkeypatch.setattr(runners, "_verrou_pythonpath", lambda _b: "/vg/valgrind") - env = runners._dd_env(str(tmp_path / "bin" / "valgrind")) - assert env["PYTHONPATH"] == "/vg/valgrind" # no stray leading ':' - - # --- auto-install hard-fail guards --- From eea0c8def458b892d95d6acc3f20237652696d50 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 2 Jun 2026 20:17:36 -0400 Subject: [PATCH 19/25] fp-stability: drop the MCA pass (redundant with the random-rounding sig-bits gate) MCA (--backend=mcaquad) reported a significant-bits estimate that duplicated the metric the core random-rounding suite already produces for PASS/FAIL; on well-conditioned cases the two agree, so it added a slower second opinion without a distinct capability. Removed _run_mca_samples, the MCA pass + --no-mca flag + result keys + summary column. Cancellation/vprec/float-proxy/float-max and the PASS/FAIL core are unchanged. ~50 lines off; it was also the slowest pass (N extra mcaquad runs/case). --- toolchain/mfc/cli/commands.py | 11 +-------- toolchain/mfc/fp_stability.py | 31 ++------------------------ toolchain/mfc/fp_stability_report.py | 7 +++--- toolchain/mfc/fp_stability_runners.py | 32 --------------------------- 4 files changed, 6 insertions(+), 75 deletions(-) diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 6dfbb57c77..7d9771f772 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -917,7 +917,6 @@ " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" " cancellation --check-cancellation origins, ranked by significant digits lost\n" - " mca-sigbits Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n" " float-max --check-max-float detection of double→float overflow sites\n" ), include_common=["mfc_config", "verbose", "debug_log"], @@ -977,13 +976,6 @@ default=False, dest="no_cancellation", ), - Argument( - name="no-mca", - help="Skip Monte Carlo Arithmetic (mcaquad) significant-bits estimate.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_mca", - ), Argument( name="no-float-max", help="Skip --check-max-float float32 overflow detection.", @@ -1001,7 +993,7 @@ ), Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"), Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"), - Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"), + Example("./mfc.sh fp-stability --no-cancellation --no-float-max", "Skip analysis passes"), ], key_options=[ ("--sim-binary PATH", "Serial simulation binary (debug, no-MPI)"), @@ -1011,7 +1003,6 @@ ("--no-float-proxy", "Skip float-rounding proxy run"), ("--no-vprec", "Skip VPREC mantissa-bit sweep"), ("--no-cancellation", "Skip cancellation detection"), - ("--no-mca", "Skip MCA significant-bits estimate"), ("--no-float-max", "Skip float32 overflow detection"), ], ) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 102d512d52..bf3f557455 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -22,11 +22,7 @@ .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous (the line maps to multiple generated instances). -E. MCA significant-bits estimate (--no-mca to skip) - N runs with --backend=mcaquad; max deviation vs nearest-rounding - reference gives a lower bound on significant bits: s = -log2(dev/scale). - -F. Float-max overflow detection (--no-float-max to skip) +E. Float-max overflow detection (--no-float-max to skip) One run with --check-max-float=yes; reports locations where a double→float conversion would overflow to ±Inf. @@ -80,7 +76,6 @@ _run_cancellation_check, _run_float_max_check, _run_float_proxy, - _run_mca_samples, _run_preprocess, _run_simulation_verrou, _run_vprec_sweep, @@ -372,8 +367,6 @@ def _blank_result(name: str) -> dict: "cancellation_locs": [], "cancellation_bits": {}, "cancellation_macro": {}, - "mca_dev": None, - "mca_sigbits": None, "float_max_locs": [], } @@ -387,7 +380,6 @@ def _run_case( run_float: bool, run_vprec: bool, run_cancellation: bool, - run_mca: bool, run_float_max: bool, ) -> dict: name = case["name"] @@ -494,22 +486,7 @@ def _run_case( except Exception as exc: cons.print(f" [bold yellow]cancellation check error[/bold yellow]: {exc}") - # --- E: MCA significant-bits estimate --- - if run_mca: - cons.print(f" [dim]MCA significant-bits estimate (N={n_samples})...[/dim]") - try: - mca_dev, mca_sigbits, n_ok = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples) - if n_ok == 0: - cons.print(f" [bold yellow]MCA: no samples completed (0/{n_samples}; see logs)[/bold yellow]") - else: - result["mca_dev"] = mca_dev - result["mca_sigbits"] = mca_sigbits - bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a" - cons.print(f" MCA: dev={mca_dev:.3e} ({bits_str}) [{n_ok}/{n_samples} samples]") - except Exception as exc: - cons.print(f" [bold yellow]MCA error[/bold yellow]: {exc}") - - # --- F: float-max overflow detection --- + # --- E: float-max overflow detection --- if run_float_max: cons.print(" [dim]float-max overflow check...[/dim]") try: @@ -610,7 +587,6 @@ def fp_stability(): run_float = not ARG("no_float_proxy") run_vprec = not ARG("no_vprec") run_cancellation = not ARG("no_cancellation") - run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES @@ -633,8 +609,6 @@ def fp_stability(): features.append("vprec-sweep") if run_cancellation: features.append("cancellation") - if run_mca: - features.append("mca-sigbits") if run_float_max: features.append("float-max") cons.print(f" features: {', '.join(features) if features else 'stability only'}") @@ -654,7 +628,6 @@ def fp_stability(): run_float, run_vprec, run_cancellation, - run_mca, run_float_max, ) except MFCException as exc: diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 2e4fe1abb5..8e0a2580b8 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -80,14 +80,13 @@ def _emit_github_summary(results: list, n_samples: int): # Main results table — pass/fail is scale-free: bits retained vs a single floor md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") - md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |") - md.append("|------|:------:|:------:|--------:|--------:|:------:|") + md.append("| Case | Status | bits retained | max\\_dev | Float proxy |") + md.append("|------|:------:|:------:|--------:|--------:|") for r in results: status = "✅" if r["passed"] else "❌" bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" - sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—" - md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |") + md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |") md.append("") # Cancellation ORIGINS — where ill-conditioning actually arises, led with the diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 8e404098aa..12af6041c3 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -5,7 +5,6 @@ """ import glob -import math import os import shutil import subprocess @@ -14,7 +13,6 @@ from .common import MFC_ROOT_DIR, MFCException from .fp_stability_metrics import ( VPREC_MANTISSA_BITS, - _max_abs_np, _max_diff_np, _parse_cancel_gen, _parse_vg_error_locs, @@ -147,36 +145,6 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh return _parse_cancel_gen(gen_path) -def _run_mca_samples( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - ref_dir: str, - n_mca: int, -) -> tuple: - """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound, n_ok) where - n_ok is how many samples actually completed (0 => no usable measurement).""" - compare = case["compare"] - ref_scale = _max_abs_np(ref_dir, compare) - max_dev = 0.0 - n_ok = 0 - flags = ["--backend=mcaquad", "--mca-mode=mca"] - for i in range(n_mca): - run_dir = os.path.join(work_dir, f"mca_{i:02d}") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) - max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - n_ok += 1 - except MFCException as exc: - cons.print(f" [dim]MCA sample {i} failed: {exc}[/dim]") - sig_bits = None - if n_ok and max_dev > 0.0 and ref_scale > 0.0: - sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale)))) - return max_dev, sig_bits, n_ok - - def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): """Run with --check-max-float=yes; return [(fname, line)] of overflow sites, or None if the run failed (distinct from [] = ran and found none).""" From c4d1ef0f41c10497902ef2f96a5e3541d29c7aa8 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 08:52:46 -0400 Subject: [PATCH 20/25] =?UTF-8?q?fp-stability:=20address=20Copilot=20revie?= =?UTF-8?q?w=20=E2=80=94=20verify=20--verrou-binary=20executability,=20env?= =?UTF-8?q?=20on=20PATH=20probe,=20comment=20rot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot PR review (all valid): (1) gate explicit --verrou-binary on os.access(X_OK) so the 'not executable' message is honest; (6) pass VALGRIND_LIB via _verrou_env to the PATH-fallback verrou probe too, so a relocated prebuilt on PATH isn't misjudged absent; (3,5) fix two stale comments left by the dd/MCA removals (a 'sensitivity list' that no longer exists; a VPREC '❌ where bits str: def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() - if not verrou_bin or not os.path.isfile(verrou_bin): + if not verrou_bin or not (os.path.isfile(verrou_bin) and os.access(verrou_bin, os.X_OK)): if ARG("verrou_binary"): raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.") verrou_bin = _install_verrou() diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 8e0a2580b8..6b8f07079c 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -51,7 +51,7 @@ def _more_md(total: int, shown: int, noun: str) -> str: or '' when nothing was truncated.""" if total <= shown: return "" - return f"- _…and {total - shown} more {noun}; see fp-stability-logs/_" + return f"- …and {total - shown} more {noun}; see `fp-stability-logs/`" def _emit_github_summary(results: list, n_samples: int): @@ -90,8 +90,7 @@ def _emit_github_summary(results: list, n_samples: int): md.append("") # Cancellation ORIGINS — where ill-conditioning actually arises, led with the - # most severe (most bits lost). The numerically interesting signal; the - # sensitivity list further down is dominated by the (benign) time integrator. + # most severe (most bits lost). cases_with_cancel = [r for r in results if r.get("cancellation_locs")] if cases_with_cancel: md.append("### Catastrophic cancellation origins (ranked by digits lost)\n") @@ -119,7 +118,8 @@ def _emit_github_summary(results: list, n_samples: int): md.append(footer) md.append("") - # VPREC sweep — one column per bit level, ❌ where bits retained < floor + # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at + # that reduced precision (💥 crash = run diverged/failed, — = not measured). if any(r["vprec"] for r in results): _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 12af6041c3..7c2519f018 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -40,9 +40,11 @@ def _find_verrou() -> str: if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)): return candidate # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system - # valgrind must read as "Verrou absent" so it gets installed, not misused. + # valgrind must read as "Verrou absent" so it gets installed, not misused. Verify + # with VALGRIND_LIB too, so a relocated prebuilt on PATH (env.sh not sourced) isn't + # wrongly judged absent. path_vg = shutil.which("valgrind") - if path_vg and _has_verrou_tool(path_vg): + if path_vg and _has_verrou_tool(path_vg, _verrou_env(path_vg)): return path_vg return "" diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 38d49d60eb..b8b6104403 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -260,7 +260,7 @@ def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch): # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool. monkeypatch.setenv("VERROU_HOME", str(tmp_path)) monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind") - monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: False) + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False) assert runners._find_verrou() == "" @@ -269,7 +269,7 @@ def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch) monkeypatch.setenv("VERROU_HOME", str(tmp_path)) monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind") - monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin: True) + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True) assert runners._find_verrou() == "/opt/verrou/bin/valgrind" From 39a1b0f99a49a6102bb4f396b9aa161a66245319 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 09:01:08 -0400 Subject: [PATCH 21/25] fp-stability: give _run_simulation_verrou sole ownership of run_dir creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The makedirs(run_dir) sat after the --log-file path and the sim.out open that both need it, so it was a dead no-op that worked only because every caller pre-created run_dir — misleading, and a future caller would hit FileNotFoundError with no hint. Moved it to the top of _run_simulation_verrou (before those uses) and dropped the now-redundant pre-creates in all callers (_run_cancellation_check, _run_float_max_check, _run_float_proxy, _run_vprec_sweep, and _run_case's ref + sample runs). Single, clear ownership. (Claude Code Review finding.) --- toolchain/mfc/fp_stability.py | 2 -- toolchain/mfc/fp_stability_runners.py | 10 ++++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index b0a2f5b058..7fce9ae6fe 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -399,7 +399,6 @@ def _run_case( _run_preprocess(pp_bin, case["pre"], work_dir) ref_dir = os.path.join(work_dir, "ref") - os.makedirs(ref_dir) cons.print(" [dim]reference run (rounding=nearest)...[/dim]") _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest") @@ -420,7 +419,6 @@ def _run_case( cons.print(f" [dim]random-rounding runs (N={n_samples})...[/dim]") for i in range(n_samples): run_dir = os.path.join(work_dir, f"run_{i:02d}") - os.makedirs(run_dir) _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random") max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index 7c2519f018..f282f47e12 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -94,11 +94,14 @@ def _run_simulation_verrou( rounding_mode: str = None, extra_flags: list = None, ): - """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output. + """Create run_dir, copy ICs into a fresh tmpdir, run simulation under verrou, + and collect its D/ output into run_dir. Owns run_dir creation, so callers need + not pre-create it. rounding_mode is passed as --rounding-mode= when not None. extra_flags are appended before the binary (e.g. --backend=vprec ...). """ + os.makedirs(run_dir, exist_ok=True) # needed before --log-file / sim.out below with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir: for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]: src = os.path.join(work_dir, fname) @@ -121,7 +124,6 @@ def _run_simulation_verrou( tag = rounding_mode or "vprec" raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out") - os.makedirs(run_dir, exist_ok=True) for fn in os.listdir(os.path.join(tmpdir, "D")): shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) @@ -132,7 +134,6 @@ def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, thresh or None if the run itself failed (distinct from [] = ran and found none).""" tag = f"cancellation_{threshold}" run_dir = os.path.join(work_dir, tag) - os.makedirs(run_dir, exist_ok=True) gen_path = os.path.join(run_dir, "cancel_gen.txt") flags = [ "--check-cancellation=yes", @@ -151,7 +152,6 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): """Run with --check-max-float=yes; return [(fname, line)] of overflow sites, or None if the run failed (distinct from [] = ran and found none).""" run_dir = os.path.join(work_dir, "float_max") - os.makedirs(run_dir, exist_ok=True) try: _run_simulation_verrou( verrou_bin, @@ -170,7 +170,6 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref.""" run_dir = os.path.join(work_dir, "float_proxy") - os.makedirs(run_dir) _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") return _max_diff_np(ref_dir, run_dir, case["compare"]) @@ -180,7 +179,6 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r results = [] for bits in VPREC_MANTISSA_BITS: run_dir = os.path.join(work_dir, f"vprec_{bits}") - os.makedirs(run_dir) flags = [ "--backend=vprec", "--vprec-mode=full", From d809997822d17717d7b0d51410563cde4d4de364 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 09:21:00 -0400 Subject: [PATCH 22/25] fp-stability: prune unit tests to the high-value contracts (33 -> 17) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kept the behavioral contracts (verrou-absent/broken detection — the case a real bug hit, scale-free pass/fail invariant + zero-scale safety, cancellation severity, output autodetect, blank-result emitter KeyError guard, fypp-ambiguity annotation, VALGRIND_LIB relocation incl. don't-clobber, install no-binary hard-fail) and the subtle edges. Dropped redundant enumerations (5 of 8 macro-context micro-cases, 3 of 5 sig-bits math cases) and trivial-math/empty-input/constant assertions (digits_left, min_sig_bits==24, *_empty, omits-when-absent, the obvious bootstrap-returncode guard). -125 lines; the dropped paths are covered by the kept tests' shared code or the end-to-end CI job. --- toolchain/mfc/test_fp_stability.py | 147 +++-------------------------- 1 file changed, 11 insertions(+), 136 deletions(-) diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index b8b6104403..6521705b96 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -1,30 +1,20 @@ -"""Unit tests for the pure helpers behind the FP-stability cancellation pass and -its fypp macro-expansion flagging. +"""Unit tests for the pure helpers behind the FP-stability cancellation pass, its +fypp macro-expansion flagging, scale-free pass/fail, and Verrou discovery/install. The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job; here we test only the pure functions that decide what to instrument and how to -label results, so they can run without Verrou or built binaries. +label results, so they can run without Verrou or built binaries. We keep the tests +that pin a real behavioral contract or a subtle edge, not every micro-variation. """ from mfc.fp_stability_metrics import ( - MIN_SIG_BITS, _autodetect_compare, _cancellation_severity, - _digits_left, _macro_context_in_lines, _sig_bits, ) -# --- #2: fypp macro-expansion context detection --- - - -def test_macro_context_none_outside_any_block(): - lines = [ - "subroutine s_foo()\n", - " a = b - c\n", - "end subroutine\n", - ] - assert _macro_context_in_lines(lines, 2) is None +# --- fypp macro-expansion context detection (a #:for/#:def line maps to N instances) --- def test_macro_context_inside_for_loop_body(): @@ -37,6 +27,7 @@ def test_macro_context_inside_for_loop_body(): def test_macro_context_if_block_is_not_duplicating(): + # #:if selects code but does not duplicate it, so it must NOT be flagged. lines = [ "#:if FOO\n", " a = b - c\n", @@ -45,50 +36,12 @@ def test_macro_context_if_block_is_not_duplicating(): assert _macro_context_in_lines(lines, 2) is None -def test_macro_context_reports_innermost_duplicating_block(): - lines = [ - "#:def MACRO(x)\n", - " #:if cond\n", - " #:for j in range(3)\n", - " y = ${x}$ - z\n", - " #:endfor\n", - " #:endif\n", - "#:enddef\n", - ] - assert _macro_context_in_lines(lines, 4) == "#:for" - - -def test_macro_context_balances_closers(): - lines = [ - "#:for i in [1, 2]\n", - " a = b - c\n", - "#:endfor\n", - "d = e - f\n", - ] - # line 4 is after the loop closed -> not in any duplicating block - assert _macro_context_in_lines(lines, 4) is None - - -def test_macro_context_def_body_when_no_inner_loop(): - lines = [ - "#:def GEOM(n)\n", - " r = x - y\n", - "#:enddef\n", - ] - assert _macro_context_in_lines(lines, 2) == "#:def" - - -def test_macro_context_block_and_call_are_duplicating(): - assert _macro_context_in_lines(["#:block B\n", " a = b - c\n", "#:endblock\n"], 2) == "#:block" - assert _macro_context_in_lines(["#:call M()\n", " a = b - c\n", "#:endcall\n"], 2) == "#:call" - - def test_macro_context_unbalanced_close_is_safe(): # a stray #:endfor with an empty stack must not crash or misreport assert _macro_context_in_lines(["#:endfor\n", " a = b - c\n"], 2) is None -# --- per-site cancellation severity (bits lost), from a threshold sweep --- +# --- per-site cancellation severity (highest bit-threshold a site survives) --- def test_cancellation_severity_takes_highest_surviving_threshold(): @@ -101,10 +54,6 @@ def test_cancellation_severity_takes_highest_surviving_threshold(): assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10} -def test_cancellation_severity_empty(): - assert _cancellation_severity([]) == {} - - # --- auto-detect which output files to compare (for a user case) --- @@ -123,47 +72,20 @@ def test_autodetect_compare_falls_back_to_prim_when_no_cons(): assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"] -def test_autodetect_compare_empty_when_no_field_output(): - assert _autodetect_compare(["indices.dat", "pre_time_data.dat", "foo.txt"]) == [] - - # --- scale-free pass/fail: significant bits retained --- -def test_sig_bits_relative_deviation(): - # max_dev/ref_scale = 1e-14 -> ~46.5 retained bits - assert 46 < _sig_bits(1e-14, 1.0) < 47 - - def test_sig_bits_is_scale_free(): # same relative deviation -> same bits regardless of absolute magnitude assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9 -def test_sig_bits_zero_deviation_is_full_precision(): - assert _sig_bits(0.0, 1.0) == 53.0 - - def test_sig_bits_zero_scale_is_safe(): + # a zero/degenerate field scale must not divide-by-zero; report full precision assert _sig_bits(1e-12, 0.0) == 53.0 -def test_sig_bits_deviation_at_scale_is_unstable(): - # deviation as large as the field -> <= 0 retained bits - assert _sig_bits(1.0, 1.0) <= 0.0 - - -def test_min_sig_bits_is_single_precision_floor(): - assert MIN_SIG_BITS == 24 - - -def test_digits_left_full_and_clamped(): - assert 15.5 < _digits_left(0) < 16.0 # full double ~ 16 sig digits - assert _digits_left(53) == 0.0 - assert _digits_left(60) == 0.0 # clamp: never negative - - -# --- report emitters: must survive blank and populated result dicts (CI-only path) --- +# --- report emitters: must survive the CI-only path without KeyError / regressions --- def _emit_to_tmp(results, tmp_path, monkeypatch): @@ -185,26 +107,6 @@ def test_emit_summary_survives_blank_result(tmp_path, monkeypatch): assert "0 passed, 1 failed" in text -def test_emit_summary_populated_result(tmp_path, monkeypatch): - from mfc.fp_stability import _blank_result - - r = _blank_result("demo") - r.update( - passed=False, - max_dev=1e-9, - sig_bits=30.0, - float_proxy=1e-6, - vprec=[(52, 1e-14), (23, float("inf"))], # exercises the "crash" branch - cancellation_locs=[("src/x/m_a.fpp", 5)], - cancellation_bits={("src/x/m_a.fpp", 5): 40}, - cancellation_macro={("src/x/m_a.fpp", 5): "#:for"}, - float_max_locs=[("m_a.fpp", 9)], - ) - text = _emit_to_tmp([r], tmp_path, monkeypatch) - assert "💥 crash" in text and "digits lost" in text - assert "may represent multiple instances" in text # fypp-ambiguous marker - - def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys): from mfc import fp_stability_report as report from mfc.fp_stability import _blank_result @@ -222,7 +124,7 @@ def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatc assert "multiple instances" in out # fypp-expanded cancellation site flagged -# --- Verrou discovery: a bare system valgrind must read as "Verrou absent" --- +# --- Verrou discovery: a bare/broken valgrind must read as "Verrou absent" --- def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch): @@ -264,15 +166,6 @@ def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch): assert runners._find_verrou() == "" -def test_find_verrou_accepts_verrou_enabled_path_valgrind(tmp_path, monkeypatch): - from mfc import fp_stability_runners as runners - - monkeypatch.setenv("VERROU_HOME", str(tmp_path)) - monkeypatch.setattr(runners.shutil, "which", lambda _name: "/opt/verrou/bin/valgrind") - monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True) - assert runners._find_verrou() == "/opt/verrou/bin/valgrind" - - def test_has_verrou_tool_reflects_exit_code(monkeypatch): from mfc import fp_stability_runners as runners @@ -304,14 +197,6 @@ def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind") -def test_verrou_env_omits_valgrind_lib_when_libexec_absent(tmp_path, monkeypatch): - from mfc import fp_stability_runners as runners - - monkeypatch.delenv("VALGRIND_LIB", raising=False) - env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) - assert "VALGRIND_LIB" not in env - - def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch): from mfc import fp_stability_runners as runners @@ -321,17 +206,7 @@ def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch): assert env["VALGRIND_LIB"] == "/user/chosen/lib" # not clobbered -# --- auto-install hard-fail guards --- - - -def test_install_verrou_raises_when_bootstrap_fails(monkeypatch): - import pytest - - from mfc import fp_stability as fps - - monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 1})()) - with pytest.raises(fps.MFCException, match="Verrou install failed"): - fps._install_verrou() +# --- auto-install hard-fail guard (a green bootstrap that produced no binary) --- def test_install_verrou_raises_when_no_binary_appears(monkeypatch): From a9dbb4237d4c37de8ae9ee63645d38b4a6f5533c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 09:38:55 -0400 Subject: [PATCH 23/25] ci(fp-stability): derive Verrou cache key from verrou.sh content (no hand-synced version) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cache key hardcoded verrou-a58d434-valgrind-3.26.0, synced to the installer's pins only by a comment — if verrou.sh bumped but the key didn't, CI would restore the stale cached tree and silently never exercise the new version. Key off hashFiles('toolchain/bootstrap/verrou.sh') so any pin change (or edit) auto-busts the cache. Also dropped the version literals from the workflow's header comment; the pinned versions now live solely in verrou.sh. --- .github/workflows/fp-stability.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 203cff3ad4..b3c9a6b5cc 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -24,8 +24,9 @@ name: FP Stability # On FAIL: verrou_dd_sym runs to identify the responsible function symbols. # Logs are uploaded as CI artifacts. # -# Verrou (Valgrind 3.26.0 + edf-hpc/verrou@a58d434) is built once and cached. -# Build takes ~20 min uncached; cached runs restore in ~30 s. +# Verrou (the pinned Valgrind+Verrou pair; versions live in toolchain/bootstrap/verrou.sh) +# is installed by fp-stability on first use and cached. The prebuilt download is seconds; +# a cache miss with no prebuilt falls back to a ~20-min source build. on: push: @@ -68,8 +69,10 @@ jobs: uses: actions/cache@v4 with: path: ~/.local/verrou - # Keep these versions in sync with toolchain/bootstrap/verrou.sh (the installer). - key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }} + # Key off the installer's content so any version bump (or other edit) in + # verrou.sh auto-busts the cache and forces a fresh install — no hand-synced + # version string to drift out of date. + key: verrou-${{ hashFiles('toolchain/bootstrap/verrou.sh') }}-${{ runner.os }} - name: Install system dependencies run: | From c58d44fd3df2587febf68f2d414fc2ad3b65260c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 09:42:57 -0400 Subject: [PATCH 24/25] fp-stability: remove emoji from console + GitHub-summary output (ASCII only) Replaced the check/cross marks in the results list (PASS/FAIL), the summary table status (PASS/FAIL), and the VPREC 'crash' cell + its comment. No emoji in the toolchain output now. --- toolchain/mfc/fp_stability.py | 4 ++-- toolchain/mfc/fp_stability_report.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 7fce9ae6fe..8710bb717c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -639,8 +639,8 @@ def fp_stability(): cons.print(f"[bold]Results[/bold] ({elapsed:.0f}s): [green]{n_pass} passed[/green] [red]{n_fail} failed[/red]") for r in results: - mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]" - cons.print(f" {mark} {r['name']}") + mark = "[green]PASS[/green]" if r["passed"] else "[red]FAIL[/red]" + cons.print(f" {mark} {r['name']}") _emit_github_summary(results, n_samples) _emit_github_annotations(results) diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 6b8f07079c..2ea90be081 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -83,7 +83,7 @@ def _emit_github_summary(results: list, n_samples: int): md.append("| Case | Status | bits retained | max\\_dev | Float proxy |") md.append("|------|:------:|:------:|--------:|--------:|") for r in results: - status = "✅" if r["passed"] else "❌" + status = "PASS" if r["passed"] else "FAIL" bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |") @@ -119,7 +119,7 @@ def _emit_github_summary(results: list, n_samples: int): md.append("") # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at - # that reduced precision (💥 crash = run diverged/failed, — = not measured). + # that reduced precision ("crash" = run diverged/failed; dash = not measured). if any(r["vprec"] for r in results): _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) @@ -135,7 +135,7 @@ def _emit_github_summary(results: list, n_samples: int): if d is None: cols.append("—") elif d == float("inf"): - cols.append("💥 crash") + cols.append("crash") else: cols.append(f"{d:.2e}") md.append(f"| `{r['name']}` | {' | '.join(cols)} |") From 009967422bc982fd9625159d9df5d92a896e55d1 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 3 Jun 2026 09:50:19 -0400 Subject: [PATCH 25/25] =?UTF-8?q?fp-stability:=20ascii-only=20=E2=80=94=20?= =?UTF-8?q?convert=20em-dash/arrow/math=20glyphs=20in=20comments=20+=20out?= =?UTF-8?q?put?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced non-ASCII in the toolchain (em-dash/en-dash -> '-', '->' for arrows, '>=' for >=, '+/-', '~' for approx, 'inf'/'Linf' for the infinity glyph, '...' for ellipsis) across fp_stability*.py, verrou.sh, and the fp-stability command help. Display/comment text only; no logic change. The viz command's pre-existing glyphs are left untouched (not part of this PR). --- toolchain/bootstrap/verrou.sh | 20 ++++++++--------- toolchain/mfc/cli/commands.py | 8 +++---- toolchain/mfc/fp_stability.py | 22 +++++++++--------- toolchain/mfc/fp_stability_metrics.py | 4 ++-- toolchain/mfc/fp_stability_report.py | 32 +++++++++++++-------------- toolchain/mfc/fp_stability_runners.py | 6 ++--- 6 files changed, 46 insertions(+), 46 deletions(-) diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh index dfbd462231..dd2a67c653 100755 --- a/toolchain/bootstrap/verrou.sh +++ b/toolchain/bootstrap/verrou.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by -# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package — it is a fork of +# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package - it is a fork of # Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds); # if none is available for this tag/arch it falls back to a source build (~20 min). # fp-stability auto-runs this on first use when Verrou is absent (printing what it @@ -19,7 +19,7 @@ set -euo pipefail VALGRIND_VERSION="3.26.0" VERROU_COMMIT="a58d434" # Prebuilt artifacts (built once per arch) live in a small companion repo. The tag -# pins to the (valgrind, verrou) pair above — bump all three together. +# pins to the (valgrind, verrou) pair above - bump all three together. VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}" VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}" PREFIX="${VERROU_HOME:-$HOME/.local/verrou}" @@ -46,7 +46,7 @@ case "$(uname -m)" in aarch64|arm64) arch_tag="aarch64" echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2 - echo " best-validated on x86_64 — treat results as experimental on this arch." >&2 + echo " best-validated on x86_64 - treat results as experimental on this arch." >&2 ;; *) echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2 @@ -73,31 +73,31 @@ try_prebuilt() { if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi } if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then - echo "==> No prebuilt for this tag/arch — building from source instead." + echo "==> No prebuilt for this tag/arch - building from source instead." rm -rf "$dl"; return 1 fi if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then - echo "WARNING: prebuilt checksum mismatch — building from source instead." >&2 + echo "WARNING: prebuilt checksum mismatch - building from source instead." >&2 rm -rf "$dl"; return 1 fi # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e # is suppressed inside a function used as an `if` condition, so check each step - # explicitly — otherwise a failed extract would fall through and the source + # explicitly - otherwise a failed extract would fall through and the source # build would install on top of a half-written tree (or a stale one on --force). local stage="$dl/stage" mkdir -p "$stage" if tar --zstd --help >/dev/null 2>&1; then - tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; } + tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; } else - zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed — building from source instead." >&2; rm -rf "$dl"; return 1; } + zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; } fi # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets # VALGRIND_LIB relative to the tree so the relocated install works. Verify the # staged tree runs before committing it. if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then - echo "WARNING: prebuilt did not run — building from source instead." >&2 + echo "WARNING: prebuilt did not run - building from source instead." >&2 rm -rf "$dl"; return 1 fi @@ -105,7 +105,7 @@ try_prebuilt() { mkdir -p "$(dirname "$PREFIX")" rm -rf "$PREFIX" if ! mv "$stage" "$PREFIX"; then - echo "WARNING: could not install prebuilt to ${PREFIX} — building from source instead." >&2 + echo "WARNING: could not install prebuilt to ${PREFIX} - building from source instead." >&2 rm -rf "$dl"; return 1 fi rm -rf "$dl" diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 7d9771f772..b0591fc9a6 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -899,25 +899,25 @@ help="Run floating-point stability tests using Verrou.", description=( "Runs Verrou random-rounding stability analysis on a built-in suite of small " - "1-D cases, or — given a case .py (positional INPUT) — on your own case. Each " + "1-D cases, or - given a case .py (positional INPUT) - on your own case. Each " "case is run N times under Verrou's random IEEE-754 rounding and compared " "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must " "retain at least ~24 significant bits (single precision) under random rounding " "(no per-case thresholds).\n\n" "With a case .py, that case is run as a SINGLE serial CPU process under Verrou " - "(~30x slower, and run many times), so it must be a small, short proxy — large " + "(~30x slower, and run many times), so it must be a small, short proxy - large " "grids or long runs are rejected with guidance; serial .dat I/O is forced. " "Example: ./mfc.sh fp-stability my_case.py\n\n" "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to " "$HOME/.local/verrou); if absent it is installed automatically (a pinned, " - "hash-verified prebuilt is downloaded, with a source build as fallback) — " + "hash-verified prebuilt is downloaded, with a source build as fallback) - " "aborts if that install fails. The simulation and pre_process binaries must " "be serial (no-MPI, no-GPU) debug builds.\n\n" "Analysis passes (skip with --no-* flags):\n" " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" " cancellation --check-cancellation origins, ranked by significant digits lost\n" - " float-max --check-max-float detection of double→float overflow sites\n" + " float-max --check-max-float detection of double->float overflow sites\n" ), include_common=["mfc_config", "verbose", "debug_log"], positionals=[ diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 8710bb717c..200748203c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -8,7 +8,7 @@ (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold). B. Float proxy (--no-float-proxy to skip) - One run with --rounding-mode=float — deterministic proxy for + One run with --rounding-mode=float - deterministic proxy for single-precision sensitivity without recompiling. C. VPREC precision sweep (--no-vprec to skip) @@ -24,7 +24,7 @@ E. Float-max overflow detection (--no-float-max to skip) One run with --check-max-float=yes; reports locations where a - double→float conversion would overflow to ±Inf. + double->float conversion would overflow to +/-Inf. Logs are saved to fp-stability-logs/ and uploaded as CI artifacts. On GitHub Actions: a step summary table and ::warning:: file annotations @@ -334,7 +334,7 @@ def _merge(*dicts): "name": "low_mach", "description": "1-D water shock with low_Mach=1 HLLC correction active", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0", + "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M~0", "pre": _merge( _BASE_PRE, _WATER_EOS, @@ -475,10 +475,10 @@ def _run_case( result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))} if locs: worst = max(bits.values()) if bits else 0 - cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits") + cons.print(f" cancellation: {len(locs)} site(s), worst loses >= {worst / math.log2(10):.0f} of ~16 digits") n_macro = len(result["cancellation_macro"]) if n_macro: - cons.print(f" [dim]{n_macro} inside fypp expansions — line maps to multiple instances[/dim]") + cons.print(f" [dim]{n_macro} inside fypp expansions - line maps to multiple instances[/dim]") else: cons.print(" cancellation: none detected") except Exception as exc: @@ -518,7 +518,7 @@ def _load_user_case(input_path: str) -> dict: """Build a single fp-stability case from a user case .py. The case is run as ONE serial CPU process under Verrou (so it must be small - and short — a coarsened proxy of a production run, not the real thing); a grid + and short - a coarsened proxy of a production run, not the real thing); a grid too large to be feasible errors. The output files to compare are auto-detected from the reference run, so 'compare' is left empty here. """ @@ -533,14 +533,14 @@ def _load_user_case(input_path: str) -> dict: t_stop = int(params.get("t_step_stop", 0) or 0) work = cells * max(t_stop, 1) if cells > FP_CASE_MAX_CELLS: - raise MFCException(f"case has {cells:,} cells — too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).") + raise MFCException(f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).") if work > FP_CASE_MAX_WORK: raise MFCException( - f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) — too slow under " + f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under " f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)." ) stem = os.path.splitext(os.path.basename(input_path))[0] - if stem == "case": # examples//case.py — the dir name is more telling + if stem == "case": # examples//case.py - the dir name is more telling stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem return { "name": stem, @@ -554,10 +554,10 @@ def _load_user_case(input_path: str) -> dict: def _install_verrou() -> str: """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified - prebuilt; source build as fallback) and return the valgrind path. Aborts on failure — + prebuilt; source build as fallback) and return the valgrind path. Aborts on failure - fp-stability cannot run without Verrou, so this is a hard error, not a skip.""" script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh") - cons.print("[bold]Verrou not found — installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]") + cons.print("[bold]Verrou not found - installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]") if subprocess.run(["bash", script], check=False).returncode != 0: raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.") verrou_bin = _find_verrou() diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py index a985b363af..4917e293f5 100644 --- a/toolchain/mfc/fp_stability_metrics.py +++ b/toolchain/mfc/fp_stability_metrics.py @@ -155,7 +155,7 @@ def _max_abs_np(ref_dir: str, compare_files: list) -> float: def _parse_cancel_gen(gen_path: str) -> list: - """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources.""" + """Parse cc-gen-file TSV (file\\tline\\tsymbol) -> sorted unique [(fname, line)] for MFC sources.""" if not os.path.isfile(gen_path): return [] locs = [] @@ -215,7 +215,7 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: # Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity # filter: a site is reported only if it lost >= the threshold bits. Sweeping these # levels and taking the highest each site survives gives a per-site "bits lost" -# severity (a lower bound — no false positives). 48 is near the full 53-bit +# severity (a lower bound - no false positives). 48 is near the full 53-bit # double mantissa (the top of the sweep), not the mantissa width itself. CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 2ea90be081..2ca469b9e9 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -36,10 +36,10 @@ def _emit_github_annotations(results: list): note = "catastrophic cancellation site" bits = site_bits.get((fname, lineno)) if bits: - note += f" — loses ≥ {bits / math.log2(10):.0f} of ~16 digits" + note += f" - loses >= {bits / math.log2(10):.0f} of ~16 digits" macro = macro_sites.get((fname, lineno)) if macro: - note += f" — inside a {macro}-expanded line, may represent multiple instances" + note += f" - inside a {macro}-expanded line, may represent multiple instances" print(f"::notice {loc},title={title}::{note}", flush=True) n_cc = len(r.get("cancellation_locs", [])) if n_cc > 3: @@ -51,7 +51,7 @@ def _more_md(total: int, shown: int, noun: str) -> str: or '' when nothing was truncated.""" if total <= shown: return "" - return f"- …and {total - shown} more {noun}; see `fp-stability-logs/`" + return f"- ...and {total - shown} more {noun}; see `fp-stability-logs/`" def _emit_github_summary(results: list, n_samples: int): @@ -70,26 +70,26 @@ def _emit_github_summary(results: list, n_samples: int): md = [] md.append("## FP Stability Results\n") - md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n") + md.append(f"**{n_pass} passed, {n_fail} failed** - {n_samples} random-rounding samples per case\n") md.append( f"> **Coverage:** {len(results)} one-dimensional case(s) " f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these " - "cases exercise — not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " + "cases exercise - not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " "they do not reach.\n" ) - # Main results table — pass/fail is scale-free: bits retained vs a single floor + # Main results table - pass/fail is scale-free: bits retained vs a single floor md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") md.append("| Case | Status | bits retained | max\\_dev | Float proxy |") md.append("|------|:------:|:------:|--------:|--------:|") for r in results: status = "PASS" if r["passed"] else "FAIL" - bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—" - fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" + bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "-" + fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "-" md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |") md.append("") - # Cancellation ORIGINS — where ill-conditioning actually arises, led with the + # Cancellation ORIGINS - where ill-conditioning actually arises, led with the # most severe (most bits lost). cases_with_cancel = [r for r in results if r.get("cancellation_locs")] if cases_with_cancel: @@ -98,7 +98,7 @@ def _emit_github_summary(results: list, n_samples: int): "> Subtraction of nearly-equal values loses leading significant digits. A double carries " "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away " "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only " - "single-precision trust. Site *count* is not severity — one site losing many digits outweighs " + "single-precision trust. Site *count* is not severity - one site losing many digits outweighs " "many mild ones.\n" ) for r in cases_with_cancel: @@ -108,17 +108,17 @@ def _emit_github_summary(results: list, n_samples: int): ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"])) if ordered: w = ordered[0] - md.append(f"**`{r['name']}`** — {len(ordered)} site(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n") + md.append(f"**`{r['name']}`** - {len(ordered)} site(s); worst loses >= {w['bits'] / math.log2(10):.0f} of ~16 digits\n") for e in ordered[:15]: lost = e["bits"] / math.log2(10) - ambiguous = f" — _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else "" - md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`{ambiguous}") + ambiguous = f" - _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else "" + md.append(f"- **>= {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) - `{e['where']}`{ambiguous}") footer = _more_md(len(ordered), 15, "site(s)") if footer: md.append(footer) md.append("") - # VPREC sweep — one column per mantissa-bit level showing the L∞ deviation at + # VPREC sweep - one column per mantissa-bit level showing the Linf deviation at # that reduced precision ("crash" = run diverged/failed; dash = not measured). if any(r["vprec"] for r in results): _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} @@ -133,7 +133,7 @@ def _emit_github_summary(results: list, n_samples: int): for b in VPREC_MANTISSA_BITS: d = vmap.get(b) if d is None: - cols.append("—") + cols.append("-") elif d == float("inf"): cols.append("crash") else: @@ -146,7 +146,7 @@ def _emit_github_summary(results: list, n_samples: int): if cases_with_fmax: md.append("### Float32 overflow sites (check\\_max\\_float)\n") for r in cases_with_fmax: - md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") + md.append(f"**`{r['name']}`** - {len(r['float_max_locs'])} site(s)\n") for fname, lineno in r["float_max_locs"][:10]: md.append(f"- `{fname}:{lineno}`") footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py index f282f47e12..3809ee9992 100644 --- a/toolchain/mfc/fp_stability_runners.py +++ b/toolchain/mfc/fp_stability_runners.py @@ -22,7 +22,7 @@ def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool: """True if this valgrind actually provides the 'verrou' tool. A plain system - valgrind does not — accepting one would only fail later at run time. Pass env + valgrind does not - accepting one would only fail later at run time. Pass env (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its tool without it.""" try: @@ -57,7 +57,7 @@ def _find_binary(name: str) -> str: def _verrou_env(verrou_bin: str) -> dict: """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt - artifact extracted to a new prefix) can locate its tool — Valgrind bakes its + artifact extracted to a new prefix) can locate its tool - Valgrind bakes its build prefix into the binary otherwise. Harmless for a source-built tree, where VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the environment (user sourced env.sh) is left untouched.""" @@ -168,7 +168,7 @@ def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: - """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref.""" + """One run with --rounding-mode=float; returns Linf deviation from nearest-ref.""" run_dir = os.path.join(work_dir, "float_proxy") _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") return _max_diff_np(ref_dir, run_dir, case["compare"])