From 7a65a99a20a86b2d3a2d33e34a1d434c8bffe2f5 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 4 Jun 2026 00:02:10 -0400 Subject: [PATCH 1/5] Fix fp-stability: preserve logs and write summary.md locally fp-stability-logs/ was created but never written to: all logs went to a tempdir deleted in _run_case's finally block, so the directory was empty after every run, locally and in CI (the artifact upload shipped nothing). The markdown report was also only emitted to GITHUB_STEP_SUMMARY, so local runs got no report at all. - preserve each case's text artifacts (*.log, *.out, *.inp, cancel_gen.txt) into fp-stability-logs// before the work dir is removed, mirroring the run-dir layout; bulky .dat field data is skipped - always write the report to fp-stability-logs/summary.md and print its path; still appended to GITHUB_STEP_SUMMARY when set --- toolchain/mfc/fp_stability.py | 31 ++++++++++++++++++++++++---- toolchain/mfc/fp_stability_report.py | 22 ++++++++++++-------- toolchain/mfc/test_fp_stability.py | 28 +++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 200748203c..020adc965d 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -26,9 +26,11 @@ One run with --check-max-float=yes; reports locations where a double->float conversion would overflow to +/-Inf. -Logs are saved to fp-stability-logs/ and uploaded as CI artifacts. -On GitHub Actions: a step summary table and ::warning:: file annotations -are emitted automatically so failing source lines appear in the PR diff. +Per-case logs (verrou.log, sim.out, pre.log, .inp, cancel_gen.txt) are saved to +fp-stability-logs// and the markdown report to fp-stability-logs/summary.md; +CI uploads the directory as an artifact. On GitHub Actions, the report is also +appended to the step summary and file annotations are emitted so failing source +lines appear in the PR diff. Requires: - Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind @@ -371,11 +373,29 @@ def _blank_result(name: str) -> dict: } +def _preserve_logs(work_dir: str, dest_dir: str) -> None: + """Copy a case's small text artifacts (*.log, *.out, *.inp, cancel_gen.txt) + from its scratch work_dir into fp-stability-logs//, mirroring the + run-dir layout, before the work_dir is deleted. Field-data .dat files are + skipped (they can be large for a user case).""" + keep = (".log", ".out", ".inp", ".txt") + if os.path.isdir(dest_dir): + shutil.rmtree(dest_dir) # stale logs from a previous invocation + for root, _dirs, files in os.walk(work_dir): + rel = os.path.relpath(root, work_dir) + for fn in files: + if fn.endswith(keep): + target = os.path.normpath(os.path.join(dest_dir, rel)) + os.makedirs(target, exist_ok=True) + shutil.copy2(os.path.join(root, fn), target) + + def _run_case( case: dict, verrou_bin: str, sim_bin: str, pp_bin: str, + log_dir: str, n_samples: int, run_float: bool, run_vprec: bool, @@ -501,6 +521,7 @@ def _run_case( cons.print(f" [bold yellow]float-max check error[/bold yellow]: {exc}") finally: + _preserve_logs(work_dir, os.path.join(log_dir, name)) shutil.rmtree(work_dir, ignore_errors=True) cons.unindent() cons.print() @@ -622,6 +643,7 @@ def fp_stability(): verrou_bin, sim_bin, pp_bin, + log_dir, n_samples, run_float, run_vprec, @@ -642,7 +664,8 @@ def fp_stability(): mark = "[green]PASS[/green]" if r["passed"] else "[red]FAIL[/red]" cons.print(f" {mark} {r['name']}") - _emit_github_summary(results, n_samples) + _emit_github_summary(results, n_samples, log_dir) _emit_github_annotations(results) + cons.print(f" report: {os.path.join(log_dir, 'summary.md')}") sys.exit(0 if n_fail == 0 else 1) diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 2ca469b9e9..26c585a03d 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -54,17 +54,15 @@ def _more_md(total: int, shown: int, noun: str) -> str: return f"- ...and {total - shown} more {noun}; see `fp-stability-logs/`" -def _emit_github_summary(results: list, n_samples: int): - """Write a markdown results table to GITHUB_STEP_SUMMARY. +def _emit_github_summary(results: list, n_samples: int, log_dir: str = None): + """Write the markdown results report. - Visible directly in the Actions run UI without downloading artifacts. + Always written to /summary.md (when log_dir is given), so local runs + get the same report CI shows; additionally appended to GITHUB_STEP_SUMMARY when + set, where it is visible in the Actions run UI without downloading artifacts. Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), and catastrophic-cancellation source locations for any failing cases. """ - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if not summary_path: - return - n_pass = sum(1 for r in results if r["passed"]) n_fail = len(results) - n_pass @@ -154,5 +152,11 @@ def _emit_github_summary(results: list, n_samples: int): md.append(footer) md.append("") - with open(summary_path, "a") as f: - f.write("\n".join(md) + "\n") + text = "\n".join(md) + "\n" + if log_dir: + with open(os.path.join(log_dir, "summary.md"), "w") as f: + f.write(text) + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if summary_path: + with open(summary_path, "a") as f: + f.write(text) diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index 6521705b96..be6cfa32df 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -99,6 +99,34 @@ def _emit_to_tmp(results, tmp_path, monkeypatch): return out.read_text() +def test_emit_summary_writes_local_summary_md_without_ci_env(tmp_path, monkeypatch): + # outside GitHub Actions the same report must land in fp-stability-logs/summary.md + from mfc import fp_stability_report as report + from mfc.fp_stability import _blank_result + + monkeypatch.delenv("GITHUB_STEP_SUMMARY", raising=False) + report._emit_github_summary([_blank_result("x")], 5, log_dir=str(tmp_path)) + assert "0 passed, 1 failed" in (tmp_path / "summary.md").read_text() + + +def test_preserve_logs_keeps_text_artifacts_skips_field_data(tmp_path): + # logs/.inp/cancel_gen.txt must survive the work_dir rmtree; bulky .dat must not + from mfc.fp_stability import _preserve_logs + + work = tmp_path / "work" + (work / "run_00").mkdir(parents=True) + (work / "pre.log").write_text("pre") + (work / "simulation.inp").write_text("inp") + (work / "run_00" / "verrou.log").write_text("v") + (work / "run_00" / "cons.1.00.000050.dat").write_text("data") + dest = tmp_path / "logs" / "case" + _preserve_logs(str(work), str(dest)) + assert (dest / "pre.log").is_file() + assert (dest / "simulation.inp").is_file() + assert (dest / "run_00" / "verrou.log").is_file() + assert not (dest / "run_00" / "cons.1.00.000050.dat").exists() + + def test_emit_summary_survives_blank_result(tmp_path, monkeypatch): # the dict produced on the per-case error path must not KeyError the emitter from mfc.fp_stability import _blank_result From bd39f27dbbffa040c47bb929783acec96239916b Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 4 Jun 2026 00:08:29 -0400 Subject: [PATCH 2/5] fp-stability: print ranked cancellation and float-max source lines to console The file:line sites were already collected and reported in summary.md and GitHub annotations, but the console only printed counts. Print the top 5 cancellation sites (ranked by digits lost, fypp-expansion ambiguity marked) and float-max overflow sites inline, eliding the rest to summary.md. --- toolchain/mfc/fp_stability.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 020adc965d..eae179b7d2 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -496,6 +496,13 @@ def _run_case( if locs: worst = max(bits.values()) if bits else 0 cons.print(f" cancellation: {len(locs)} site(s), worst loses >= {worst / math.log2(10):.0f} of ~16 digits") + ranked = sorted(locs, key=lambda s: (-bits.get(s, 0), s)) + for path, line in ranked[:5]: + lost = bits.get((path, line), 0) / math.log2(10) + macro = " [dim](fypp-expanded)[/dim]" if (path, line) in result["cancellation_macro"] else "" + cons.print(f" >= {lost:.0f} digits lost {path}:{line}{macro}") + if len(ranked) > 5: + cons.print(f" [dim]...and {len(ranked) - 5} more; see fp-stability-logs/summary.md[/dim]") n_macro = len(result["cancellation_macro"]) if n_macro: cons.print(f" [dim]{n_macro} inside fypp expansions - line maps to multiple instances[/dim]") @@ -515,6 +522,10 @@ def _run_case( result["float_max_locs"] = locs if locs: cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") + for path, line in locs[:5]: + cons.print(f" {path}:{line}") + if len(locs) > 5: + cons.print(f" [dim]...and {len(locs) - 5} more; see fp-stability-logs/summary.md[/dim]") else: cons.print(" float-max: no overflows") except Exception as exc: From a395cb11c0a9172d71304aece50bc3db9af316da Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 4 Jun 2026 00:14:37 -0400 Subject: [PATCH 3/5] fp-stability: include source snippets in summary.md For each cancellation and float-max site shown in the report, embed a line-numbered excerpt of the source (marked line plus 3 lines of context) as a nested fenced code block, so the offending code is readable without opening the file. Unresolvable sources degrade to no snippet. --- toolchain/mfc/fp_stability_metrics.py | 12 ++++++++++++ toolchain/mfc/fp_stability_report.py | 16 +++++++++++++++- toolchain/mfc/test_fp_stability.py | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py index 4917e293f5..05da02c5a4 100644 --- a/toolchain/mfc/fp_stability_metrics.py +++ b/toolchain/mfc/fp_stability_metrics.py @@ -113,6 +113,18 @@ def _macro_context_in_lines(lines: list, lineno: int) -> str: return None +def _source_snippet(fname: str, lineno: int, context: int = 3) -> str: + """Return a line-numbered source excerpt around `lineno` (1-based), the marked + line prefixed with '>', or '' if the file cannot be resolved or the line is + out of range. Used to show the offending code inline in reports.""" + lines = _read_source_lines(fname) + if not lines or not 1 <= lineno <= len(lines): + return "" + beg = max(1, lineno - context) + end = min(len(lines), lineno + context) + return "\n".join(f"{'>' if i == lineno else ' '}{i:5d} | {lines[i - 1].rstrip()}" for i in range(beg, end + 1)) + + def _macro_context(fname: str, lineno: int) -> str: """File-backed wrapper around _macro_context_in_lines; '' path safe.""" lines = _read_source_lines(fname) diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index 26c585a03d..e07e86e63e 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -11,9 +11,19 @@ MIN_SIG_BITS, VPREC_MANTISSA_BITS, _digits_left, + _source_snippet, ) +def _snippet_md(fname: str, lineno: int) -> list: + """Markdown lines for a source excerpt nested under a list item (2-space + indented fenced block), or [] if the source cannot be resolved.""" + snippet = _source_snippet(fname, lineno) + if not snippet: + return [] + return ["", " ```f90", *(" " + ln for ln in snippet.splitlines()), " ```", ""] + + def _emit_github_annotations(results: list): """Emit GitHub annotations for FP cancellation sites. @@ -102,7 +112,9 @@ def _emit_github_summary(results: list, n_samples: int, log_dir: str = None): for r in cases_with_cancel: site_bits = r.get("cancellation_bits") or {} macro_sites = r.get("cancellation_macro") or {} - sites = [{"where": f"{fname}:{lineno}", "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"]] + sites = [ + {"where": f"{fname}:{lineno}", "loc": (fname, lineno), "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"] + ] ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"])) if ordered: w = ordered[0] @@ -111,6 +123,7 @@ def _emit_github_summary(results: list, n_samples: int, log_dir: str = None): lost = e["bits"] / math.log2(10) ambiguous = f" - _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else "" md.append(f"- **>= {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) - `{e['where']}`{ambiguous}") + md.extend(_snippet_md(*e["loc"])) footer = _more_md(len(ordered), 15, "site(s)") if footer: md.append(footer) @@ -147,6 +160,7 @@ def _emit_github_summary(results: list, n_samples: int, log_dir: str = None): md.append(f"**`{r['name']}`** - {len(r['float_max_locs'])} site(s)\n") for fname, lineno in r["float_max_locs"][:10]: md.append(f"- `{fname}:{lineno}`") + md.extend(_snippet_md(fname, lineno)) footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") if footer: md.append(footer) diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py index be6cfa32df..f43e127311 100644 --- a/toolchain/mfc/test_fp_stability.py +++ b/toolchain/mfc/test_fp_stability.py @@ -80,6 +80,20 @@ def test_sig_bits_is_scale_free(): assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9 +def test_source_snippet_marks_line_with_context(tmp_path): + from mfc.fp_stability_metrics import _source_snippet + + f = tmp_path / "m_x.fpp" + f.write_text("".join(f"line{i}\n" for i in range(1, 11))) + rows = _source_snippet(str(f), 5, context=2).splitlines() + assert len(rows) == 5 # lines 3..7 + assert rows[2].startswith(">") and "line5" in rows[2] + assert "line3" in rows[0] and "line7" in rows[-1] + # unresolvable file or out-of-range line must degrade to '' (no snippet) + assert _source_snippet(str(tmp_path / "nope.fpp"), 5) == "" + assert _source_snippet(str(f), 99) == "" + + def test_sig_bits_zero_scale_is_safe(): # a zero/degenerate field scale must not divide-by-zero; report full precision assert _sig_bits(1e-12, 0.0) == 53.0 From 4c1c1d92be6d7fd732e7bc2c9626739ce43271fc Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 4 Jun 2026 00:28:25 -0400 Subject: [PATCH 4/5] fp-stability: add --force to bypass the user-case size guard The cells/cell-steps feasibility guard protects against accidentally launching hours of Verrou runs, but offered no escape hatch for users who knowingly want a larger grid or more time steps. --force runs the case anyway, printing the expected cost multiple and a reminder to trim passes (-N 1, --no-*). Guard errors now mention the flag. --- toolchain/mfc/cli/commands.py | 10 ++++++++++ toolchain/mfc/fp_stability.py | 26 ++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index b0591fc9a6..069e3c9d12 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -983,6 +983,12 @@ default=False, dest="no_float_max", ), + Argument( + name="force", + help="Run a user case that exceeds the size feasibility guard anyway. Expect long runtimes: ~30x Verrou slowdown per run, times up to ~12 runs (trim passes with -N 1 and --no-* flags).", + action=ArgAction.STORE_TRUE, + default=False, + ), ], examples=[ Example("./mfc.sh fp-stability", "Auto-discover binaries and run the built-in suite"), @@ -994,6 +1000,10 @@ Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"), Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"), Example("./mfc.sh fp-stability --no-cancellation --no-float-max", "Skip analysis passes"), + Example( + "./mfc.sh fp-stability big_case.py --force -N 1 --no-vprec --no-float-proxy", + "Run a case beyond the size guard, trimming passes to keep it tractable", + ), ], key_options=[ ("--sim-binary PATH", "Serial simulation binary (debug, no-MPI)"), diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index eae179b7d2..f0562a5a26 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -45,8 +45,9 @@ ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH A user case .py is run as a single serial CPU process under Verrou, so it must be -a small, short proxy (a feasibility guard rejects large grids / long runs); output -is forced to serial .dat I/O and the files to diff are auto-detected. +a small, short proxy (a feasibility guard rejects large grids / long runs; --force +overrides it, at proportionally long runtimes); output is forced to serial .dat +I/O and the files to diff are auto-detected. """ import math @@ -564,12 +565,21 @@ def _load_user_case(input_path: str) -> dict: cells = (m + 1) * (n + 1) * (p + 1) t_stop = int(params.get("t_step_stop", 0) or 0) work = cells * max(t_stop, 1) - if cells > FP_CASE_MAX_CELLS: - raise MFCException(f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).") - if work > FP_CASE_MAX_WORK: - raise MFCException( - f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under " - f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)." + if not ARG("force"): + if cells > FP_CASE_MAX_CELLS: + raise MFCException( + f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells), or pass --force to run anyway." + ) + if work > FP_CASE_MAX_WORK: + raise MFCException( + f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under " + f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps), or pass --force to run anyway." + ) + elif cells > FP_CASE_MAX_CELLS or work > FP_CASE_MAX_WORK: + cons.print( + f" [bold yellow]--force[/bold yellow]: case is ~{work:,} cell-steps " + f"(guard is {FP_CASE_MAX_WORK:,}) - expect roughly {work / FP_CASE_MAX_WORK:.0f}x the usual per-run time, " + "for every enabled pass (trim with -N 1 and --no-* flags)." ) stem = os.path.splitext(os.path.basename(input_path))[0] if stem == "case": # examples//case.py - the dir name is more telling From 451ab76d1a779bb15ecbe19167b5040cd85df9bc Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 4 Jun 2026 00:33:05 -0400 Subject: [PATCH 5/5] fp-stability: harden log preservation and report writing (review feedback) - _preserve_logs runs in _run_case's finally block; an OSError there (disk full, dest collision) would replace the case's real outcome and, being uncatchable by the suite's MFCException handler, abort the whole run. Make it best-effort with a printed warning, matching the adjacent ignore_errors rmtree. - _emit_github_summary creates log_dir if missing instead of raising FileNotFoundError at the very end of a run. --- toolchain/mfc/fp_stability.py | 7 ++++++- toolchain/mfc/fp_stability_report.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index f0562a5a26..07ef7aae05 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -533,7 +533,12 @@ def _run_case( cons.print(f" [bold yellow]float-max check error[/bold yellow]: {exc}") finally: - _preserve_logs(work_dir, os.path.join(log_dir, name)) + # best-effort, like the rmtree below: a failed log copy must not replace + # the case's real outcome (this runs even when the try block is raising) + try: + _preserve_logs(work_dir, os.path.join(log_dir, name)) + except OSError as exc: + cons.print(f" [bold yellow]could not preserve logs[/bold yellow]: {exc}") shutil.rmtree(work_dir, ignore_errors=True) cons.unindent() cons.print() diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py index e07e86e63e..f2408be4ea 100644 --- a/toolchain/mfc/fp_stability_report.py +++ b/toolchain/mfc/fp_stability_report.py @@ -168,6 +168,7 @@ def _emit_github_summary(results: list, n_samples: int, log_dir: str = None): text = "\n".join(md) + "\n" if log_dir: + os.makedirs(log_dir, exist_ok=True) with open(os.path.join(log_dir, "summary.md"), "w") as f: f.write(text) summary_path = os.environ.get("GITHUB_STEP_SUMMARY")