diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 8a977cfcb3..b3c9a6b5cc 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -24,8 +24,9 @@ name: FP Stability # On FAIL: verrou_dd_sym runs to identify the responsible function symbols. # Logs are uploaded as CI artifacts. # -# Verrou (Valgrind 3.26.0 + edf-hpc/verrou@a58d434) is built once and cached. -# Build takes ~20 min uncached; cached runs restore in ~30 s. +# Verrou (the pinned Valgrind+Verrou pair; versions live in toolchain/bootstrap/verrou.sh) +# is installed by fp-stability on first use and cached. The prebuilt download is seconds; +# a cache miss with no prebuilt falls back to a ~20-min source build. on: push: @@ -68,37 +69,21 @@ jobs: uses: actions/cache@v4 with: path: ~/.local/verrou - key: verrou-a58d434-valgrind-3.26.0-${{ runner.os }} + # Key off the installer's content so any version bump (or other edit) in + # verrou.sh auto-busts the cache and forces a fresh install — no hand-synced + # version string to drift out of date. + key: verrou-${{ hashFiles('toolchain/bootstrap/verrou.sh') }}-${{ runner.os }} - name: Install system dependencies run: | sudo apt-get update -y sudo apt-get install -y \ build-essential automake python3 python3-numpy libc6-dbg \ - cmake gfortran + cmake gfortran zstd - - name: Build Verrou - if: steps.cache-verrou.outputs.cache-hit != 'true' - run: | - cd /tmp - wget -q https://sourceware.org/pub/valgrind/valgrind-3.26.0.tar.bz2 - tar xf valgrind-3.26.0.tar.bz2 - - git clone https://github.com/edf-hpc/verrou.git - git -C verrou checkout a58d434 - - # Merge Verrou into Valgrind source tree and patch - cp -r verrou valgrind-3.26.0/verrou - cd valgrind-3.26.0 - cat verrou/valgrind.*diff | patch -p1 - - ./autogen.sh - ./configure --enable-only64bit --prefix="$HOME/.local/verrou" - make -j"$(nproc)" - make install - - - name: Verify Verrou - run: ~/.local/verrou/bin/valgrind --version + # Verrou is installed by `fp-stability` itself on first use (downloads the + # prebuilt artifact; aborts if that fails). The cache above restores it across + # runs so the download only happens on a cache miss. - name: Build MFC (debug, serial) # FFLAGS=-fno-inline prevents gfortran from inlining small functions into diff --git a/toolchain/bootstrap/verrou.sh b/toolchain/bootstrap/verrou.sh new file mode 100755 index 0000000000..dd2a67c653 --- /dev/null +++ b/toolchain/bootstrap/verrou.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# +# Opt-in installer for Verrou (the Valgrind FP-perturbation tool used by +# `./mfc.sh fp-stability`). Verrou is NOT a Python/pip package - it is a fork of +# Valgrind. By default this downloads a prebuilt, hash-verified artifact (seconds); +# if none is available for this tag/arch it falls back to a source build (~20 min). +# fp-stability auto-runs this on first use when Verrou is absent (printing what it +# does); it is also safe to run by hand. A failed install aborts, never a silent skip. +# +# bash toolchain/bootstrap/verrou.sh # install into $HOME/.local/verrou +# VERROU_HOME=/path bash toolchain/bootstrap/verrou.sh +# bash toolchain/bootstrap/verrou.sh --force # reinstall even if present +# VERROU_BUILD_FROM_SOURCE=1 bash toolchain/bootstrap/verrou.sh # skip the prebuilt +# +# Versions are pinned to match the fp-stability CI workflow. + +set -euo pipefail + +VALGRIND_VERSION="3.26.0" +VERROU_COMMIT="a58d434" +# Prebuilt artifacts (built once per arch) live in a small companion repo. The tag +# pins to the (valgrind, verrou) pair above - bump all three together. +VERROU_DIST_REPO="${VERROU_DIST_REPO:-sbryngelson/verrou-dist}" +VERROU_DIST_TAG="${VERROU_DIST_TAG:-v1}" +PREFIX="${VERROU_HOME:-$HOME/.local/verrou}" +FORCE="${1:-}" + +echo "==> Verrou bootstrap (Valgrind ${VALGRIND_VERSION} + edf-hpc/verrou@${VERROU_COMMIT}) -> ${PREFIX}" + +# Idempotent: skip if already installed and working. Source env.sh first if present +# (a prebuilt tree needs VALGRIND_LIB to run; a source build works either way). +if [ "$FORCE" != "--force" ] && [ -x "${PREFIX}/bin/valgrind" ] \ + && ( [ -f "${PREFIX}/env.sh" ] && . "${PREFIX}/env.sh"; "${PREFIX}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then + echo "==> Verrou already installed at ${PREFIX} (use --force to rebuild). Nothing to do." + exit 0 +fi + +# Platform: Valgrind has no working modern-macOS support; Linux only. +if [ "$(uname -s)" != "Linux" ]; then + echo "ERROR: Verrou requires Linux (Valgrind does not support modern macOS, incl. Apple Silicon)." >&2 + exit 1 +fi +arch_tag="" +case "$(uname -m)" in + x86_64) arch_tag="x86_64" ;; + aarch64|arm64) + arch_tag="aarch64" + echo "WARNING: $(uname -m) detected. Valgrind builds here, but Verrou's FP backends are" >&2 + echo " best-validated on x86_64 - treat results as experimental on this arch." >&2 + ;; + *) + echo "WARNING: unrecognised arch $(uname -m); the build may fail. Proceeding anyway." >&2 + ;; +esac + +# Fast path: download a prebuilt, hash-verified artifact and source its relocatable +# env.sh, instead of building from source. Any failure (no asset for this arch/tag, +# missing zstd/sha256sum, checksum mismatch, won't run) falls through to the build. +try_prebuilt() { + [ -n "$arch_tag" ] || return 1 + [ "${VERROU_BUILD_FROM_SOURCE:-}" = "1" ] && return 1 + command -v sha256sum >/dev/null 2>&1 || return 1 + tar --zstd --help >/dev/null 2>&1 || command -v zstd >/dev/null 2>&1 || return 1 + command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || return 1 + + local asset base dl + asset="verrou-${VERROU_COMMIT}-valgrind-${VALGRIND_VERSION}-linux-${arch_tag}.tar.zst" + base="https://github.com/${VERROU_DIST_REPO}/releases/download/${VERROU_DIST_TAG}/${asset}" + dl="$(mktemp -d)" + + echo "==> Trying prebuilt ${VERROU_DIST_REPO}@${VERROU_DIST_TAG} (${asset})" + _fetch() { # url dest + if command -v curl >/dev/null 2>&1; then curl -fsSL -o "$2" "$1"; else wget -q -O "$2" "$1"; fi + } + if ! _fetch "$base" "$dl/$asset" || ! _fetch "$base.sha256" "$dl/$asset.sha256"; then + echo "==> No prebuilt for this tag/arch - building from source instead." + rm -rf "$dl"; return 1 + fi + if ! ( cd "$dl" && sha256sum -c "$asset.sha256" >/dev/null 2>&1 ); then + echo "WARNING: prebuilt checksum mismatch - building from source instead." >&2 + rm -rf "$dl"; return 1 + fi + + # Extract + verify in a staging dir, then swap into $PREFIX atomically. set -e + # is suppressed inside a function used as an `if` condition, so check each step + # explicitly - otherwise a failed extract would fall through and the source + # build would install on top of a half-written tree (or a stale one on --force). + local stage="$dl/stage" + mkdir -p "$stage" + if tar --zstd --help >/dev/null 2>&1; then + tar -C "$stage" --zstd -xf "$dl/$asset" || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; } + else + zstd -dc "$dl/$asset" | tar -C "$stage" -xf - || { echo "WARNING: prebuilt extract failed - building from source instead." >&2; rm -rf "$dl"; return 1; } + fi + + # Valgrind bakes its build prefix into the binary; the artifact's env.sh sets + # VALGRIND_LIB relative to the tree so the relocated install works. Verify the + # staged tree runs before committing it. + if ! ( . "${stage}/env.sh" && "${stage}/bin/valgrind" --tool=verrou --version >/dev/null 2>&1 ); then + echo "WARNING: prebuilt did not run - building from source instead." >&2 + rm -rf "$dl"; return 1 + fi + + # Commit only now: replace any existing $PREFIX atomically. + mkdir -p "$(dirname "$PREFIX")" + rm -rf "$PREFIX" + if ! mv "$stage" "$PREFIX"; then + echo "WARNING: could not install prebuilt to ${PREFIX} - building from source instead." >&2 + rm -rf "$dl"; return 1 + fi + rm -rf "$dl" + return 0 +} + +if try_prebuilt; then + echo "==> Verifying" + ( . "${PREFIX}/env.sh" && "${PREFIX}/bin/valgrind" --tool=verrou --version ) + echo "==> Done (prebuilt). Verrou installed at ${PREFIX}" + echo " Run: ./mfc.sh fp-stability (or set VERROU_HOME=${PREFIX} if you used a custom prefix)" + exit 0 +fi + +# Build dependencies. +missing="" +for tool in tar git make patch autoconf automake; do + command -v "$tool" >/dev/null 2>&1 || missing="$missing $tool" +done +command -v cc >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || missing="$missing gcc" +command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing="$missing wget/curl" +if [ -n "$missing" ]; then + echo "ERROR: missing build dependencies:$missing" >&2 + echo " Install them (e.g. apt: build-essential automake autoconf libtool; or load HPC modules) and retry." >&2 + exit 1 +fi + +workdir="$(mktemp -d)" +trap 'rm -rf "$workdir"' EXIT +cd "$workdir" + +tarball="valgrind-${VALGRIND_VERSION}.tar.bz2" +url="https://sourceware.org/pub/valgrind/${tarball}" +echo "==> Downloading ${tarball}" +if command -v wget >/dev/null 2>&1; then + wget -q "$url" +else + curl -fsSL -o "$tarball" "$url" +fi +tar xf "$tarball" + +echo "==> Cloning Verrou @ ${VERROU_COMMIT}" +git clone --quiet https://github.com/edf-hpc/verrou.git +git -C verrou checkout --quiet "$VERROU_COMMIT" + +# Merge Verrou into the Valgrind tree and apply its patch. +cp -r verrou "valgrind-${VALGRIND_VERSION}/verrou" +cd "valgrind-${VALGRIND_VERSION}" +cat verrou/valgrind.*diff | patch -p1 + +echo "==> Building (this takes ~20 min)" +./autogen.sh +./configure --enable-only64bit --prefix="$PREFIX" +make -j"$(nproc)" +make install + +echo "==> Verifying" +"${PREFIX}/bin/valgrind" --tool=verrou --version +echo "==> Done. Verrou installed at ${PREFIX}" +echo " Run: ./mfc.sh fp-stability (or set VERROU_HOME=${PREFIX} if you used a custom prefix)" diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 54bbff4641..b0591fc9a6 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -898,27 +898,36 @@ name="fp-stability", help="Run floating-point stability tests using Verrou.", description=( - "Runs each registered test case N times under Verrou's random IEEE-754 " - "rounding mode and compares against a nearest-rounding reference run. " - "Reports the max L∞ deviation and PASS/FAIL against per-case thresholds.\n\n" - "Requires a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind " - "(defaults to $HOME/.local/verrou). The simulation and pre_process " - "binaries must be serial (no-MPI, no-GPU) debug builds.\n\n" - "Test cases:\n" - " sod_standard 1-D standard Sod, p_L/p_R=10 (well-conditioned baseline)\n" - " sod_strong 1-D Sod, p_L/p_R=100,000 — HLLC xi-factor cancellation\n" - " water_stiffened 1-D water shock (pi_inf=4046) — pressure-recovery cancellation\n" - " air_water_interface 1-D air/water contact (two-fluid) — mixed-cell cancellation\n\n" - "Additional features (skip with --no-* flags):\n" + "Runs Verrou random-rounding stability analysis on a built-in suite of small " + "1-D cases, or - given a case .py (positional INPUT) - on your own case. Each " + "case is run N times under Verrou's random IEEE-754 rounding and compared " + "against a nearest-rounding reference. PASS/FAIL is scale-free: a case must " + "retain at least ~24 significant bits (single precision) under random rounding " + "(no per-case thresholds).\n\n" + "With a case .py, that case is run as a SINGLE serial CPU process under Verrou " + "(~30x slower, and run many times), so it must be a small, short proxy - large " + "grids or long runs are rejected with guidance; serial .dat I/O is forced. " + "Example: ./mfc.sh fp-stability my_case.py\n\n" + "Uses a Verrou-enabled Valgrind at $VERROU_HOME/bin/valgrind (defaults to " + "$HOME/.local/verrou); if absent it is installed automatically (a pinned, " + "hash-verified prebuilt is downloaded, with a source build as fallback) - " + "aborts if that install fails. The simulation and pre_process binaries must " + "be serial (no-MPI, no-GPU) debug builds.\n\n" + "Analysis passes (skip with --no-* flags):\n" " float proxy One run with --rounding-mode=float (single-precision sensitivity)\n" " vprec sweep Runs at mantissa bits [52, 23, 16, 10] (precision floor curve)\n" - " dd_sym verrou_dd_sym bisection to responsible functions (on failure)\n" - " dd_line verrou_dd_line bisection to responsible source lines (on failure)\n" - " cancellation --check-cancellation detection of catastrophic cancellation sites\n" - " mca-sigbits Monte Carlo Arithmetic (mcaquad) significant-bits lower bound\n" - " float-max --check-max-float detection of double→float overflow sites\n" + " cancellation --check-cancellation origins, ranked by significant digits lost\n" + " float-max --check-max-float detection of double->float overflow sites\n" ), include_common=["mfc_config", "verbose", "debug_log"], + positionals=[ + Positional( + name="input", + help="Optional case .py to analyze instead of the built-in suite (run as a single serial CPU process under Verrou; must be small/short).", + nargs="?", + completion=Completion(type=CompletionType.FILES_PY), + ), + ], arguments=[ Argument( name="sim-binary", @@ -960,20 +969,6 @@ default=False, dest="no_vprec", ), - Argument( - name="no-dd-sym", - help="Skip verrou_dd_sym function-level delta-debug on failure.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_dd_sym", - ), - Argument( - name="no-dd-line", - help="Skip verrou_dd_line source-line delta-debug on failure.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_dd_line", - ), Argument( name="no-cancellation", help="Skip --check-cancellation catastrophic-cancellation detection.", @@ -981,13 +976,6 @@ default=False, dest="no_cancellation", ), - Argument( - name="no-mca", - help="Skip Monte Carlo Arithmetic (mcaquad) significant-bits estimate.", - action=ArgAction.STORE_TRUE, - default=False, - dest="no_mca", - ), Argument( name="no-float-max", help="Skip --check-max-float float32 overflow detection.", @@ -997,14 +985,15 @@ ), ], examples=[ - Example("./mfc.sh fp-stability", "Auto-discover binaries and run all cases"), + Example("./mfc.sh fp-stability", "Auto-discover binaries and run the built-in suite"), + Example("./mfc.sh fp-stability my_case.py", "Analyze your own case (small/short, serial, CPU)"), Example( "./mfc.sh fp-stability --sim-binary build/install/abc123/bin/simulation", "Specify simulation binary explicitly", ), Example("./mfc.sh fp-stability -N 10", "Run 10 random-rounding samples per case"), - Example("./mfc.sh fp-stability --no-vprec --no-dd-line", "Skip VPREC sweep and line debug"), - Example("./mfc.sh fp-stability --no-cancellation --no-mca --no-float-max", "Skip new analysis passes"), + Example("./mfc.sh fp-stability --no-vprec --no-cancellation", "Skip VPREC sweep and cancellation detection"), + Example("./mfc.sh fp-stability --no-cancellation --no-float-max", "Skip analysis passes"), ], key_options=[ ("--sim-binary PATH", "Serial simulation binary (debug, no-MPI)"), @@ -1013,10 +1002,7 @@ ("-N, --samples N", "Random-rounding samples per case (default: 5)"), ("--no-float-proxy", "Skip float-rounding proxy run"), ("--no-vprec", "Skip VPREC mantissa-bit sweep"), - ("--no-dd-sym", "Skip verrou_dd_sym on failure"), - ("--no-dd-line", "Skip verrou_dd_line on failure"), ("--no-cancellation", "Skip cancellation detection"), - ("--no-mca", "Skip MCA significant-bits estimate"), ("--no-float-max", "Skip float32 overflow detection"), ], ) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index dd848f046c..200748203c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -4,35 +4,27 @@ Features -------- A. Stability suite (always) - N random-rounding samples per case, threshold-based PASS/FAIL. + N random-rounding samples per case; PASS/FAIL on significant bits retained + (scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold). B. Float proxy (--no-float-proxy to skip) - One run with --rounding-mode=float — deterministic proxy for + One run with --rounding-mode=float - deterministic proxy for single-precision sensitivity without recompiling. C. VPREC precision sweep (--no-vprec to skip) One run per mantissa-bit level [52,23,16,10] with --backend=vprec --vprec-mode=full; shows where each case breaks. -D. verrou_dd_sym on failure (--no-dd-sym to skip) - Delta-debug bisection isolates the minimal set of *functions* causing - instability. - -E. verrou_dd_line on failure, after dd_sym (--no-dd-line to skip) - Further bisects to exact *source lines* within the responsible functions. - -F. Cancellation detection (--no-cancellation to skip) +D. Cancellation detection (--no-cancellation to skip) One run with --check-cancellation=yes; reports MFC source lines that produce catastrophic cancellation (subtraction of nearly-equal doubles). - Uses --cc-gen-file for structured per-line output. - -G. MCA significant-bits estimate (--no-mca to skip) - N runs with --backend=mcaquad; max deviation vs nearest-rounding - reference gives a lower bound on significant bits: s = -log2(dev/scale). + Uses --cc-gen-file for structured per-line output. A cancellation site whose + .fpp line sits inside a #:for/#:def expansion is flagged as instance-ambiguous + (the line maps to multiple generated instances). -H. Float-max overflow detection (--no-float-max to skip) +E. Float-max overflow detection (--no-float-max to skip) One run with --check-max-float=yes; reports locations where a - double→float conversion would overflow to ±Inf. + double->float conversion would overflow to +/-Inf. Logs are saved to fp-stability-logs/ and uploaded as CI artifacts. On GitHub Actions: a step summary table and ::warning:: file annotations @@ -45,119 +37,53 @@ - A serial pre_process binary (to generate initial conditions) Usage: - ./mfc.sh fp-stability - ./mfc.sh fp-stability --no-vprec --no-dd-line + ./mfc.sh fp-stability # built-in 1-D suite + ./mfc.sh fp-stability my_case.py # your own case (small/short, serial, CPU) + ./mfc.sh fp-stability --no-vprec --no-cancellation ./mfc.sh fp-stability --sim-binary PATH --pre-binary PATH + +A user case .py is run as a single serial CPU process under Verrou, so it must be +a small, short proxy (a feasibility guard rejects large grids / long runs); output +is forced to serial .dat I/O and the files to diff are auto-detected. """ -import glob import math import os -import re import shutil -import stat import subprocess import sys import tempfile -import textwrap import time from .common import MFC_ROOT_DIR, MFCException +from .fp_stability_metrics import ( + CANCEL_BIT_LEVELS, + MIN_SIG_BITS, + _autodetect_compare, + _cancellation_severity, + _macro_context, + _max_abs_np, + _max_diff_np, + _sig_bits, +) +from .fp_stability_report import ( + _emit_github_annotations, + _emit_github_summary, +) +from .fp_stability_runners import ( + _find_binary, + _find_verrou, + _run_cancellation_check, + _run_float_max_check, + _run_float_proxy, + _run_preprocess, + _run_simulation_verrou, + _run_vprec_sweep, + _write_inp, +) from .printer import cons from .state import ARG -# Mantissa-bit levels for the VPREC sweep (C). -# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. -VPREC_MANTISSA_BITS = [52, 23, 16, 10] - -# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary. -_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE) - -# Files to exclude from cancellation / float-max reports (runtime loaders, XALT). -_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so") - -# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". -_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") - -# Lines that are clearly control-flow delimiters rather than arithmetic. -# dd_line sometimes reports these when the responsible arithmetic is on the -# preceding line but shares DWARF debug info with the delimiter (e.g. loop -# boundaries in #:for-expanded code, or inlined functions at call sites). -_CONTROL_FLOW_RE = re.compile( - r"^\s*(" - r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" - r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) - r"|else(\s+if\s*\(.*\)\s*then)?\s*$" # else / else if (...) then - r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+" # subroutine declaration - r"|\$:END_GPU\w+" # fypp GPU macro closers - r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) - r"|\s*!\s*$" # comment-only lines - r"|\s*$" # blank lines - r")", - re.IGNORECASE, -) - - -def _read_source_line(fname: str, lineno: int) -> str: - """Return the raw source line at lineno (1-based), or '' if unavailable.""" - if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - return "" - try: - with open(candidates[0]) as fh: - lines = fh.readlines() - return lines[lineno - 1] if 0 < lineno <= len(lines) else "" - except OSError: - return "" - - -def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: - """Return True if any line in [start, end] contains non-trivial arithmetic. - - Filters out loop delimiters and fypp directive lines that dd_line sometimes - reports when the responsible arithmetic shares DWARF info with its enclosing - control-flow boundary (inlining, #:for template expansion, etc.). - Returns True (keep) when uncertain so we never silently drop real hotspots. - """ - for lineno in range(start, end + 1): - line = _read_source_line(fname, lineno) - if not line: - return True # can't read — keep to be safe - if not _CONTROL_FLOW_RE.match(line): - return True - return False - - -def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: - """Return a annotated source snippet around lineno, or '' if file not found. - - fname may be a bare basename (e.g. 'm_weno.fpp') or a relative path. - Searches recursively under MFC_ROOT_DIR/src/ first, then the whole tree. - """ - if os.path.isabs(fname) and os.path.isfile(fname): - candidates = [fname] - else: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) - if not candidates: - candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "**", os.path.basename(fname)), recursive=True) - if not candidates: - return "" - try: - with open(candidates[0]) as fh: - lines = fh.readlines() - except OSError: - return "" - start = max(0, lineno - context - 1) - end = min(len(lines), lineno + context) - rows = [] - for i, line in enumerate(lines[start:end], start=start + 1): - marker = ">" if i == lineno else " " - rows.append(f"{marker}{i:5d} | {line.rstrip()}") - return "\n".join(rows) - def _merge(*dicts): """Merge dicts left-to-right; later entries override earlier ones.""" @@ -225,8 +151,9 @@ def _merge(*dicts): # name - unique identifier used in log paths and console output # description - human-readable summary # compare - D/ output files compared between reference and perturbed runs -# threshold - max L∞ deviation allowed before the case is declared FAIL # ill_cond - known source of cancellation (empty string = none expected) +# Pass/fail is scale-free (>= MIN_SIG_BITS significant bits retained), so cases +# need no per-case deviation threshold regardless of field magnitude. # pre - parameters for pre_process (generates initial conditions) # sim - parameters for simulation CASES = [ @@ -234,7 +161,6 @@ def _merge(*dicts): "name": "sod_standard", "description": "1-D standard Sod, p_L/p_R=10, ideal gas (well-conditioned baseline)", "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"], - "threshold": 1e-13, "ill_cond": "", "pre": _merge( _BASE_PRE, @@ -257,7 +183,6 @@ def _merge(*dicts): "name": "sod_strong", "description": "1-D Sod, p_L/p_R=100,000, ideal gas", "compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"], - "threshold": 1e-10, "ill_cond": "HLLC xi factor: (s_L - vel_L)/(s_L - s_S) cancels near sonic contact", "pre": _merge( _BASE_PRE, @@ -280,8 +205,7 @@ def _merge(*dicts): "name": "water_stiffened", "description": "1-D water shock, stiffened EOS (pi_inf=4046)", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 1e-8, - "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000) [threshold loosened until reduced-energy (Etilde) scheme is merged]", + "ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000)", "pre": _merge( _BASE_PRE, _WATER_EOS, @@ -303,7 +227,6 @@ def _merge(*dicts): "name": "air_water_interface", "description": "1-D air/water isobaric contact (two-fluid, pi_inf=4046)", "compare": ["cons.1.00.000050.dat", "cons.4.00.000050.dat", "cons.5.00.000050.dat"], - "threshold": 1e-10, "ill_cond": "Mixed-cell pressure recovery: E-alpha_w*gamma_w*pi_inf cancels when alpha_w<<1", "pre": _merge( _BASE_PRE, @@ -344,7 +267,6 @@ def _merge(*dicts): "name": "bubble_rp", "description": "1-D bubbly water, pressure step 2:1 driving Rayleigh-Plesset oscillations (nb=1, Keller-Miksis)", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 1e-8, "ill_cond": "RP ODE: (p_bub - p_ext) cancels near bubble equilibrium", "pre": _merge( _BASE_PRE, @@ -412,8 +334,7 @@ def _merge(*dicts): "name": "low_mach", "description": "1-D water shock with low_Mach=1 HLLC correction active", "compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"], - "threshold": 2e-7, - "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0 (threshold loosened to 2e-7 to absorb MCA sampling variance)", + "ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M~0", "pre": _merge( _BASE_PRE, _WATER_EOS, @@ -434,511 +355,20 @@ def _merge(*dicts): ] -def _find_verrou() -> str: - verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) - candidate = os.path.join(verrou_home, "bin", "valgrind") - if os.path.isfile(candidate) and os.access(candidate, os.X_OK): - return candidate - return shutil.which("valgrind") or "" - - -def _find_binary(name: str) -> str: - install_dir = os.path.join(MFC_ROOT_DIR, "build", "install") - candidates = glob.glob(os.path.join(install_dir, "*", "bin", name)) - return max(candidates, key=os.path.getmtime) if candidates else "" - - -def _find_dd_sym(verrou_bin: str) -> str: - c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_sym") - return c if os.path.isfile(c) else "" - - -def _find_dd_line(verrou_bin: str) -> str: - c = os.path.join(os.path.dirname(verrou_bin), "verrou_dd_line") - return c if os.path.isfile(c) else "" - - -def _verrou_pythonpath(verrou_bin: str) -> str: - """Path that must be on PYTHONPATH for verrou_dd_* imports (valgrind/ subdir).""" - verrou_home = os.path.dirname(os.path.dirname(verrou_bin)) - matches = glob.glob(os.path.join(verrou_home, "lib", "python*", "site-packages", "valgrind")) - return matches[0] if matches else "" - - -def _write_inp(params: dict, target_name: str, work_dir: str) -> None: - """Write a Fortran namelist .inp file from a Python params dict.""" - from .run import case_dicts - - master_keys = case_dicts.get_input_dict_keys(target_name) - lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys] - with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh: - fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n") - - -def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str): - _write_inp(pre_params, "pre_process", work_dir) - with open(os.path.join(work_dir, "pre.log"), "w") as f: - result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False) - if result.returncode != 0: - raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log") - - -def _run_simulation_verrou( - verrou_bin: str, - sim_bin: str, - work_dir: str, - run_dir: str, - rounding_mode: str = None, - extra_flags: list = None, -): - """Copy ICs into a fresh tmpdir, run simulation under verrou, collect D/ output. - - rounding_mode is passed as --rounding-mode= when not None. - extra_flags are appended before the binary (e.g. --backend=vprec ...). - """ - with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir: - for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]: - src = os.path.join(work_dir, fname) - if os.path.exists(src): - shutil.copy2(src, tmpdir) - shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all")) - os.makedirs(os.path.join(tmpdir, "D")) - - log_path = os.path.join(run_dir, "verrou.log") - cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"] - if rounding_mode: - cmd.append(f"--rounding-mode={rounding_mode}") - cmd.extend(extra_flags or []) - cmd.append(sim_bin) - - with open(os.path.join(run_dir, "sim.out"), "w") as f: - result = subprocess.run(cmd, cwd=tmpdir, stdout=f, stderr=subprocess.STDOUT, check=False) - - if result.returncode != 0: - tag = rounding_mode or "vprec" - raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out") - - os.makedirs(run_dir, exist_ok=True) - for fn in os.listdir(os.path.join(tmpdir, "D")): - shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) - - -def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: - import numpy as np - - total = 0.0 - for fname in compare_files: - ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname) - if not os.path.exists(ref_p) or not os.path.exists(run_p): - return float("inf") - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] - total = max(total, float(np.max(np.abs(ref - run)))) - return total - - -def _max_abs_np(ref_dir: str, compare_files: list) -> float: - """Return the maximum absolute value across all reference output files.""" - import numpy as np - - total = 0.0 - for fname in compare_files: - ref_p = os.path.join(ref_dir, fname) - if not os.path.exists(ref_p): - continue - ref = np.loadtxt(ref_p)[:, 1] - total = max(total, float(np.max(np.abs(ref)))) - return total - - -def _parse_cancel_gen(gen_path: str) -> list: - """Parse cc-gen-file TSV (file\\tline\\tsymbol) → sorted unique [(fname, line)] for MFC sources.""" - if not os.path.isfile(gen_path): - return [] - locs = [] - seen = set() - with open(gen_path) as fh: - for raw in fh: - parts = raw.rstrip("\n").split("\t") - if len(parts) < 2: - continue - fname = parts[0].strip() - if any(ext in fname for ext in _EXTERNAL_SRCS): - continue - if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")): - continue - try: - lineno = int(parts[1].strip()) - except ValueError: - continue - key = (fname, lineno) - if key not in seen: - seen.add(key) - locs.append(key) - return locs - - -def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: - """Extract first MFC-source frame from each Valgrind error matching error_keyword.""" - if not os.path.isfile(log_path): - return [] - locs = [] - seen = set() - in_error = False - with open(log_path) as fh: - for raw in fh: - line = re.sub(r"^==\d+== ?", "", raw) - if error_keyword in line: - in_error = True - continue - if in_error: - if " at " in line or " by " in line: - m = _VGFRAME_RE.search(line) - if m: - fname = m.group(1) - if any(ext in fname for ext in _EXTERNAL_SRCS): - continue - lineno = int(m.group(2)) - key = (fname, lineno) - if key not in seen: - seen.add(key) - locs.append(key) - in_error = False - elif line.strip() == "": - in_error = False - return locs - - -def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list: - """Run with --check-cancellation=yes; return [(fname, line)] of MFC cancellation sites.""" - run_dir = os.path.join(work_dir, "cancellation") - os.makedirs(run_dir, exist_ok=True) - gen_path = os.path.join(run_dir, "cancel_gen.txt") - flags = [ - "--check-cancellation=yes", - "--cc-threshold-double=10", - f"--cc-gen-file={gen_path}", - ] - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) - except MFCException: - pass - raw = _parse_cancel_gen(gen_path) - filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] - skipped = len(raw) - len(filtered) - if skipped: - cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") - return filtered - - -def _run_mca_samples( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - ref_dir: str, - n_mca: int, -) -> tuple: - """Run N mcaquad samples; return (max_dev, sig_bits_lower_bound).""" - compare = case["compare"] - ref_scale = _max_abs_np(ref_dir, compare) - max_dev = 0.0 - flags = ["--backend=mcaquad", "--mca-mode=mca"] - for i in range(n_mca): - run_dir = os.path.join(work_dir, f"mca_{i:02d}") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) - max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - except MFCException: - pass - sig_bits = None - if max_dev > 0.0 and ref_scale > 0.0: - sig_bits = max(0, int(math.floor(-math.log2(max_dev / ref_scale)))) - return max_dev, sig_bits - - -def _run_float_max_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list: - """Run with --check-max-float=yes; return [(fname, line)] of overflow sites.""" - run_dir = os.path.join(work_dir, "float_max") - os.makedirs(run_dir, exist_ok=True) - try: - _run_simulation_verrou( - verrou_bin, - sim_bin, - work_dir, - run_dir, - rounding_mode="nearest", - extra_flags=["--check-max-float=yes"], - ) - except MFCException: - pass - return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float") - - -def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: - """One run with --rounding-mode=float; returns L∞ deviation from nearest-ref.""" - run_dir = os.path.join(work_dir, "float_proxy") - os.makedirs(run_dir) - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") - return _max_diff_np(ref_dir, run_dir, case["compare"]) - - -def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list: - """Run at each mantissa-bit level. Returns [(bits, dev), ...].""" - results = [] - for bits in VPREC_MANTISSA_BITS: - run_dir = os.path.join(work_dir, f"vprec_{bits}") - os.makedirs(run_dir) - flags = [ - "--backend=vprec", - "--vprec-mode=full", - f"--vprec-precision-binary64={bits}", - "--vprec-range-binary64=11", - ] - try: - _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) - dev = _max_diff_np(ref_dir, run_dir, case["compare"]) - except MFCException: - dev = float("inf") - results.append((bits, dev)) - return results - - -def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): - """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via - VERROU_EXCLUDE / VERROU_SOURCE environment variables. For test runs, we use - --rounding-mode=float (deterministic, same deviation every call, --nruns=1 suffices). - For the reference run, verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest in the - environment — we honour that so the reference is a stable nearest-rounding baseline - to compare against. CLI --rounding-mode would override the env var and break the - reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env bash - # Generated by mfc.sh fp-stability — do not edit by hand. - VERROU_BIN={verrou_bin!r} - SIM_BIN={sim_bin!r} - IC_DIR={ic_dir!r} - - RUNDIR="$1" - TMPDIR_RUN=$(mktemp -d) - trap 'rm -rf "$TMPDIR_RUN"' EXIT - - cp -r "$IC_DIR/p_all" "$TMPDIR_RUN/p_all" - cp "$IC_DIR/simulation.inp" "$TMPDIR_RUN/simulation.inp" - for fname in indices.dat pre_time_data.dat io_time_data.dat; do - [ -f "$IC_DIR/$fname" ] && cp "$IC_DIR/$fname" "$TMPDIR_RUN/" - done - mkdir -p "$TMPDIR_RUN/D" - - # verrou_dd_sym sets VERROU_ROUNDING_MODE=nearest for its reference run and - # leaves it unset for test runs. Defaulting to float gives deterministic - # test steps while letting the reference use nearest-rounding. - ROUND="${{VERROU_ROUNDING_MODE:-float}}" - - # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation). - # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to). - # Forward them as valgrind flags when set. - EXTRA="" - [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE" - [ -n "${{VERROU_SOURCE:-}}" ] && EXTRA="$EXTRA --source=$VERROU_SOURCE" - - cd "$TMPDIR_RUN" - "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN" - rc=$? - - [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" - exit $rc - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _write_dd_cmp_py(path: str, compare_files: list, threshold: float): - """Generate dd_cmp.py for verrou_dd_sym / verrou_dd_line. - - verrou_dd_* calls: dd_cmp.py REF_DIR RUN_DIR - Exits 0 (stable) or 1 (unstable) based on threshold. - """ - content = textwrap.dedent(f"""\ - #!/usr/bin/env python3 - # Generated by mfc.sh fp-stability — do not edit by hand. - import sys, os, numpy as np - - COMPARE_FILES = {compare_files!r} - THRESHOLD = {threshold!r} - - ref_dir, run_dir = sys.argv[1], sys.argv[2] - max_dev = 0.0 - for fname in COMPARE_FILES: - ref_p = os.path.join(ref_dir, fname) - run_p = os.path.join(run_dir, fname) - if not os.path.exists(ref_p) or not os.path.exists(run_p): - print(f"MISSING: {{fname}}") - sys.exit(1) - ref = np.loadtxt(ref_p)[:, 1] - run = np.loadtxt(run_p)[:, 1] - dev = float(np.max(np.abs(ref - run))) - max_dev = max(max_dev, dev) - - print(f"max_dev={{max_dev:.3e}} threshold={{THRESHOLD:.0e}}") - sys.exit(0 if max_dev <= THRESHOLD else 1) - """) - with open(path, "w") as f: - f.write(content) - os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - -def _dd_env(verrou_bin: str) -> dict: - """Environment with PYTHONPATH set for verrou_dd_* imports.""" - py_pkg = _verrou_pythonpath(verrou_bin) - env = os.environ.copy() - if py_pkg: - existing = env.get("PYTHONPATH", "") - env["PYTHONPATH"] = ":".join(filter(None, [py_pkg, existing])) - return env - - -def _parse_rddmin_locs(summary_path: str) -> list: - """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary. - - Filters out locations whose source lines are pure control-flow delimiters - (loop boundaries, fypp directive closers, blank/comment lines). These can - appear when the responsible arithmetic shares DWARF debug info with an - enclosing boundary due to inlining or #:for template expansion. - """ - if not os.path.isfile(summary_path): - return [] - locs = [] - skipped = [] - with open(summary_path) as fh: - for line in fh: - m = _LOC_RE.search(line) - if not m: - continue - path = m.group(1) - start = int(m.group(2)) - end = int(m.group(3)) if m.group(3) else start - try: - rel = os.path.relpath(path, MFC_ROOT_DIR) - if rel.startswith(".."): - rel = path - except ValueError: - rel = path - rel = rel.replace("\\", "/") - if _is_arithmetic_loc(path, start, end): - locs.append((rel, start, end)) - else: - skipped.append((rel, start, end)) - for rel, start, end in skipped: - loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}" - cons.print(f" [dim]dd_line: skipped control-flow boundary {loc}[/dim]") - return locs - - -def _parse_rddmin_syms(summary_path: str) -> list: - """Extract symbol/function names from a dd_sym rddmin_summary. - - rddmin_summary format: - ddmin0:\\tFail Ratio: ...\\tFail indexes: ... - \\t\\t - ddmin1:\\t... - \\t\\t - - Lines starting with 'ddmin' are metadata; function names are on the - indented (tab-prefixed) lines as the first tab-delimited field. - """ - if not os.path.isfile(summary_path): - return [] - syms = [] - with open(summary_path) as fh: - for ln in fh: - stripped = ln.strip() - if not stripped or stripped.startswith("ddmin"): - continue - sym = stripped.split("\t")[0].strip() - if sym: - syms.append(sym) - return syms - - -def _run_dd_tool( - dd_bin: str, - dd_dir: str, - dd_run_sh: str, - dd_cmp_py: str, - env: dict, - log_name: str, - summary_subdir: str, - label: str, -) -> list: - """Generic runner for verrou_dd_sym / verrou_dd_line. Returns raw summary lines.""" - log_file = os.path.join(dd_dir, log_name) - cmd = [dd_bin, "--nruns=1", "--rddmin=d", "--reference-rounding=nearest", dd_run_sh, dd_cmp_py] - cons.print(f" [dim]running {label} (--nruns=1 float-mode --rddmin=d)...[/dim]") - with open(log_file, "w") as f: - result = subprocess.run(cmd, cwd=dd_dir, env=env, stdout=f, stderr=subprocess.STDOUT, check=False) - summary_path = os.path.join(dd_dir, summary_subdir, "rddmin_summary") - summary_lines = [] - if result.returncode == 0: - if os.path.isfile(summary_path): - with open(summary_path) as f: - summary_lines = f.readlines() - cons.print(f" [bold yellow]{label} result[/bold yellow]:") - for line in summary_lines: - cons.print(f" {line.rstrip()}") - else: - cons.print(f" [dim]{label} done; see {log_file}[/dim]") - else: - cons.print(f" [bold yellow]{label} exited {result.returncode}[/bold yellow] (see {log_file})") - return summary_lines - - -def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: - """Run verrou_dd_sym; return list of responsible symbol names.""" - dd_bin = _find_dd_sym(verrou_bin) - if not dd_bin: - cons.print(" [dim]verrou_dd_sym not found; skipping delta-debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else case["threshold"]) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym") - cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]") - return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) - - -def _run_dd_line( - case: dict, - verrou_bin: str, - sim_bin: str, - work_dir: str, - log_dir: str, - threshold: float = None, -) -> list: - """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples.""" - dd_bin = _find_dd_line(verrou_bin) - if not dd_bin: - cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") - return [] - - dd_dir = os.path.join(log_dir, case["name"]) - os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") - dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") - effective_threshold = threshold if threshold is not None else case["threshold"] - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) - _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") - return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) +def _blank_result(name: str) -> dict: + """A result dict with every field at its empty/unmeasured default.""" + return { + "name": name, + "passed": False, + "max_dev": float("inf"), + "sig_bits": None, + "float_proxy": None, + "vprec": [], + "cancellation_locs": [], + "cancellation_bits": {}, + "cancellation_macro": {}, + "float_max_locs": [], + } def _run_case( @@ -947,64 +377,58 @@ def _run_case( sim_bin: str, pp_bin: str, n_samples: int, - log_dir: str, run_float: bool, run_vprec: bool, - run_dd_sym: bool, - run_dd_line: bool, run_cancellation: bool, - run_mca: bool, run_float_max: bool, ) -> dict: name = case["name"] - threshold = case["threshold"] compare = case["compare"] cons.print(f"[bold]{name}[/bold]: {case['description']}") cons.indent() if case["ill_cond"]: cons.print(f" ill-conditioning: {case['ill_cond']}") - cons.print(f" threshold: {threshold:.0e}") + cons.print(f" pass floor: >= {MIN_SIG_BITS} significant bits retained") work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-") - result = { - "name": name, - "passed": False, - "max_dev": float("inf"), - "threshold": threshold, - "float_proxy": None, - "vprec": [], - "dd_sym_syms": [], - "dd_line_locs": [], - "cancellation_locs": [], - "mca_dev": None, - "mca_sigbits": None, - "float_max_locs": [], - } + result = _blank_result(name) try: cons.print(" [dim]running pre_process...[/dim]") _write_inp(case["sim"], "simulation", work_dir) _run_preprocess(pp_bin, case["pre"], work_dir) ref_dir = os.path.join(work_dir, "ref") - os.makedirs(ref_dir) cons.print(" [dim]reference run (rounding=nearest)...[/dim]") _run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest") + # For a user case with no fixed compare list, diff whatever the reference + # run actually wrote (conserved vars at the final step). + if not compare: + compare = _autodetect_compare(os.listdir(ref_dir)) + case["compare"] = compare + if not compare: + raise MFCException("case produced no cons.*/prim.* output to compare (check t_step_save/t_step_stop and parallel_io)") + cons.print(f" [dim]comparing: {', '.join(compare)}[/dim]") + # --- A: random-rounding stability samples --- + # Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale), + # vs one global floor (no per-case hand-tuned absolute threshold). + ref_scale = _max_abs_np(ref_dir, compare) max_dev = 0.0 cons.print(f" [dim]random-rounding runs (N={n_samples})...[/dim]") for i in range(n_samples): run_dir = os.path.join(work_dir, f"run_{i:02d}") - os.makedirs(run_dir) _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random") max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare)) - passed = max_dev <= threshold + sig_bits = _sig_bits(max_dev, ref_scale) + passed = sig_bits >= MIN_SIG_BITS result["passed"] = passed result["max_dev"] = max_dev + result["sig_bits"] = sig_bits tag = "[bold green]PASS[/bold green]" if passed else "[bold red]FAIL[/bold red]" - cons.print(f" {tag} max_dev={max_dev:.3e} threshold={threshold:.0e}") + cons.print(f" {tag} {sig_bits:.1f} bits retained (floor {MIN_SIG_BITS}) max_dev={max_dev:.3e}") # --- B: float proxy --- if run_float: @@ -1027,77 +451,52 @@ def _run_case( marker = "" if dev == float("inf"): marker = " [red]crashed[/red]" - elif dev > threshold: + elif _sig_bits(dev, ref_scale) < MIN_SIG_BITS: marker = " [red]FAIL[/red]" cons.print(f" {bits:2d} bits{label_str}: dev={dev:.3e}{marker}") - # --- D/E: delta-debug with float mode to find FP hotspots. - # dd_run.sh uses --rounding-mode=float (deterministic single-precision), - # so each bisection step is consistent and --nruns=1 suffices. Threshold - # = float_proxy/10: the full instrumented set produces ~float_proxy - # deviation; excluding the responsible function drops it to near zero; - # any subset missing the responsible function gives SAME. - # Skip when float_proxy is unavailable or too small to localize. - float_proxy = result.get("float_proxy") - _DD_FLOAT_MIN = 1e-6 - dd_threshold = float_proxy / 10.0 if float_proxy and float_proxy >= _DD_FLOAT_MIN else 0.0 - if dd_threshold > 0 and (run_dd_sym or run_dd_line): - cons.print(f" [dim]dd threshold: {dd_threshold:.1e} (float_proxy={float_proxy:.1e})[/dim]") - elif run_dd_sym or run_dd_line: - cons.print(f" [dim]skipping dd: float_proxy={float_proxy} < {_DD_FLOAT_MIN:.0e}[/dim]") - if dd_threshold > 0 and run_dd_sym: - try: - result["dd_sym_syms"] = _run_dd_sym(case, verrou_bin, sim_bin, work_dir, log_dir, threshold=dd_threshold) - except Exception as exc: - cons.print(f" [bold yellow]dd_sym error[/bold yellow]: {exc}") - if dd_threshold > 0 and run_dd_line: - try: - result["dd_line_locs"] = _run_dd_line( - case, - verrou_bin, - sim_bin, - work_dir, - log_dir, - threshold=dd_threshold, - ) - except Exception as exc: - cons.print(f" [bold yellow]dd_line error[/bold yellow]: {exc}") - - # --- F: cancellation detection --- + # --- D: cancellation detection --- if run_cancellation: cons.print(" [dim]cancellation detection...[/dim]") try: - locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir) - result["cancellation_locs"] = locs - if locs: - cons.print(f" cancellation: {len(locs)} unique source location(s)") + # sweep bit thresholds to get per-site severity (bits lost); each + # run returns None if it failed (distinct from [] = ran, found none) + level_sites = [(level, _run_cancellation_check(verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS] + locs = next((s for lvl, s in level_sites if lvl == CANCEL_BIT_LEVELS[0]), None) + if locs is None: + cons.print(" [bold yellow]cancellation: detection run failed (see logs); not reported[/bold yellow]") else: - cons.print(" cancellation: none detected") + bits = _cancellation_severity([(lvl, s) for lvl, s in level_sites if s is not None]) + result["cancellation_locs"] = locs + result["cancellation_bits"] = bits + # flag cancellation sites whose .fpp line is inside a #:for/#:def + # expansion: the line maps to multiple generated instances, so the + # report cannot pin it to a unique runtime instance. + result["cancellation_macro"] = {(path, line): macro for (path, line) in locs if (macro := _macro_context(path, line))} + if locs: + worst = max(bits.values()) if bits else 0 + cons.print(f" cancellation: {len(locs)} site(s), worst loses >= {worst / math.log2(10):.0f} of ~16 digits") + n_macro = len(result["cancellation_macro"]) + if n_macro: + cons.print(f" [dim]{n_macro} inside fypp expansions - line maps to multiple instances[/dim]") + else: + cons.print(" cancellation: none detected") except Exception as exc: cons.print(f" [bold yellow]cancellation check error[/bold yellow]: {exc}") - # --- G: MCA significant-bits estimate --- - if run_mca: - cons.print(f" [dim]MCA significant-bits estimate (N={n_samples})...[/dim]") - try: - mca_dev, mca_sigbits = _run_mca_samples(case, verrou_bin, sim_bin, work_dir, ref_dir, n_samples) - result["mca_dev"] = mca_dev - result["mca_sigbits"] = mca_sigbits - bits_str = f"~{mca_sigbits} sig bits" if mca_sigbits is not None else "n/a" - cons.print(f" MCA: dev={mca_dev:.3e} ({bits_str})") - except Exception as exc: - cons.print(f" [bold yellow]MCA error[/bold yellow]: {exc}") - - # --- H: float-max overflow detection --- + # --- E: float-max overflow detection --- if run_float_max: cons.print(" [dim]float-max overflow check...[/dim]") try: - locs = _run_float_max_check(case, verrou_bin, sim_bin, work_dir) - result["float_max_locs"] = locs - if locs: - cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") + locs = _run_float_max_check(verrou_bin, sim_bin, work_dir) + if locs is None: + cons.print(" [bold yellow]float-max: run failed (see logs); not reported[/bold yellow]") else: - cons.print(" float-max: no overflows") + result["float_max_locs"] = locs + if locs: + cons.print(f" [bold yellow]float-max[/bold yellow]: {len(locs)} overflow site(s)") + else: + cons.print(" float-max: no overflows") except Exception as exc: cons.print(f" [bold yellow]float-max check error[/bold yellow]: {exc}") @@ -1108,150 +507,71 @@ def _run_case( return result -def _emit_github_annotations(results: list): - """Emit GitHub annotations for FP hotspots. - - Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations - appear inline on the responsible source lines in the PR diff view. - - Up to 3 dd_line locations are emitted as ::warning:: per case (minimal - responsible lines from delta-debug). Up to 3 cancellation sites per case - are emitted as ::notice:: so the diff also highlights subtraction- - cancellation hotspots identified by --check-cancellation. - """ - if not os.environ.get("GITHUB_ACTIONS"): - return - for r in results: - status = "FAIL" if not r["passed"] else "hotspot" - dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})" - - for rel_path, start, end in r.get("dd_line_locs", [])[:3]: - loc = f"file={rel_path},line={start}" - if end != start: - loc += f",endLine={end}" - title = f"FP {status} [{r['name']}]" - print(f"::warning {loc},title={title}::{dev_str}", flush=True) - - for fname, lineno in r.get("cancellation_locs", [])[:3]: - loc = f"file={fname},line={lineno}" - title = f"FP cancellation [{r['name']}]" - print(f"::notice {loc},title={title}::catastrophic cancellation site", flush=True) +# Verrou is ~30x slower and the suite runs the simulation many times, so a user +# case must be a small, short, single-process proxy. Work = cells x time steps; +# both a huge grid and a long run are rejected (built-in cases are ~1k cell-steps). +FP_CASE_MAX_CELLS = 100_000 +FP_CASE_MAX_WORK = 200_000 # cells x t_step_stop -def _emit_github_summary(results: list, n_samples: int): - """Write a markdown results table to GITHUB_STEP_SUMMARY. +def _load_user_case(input_path: str) -> dict: + """Build a single fp-stability case from a user case .py. - Visible directly in the Actions run UI without downloading artifacts. - Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), - and dd_line source locations for any failing cases. + The case is run as ONE serial CPU process under Verrou (so it must be small + and short - a coarsened proxy of a production run, not the real thing); a grid + too large to be feasible errors. The output files to compare are auto-detected + from the reference run, so 'compare' is left empty here. """ - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if not summary_path: - return - - n_pass = sum(1 for r in results if r["passed"]) - n_fail = len(results) - n_pass - - md = [] - md.append("## FP Stability Results\n") - md.append(f"**{n_pass} passed, {n_fail} failed** — {n_samples} random-rounding samples per case\n") - - # Main results table - md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |") - md.append("|------|:------:|--------:|--------:|--------:|:------:|") - for r in results: - status = "✅" if r["passed"] else "❌" - fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—" - sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—" - md.append(f"| `{r['name']}` | {status} | {r['max_dev']:.2e} | {r['threshold']:.0e} | {fp} | {sb} |") - md.append("") - - # VPREC sweep — one column per bit level, ❌ where dev > threshold - if any(r["vprec"] for r in results): - _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} - header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) - sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS) - md.append("### VPREC precision sweep\n") - md.append(f"| Case | {header} |") - md.append(f"|------|{sep}|") - for r in results: - vmap = {b: d for b, d in r["vprec"]} - cols = [] - for b in VPREC_MANTISSA_BITS: - d = vmap.get(b) - if d is None: - cols.append("—") - elif d == float("inf"): - cols.append("💥 crash") - else: - cols.append(f"{d:.2e}") - md.append(f"| `{r['name']}` | {' | '.join(cols)} |") - md.append("") - - # dd_line hotspot sources — always shown (top 10 per case) with source context - cases_with_locs = [r for r in results if r["dd_line_locs"]] - if cases_with_locs: - md.append("### Top FP hotspots (dd\\_line)\n") - for r in cases_with_locs: - status = "❌ FAIL" if not r["passed"] else "✅ pass" - md.append(f"**`{r['name']}`** ({status})\n") - for rel_path, start, end in r["dd_line_locs"][:10]: - loc = f"{rel_path}:{start}" if start == end else f"{rel_path}:{start}-{end}" - md.append(f"- `{loc}`") - snippet = _get_source_context(rel_path, start) - if snippet: - md.append(" ```fortran") - for line in snippet.splitlines(): - md.append(f" {line}") - md.append(" ```") - md.append("") - - # dd_sym function names (collapsed, since less actionable than dd_line) - cases_with_syms = [r for r in results if r["dd_sym_syms"]] - if cases_with_syms: - md.append("
") - md.append("Responsible functions (dd_sym)\n") - for r in cases_with_syms: - md.append(f"\n**`{r['name']}`**\n") - for sym in r["dd_sym_syms"]: - md.append(f"- `{sym}`") - md.append("\n
\n") - - # Cancellation hotspots - cases_with_cancel = [r for r in results if r.get("cancellation_locs")] - if cases_with_cancel: - md.append("### Catastrophic cancellation sites\n") - for r in cases_with_cancel: - md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s)\n") - for fname, lineno in r["cancellation_locs"][:15]: - md.append(f"- `{fname}:{lineno}`") - snippet = _get_source_context(fname, lineno) - if snippet: - md.append(" ```fortran") - for line in snippet.splitlines(): - md.append(f" {line}") - md.append(" ```") - md.append("") + from .run import input as run_input # lazy import: avoids a circular import + + params = run_input.load(input_path, None, {}, do_print=False).params + # Force serial .dat I/O: the suite runs the no-MPI binary as one process and + # diffs serial cons.*/prim.* files (not the parallel SILO/HDF5 path). + params["parallel_io"] = "F" + m, n, p = (int(params.get(k, 0) or 0) for k in ("m", "n", "p")) + cells = (m + 1) * (n + 1) * (p + 1) + t_stop = int(params.get("t_step_stop", 0) or 0) + work = cells * max(t_stop, 1) + if cells > FP_CASE_MAX_CELLS: + raise MFCException(f"case has {cells:,} cells - too large for Verrou (~30x slowdown, run many times). " f"Use a coarsened proxy (<= {FP_CASE_MAX_CELLS:,} cells).") + if work > FP_CASE_MAX_WORK: + raise MFCException( + f"case is ~{work:,} cell-steps ({cells:,} cells x {t_stop} time steps) - too slow under " + f"Verrou (~30x, run many times). Reduce m/n/p or t_step_stop (target <= {FP_CASE_MAX_WORK:,} cell-steps)." + ) + stem = os.path.splitext(os.path.basename(input_path))[0] + if stem == "case": # examples//case.py - the dir name is more telling + stem = os.path.basename(os.path.dirname(os.path.abspath(input_path))) or stem + return { + "name": stem, + "description": f"user case {input_path} ({cells} cells, run single-rank on CPU)", + "compare": [], # auto-detected from the reference run's output + "ill_cond": "", + "pre": params, + "sim": params, + } - # Float-max overflow sites - cases_with_fmax = [r for r in results if r.get("float_max_locs")] - if cases_with_fmax: - md.append("### Float32 overflow sites (check\\_max\\_float)\n") - for r in cases_with_fmax: - md.append(f"**`{r['name']}`** — {len(r['float_max_locs'])} site(s)\n") - for fname, lineno in r["float_max_locs"][:10]: - md.append(f"- `{fname}:{lineno}`") - md.append("") - with open(summary_path, "a") as f: - f.write("\n".join(md) + "\n") +def _install_verrou() -> str: + """Verrou is absent: install it via the bootstrap (downloads a pinned, hash-verified + prebuilt; source build as fallback) and return the valgrind path. Aborts on failure - + fp-stability cannot run without Verrou, so this is a hard error, not a skip.""" + script = os.path.join(MFC_ROOT_DIR, "toolchain", "bootstrap", "verrou.sh") + cons.print("[bold]Verrou not found - installing it (downloads a prebuilt artifact, ~seconds; source build as fallback)...[/bold]") + if subprocess.run(["bash", script], check=False).returncode != 0: + raise MFCException("Verrou install failed (see output above). Fix the issue and re-run, install manually with `bash toolchain/bootstrap/verrou.sh`, or pass --verrou-binary PATH.") + verrou_bin = _find_verrou() + if not verrou_bin or not os.path.isfile(verrou_bin): + raise MFCException("Verrou install reported success but no valgrind binary was found under $VERROU_HOME.") + return verrou_bin def fp_stability(): verrou_bin = ARG("verrou_binary") or _find_verrou() - if not verrou_bin or not os.path.isfile(verrou_bin): - cons.print("[bold yellow]SKIP[/bold yellow]: verrou not found. Install at $HOME/.local/verrou or set VERROU_HOME.") - sys.exit(0) + if not verrou_bin or not (os.path.isfile(verrou_bin) and os.access(verrou_bin, os.X_OK)): + if ARG("verrou_binary"): + raise MFCException(f"--verrou-binary {ARG('verrou_binary')!r} not found or not executable.") + verrou_bin = _install_verrou() sim_bin = ARG("sim_binary") or _find_binary("simulation") if not sim_bin or not os.path.isfile(sim_bin): @@ -1264,12 +584,11 @@ def fp_stability(): n_samples = ARG("samples") run_float = not ARG("no_float_proxy") run_vprec = not ARG("no_vprec") - run_dd_sym = not ARG("no_dd_sym") - run_dd_line = not ARG("no_dd_line") run_cancellation = not ARG("no_cancellation") - run_mca = not ARG("no_mca") run_float_max = not ARG("no_float_max") + cases_to_run = [_load_user_case(ARG("input"))] if ARG("input") else CASES + log_dir = os.path.join(MFC_ROOT_DIR, "fp-stability-logs") os.makedirs(log_dir, exist_ok=True) @@ -1278,20 +597,16 @@ def fp_stability(): cons.print(f" verrou: {verrou_bin}") cons.print(f" simulation: {sim_bin}") cons.print(f" pre_process: {pp_bin}") + if ARG("input"): + cons.print(f" case: {ARG('input')} (single serial CPU run under Verrou)") cons.print(f" samples: {n_samples}") features = [] if run_float: features.append("float-proxy") if run_vprec: features.append("vprec-sweep") - if run_dd_sym: - features.append("dd_sym") - if run_dd_line: - features.append("dd_line") if run_cancellation: features.append("cancellation") - if run_mca: - features.append("mca-sigbits") if run_float_max: features.append("float-max") cons.print(f" features: {', '.join(features) if features else 'stability only'}") @@ -1300,7 +615,7 @@ def fp_stability(): start = time.time() results = [] - for case in CASES: + for case in cases_to_run: try: r = _run_case( case, @@ -1308,31 +623,14 @@ def fp_stability(): sim_bin, pp_bin, n_samples, - log_dir, run_float, run_vprec, - run_dd_sym, - run_dd_line, run_cancellation, - run_mca, run_float_max, ) except MFCException as exc: cons.print(f" [bold red]ERROR[/bold red]: {exc}") - r = { - "name": case["name"], - "passed": False, - "max_dev": float("inf"), - "threshold": case["threshold"], - "float_proxy": None, - "vprec": [], - "dd_sym_syms": [], - "dd_line_locs": [], - "cancellation_locs": [], - "mca_dev": None, - "mca_sigbits": None, - "float_max_locs": [], - } + r = _blank_result(case["name"]) results.append(r) elapsed = time.time() - start @@ -1341,11 +639,8 @@ def fp_stability(): cons.print(f"[bold]Results[/bold] ({elapsed:.0f}s): [green]{n_pass} passed[/green] [red]{n_fail} failed[/red]") for r in results: - mark = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]" - cons.print(f" {mark} {r['name']}") - - if n_fail > 0: - cons.print(f"\n dd_sym/dd_line logs in: {log_dir}") + mark = "[green]PASS[/green]" if r["passed"] else "[red]FAIL[/red]" + cons.print(f" {mark} {r['name']}") _emit_github_summary(results, n_samples) _emit_github_annotations(results) diff --git a/toolchain/mfc/fp_stability_metrics.py b/toolchain/mfc/fp_stability_metrics.py new file mode 100644 index 0000000000..4917e293f5 --- /dev/null +++ b/toolchain/mfc/fp_stability_metrics.py @@ -0,0 +1,237 @@ +"""Pure metrics, source-resolution, and parsing helpers for the FP-stability suite. + +Leaf module: imports only stdlib + MFC_ROOT_DIR. No sibling fp_stability* +imports, so the runners/report/orchestrator modules can all depend on it. +""" + +import glob +import math +import os +import re + +from .common import MFC_ROOT_DIR + +# Mantissa-bit levels for the VPREC sweep (C). +# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low. +VPREC_MANTISSA_BITS = [52, 23, 16, 10] + +_OUTPUT_DAT = re.compile(r"^(cons|prim)\.\d+\.\d+\.(\d+)\.dat$") + + +def _autodetect_compare(filenames: list) -> list: + """Pick the D/ output files to diff for a user-supplied case: the conserved- + variable files at the latest written time step (falling back to primitive + files if none are written). Returns [] if the case produced no field output.""" + by_step = {} + for f in filenames: + m = _OUTPUT_DAT.match(os.path.basename(f)) + if m: + by_step.setdefault(int(m.group(2)), {"cons": [], "prim": []})[m.group(1)].append(os.path.basename(f)) + if not by_step: + return [] + last = by_step[max(by_step)] + return sorted(last["cons"] or last["prim"]) + + +# Stability pass/fail (stage A) is scale-free: a case must retain at least this +# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)). +# 24 ~= single precision. One global floor replaces per-case absolute thresholds +# (which spanned 6 orders of magnitude purely from field scale + conditioning); +# normalising by the field scale collapses that, so a single number suffices. +MIN_SIG_BITS = 24 + + +def _sig_bits(max_dev: float, ref_scale: float) -> float: + """Significant bits retained = -log2(max_dev / ref_scale). + + Scale-free: dividing the deviation by the field's peak magnitude removes the + absolute scale, leaving only the conditioning. Zero deviation (or zero + scale) returns 53.0 = full double precision retained. + """ + if not (max_dev > 0) or not (ref_scale > 0): + return 53.0 + return -math.log2(max_dev / ref_scale) + + +# Files to exclude from cancellation / float-max reports (runtime loaders, XALT). +_EXTERNAL_SRCS = ("xalt", "dl-init", "ld-linux", "libc.so", "libm.so") + +# Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". +_VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") + +# Fypp block directives. The duplicating ones (#:for expands to N copies, #:def +# defines a macro instantiated at multiple call sites) collapse many distinct +# generated computations onto a single .fpp source line, so a cancellation site +# inside one cannot be pinned to a unique runtime instance. #:if/#:with/#:mute +# select code but do not duplicate it, so they are tracked for balance but not flagged. +_FYPP_BLOCK_OPEN = re.compile(r"^\s*#:(for|def|block|call|if|with|mute)\b", re.IGNORECASE) +_FYPP_BLOCK_CLOSE = re.compile(r"^\s*#:end(for|def|block|call|if|with|mute)?\b", re.IGNORECASE) +_FYPP_DUPLICATING = ("for", "def", "block", "call") + + +def _resolve_source(fname: str) -> str: + """Resolve a (possibly bare) source filename to an existing path, or '' if not + found. An absolute existing path is used as-is; otherwise the basename is + located recursively under src/.""" + if os.path.isabs(fname) and os.path.isfile(fname): + return fname + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + return candidates[0] if candidates else "" + + +def _read_source_lines(fname: str) -> list: + """Resolve `fname` and return its lines (with newlines), or [] if unreadable.""" + path = _resolve_source(fname) + if not path: + return [] + try: + with open(path) as fh: + return fh.readlines() + except OSError: + return [] + + +def _macro_context_in_lines(lines: list, lineno: int) -> str: + """Return the innermost code-duplicating fypp block ('#:for'/'#:def'/...) that + encloses `lineno` (1-based) in `lines`, or None if none does. + + Used to flag cancellation sites whose .fpp line is shared across multiple + expanded instances (a #:for body, a #:def macro used in many places), where + line-level attribution cannot identify which instance is responsible. + """ + stack = [] + for raw in lines[: max(0, lineno - 1)]: + mo = _FYPP_BLOCK_OPEN.match(raw) + if mo: + stack.append(mo.group(1).lower()) + continue + if _FYPP_BLOCK_CLOSE.match(raw) and stack: + stack.pop() + for kw in reversed(stack): + if kw in _FYPP_DUPLICATING: + return f"#:{kw}" + return None + + +def _macro_context(fname: str, lineno: int) -> str: + """File-backed wrapper around _macro_context_in_lines; '' path safe.""" + lines = _read_source_lines(fname) + if not lines: + return None + return _macro_context_in_lines(lines, lineno) + + +def _dat_column(path: str): + """Load column 1 (the field value) from an MFC .dat file, robust to a + single-row file (np.loadtxt returns 1-D then, which [:, 1] would crash on).""" + import numpy as np + + return np.atleast_2d(np.loadtxt(path))[:, 1] + + +def _max_diff_np(ref_dir: str, run_dir: str, compare_files: list) -> float: + import numpy as np + + total = 0.0 + for fname in compare_files: + ref_p, run_p = os.path.join(ref_dir, fname), os.path.join(run_dir, fname) + if not os.path.exists(ref_p) or not os.path.exists(run_p): + return float("inf") + total = max(total, float(np.max(np.abs(_dat_column(ref_p) - _dat_column(run_p))))) + return total + + +def _max_abs_np(ref_dir: str, compare_files: list) -> float: + """Return the maximum absolute value across all reference output files.""" + import numpy as np + + total = 0.0 + for fname in compare_files: + ref_p = os.path.join(ref_dir, fname) + if not os.path.exists(ref_p): + continue + total = max(total, float(np.max(np.abs(_dat_column(ref_p))))) + return total + + +def _parse_cancel_gen(gen_path: str) -> list: + """Parse cc-gen-file TSV (file\\tline\\tsymbol) -> sorted unique [(fname, line)] for MFC sources.""" + if not os.path.isfile(gen_path): + return [] + locs = [] + seen = set() + with open(gen_path) as fh: + for raw in fh: + parts = raw.rstrip("\n").split("\t") + if len(parts) < 2: + continue + fname = parts[0].strip() + if any(ext in fname for ext in _EXTERNAL_SRCS): + continue + if not fname.endswith((".fpp", ".f90", ".F90", ".c", ".cpp")): + continue + try: + lineno = int(parts[1].strip()) + except ValueError: + continue + key = (fname, lineno) + if key not in seen: + seen.add(key) + locs.append(key) + return locs + + +def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list: + """Extract first MFC-source frame from each Valgrind error matching error_keyword.""" + if not os.path.isfile(log_path): + return [] + locs = [] + seen = set() + in_error = False + with open(log_path) as fh: + for raw in fh: + line = re.sub(r"^==\d+== ?", "", raw) + if error_keyword in line: + in_error = True + continue + if in_error: + if " at " in line or " by " in line: + m = _VGFRAME_RE.search(line) + if m: + fname = m.group(1) + if any(ext in fname for ext in _EXTERNAL_SRCS): + continue + lineno = int(m.group(2)) + key = (fname, lineno) + if key not in seen: + seen.add(key) + locs.append(key) + in_error = False + elif line.strip() == "": + in_error = False + return locs + + +# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity +# filter: a site is reported only if it lost >= the threshold bits. Sweeping these +# levels and taking the highest each site survives gives a per-site "bits lost" +# severity (a lower bound - no false positives). 48 is near the full 53-bit +# double mantissa (the top of the sweep), not the mantissa width itself. +CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48] + + +def _cancellation_severity(level_sites: list) -> dict: + """Given [(threshold, [sites])], return {site: highest threshold it survives} + = the per-site bits-lost severity (a lower bound).""" + sev = {} + for level, sites in level_sites: + for site in sites: + if level > sev.get(site, 0): + sev[site] = level + return sev + + +def _digits_left(bits_lost: float) -> float: + """Approximate trustworthy decimal digits remaining after losing `bits_lost` + bits of a double's 53-bit mantissa (~15.95 digits full).""" + return max(0.0, (53 - bits_lost) / math.log2(10)) diff --git a/toolchain/mfc/fp_stability_report.py b/toolchain/mfc/fp_stability_report.py new file mode 100644 index 0000000000..2ca469b9e9 --- /dev/null +++ b/toolchain/mfc/fp_stability_report.py @@ -0,0 +1,158 @@ +"""GitHub-output emitters for the FP-stability suite (step summary + annotations). + +Pure formatting of the result dicts produced by the runners; the metric helpers +it uses (digit math) live in fp_stability_metrics. +""" + +import math +import os + +from .fp_stability_metrics import ( + MIN_SIG_BITS, + VPREC_MANTISSA_BITS, + _digits_left, +) + + +def _emit_github_annotations(results: list): + """Emit GitHub annotations for FP cancellation sites. + + Only runs inside GitHub Actions (GITHUB_ACTIONS env var set). Annotations + appear inline on the responsible source lines in the PR diff view. + + Up to 3 cancellation sites per case are emitted as ::notice:: so the diff + highlights subtraction-cancellation hotspots from --check-cancellation. A site + whose .fpp line sits inside a #:for/#:def expansion (tracked in + cancellation_macro) is noted as possibly representing multiple instances. + """ + if not os.environ.get("GITHUB_ACTIONS"): + return + for r in results: + site_bits = r.get("cancellation_bits") or {} + macro_sites = r.get("cancellation_macro") or {} + for fname, lineno in r.get("cancellation_locs", [])[:3]: + loc = f"file={fname},line={lineno}" + title = f"FP cancellation [{r['name']}]" + note = "catastrophic cancellation site" + bits = site_bits.get((fname, lineno)) + if bits: + note += f" - loses >= {bits / math.log2(10):.0f} of ~16 digits" + macro = macro_sites.get((fname, lineno)) + if macro: + note += f" - inside a {macro}-expanded line, may represent multiple instances" + print(f"::notice {loc},title={title}::{note}", flush=True) + n_cc = len(r.get("cancellation_locs", [])) + if n_cc > 3: + print(f"::notice title=FP cancellation [{r['name']}]::{n_cc - 3} more cancellation site(s) not annotated inline; see the step summary", flush=True) + + +def _more_md(total: int, shown: int, noun: str) -> str: + """Markdown bullet noting `total - shown` further items elided from a list, + or '' when nothing was truncated.""" + if total <= shown: + return "" + return f"- ...and {total - shown} more {noun}; see `fp-stability-logs/`" + + +def _emit_github_summary(results: list, n_samples: int): + """Write a markdown results table to GITHUB_STEP_SUMMARY. + + Visible directly in the Actions run UI without downloading artifacts. + Includes: pass/fail, max_dev, float proxy, VPREC sweep (failing levels), + and catastrophic-cancellation source locations for any failing cases. + """ + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + + n_pass = sum(1 for r in results if r["passed"]) + n_fail = len(results) - n_pass + + md = [] + md.append("## FP Stability Results\n") + md.append(f"**{n_pass} passed, {n_fail} failed** - {n_samples} random-rounding samples per case\n") + md.append( + f"> **Coverage:** {len(results)} one-dimensional case(s) " + f"({', '.join(r['name'] for r in results)}). A pass means stable in the code paths these " + "cases exercise - not a guarantee for multi-D, viscous, MHD, IGR, or bubble-dynamics paths " + "they do not reach.\n" + ) + + # Main results table - pass/fail is scale-free: bits retained vs a single floor + md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n") + md.append("| Case | Status | bits retained | max\\_dev | Float proxy |") + md.append("|------|:------:|:------:|--------:|--------:|") + for r in results: + status = "PASS" if r["passed"] else "FAIL" + bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "-" + fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "-" + md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} |") + md.append("") + + # Cancellation ORIGINS - where ill-conditioning actually arises, led with the + # most severe (most bits lost). + cases_with_cancel = [r for r in results if r.get("cancellation_locs")] + if cases_with_cancel: + md.append("### Catastrophic cancellation origins (ranked by digits lost)\n") + md.append( + "> Subtraction of nearly-equal values loses leading significant digits. A double carries " + "~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away " + "(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only " + "single-precision trust. Site *count* is not severity - one site losing many digits outweighs " + "many mild ones.\n" + ) + for r in cases_with_cancel: + site_bits = r.get("cancellation_bits") or {} + macro_sites = r.get("cancellation_macro") or {} + sites = [{"where": f"{fname}:{lineno}", "bits": site_bits.get((fname, lineno), 0), "macro": macro_sites.get((fname, lineno))} for fname, lineno in r["cancellation_locs"]] + ordered = sorted(sites, key=lambda e: (-e["bits"], e["where"])) + if ordered: + w = ordered[0] + md.append(f"**`{r['name']}`** - {len(ordered)} site(s); worst loses >= {w['bits'] / math.log2(10):.0f} of ~16 digits\n") + for e in ordered[:15]: + lost = e["bits"] / math.log2(10) + ambiguous = f" - _{e['macro']}-expanded, may represent multiple instances_" if e["macro"] else "" + md.append(f"- **>= {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) - `{e['where']}`{ambiguous}") + footer = _more_md(len(ordered), 15, "site(s)") + if footer: + md.append(footer) + md.append("") + + # VPREC sweep - one column per mantissa-bit level showing the Linf deviation at + # that reduced precision ("crash" = run diverged/failed; dash = not measured). + if any(r["vprec"] for r in results): + _labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"} + header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS) + sep = " | ".join(":---:" for _ in VPREC_MANTISSA_BITS) + md.append("### VPREC precision sweep\n") + md.append(f"| Case | {header} |") + md.append(f"|------|{sep}|") + for r in results: + vmap = {b: d for b, d in r["vprec"]} + cols = [] + for b in VPREC_MANTISSA_BITS: + d = vmap.get(b) + if d is None: + cols.append("-") + elif d == float("inf"): + cols.append("crash") + else: + cols.append(f"{d:.2e}") + md.append(f"| `{r['name']}` | {' | '.join(cols)} |") + md.append("") + + # Float-max overflow sites + cases_with_fmax = [r for r in results if r.get("float_max_locs")] + if cases_with_fmax: + md.append("### Float32 overflow sites (check\\_max\\_float)\n") + for r in cases_with_fmax: + md.append(f"**`{r['name']}`** - {len(r['float_max_locs'])} site(s)\n") + for fname, lineno in r["float_max_locs"][:10]: + md.append(f"- `{fname}:{lineno}`") + footer = _more_md(len(r["float_max_locs"]), 10, "site(s)") + if footer: + md.append(footer) + md.append("") + + with open(summary_path, "a") as f: + f.write("\n".join(md) + "\n") diff --git a/toolchain/mfc/fp_stability_runners.py b/toolchain/mfc/fp_stability_runners.py new file mode 100644 index 0000000000..3809ee9992 --- /dev/null +++ b/toolchain/mfc/fp_stability_runners.py @@ -0,0 +1,194 @@ +"""Verrou subprocess runners for the FP-stability suite. + +Each routine drives the verrou/valgrind binary and returns parsed results. Pure +parsing / metric helpers live in fp_stability_metrics, which this module imports. +""" + +import glob +import os +import shutil +import subprocess +import tempfile + +from .common import MFC_ROOT_DIR, MFCException +from .fp_stability_metrics import ( + VPREC_MANTISSA_BITS, + _max_diff_np, + _parse_cancel_gen, + _parse_vg_error_locs, +) +from .printer import cons + + +def _has_verrou_tool(valgrind_bin: str, env: dict = None) -> bool: + """True if this valgrind actually provides the 'verrou' tool. A plain system + valgrind does not - accepting one would only fail later at run time. Pass env + (with VALGRIND_LIB) to verify a relocated prebuilt tree, which cannot load its + tool without it.""" + try: + return subprocess.run([valgrind_bin, "--tool=verrou", "--version"], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode == 0 + except OSError: + return False + + +def _find_verrou() -> str: + verrou_home = os.environ.get("VERROU_HOME", os.path.join(os.path.expanduser("~"), ".local", "verrou")) + candidate = os.path.join(verrou_home, "bin", "valgrind") + # Require the $VERROU_HOME tree to actually run the verrou tool (with VALGRIND_LIB + # for a relocated prebuilt). A broken/stale/non-Verrou tree there must read as + # "absent" so it gets reinstalled, not used until it fails on every run. + if os.path.isfile(candidate) and os.access(candidate, os.X_OK) and _has_verrou_tool(candidate, _verrou_env(candidate)): + return candidate + # Fall back to a valgrind on PATH only if it is Verrou-enabled; a bare system + # valgrind must read as "Verrou absent" so it gets installed, not misused. Verify + # with VALGRIND_LIB too, so a relocated prebuilt on PATH (env.sh not sourced) isn't + # wrongly judged absent. + path_vg = shutil.which("valgrind") + if path_vg and _has_verrou_tool(path_vg, _verrou_env(path_vg)): + return path_vg + return "" + + +def _find_binary(name: str) -> str: + install_dir = os.path.join(MFC_ROOT_DIR, "build", "install") + candidates = glob.glob(os.path.join(install_dir, "*", "bin", name)) + return max(candidates, key=os.path.getmtime) if candidates else "" + + +def _verrou_env(verrou_bin: str) -> dict: + """os.environ plus VALGRIND_LIB, so a relocated install tree (e.g. a prebuilt + artifact extracted to a new prefix) can locate its tool - Valgrind bakes its + build prefix into the binary otherwise. Harmless for a source-built tree, where + VALGRIND_LIB just equals the compiled-in path. A VALGRIND_LIB already in the + environment (user sourced env.sh) is left untouched.""" + env = os.environ.copy() + libdir = os.path.join(os.path.dirname(os.path.dirname(verrou_bin)), "libexec", "valgrind") + if "VALGRIND_LIB" not in env and os.path.isdir(libdir): + env["VALGRIND_LIB"] = libdir + return env + + +def _write_inp(params: dict, target_name: str, work_dir: str) -> None: + """Write a Fortran namelist .inp file from a Python params dict.""" + from .run import case_dicts + + master_keys = case_dicts.get_input_dict_keys(target_name) + lines = [f"{k} = {v}" for k, v in params.items() if k in master_keys] + with open(os.path.join(work_dir, f"{target_name}.inp"), "w") as fh: + fh.write("&user_inputs\n" + "\n".join(lines) + "\n&end/\n") + + +def _run_preprocess(pp_bin: str, pre_params: dict, work_dir: str): + _write_inp(pre_params, "pre_process", work_dir) + with open(os.path.join(work_dir, "pre.log"), "w") as f: + result = subprocess.run([pp_bin], cwd=work_dir, stdout=f, stderr=subprocess.STDOUT, check=False) + if result.returncode != 0: + raise MFCException(f"pre_process failed (rc={result.returncode}). See {work_dir}/pre.log") + + +def _run_simulation_verrou( + verrou_bin: str, + sim_bin: str, + work_dir: str, + run_dir: str, + rounding_mode: str = None, + extra_flags: list = None, +): + """Create run_dir, copy ICs into a fresh tmpdir, run simulation under verrou, + and collect its D/ output into run_dir. Owns run_dir creation, so callers need + not pre-create it. + + rounding_mode is passed as --rounding-mode= when not None. + extra_flags are appended before the binary (e.g. --backend=vprec ...). + """ + os.makedirs(run_dir, exist_ok=True) # needed before --log-file / sim.out below + with tempfile.TemporaryDirectory(prefix="mfc-fps-") as tmpdir: + for fname in ["simulation.inp", "indices.dat", "pre_time_data.dat", "io_time_data.dat"]: + src = os.path.join(work_dir, fname) + if os.path.exists(src): + shutil.copy2(src, tmpdir) + shutil.copytree(os.path.join(work_dir, "p_all"), os.path.join(tmpdir, "p_all")) + os.makedirs(os.path.join(tmpdir, "D")) + + log_path = os.path.join(run_dir, "verrou.log") + cmd = [verrou_bin, "--tool=verrou", "--error-limit=no", f"--log-file={log_path}"] + if rounding_mode: + cmd.append(f"--rounding-mode={rounding_mode}") + cmd.extend(extra_flags or []) + cmd.append(sim_bin) + + with open(os.path.join(run_dir, "sim.out"), "w") as f: + result = subprocess.run(cmd, cwd=tmpdir, env=_verrou_env(verrou_bin), stdout=f, stderr=subprocess.STDOUT, check=False) + + if result.returncode != 0: + tag = rounding_mode or "vprec" + raise MFCException(f"simulation ({tag}) exited {result.returncode}. See {run_dir}/sim.out") + + for fn in os.listdir(os.path.join(tmpdir, "D")): + shutil.copy2(os.path.join(tmpdir, "D", fn), run_dir) + + +def _run_cancellation_check(verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10): + """Run --check-cancellation at the given bit threshold; return [(fname, line)] + of MFC cancellation sites (subtractions losing >= `threshold` significant bits), + or None if the run itself failed (distinct from [] = ran and found none).""" + tag = f"cancellation_{threshold}" + run_dir = os.path.join(work_dir, tag) + gen_path = os.path.join(run_dir, "cancel_gen.txt") + flags = [ + "--check-cancellation=yes", + f"--cc-threshold-double={threshold}", + f"--cc-gen-file={gen_path}", + ] + try: + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) + except MFCException as exc: + cons.print(f" [yellow]cancellation run (threshold {threshold}) failed: {exc}[/yellow]") + return None + return _parse_cancel_gen(gen_path) + + +def _run_float_max_check(verrou_bin: str, sim_bin: str, work_dir: str): + """Run with --check-max-float=yes; return [(fname, line)] of overflow sites, + or None if the run failed (distinct from [] = ran and found none).""" + run_dir = os.path.join(work_dir, "float_max") + try: + _run_simulation_verrou( + verrou_bin, + sim_bin, + work_dir, + run_dir, + rounding_mode="nearest", + extra_flags=["--check-max-float=yes"], + ) + except MFCException as exc: + cons.print(f" [yellow]float-max run failed: {exc}[/yellow]") + return None + return _parse_vg_error_locs(os.path.join(run_dir, "verrou.log"), "Max float") + + +def _run_float_proxy(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> float: + """One run with --rounding-mode=float; returns Linf deviation from nearest-ref.""" + run_dir = os.path.join(work_dir, "float_proxy") + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="float") + return _max_diff_np(ref_dir, run_dir, case["compare"]) + + +def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, ref_dir: str) -> list: + """Run at each mantissa-bit level. Returns [(bits, dev), ...].""" + results = [] + for bits in VPREC_MANTISSA_BITS: + run_dir = os.path.join(work_dir, f"vprec_{bits}") + flags = [ + "--backend=vprec", + "--vprec-mode=full", + f"--vprec-precision-binary64={bits}", + "--vprec-range-binary64=11", + ] + try: + _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, extra_flags=flags) + dev = _max_diff_np(ref_dir, run_dir, case["compare"]) + except MFCException: + dev = float("inf") + results.append((bits, dev)) + return results diff --git a/toolchain/mfc/test_fp_stability.py b/toolchain/mfc/test_fp_stability.py new file mode 100644 index 0000000000..6521705b96 --- /dev/null +++ b/toolchain/mfc/test_fp_stability.py @@ -0,0 +1,220 @@ +"""Unit tests for the pure helpers behind the FP-stability cancellation pass, its +fypp macro-expansion flagging, scale-free pass/fail, and Verrou discovery/install. + +The Verrou subprocess machinery is exercised by the ./mfc.sh fp-stability CI job; +here we test only the pure functions that decide what to instrument and how to +label results, so they can run without Verrou or built binaries. We keep the tests +that pin a real behavioral contract or a subtle edge, not every micro-variation. +""" + +from mfc.fp_stability_metrics import ( + _autodetect_compare, + _cancellation_severity, + _macro_context_in_lines, + _sig_bits, +) + +# --- fypp macro-expansion context detection (a #:for/#:def line maps to N instances) --- + + +def test_macro_context_inside_for_loop_body(): + lines = [ + "#:for i in [1, 2, 3]\n", + " q(${i}$) = a - b\n", + "#:endfor\n", + ] + assert _macro_context_in_lines(lines, 2) == "#:for" + + +def test_macro_context_if_block_is_not_duplicating(): + # #:if selects code but does not duplicate it, so it must NOT be flagged. + lines = [ + "#:if FOO\n", + " a = b - c\n", + "#:endif\n", + ] + assert _macro_context_in_lines(lines, 2) is None + + +def test_macro_context_unbalanced_close_is_safe(): + # a stray #:endfor with an empty stack must not crash or misreport + assert _macro_context_in_lines(["#:endfor\n", " a = b - c\n"], 2) is None + + +# --- per-site cancellation severity (highest bit-threshold a site survives) --- + + +def test_cancellation_severity_takes_highest_surviving_threshold(): + level_sites = [ + (10, [("a.fpp", 1), ("b.fpp", 2)]), + (20, [("a.fpp", 1)]), + (30, [("a.fpp", 1)]), + ] + # a.fpp:1 survives to 30 bits; b.fpp:2 only at 10 + assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10} + + +# --- auto-detect which output files to compare (for a user case) --- + + +def test_autodetect_compare_picks_cons_at_latest_step(): + fns = [ + "cons.1.00.000000.dat", + "cons.1.00.000050.dat", + "cons.2.00.000050.dat", + "prim.1.00.000050.dat", + ] + assert _autodetect_compare(fns) == ["cons.1.00.000050.dat", "cons.2.00.000050.dat"] + + +def test_autodetect_compare_falls_back_to_prim_when_no_cons(): + fns = ["prim.1.00.000010.dat", "prim.3.00.000010.dat"] + assert _autodetect_compare(fns) == ["prim.1.00.000010.dat", "prim.3.00.000010.dat"] + + +# --- scale-free pass/fail: significant bits retained --- + + +def test_sig_bits_is_scale_free(): + # same relative deviation -> same bits regardless of absolute magnitude + assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9 + + +def test_sig_bits_zero_scale_is_safe(): + # a zero/degenerate field scale must not divide-by-zero; report full precision + assert _sig_bits(1e-12, 0.0) == 53.0 + + +# --- report emitters: must survive the CI-only path without KeyError / regressions --- + + +def _emit_to_tmp(results, tmp_path, monkeypatch): + """Run _emit_github_summary into a temp file under the GitHub-Actions env.""" + from mfc import fp_stability_report as report + + out = tmp_path / "summary.md" + monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(out)) + monkeypatch.setenv("GITHUB_ACTIONS", "1") + report._emit_github_summary(results, 5) + return out.read_text() + + +def test_emit_summary_survives_blank_result(tmp_path, monkeypatch): + # the dict produced on the per-case error path must not KeyError the emitter + from mfc.fp_stability import _blank_result + + text = _emit_to_tmp([_blank_result("x")], tmp_path, monkeypatch) + assert "0 passed, 1 failed" in text + + +def test_emit_annotations_cancellation_notes_fypp_ambiguity(tmp_path, monkeypatch, capsys): + from mfc import fp_stability_report as report + from mfc.fp_stability import _blank_result + + monkeypatch.setenv("GITHUB_ACTIONS", "1") + r = _blank_result("demo") + r.update( + cancellation_locs=[("src/x/m_a.fpp", 5)], + cancellation_bits={("src/x/m_a.fpp", 5): 40}, + cancellation_macro={("src/x/m_a.fpp", 5): "#:for"}, + ) + report._emit_github_annotations([r]) + out = capsys.readouterr().out + assert "::notice" in out + assert "multiple instances" in out # fypp-expanded cancellation site flagged + + +# --- Verrou discovery: a bare/broken valgrind must read as "Verrou absent" --- + + +def test_find_verrou_prefers_verrou_home_candidate(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + vbin = tmp_path / "bin" / "valgrind" + vbin.parent.mkdir(parents=True) + vbin.write_text("#!/bin/sh\n") + vbin.chmod(0o755) + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + # The candidate must also verify as Verrou-enabled; stub that so the test + # exercises precedence, not a real valgrind invocation. + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: True) + assert runners._find_verrou() == str(vbin) + + +def test_find_verrou_rejects_broken_verrou_home_tree(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + # A valgrind exists at $VERROU_HOME but does not actually run the verrou tool + # (broken/stale/non-Verrou): it must read as absent, not be returned. + vbin = tmp_path / "bin" / "valgrind" + vbin.parent.mkdir(parents=True) + vbin.write_text("#!/bin/sh\n") + vbin.chmod(0o755) + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False) + monkeypatch.setattr(runners.shutil, "which", lambda _name: None) + assert runners._find_verrou() == "" + + +def test_find_verrou_rejects_non_verrou_path_valgrind(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + # VERROU_HOME has no valgrind; a plain valgrind is on PATH but lacks the tool. + monkeypatch.setenv("VERROU_HOME", str(tmp_path)) + monkeypatch.setattr(runners.shutil, "which", lambda _name: "/usr/bin/valgrind") + monkeypatch.setattr(runners, "_has_verrou_tool", lambda _bin, _env=None: False) + assert runners._find_verrou() == "" + + +def test_has_verrou_tool_reflects_exit_code(monkeypatch): + from mfc import fp_stability_runners as runners + + class _R: + def __init__(self, rc): + self.returncode = rc + + monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(0)) + assert runners._has_verrou_tool("/any/valgrind") is True + monkeypatch.setattr(runners.subprocess, "run", lambda *a, **k: _R(1)) + assert runners._has_verrou_tool("/any/valgrind") is False + + def _boom(*a, **k): + raise OSError("not executable") + + monkeypatch.setattr(runners.subprocess, "run", _boom) + assert runners._has_verrou_tool("/stale/valgrind") is False + + +# --- env composition for relocated (prebuilt) Verrou trees --- + + +def test_verrou_env_sets_valgrind_lib_when_libexec_present(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + (tmp_path / "libexec" / "valgrind").mkdir(parents=True) + monkeypatch.delenv("VALGRIND_LIB", raising=False) + env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) + assert env["VALGRIND_LIB"] == str(tmp_path / "libexec" / "valgrind") + + +def test_verrou_env_preserves_user_valgrind_lib(tmp_path, monkeypatch): + from mfc import fp_stability_runners as runners + + (tmp_path / "libexec" / "valgrind").mkdir(parents=True) + monkeypatch.setenv("VALGRIND_LIB", "/user/chosen/lib") + env = runners._verrou_env(str(tmp_path / "bin" / "valgrind")) + assert env["VALGRIND_LIB"] == "/user/chosen/lib" # not clobbered + + +# --- auto-install hard-fail guard (a green bootstrap that produced no binary) --- + + +def test_install_verrou_raises_when_no_binary_appears(monkeypatch): + import pytest + + from mfc import fp_stability as fps + + monkeypatch.setattr(fps.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0})()) + monkeypatch.setattr(fps, "_find_verrou", lambda: "") + with pytest.raises(fps.MFCException, match="no valgrind binary"): + fps._install_verrou()