diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index fe44810..c0b58a9 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -1434,9 +1434,18 @@ mod tests { ..CompileConfig::default() }; - // Optimized path: `(local.get 0) >>> 32; wrap_i64` + // #518: the i64 value must NOT come from an i64 PARAM — the optimized + // path now declines i64-param functions to the direct selector (it homed + // an i64 param in R4:R5 instead of R0:R1, a silent miscompile this test's + // byte-size-only assertion masked). The canonical #94 case is a u64 from + // an FFI return, not a param, anyway. Source the i64 from a sign-extended + // i32 param (`extend_i32_s`): a runtime, non-constant-foldable i64 that + // stays on the optimized path, so the shift-by-32 hi-extract peephole is + // still exercised on CORRECT code. + // Optimized path: `(i64.extend_i32_s (local.get 0)) >>> 32; wrap_i64` let ops_hi32 = vec![ - WasmOp::LocalGet(0), // i64 param in R0:R1 + WasmOp::LocalGet(0), // i32 param in R0 + WasmOp::I64ExtendI32S, WasmOp::I64Const(32), WasmOp::I64ShrU, WasmOp::I32WrapI64, @@ -1445,11 +1454,11 @@ mod tests { .compile_function("hi32_extract", &ops_hi32, &config) .unwrap(); - // Generic path: `(local.get 0) >>> 7; wrap_i64` — same shape, but the - // shift amount is not a multiple of 32, so it falls through to the - // 38-byte runtime shift. + // Generic path: `... >>> 7; wrap_i64` — same shape, but the shift amount + // is not a multiple of 32, so it falls through to the runtime shift. let ops_generic = vec![ WasmOp::LocalGet(0), + WasmOp::I64ExtendI32S, WasmOp::I64Const(7), WasmOp::I64ShrU, WasmOp::I32WrapI64, diff --git a/crates/synth-synthesis/src/instruction_selector.rs b/crates/synth-synthesis/src/instruction_selector.rs index 8686f69..4ea7025 100644 --- a/crates/synth-synthesis/src/instruction_selector.rs +++ b/crates/synth-synthesis/src/instruction_selector.rs @@ -132,6 +132,56 @@ fn index_to_reg(index: u8) -> Reg { reg } +/// AAPCS core-register assignment for a function's first parameters (#518). +/// +/// Walks the declared param widths left-to-right over the 4 core argument +/// registers R0..R3. An i32/f32 param takes the next register; an i64/f64 param +/// takes an EVEN-ALIGNED consecutive pair (rounding the cursor up to an even +/// index first) and the returned register is its LOW half (hi = [`i64_pair_hi`]). +/// Returns the lo register for every param that lands wholly within R0..R3; a +/// param that would spill past R3 is stack-passed and is absent from the map +/// (an i64 there is the open #503-i64 case, declined by the caller). +/// +/// For an all-i32 signature this is exactly `index_to_reg(i)` for each `i`, so +/// the param→register mapping is byte-identical to the legacy sequential scheme +/// whenever no i64/f64 param is present — the #518 fix only diverges for the +/// i64-param functions that were miscompiled. +fn aapcs_param_regs(num_params: u32, params_i64: &[bool]) -> std::collections::HashMap { + let mut map = std::collections::HashMap::new(); + let mut next: u8 = 0; // next free core arg-register index (0..=3) + for i in 0..num_params { + let is_wide = params_i64.get(i as usize).copied().unwrap_or(false); + if is_wide { + if !next.is_multiple_of(2) { + next += 1; // even-align the pair + } + if next <= 2 { + // the pair (next, next+1) lands wholly within R0..R3 + map.insert(i, index_to_reg(next)); + next += 2; + } else { + next = 4; // spilled to the stack (#503-i64) + } + } else if next <= 3 { + map.insert(i, index_to_reg(next)); + next += 1; + } else { + next = 4; // i32 stack param (#359 incoming-arg path) + } + } + map +} + +/// True if any declared i64/f64 param of this function lands (wholly or its +/// even-aligned start) PAST R3 — i.e. AAPCS passes it on the stack (#518/#503). +/// The register-pair homing below only covers i64 params resident in R0..R3; a +/// stack-passed 64-bit param is the open #503-i64 follow-up and is declined. +fn i64_param_overflows_core_regs(num_params: u32, params_i64: &[bool]) -> bool { + let regs = aapcs_param_regs(num_params, params_i64); + (0..num_params) + .any(|i| params_i64.get(i as usize).copied().unwrap_or(false) && !regs.contains_key(&i)) +} + /// A virtual-stack entry (#171). A value normally lives in a physical register /// ([`StackVal::Reg`] — for an i64, this is the `lo`; the `hi` is the /// conventional [`i64_pair_hi`]). When register pressure exhausts the @@ -5382,6 +5432,41 @@ impl InstructionSelector { ))); } + // #518 Ok-or-Err (#180/#185): an i64/f64 PARAM is correctly homed below + // (AAPCS register-pair, `aapcs_param_regs`) only for the LEAF, + // register-resident case. Two sub-cases are declined LOUDLY here rather + // than silently miscompiled (the #518 defect): (1) an i64 param that + // AAPCS passes PAST R3 on the stack — the open #503-i64 follow-up; + // (2) a function that FRAME-BACKS its params — `has_call` (params spill + // to the frame to survive the call's caller-saved clobber, #204/#193) or + // the pair-exhaustion retry (`param_backing_on_exhaustion`) — where the + // `param_slots` path would size an i64 param's slot from `i64_set`, which + // does not include params, dropping the high half. Both fall back to a + // loud skip (warning + absent symbol), never wrong code. The common + // leaf-in-registers case is FIXED below. (Empty `params_i64` ⇒ all-i32 ⇒ + // this is a no-op and every existing fixture stays byte-identical.) + let has_i64_param = self.params_i64.iter().take(num_params as usize).any(|&w| w); + if has_i64_param { + if i64_param_overflows_core_regs(num_params, &self.params_i64) { + return Err(synth_core::Error::synthesis( + "#518/#503: an i64/f64 param is AAPCS-passed past R3 (on the \ + stack); 64-bit stack params are not yet lowered" + .to_string(), + )); + } + let has_call = wasm_ops + .iter() + .any(|op| matches!(op, Call(_) | CallIndirect { .. })); + if has_call || self.param_backing_on_exhaustion { + return Err(synth_core::Error::synthesis( + "#518: an i64/f64 param in a frame-backing function (contains a \ + call, or the register-pair-exhaustion retry) is not yet \ + lowered — the param_slots path drops the high half" + .to_string(), + )); + } + } + // #359: size of the outgoing stack-argument region = the max over all // Call/CallIndirect sites of `max(0, arg_count - 4) * 4`, rounded up to 8. // Reserved at the BOTTOM of the frame (offset 0) by `compute_local_layout`. @@ -5505,11 +5590,20 @@ impl InstructionSelector { // Map of local index -> register let mut local_to_reg: std::collections::HashMap = std::collections::HashMap::new(); - // First 4 params are in r0-r3 + // First 4 params are in r0-r3. + // #518: register homes follow the AAPCS core-register assignment + // (`aapcs_param_regs`) — an i64 param takes an even-aligned pair, so e.g. + // `(i32, i64)` puts the i64 in R2:R3, not the sequential R1:R2 that the + // old `index_to_reg(i)` mapping wrongly used. For an all-i32 signature + // this is identical to `index_to_reg(i)`, so non-i64-param functions are + // byte-identical. (Frame-backing i64 params are declined above, so any + // `param_slots` entry reached here is i32 — its sequential `index_to_reg` + // already equals the AAPCS reg; left unchanged to minimise the diff.) // #204/#193: a param the function reads is spilled to a frame slot at // entry and accessed only through it, so it can never be clobbered in // its home register between reads. Params with no slot (unused) stay // register-backed via local_to_reg. + let param_aapcs = aapcs_param_regs(num_params, &self.params_i64); for i in 0..num_params.min(4) { if let Some(&(off, is_i64)) = layout.param_slots.get(&i) { let reg = index_to_reg(i as u8); @@ -5530,11 +5624,27 @@ impl InstructionSelector { source_line: None, }); } else { - local_to_reg.insert(i, index_to_reg(i as u8)); + let reg = param_aapcs + .get(&i) + .copied() + .unwrap_or_else(|| index_to_reg(i as u8)); + local_to_reg.insert(i, reg); } } - let i64_locals = infer_i64_locals(wasm_ops, &self.func_ret_i64, &self.type_ret_i64); + let mut i64_locals = infer_i64_locals(wasm_ops, &self.func_ret_i64, &self.type_ret_i64); + // #518: an i64/f64 PARAM is 64-bit by signature even if it is only READ + // (never `local.set`/`tee`), which `infer_i64_locals` — driven by + // LocalSet/Tee/Call result widths — cannot see. Seed those indices so a + // `LocalGet` of an i64 param pushes a `StackVal::i64` (whose hi register + // is reserved via `i64_pair_hi`) instead of an i32 entry that left the hi + // unreserved — the exact direct-path #518 mechanism (a following + // `i64.const` was then allocated into the param's hi register). + for (k, &wide) in self.params_i64.iter().take(num_params as usize).enumerate() { + if wide { + i64_locals.insert(k as u32); + } + } // VCR-RA local promotion (#390, #242): choose non-param i32 locals to keep // in callee-saved registers (r4..r8) instead of frame slots, and seed them @@ -5573,11 +5683,23 @@ impl InstructionSelector { } } let live_param_regs = |at: usize| -> Vec { - param_regs - .iter() - .filter(|(p, _)| param_last_read.get(p).is_some_and(|&last| last >= at)) - .map(|(_, r)| *r) - .collect::>() + let mut out = Vec::new(); + for (p, r) in ¶m_regs { + if param_last_read.get(p).is_some_and(|&last| last >= at) { + out.push(*r); + // #518: an i64 param occupies a register PAIR (lo in `r`, hi in + // `i64_pair_hi(r)`). Reserve the hi half too — otherwise a + // constant/temp allocated into it clobbers the param's high + // word before the i64 op reads it (the direct-path #518 bug: + // `movw r1,#K` overwrote R1 = hi of an i64 param in R0:R1). + if i64_locals.contains(p) + && let Ok(hi) = i64_pair_hi(*r) + { + out.push(hi); + } + } + } + out }; for (idx, op) in wasm_ops.iter().enumerate() { @@ -10415,6 +10537,48 @@ mod tests { use super::*; use crate::rules::RuleDatabase; + /// #518: the AAPCS core-register assignment for parameters. An i64 takes an + /// even-aligned consecutive pair (lo returned); a param that spills past R3 is + /// absent. For an all-i32 signature it must equal `index_to_reg(i)`, so + /// non-i64-param functions stay byte-identical. + #[test] + fn test_aapcs_param_regs_518() { + let m = |np: u32, w: &[bool]| aapcs_param_regs(np, w); + // single i64 -> R0:R1 (lo = R0) + assert_eq!(m(1, &[true]).get(&0), Some(&Reg::R0)); + // (i32, i64) -> R0, then even-aligned R2:R3 (R1 skipped) + let mixed = m(2, &[false, true]); + assert_eq!(mixed.get(&0), Some(&Reg::R0)); + assert_eq!(mixed.get(&1), Some(&Reg::R2)); + // (i64, i64) -> R0:R1, R2:R3 + let two = m(2, &[true, true]); + assert_eq!(two.get(&0), Some(&Reg::R0)); + assert_eq!(two.get(&1), Some(&Reg::R2)); + // (i32, i32, i64) -> R0, R1, even-aligned R2:R3 + let m3 = m(3, &[false, false, true]); + assert_eq!(m3.get(&2), Some(&Reg::R2)); + // (i64, i32) -> R0:R1, R2 + let i64_i32 = m(2, &[true, false]); + assert_eq!(i64_i32.get(&0), Some(&Reg::R0)); + assert_eq!(i64_i32.get(&1), Some(&Reg::R2)); + // all-i32 must match the legacy sequential index_to_reg (byte-identity) + let all_i32 = m(4, &[false, false, false, false]); + for i in 0..4u32 { + assert_eq!(all_i32.get(&i), Some(&index_to_reg(i as u8))); + } + // (i32, i32, i32, i64): the i64 even-aligns to R4:R5 -> past R3 -> absent + let overflow = m(4, &[false, false, false, true]); + assert_eq!(overflow.get(&3), None); + assert!(i64_param_overflows_core_regs( + 4, + &[false, false, false, true] + )); + // the in-register cases never report overflow + assert!(!i64_param_overflows_core_regs(1, &[true])); + assert!(!i64_param_overflows_core_regs(2, &[false, true])); + assert!(!i64_param_overflows_core_regs(2, &[true, true])); + } + #[test] fn test_register_allocation() { let mut regs = RegisterState::new(); diff --git a/crates/synth-synthesis/src/optimizer_bridge.rs b/crates/synth-synthesis/src/optimizer_bridge.rs index 59837ce..aa60549 100644 --- a/crates/synth-synthesis/src/optimizer_bridge.rs +++ b/crates/synth-synthesis/src/optimizer_bridge.rs @@ -2611,6 +2611,25 @@ impl OptimizerBridge { )); } + // #518: this lowering cannot home an i64 PARAM correctly — it has only the + // param COUNT, not the per-param TYPES, so it cannot compute the AAPCS + // even-aligned register pair an i64 param occupies (the `I64Load` arm's + // `num_params >= 2/4` guard conflated register-count with param-count and + // dropped the param). Decline any function that READS an i64 param (an + // `I64Load` from a param index) to the direct selector, which homes the + // pair via `aapcs_param_regs`. Honest degradation (the #359/#188/#507 + // pattern); functions with no i64-param read are untouched (byte-identical). + if instructions.iter().any(|inst| { + matches!(inst.opcode, Opcode::I64Load { addr, .. } if (addr as usize) < num_params) + }) { + return Err(synth_core::Error::synthesis( + "optimized path declines functions that read an i64 param (no \ + per-param AAPCS pair model — see #518); falling back to direct \ + selector" + .to_string(), + )); + } + let mut arm_instrs = Vec::new(); let mut vreg_to_arm: HashMap = HashMap::new(); @@ -6263,10 +6282,17 @@ mod tests { } #[test] - fn test_ir_to_arm_i64_add_uses_operand_regs() { - // Build IR that loads two i64 params (R0:R1 and R2:R3 via I64Load - // with addr=0/1 and num_params >= 4), then adds them. The fix should - // emit Adds/Adc that use the operand registers, NOT hardcoded ones. + fn test_ir_to_arm_i64_add_operand_regs_and_param_decline_518() { + // Build IR that loads two i64 values via I64Load (addr 0/1) then adds + // them. Two assertions: + // (1) #518: when those I64Loads are PARAM reads (addr < num_params) the + // optimized path DECLINES (it cannot AAPCS-home an i64 param pair), + // so ir_to_arm returns Err and the backend falls back to the direct + // selector. Correct param lowering is covered end-to-end by + // scripts/repro/i64_param_518_differential.py. + // (2) for NON-param i64 values (addr >= num_params) it still lowers, + // emitting Adds/Adc that use the operand registers, not hardcoded + // ones. let bridge = OptimizerBridge::new(); let instrs = vec![ // I64Load addr=0 → R0:R1 @@ -6306,9 +6332,16 @@ mod tests { is_dead: false, }, ]; - let arm = bridge.ir_to_arm(&instrs, 4).expect("valid IR lowers"); - // We must see at least one Adds and at least one Adc — that's the - // characteristic shape of a 64-bit add on 32-bit ARM. + // (1) addr 0/1 ARE params (num_params=4) → decline (#518). + assert!( + bridge.ir_to_arm(&instrs, 4).is_err(), + "#518: reading an i64 param must decline the optimized path" + ); + // (2) same IR, but num_params=0 → addr 0/1 are non-param i64 locals → + // lowers, using operand registers for the ADDS/ADC. + let arm = bridge + .ir_to_arm(&instrs, 0) + .expect("non-param i64 IR lowers"); let has_adds = arm.iter().any(|op| matches!(op, ArmOp::Adds { .. })); let has_adc = arm.iter().any(|op| matches!(op, ArmOp::Adc { .. })); assert!(has_adds, "i64.add should emit ADDS for the low half"); @@ -6514,7 +6547,8 @@ mod tests { WasmOp::I32WrapI64, ]; let (instrs, _, _) = bridge.optimize_full(&after_ops).unwrap(); - let arm_after = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + // #518: num_params=0 → non-param i64 source (i64-param reads decline). + let arm_after = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); let bytes_after = count_arm_byte_size(&arm_after); // Pre-fix proxy: shift-by-7 takes the generic ArmOp::I64ShrU path, @@ -6526,7 +6560,7 @@ mod tests { WasmOp::I32WrapI64, ]; let (instrs, _, _) = bridge.optimize_full(&before_ops).unwrap(); - let arm_before = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + let arm_before = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); let bytes_before = count_arm_byte_size(&arm_before); println!( @@ -6549,17 +6583,19 @@ mod tests { let bridge = OptimizerBridge::new(); // (i64) -> i32, body = `local.get 0; i64.const 32; i64.shr_u; i32.wrap_i64` let wasm_ops = vec![ - WasmOp::LocalGet(0), // i64 param in R0:R1 + WasmOp::LocalGet(0), WasmOp::I64Const(32), // shift amount WasmOp::I64ShrU, WasmOp::I32WrapI64, ]; let (instrs, _, _) = bridge.optimize_full(&wasm_ops).unwrap(); - // num_params = 1 (one i64 param occupies R0:R1 per AAPCS — but we - // pass 2 to ir_to_arm because each i64 counts as two AAPCS slots - // in the codegen's accounting). - let arm = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + // #518: num_params = 0 → the i64 source is a NON-PARAM local. An i64 + // PARAM read is now declined to the direct selector (the optimized path + // can't AAPCS-home a param pair), so this peephole is exercised on a + // non-param i64; the i64-param path's correctness is covered by + // scripts/repro/i64_param_518_differential.py. + let arm = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); // After fix: NO ArmOp::I64ShrU should be emitted. The 38-byte runtime // shift sequence is the bug we're fixing. @@ -6596,7 +6632,8 @@ mod tests { ]; let (instrs, _, _) = bridge.optimize_full(&wasm_ops).unwrap(); - let arm = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + // #518: num_params=0 → non-param i64 source (i64-param reads decline). + let arm = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); let has_runtime_shift = arm.iter().any(|op| matches!(op, ArmOp::I64ShrS { .. })); assert!( @@ -6639,7 +6676,8 @@ mod tests { ]; let (instrs, _, _) = bridge.optimize_full(&wasm_ops).unwrap(); - let arm = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + // #518: num_params=0 → non-param i64 source (i64-param reads decline). + let arm = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); // After fix: at most one AND should remain, and it shouldn't be the // pair of ANDs from the generic i64.and lowering. (We allow zero ANDs @@ -6669,7 +6707,8 @@ mod tests { ]; let (instrs, _, _) = bridge.optimize_full(&wasm_ops).unwrap(); - let arm = bridge.ir_to_arm(&instrs, 2).expect("valid IR lowers"); + // #518: num_params=0 → non-param i64 source (i64-param reads decline). + let arm = bridge.ir_to_arm(&instrs, 0).expect("valid IR lowers"); let has_runtime_shift = arm.iter().any(|op| matches!(op, ArmOp::I64ShrU { .. })); assert!( diff --git a/scripts/repro/i64_param_518_decline.wat b/scripts/repro/i64_param_518_decline.wat new file mode 100644 index 0000000..8469b24 --- /dev/null +++ b/scripts/repro/i64_param_518_decline.wat @@ -0,0 +1,31 @@ +;; #518 — the DECLINE half of the fix. The leaf, register-resident i64-param +;; cases are lowered correctly (see i64_param_518.wat); these two sub-cases are +;; instead declined LOUDLY (a `warning: skipping function …` + absence from the +;; symbol table) rather than silently miscompiled, because their correct lowering +;; is a tracked follow-up: +;; - d_past_r3: an i64 param AAPCS-passed PAST R3 (on the stack) — the open +;; #503-i64 stack-param case. Here three i32 params fill R0..R2, so the i64 +;; even-aligns to R4:R5 → stack. +;; - d_call: an i64 param in a function that CONTAINS A CALL — params are +;; frame-backed to survive the call's caller-saved clobber, and that +;; param_slots path sizes an i64 param's slot from a width set that excludes +;; params, dropping the high half. Declined until the frame-backed i64 path +;; lands. +;; d_leaf is the control: a leaf register-resident i64 param IS emitted (proves +;; the decline is specific to the two sub-cases, not a blanket i64-param refusal). +(module + (func $helper (param i32) (result i32) (local.get 0)) + + ;; i64 param past R3 (R0,R1,R2 = the three i32s; i64 -> R4:R5 = stack) -> decline + (func (export "d_past_r3") (param i32 i32 i32 i64) (result i64) + (i64.add (local.get 3) (i64.const 1))) + + ;; i64 param + a call -> frame-backed -> decline + (func (export "d_call") (param i64) (result i64) + (i64.add + (local.get 0) + (i64.extend_i32_u (call $helper (i32.const 7))))) + + ;; control: leaf i64 param in R0:R1 -> emitted correctly + (func (export "d_leaf") (param i64) (result i64) + (i64.add (local.get 0) (i64.const 3)))) diff --git a/scripts/repro/i64_param_518_differential.py b/scripts/repro/i64_param_518_differential.py index 3e7062c..756d760 100644 --- a/scripts/repro/i64_param_518_differential.py +++ b/scripts/repro/i64_param_518_differential.py @@ -54,12 +54,21 @@ ) WAT = Path(__file__).with_name("i64_param_518.wat") +# The DECLINE half: i64-param sub-cases that are loud-skipped (not lowered) — an +# i64 param past R3 (#503-i64 stack-param) and an i64 param in a frame-backing +# (has-call) function. d_leaf is the control: a leaf i64 param IS emitted. +DECLINE_WAT = Path(__file__).with_name("i64_param_518_decline.wat") +DECLINE_SKIPPED = ["d_past_r3", "d_call"] # must warn + be absent from symtab +DECLINE_EMITTED = ["d_leaf"] # must be emitted (non-vacuity) SYNTH = os.environ.get("SYNTH", "./target/debug/synth") CODE, STK, RET, R11 = 0x100000, 0x900000, 0x300000, 0x20000000 -# While the bug is live we EXPECT the i64-param functions to miscompile. Flip to -# False in the gated fix PR; the script then enforces correctness on every path. -EXPECT_MISCOMPILE = True +# The #518 fix is LANDED: the leaf register-resident i64-param cases are lowered +# correctly on both paths; the two unsupported sub-cases are declined loudly. So +# this is now the CORRECTNESS gate (every run must match wasmtime), not the +# pre-fix characterization. (Set True only to re-demonstrate the original bug on +# a pre-fix build.) +EXPECT_MISCOMPILE = False # (export, arg-list, signature) — args/sig drive AAPCS-correct placement so the # harness matches the TRUE calling convention, not synth's buggy mapping. @@ -190,9 +199,77 @@ def run_path(label, relocatable): return divergences, control_ok +def check_decline_contract(): + """The unsupported i64-param sub-cases must LOUD-SKIP (warn + absent symbol), + never silently emit wrong code; a leaf i64 param must still be emitted AND + execute correctly (non-vacuity). Returns the number of failures.""" + out = "/tmp/i518_decline.o" + cmd = [SYNTH, "compile", str(DECLINE_WAT), "-o", out, "-b", "arm", + "--target", "cortex-m4", "--all-exports", "--relocatable"] + r = subprocess.run(cmd, capture_output=True, text=True, + env={"PATH": "/usr/bin:/bin"}) + if r.returncode != 0: + print(f"FAIL: decline fixture failed to compile: {r.stderr}") + return 1 + code, base, syms = load(out) + print("\n=== decline contract (loud-skip, not silent miscompile) ===") + fails = 0 + for fn in DECLINE_SKIPPED: + warned = f"skipping function '{fn}'" in r.stderr + absent = fn not in syms + ok = warned and absent + if not ok: + fails += 1 + why = "" if ok else ( + " [" + ", ".join( + ([] if warned else ["no warning"]) + + ([] if absent else ["SILENTLY EMITTED — regression!"]) + ) + "]" + ) + print(f" [{'ok ' if ok else 'BUG'}] {fn}: loud-skipped{why}") + # leaf control: emitted AND correct under unicorn + for fn in DECLINE_EMITTED: + if fn not in syms: + print(f" [BUG] {fn}: expected emitted, but it was skipped") + fails += 1 + continue + # d_leaf: (param i64)(result i64) i64.add(p,3); run with p=7 -> 10 + ok = got_i64_leaf(code, base, syms[fn], 7) == leaf_expect(7) + print(f" [{'ok ' if ok else 'BUG'}] {fn}: emitted + executes correctly") + if not ok: + fails += 1 + return fails + + +def leaf_expect(p): + # d_leaf = (i64.add (local.get 0) (i64.const 3)) + return (p + 3) & 0xFFFFFFFFFFFFFFFF + + +def got_i64_leaf(code, base, faddr, arg): + foff = (faddr & ~1) - base + mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) + mu.mem_map(CODE, 0x20000) + mu.mem_map(STK - 0x10000, 0x20000) + mu.mem_map(RET & ~0xFFF, 0x1000) + mu.mem_write(CODE, code) + mu.reg_write(UC_ARM_REG_SP, STK) + mu.reg_write(UC_ARM_REG_R11, R11) + mu.reg_write(UC_ARM_REG_R0, arg & 0xFFFFFFFF) + mu.reg_write(UC_ARM_REG_R1, (arg >> 32) & 0xFFFFFFFF) + mu.reg_write(UC_ARM_REG_LR, RET | 1) + try: + mu.emu_start((CODE + foff) | 1, RET, count=100000) + except UcError as e: + return f"ERR:{e}" + return (mu.reg_read(UC_ARM_REG_R0) & 0xFFFFFFFF) | ( + (mu.reg_read(UC_ARM_REG_R1) & 0xFFFFFFFF) << 32) + + def main(): opt_div, opt_ctrl = run_path("OPTIMIZED (--target cortex-m4)", relocatable=False) rel_div, rel_ctrl = run_path("DIRECT (--relocatable)", relocatable=True) + decline_fails = check_decline_contract() print("\n--- characterization verdict ---") # The i32 control must ALWAYS match; only i64-param fns are allowed to diverge @@ -213,12 +290,15 @@ def main(): "Re-examine before trusting the fix spec.") sys.exit(1) else: - total = opt_div + rel_div + total = opt_div + rel_div + decline_fails if total == 0: - print("RESULT: PASS — both paths now match wasmtime on every i64-param " - "function. #518 fixed.") + print("RESULT: PASS — both paths match wasmtime on every leaf i64-param " + "function across the full AAPCS matrix, AND the unsupported " + "sub-cases (i64 past R3, frame-backed i64 param) loud-skip rather " + "than miscompile. #518 fixed.") sys.exit(0) - print(f"RESULT: FAIL — {total} residual divergence(s); #518 not fully fixed.") + print(f"RESULT: FAIL — {opt_div + rel_div} value divergence(s) + " + f"{decline_fails} decline-contract failure(s); #518 not fully fixed.") sys.exit(1)