diff --git a/meld-core/src/dwarf.rs b/meld-core/src/dwarf.rs index 8134482..073f5c0 100644 --- a/meld-core/src/dwarf.rs +++ b/meld-core/src/dwarf.rs @@ -575,6 +575,9 @@ fn rewrite_debug_sections( // `llvm-dwarfdump --verify` flags as out-of-parent / overlapping. Fix // the lengths to the fused layout now that `low_pc` is remapped. correct_high_pc_lengths(&mut write_dwarf, remap); + // #319 inc 2: fix DW_AT_ranges lists (base-relative offsets that gimli + // mis-routes through convert_address) from the read side. + correct_die_ranges(&mut write_dwarf, &read_dwarf, remap); // #144 inc 3: the synthetic `` unit rides the SAME // write as the remapped original units, so every cross-section // offset is computed in one shared offset space (appending @@ -644,6 +647,138 @@ fn correct_high_pc_lengths(write_dwarf: &mut gimli::write::Dwarf, remap: &Addres } } +/// Collect a write unit's DIE ids in first-child-first pre-order — the same +/// order `gimli::read`'s `next_dfs` yields, so the two can be walked in +/// lockstep. (Distinct from [`correct_high_pc_lengths`]'s order-agnostic +/// stack walk, which doesn't need to align with the read side.) +fn write_dies_preorder( + unit: &gimli::write::Unit, + id: gimli::write::UnitEntryId, + out: &mut Vec, +) { + out.push(id); + let children: Vec<_> = unit.get(id).children().copied().collect(); + for c in children { + write_dies_preorder(unit, c, out); + } +} + +/// Rewrite every `DW_AT_ranges` list to the fused layout (#319 inc 2). +/// +/// DWARF v4 `.debug_ranges` entries are base-relative offset pairs. gimli +/// parses them as `AddressOrOffsetPair` and routes the *offsets* through +/// `convert_address` — which treats a small offset as an absolute code +/// address and remaps it to an unrelated output location, so the resolved +/// range escapes its parent (the `.debug_ranges` analogue of the inc-1 +/// `high_pc`-length bug). Fix it from the read side, where the base is still +/// applied: resolve each DIE's absolute input ranges via `die_ranges`, +/// `translate` both endpoints to the fused layout, and re-emit them as +/// offsets relative to the output CU base. Drop sub-ranges (or the whole +/// list) that can't be mapped — never emit a wrong or escaping range +/// (LS-D-1). +fn correct_die_ranges>( + write_dwarf: &mut gimli::write::Dwarf, + read_dwarf: &gimli::read::Dwarf, + remap: &AddressRemap, +) { + use gimli::constants::{DW_AT_low_pc, DW_AT_ranges}; + use gimli::write::{Address, AttributeValue, Range, RangeList}; + + let mut headers = read_dwarf.units(); + let mut unit_idx = 0usize; + while let Ok(Some(header)) = headers.next() { + if unit_idx >= write_dwarf.units.count() { + break; + } + let uid = write_dwarf.units.id(unit_idx); + unit_idx += 1; + let unit = match read_dwarf.unit(header) { + Ok(u) => u, + Err(_) => continue, + }; + + // Output CU base = the write root's already-remapped low_pc; v4 + // range entries are emitted relative to it. No usable base → skip. + let out_cu_base = { + let root = write_dwarf.units.get(uid).root(); + match write_dwarf.units.get(uid).get(root).get(DW_AT_low_pc) { + Some(AttributeValue::Address(Address::Constant(a))) => *a as u32, + _ => continue, + } + }; + + // Write DIE ids in the same pre-order the read cursor will walk. + let write_ids = { + let wunit = write_dwarf.units.get(uid); + let mut ids = Vec::new(); + write_dies_preorder(wunit, wunit.root(), &mut ids); + ids + }; + + let mut entries = unit.entries(); + let mut wi = 0usize; + while let Ok(Some((_, entry))) = entries.next_dfs() { + if wi >= write_ids.len() { + break; + } + let die_id = write_ids[wi]; + wi += 1; + + // Only DIEs that carry a range LIST (not low/high — those are the + // inc-1 path). `die_ranges` would otherwise synthesize a single + // range from low/high and we'd wrongly convert a subprogram. + if entry.attr_value(DW_AT_ranges).ok().flatten().is_none() { + continue; + } + + let mut ranges = match read_dwarf.die_ranges(&unit, entry) { + Ok(r) => r, + Err(_) => continue, + }; + let mut new_ranges = Vec::new(); + while let Ok(Some(r)) = ranges.next() { + // `die_ranges` yields absolute input addresses (base applied). + let (Some(ob), Some(oe)) = ( + remap.translate(r.begin as u32), + remap.translate(r.end as u32), + ) else { + continue; // unmappable (dropped/tombstoned) sub-range → drop + }; + // Require a strictly-positive extent: a degenerate `begin == + // end` OffsetPair is rejected by gimli's range writer + // (`InvalidRange`), which would abort the whole DWARF write + // and strip. A zero-length range carries no information, so + // dropping it is lossless. + if ob >= out_cu_base && oe > ob { + new_ranges.push(Range::OffsetPair { + begin: (ob - out_cu_base) as u64, + end: (oe - out_cu_base) as u64, + }); + } + } + + if new_ranges.is_empty() { + write_dwarf + .units + .get_mut(uid) + .get_mut(die_id) + .delete(DW_AT_ranges); + } else { + let rid = write_dwarf + .units + .get_mut(uid) + .ranges + .add(RangeList(new_ranges)); + write_dwarf + .units + .get_mut(uid) + .get_mut(die_id) + .set(DW_AT_ranges, AttributeValue::RangeListRef(rid)); + } + } + } +} + /// Top-level entry point for [`crate::DwarfHandling::Remap`]. /// /// Inspects the input components for `.debug_*` sections and, when diff --git a/meld-core/tests/dwarf_remap_witness.rs b/meld-core/tests/dwarf_remap_witness.rs index f73bbc6..2b79cc6 100644 --- a/meld-core/tests/dwarf_remap_witness.rs +++ b/meld-core/tests/dwarf_remap_witness.rs @@ -190,3 +190,99 @@ fn remapped_subprogram_low_pcs_match_fused_body_starts() { fused body starts, {tombstoned} dead-code subprograms tombstoned" ); } + +/// A multi-inner-module fixture whose `.debug_ranges` carry inlined +/// subroutines (base-relative offset pairs) — the #319 inc-2 surface. +const RANGES_FIXTURE: &str = "../tests/wit_bindgen/fixtures/records.wasm"; + +/// #319 inc 2: no `DW_AT_ranges` DIE may escape its enclosing subprogram in +/// the remapped output. `records.wasm` carries inlined subroutines whose +/// `.debug_ranges` are base-relative offset pairs; before the fix gimli +/// mis-routed those *offsets* through `convert_address` (treating a small +/// offset as an absolute address), sending the ranges to unrelated output +/// locations — `llvm-dwarfdump --verify`: "DIE address ranges are not +/// contained in its parent's ranges". This re-parses the fused DWARF and +/// asserts every resolved range sits inside the enclosing subprogram. +#[test] +fn inc2_die_ranges_stay_within_enclosing_subprogram() { + if !std::path::Path::new(RANGES_FIXTURE).is_file() { + eprintln!("skipping: fixture not found at {RANGES_FIXTURE}"); + return; + } + let input = std::fs::read(RANGES_FIXTURE).expect("read fixture"); + let fused = fuse_remap(&input); + let sections = debug_sections(&fused); + assert!( + sections.contains_key(".debug_info"), + "Remap must emit remapped .debug_info for the ranges fixture" + ); + + let endian = gimli::LittleEndian; + let load = |id: gimli::SectionId| -> Result, gimli::Error> { + let data = sections.get(id.name()).map(|v| v.as_slice()).unwrap_or(&[]); + Ok(gimli::EndianSlice::new(data, endian)) + }; + let dwarf = gimli::Dwarf::load(load).expect("load output dwarf"); + + let mut checked = 0usize; + let mut units = dwarf.units(); + while let Some(header) = units.next().expect("unit header") { + let unit = dwarf.unit(header).expect("parse unit"); + let mut entries = unit.entries(); + let mut depth = 0isize; + // Stack of enclosing subprogram ranges: (die_depth, low, end). + let mut encl: Vec<(isize, u64, u64)> = Vec::new(); + while let Some((delta, entry)) = entries.next_dfs().expect("dfs walk") { + depth += delta; + while matches!(encl.last(), Some(&(d, _, _)) if d >= depth) { + encl.pop(); + } + if entry.tag() == gimli::constants::DW_TAG_subprogram + && let Some(gimli::AttributeValue::Addr(low)) = entry + .attr_value(gimli::constants::DW_AT_low_pc) + .expect("low_pc") + && low != TOMBSTONE + && let Some(hp) = entry + .attr_value(gimli::constants::DW_AT_high_pc) + .expect("high_pc") + { + let end = match hp { + gimli::AttributeValue::Udata(len) => low + len, + gimli::AttributeValue::Addr(a) => a, + _ => low, + }; + encl.push((depth, low, end)); + } + // A DIE carrying a range LIST, checked against its enclosing + // subprogram (the class that escaped in #319). + if entry + .attr_value(gimli::constants::DW_AT_ranges) + .expect("ranges attr") + .is_some() + && let Some(&(_, plow, pend)) = encl.last() + { + let mut ranges = dwarf.die_ranges(&unit, entry).expect("die_ranges"); + while let Some(r) = ranges.next().expect("range") { + // Skip tombstone / dead-code sentinel entries. + if r.begin >= 0xffff_fffe { + continue; + } + assert!( + r.begin >= plow && r.end <= pend, + "inc2: DW_AT_ranges [{:#x},{:#x}) escapes enclosing \ + subprogram [{plow:#x},{pend:#x}) — base-relative range \ + offset was remapped as an absolute address (#319)", + r.begin, + r.end + ); + checked += 1; + } + } + } + } + assert!( + checked > 0, + "expected at least one DW_AT_ranges DIE inside a subprogram to check" + ); + eprintln!("inc2: {checked} DW_AT_ranges entries all contained in their subprogram"); +}