Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions meld-core/src/dwarf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,9 @@ fn rewrite_debug_sections(
// `llvm-dwarfdump --verify` flags as out-of-parent / overlapping. Fix
// the lengths to the fused layout now that `low_pc` is remapped.
correct_high_pc_lengths(&mut write_dwarf, remap);
// #319 inc 2: fix DW_AT_ranges lists (base-relative offsets that gimli
// mis-routes through convert_address) from the read side.
correct_die_ranges(&mut write_dwarf, &read_dwarf, remap);
// #144 inc 3: the synthetic `<meld-adapter>` unit rides the SAME
// write as the remapped original units, so every cross-section
// offset is computed in one shared offset space (appending
Expand Down Expand Up @@ -644,6 +647,138 @@ fn correct_high_pc_lengths(write_dwarf: &mut gimli::write::Dwarf, remap: &Addres
}
}

/// Collect a write unit's DIE ids in first-child-first pre-order — the same
/// order `gimli::read`'s `next_dfs` yields, so the two can be walked in
/// lockstep. (Distinct from [`correct_high_pc_lengths`]'s order-agnostic
/// stack walk, which doesn't need to align with the read side.)
fn write_dies_preorder(
unit: &gimli::write::Unit,
id: gimli::write::UnitEntryId,
out: &mut Vec<gimli::write::UnitEntryId>,
) {
out.push(id);
let children: Vec<_> = unit.get(id).children().copied().collect();
for c in children {
write_dies_preorder(unit, c, out);
}
}

/// Rewrite every `DW_AT_ranges` list to the fused layout (#319 inc 2).
///
/// DWARF v4 `.debug_ranges` entries are base-relative offset pairs. gimli
/// parses them as `AddressOrOffsetPair` and routes the *offsets* through
/// `convert_address` — which treats a small offset as an absolute code
/// address and remaps it to an unrelated output location, so the resolved
/// range escapes its parent (the `.debug_ranges` analogue of the inc-1
/// `high_pc`-length bug). Fix it from the read side, where the base is still
/// applied: resolve each DIE's absolute input ranges via `die_ranges`,
/// `translate` both endpoints to the fused layout, and re-emit them as
/// offsets relative to the output CU base. Drop sub-ranges (or the whole
/// list) that can't be mapped — never emit a wrong or escaping range
/// (LS-D-1).
fn correct_die_ranges<R: gimli::read::Reader<Offset = usize>>(
write_dwarf: &mut gimli::write::Dwarf,
read_dwarf: &gimli::read::Dwarf<R>,
remap: &AddressRemap,
) {
use gimli::constants::{DW_AT_low_pc, DW_AT_ranges};
use gimli::write::{Address, AttributeValue, Range, RangeList};

let mut headers = read_dwarf.units();
let mut unit_idx = 0usize;
while let Ok(Some(header)) = headers.next() {
if unit_idx >= write_dwarf.units.count() {
break;
}
let uid = write_dwarf.units.id(unit_idx);
unit_idx += 1;
let unit = match read_dwarf.unit(header) {
Ok(u) => u,
Err(_) => continue,
};

// Output CU base = the write root's already-remapped low_pc; v4
// range entries are emitted relative to it. No usable base → skip.
let out_cu_base = {
let root = write_dwarf.units.get(uid).root();
match write_dwarf.units.get(uid).get(root).get(DW_AT_low_pc) {
Some(AttributeValue::Address(Address::Constant(a))) => *a as u32,
_ => continue,
}
};

// Write DIE ids in the same pre-order the read cursor will walk.
let write_ids = {
let wunit = write_dwarf.units.get(uid);
let mut ids = Vec::new();
write_dies_preorder(wunit, wunit.root(), &mut ids);
ids
};

let mut entries = unit.entries();
let mut wi = 0usize;
while let Ok(Some((_, entry))) = entries.next_dfs() {
if wi >= write_ids.len() {
break;
}
let die_id = write_ids[wi];
wi += 1;

// Only DIEs that carry a range LIST (not low/high — those are the
// inc-1 path). `die_ranges` would otherwise synthesize a single
// range from low/high and we'd wrongly convert a subprogram.
if entry.attr_value(DW_AT_ranges).ok().flatten().is_none() {
continue;
}

let mut ranges = match read_dwarf.die_ranges(&unit, entry) {
Ok(r) => r,
Err(_) => continue,
};
let mut new_ranges = Vec::new();
while let Ok(Some(r)) = ranges.next() {
// `die_ranges` yields absolute input addresses (base applied).
let (Some(ob), Some(oe)) = (
remap.translate(r.begin as u32),
remap.translate(r.end as u32),
) else {
continue; // unmappable (dropped/tombstoned) sub-range → drop
};
// Require a strictly-positive extent: a degenerate `begin ==
// end` OffsetPair is rejected by gimli's range writer
// (`InvalidRange`), which would abort the whole DWARF write
// and strip. A zero-length range carries no information, so
// dropping it is lossless.
if ob >= out_cu_base && oe > ob {
new_ranges.push(Range::OffsetPair {
begin: (ob - out_cu_base) as u64,
end: (oe - out_cu_base) as u64,
});
}
}

if new_ranges.is_empty() {
write_dwarf
.units
.get_mut(uid)
.get_mut(die_id)
.delete(DW_AT_ranges);
} else {
let rid = write_dwarf
.units
.get_mut(uid)
.ranges
.add(RangeList(new_ranges));
write_dwarf
.units
.get_mut(uid)
.get_mut(die_id)
.set(DW_AT_ranges, AttributeValue::RangeListRef(rid));
}
}
}
}

/// Top-level entry point for [`crate::DwarfHandling::Remap`].
///
/// Inspects the input components for `.debug_*` sections and, when
Expand Down
96 changes: 96 additions & 0 deletions meld-core/tests/dwarf_remap_witness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,99 @@ fn remapped_subprogram_low_pcs_match_fused_body_starts() {
fused body starts, {tombstoned} dead-code subprograms tombstoned"
);
}

/// A multi-inner-module fixture whose `.debug_ranges` carry inlined
/// subroutines (base-relative offset pairs) — the #319 inc-2 surface.
const RANGES_FIXTURE: &str = "../tests/wit_bindgen/fixtures/records.wasm";

/// #319 inc 2: no `DW_AT_ranges` DIE may escape its enclosing subprogram in
/// the remapped output. `records.wasm` carries inlined subroutines whose
/// `.debug_ranges` are base-relative offset pairs; before the fix gimli
/// mis-routed those *offsets* through `convert_address` (treating a small
/// offset as an absolute address), sending the ranges to unrelated output
/// locations — `llvm-dwarfdump --verify`: "DIE address ranges are not
/// contained in its parent's ranges". This re-parses the fused DWARF and
/// asserts every resolved range sits inside the enclosing subprogram.
#[test]
fn inc2_die_ranges_stay_within_enclosing_subprogram() {
if !std::path::Path::new(RANGES_FIXTURE).is_file() {
eprintln!("skipping: fixture not found at {RANGES_FIXTURE}");
return;
}
let input = std::fs::read(RANGES_FIXTURE).expect("read fixture");
let fused = fuse_remap(&input);
let sections = debug_sections(&fused);
assert!(
sections.contains_key(".debug_info"),
"Remap must emit remapped .debug_info for the ranges fixture"
);

let endian = gimli::LittleEndian;
let load = |id: gimli::SectionId| -> Result<gimli::EndianSlice<'_, gimli::LittleEndian>, gimli::Error> {
let data = sections.get(id.name()).map(|v| v.as_slice()).unwrap_or(&[]);
Ok(gimli::EndianSlice::new(data, endian))
};
let dwarf = gimli::Dwarf::load(load).expect("load output dwarf");

let mut checked = 0usize;
let mut units = dwarf.units();
while let Some(header) = units.next().expect("unit header") {
let unit = dwarf.unit(header).expect("parse unit");
let mut entries = unit.entries();
let mut depth = 0isize;
// Stack of enclosing subprogram ranges: (die_depth, low, end).
let mut encl: Vec<(isize, u64, u64)> = Vec::new();
while let Some((delta, entry)) = entries.next_dfs().expect("dfs walk") {
depth += delta;
while matches!(encl.last(), Some(&(d, _, _)) if d >= depth) {
encl.pop();
}
if entry.tag() == gimli::constants::DW_TAG_subprogram
&& let Some(gimli::AttributeValue::Addr(low)) = entry
.attr_value(gimli::constants::DW_AT_low_pc)
.expect("low_pc")
&& low != TOMBSTONE
&& let Some(hp) = entry
.attr_value(gimli::constants::DW_AT_high_pc)
.expect("high_pc")
{
let end = match hp {
gimli::AttributeValue::Udata(len) => low + len,
gimli::AttributeValue::Addr(a) => a,
_ => low,
};
encl.push((depth, low, end));
}
// A DIE carrying a range LIST, checked against its enclosing
// subprogram (the class that escaped in #319).
if entry
.attr_value(gimli::constants::DW_AT_ranges)
.expect("ranges attr")
.is_some()
&& let Some(&(_, plow, pend)) = encl.last()
{
let mut ranges = dwarf.die_ranges(&unit, entry).expect("die_ranges");
while let Some(r) = ranges.next().expect("range") {
// Skip tombstone / dead-code sentinel entries.
if r.begin >= 0xffff_fffe {
continue;
}
assert!(
r.begin >= plow && r.end <= pend,
"inc2: DW_AT_ranges [{:#x},{:#x}) escapes enclosing \
subprogram [{plow:#x},{pend:#x}) — base-relative range \
offset was remapped as an absolute address (#319)",
r.begin,
r.end
);
checked += 1;
}
}
}
}
assert!(
checked > 0,
"expected at least one DW_AT_ranges DIE inside a subprogram to check"
);
eprintln!("inc2: {checked} DW_AT_ranges entries all contained in their subprogram");
}
Loading