From 7de033fde4548f8bd73c238711036803c26d733b Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Tue, 7 Mar 2023 12:19:37 +0000 Subject: [PATCH 1/2] Linking: fix up all types before handling functions or objects Function parameters may require application of type fixes: our linking ensures that all types have a unique tag, and some parameter's types may require such tag updates. We used to do this for objects already, but need to apply it to parameters as well. Also, the order matters: all types need to be sorted out first before attempting objects or functions. Spotted while trying to compile a particular version of Xen 4.11. Co-authored-by: Kiro --- .../ansi-c/linking_conflicts2/test.desc | 2 +- regression/cbmc/Linking10/main.c | 16 ++++++ regression/cbmc/Linking10/module.c | 16 ++++++ regression/cbmc/Linking10/test.desc | 13 +++++ regression/cbmc/Linking11/main.c | 16 ++++++ regression/cbmc/Linking11/module.c | 16 ++++++ regression/cbmc/Linking11/test.desc | 11 +++++ src/linking/linking.cpp | 49 ++++++++++++------- 8 files changed, 120 insertions(+), 19 deletions(-) create mode 100644 regression/cbmc/Linking10/main.c create mode 100644 regression/cbmc/Linking10/module.c create mode 100644 regression/cbmc/Linking10/test.desc create mode 100644 regression/cbmc/Linking11/main.c create mode 100644 regression/cbmc/Linking11/module.c create mode 100644 regression/cbmc/Linking11/test.desc diff --git a/regression/ansi-c/linking_conflicts2/test.desc b/regression/ansi-c/linking_conflicts2/test.desc index aa958cb6476..3d3eb99c6b5 100644 --- a/regression/ansi-c/linking_conflicts2/test.desc +++ b/regression/ansi-c/linking_conflicts2/test.desc @@ -3,6 +3,6 @@ main.c other.c ^EXIT=(64|1)$ ^SIGNAL=0$ -^\S+\.c(:\d+:\d+|\(\d+\)): error: conflicting function declarations 'foo'$ +^\S+\.c(:\d+:\d+|\(\d+\)): error: duplicate definition of function 'foo'$ -- ^warning: ignoring diff --git a/regression/cbmc/Linking10/main.c b/regression/cbmc/Linking10/main.c new file mode 100644 index 00000000000..b7bdd27a613 --- /dev/null +++ b/regression/cbmc/Linking10/main.c @@ -0,0 +1,16 @@ +// TU1: struct A is incomplete, struct B is complete +struct A; +struct B +{ + int y; +}; + +void f(struct A *a, struct B *b); + +int main() +{ + struct B b; + b.y = 10; + f((struct A *)0, &b); + return 0; +} diff --git a/regression/cbmc/Linking10/module.c b/regression/cbmc/Linking10/module.c new file mode 100644 index 00000000000..a43cd9ad156 --- /dev/null +++ b/regression/cbmc/Linking10/module.c @@ -0,0 +1,16 @@ +// TU2: struct A is complete, struct B is incomplete +// Param 1 (struct A *): old=incomplete, new=complete -> set_to_new=true +// Param 2 (struct B *): old=complete, new=incomplete -> set_to_new=false +// Without |= accumulation, replace would be false (from param 2), +// losing the replacement needed for param 1. +struct A +{ + int x; +}; +struct B; + +void f(struct A *a, struct B *b) +{ + if(a) + __CPROVER_assert(a->x == 0, "a->x accessible"); +} diff --git a/regression/cbmc/Linking10/test.desc b/regression/cbmc/Linking10/test.desc new file mode 100644 index 00000000000..f85ae6e0a95 --- /dev/null +++ b/regression/cbmc/Linking10/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +module.c +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Test that the replace flag is accumulated (|=) across function parameters +during linking. Param 1 has incomplete-to-complete struct (set_to_new=true), +param 2 has complete-to-incomplete struct (set_to_new=false). Without |=, +the replace flag from param 1 would be lost. diff --git a/regression/cbmc/Linking11/main.c b/regression/cbmc/Linking11/main.c new file mode 100644 index 00000000000..5050ae56953 --- /dev/null +++ b/regression/cbmc/Linking11/main.c @@ -0,0 +1,16 @@ +// TU1: struct S with member 'x' +struct S +{ + int x; +}; + +// Function declared here, defined in module.c with a different struct S +int g(struct S *p); + +int main() +{ + struct S s; + s.x = 42; + __CPROVER_assert(g(&s) == 42, "g returns s.x"); + return 0; +} diff --git a/regression/cbmc/Linking11/module.c b/regression/cbmc/Linking11/module.c new file mode 100644 index 00000000000..c2972a92f57 --- /dev/null +++ b/regression/cbmc/Linking11/module.c @@ -0,0 +1,16 @@ +// TU2: a different struct S (with members x and y) +// During linking, this struct S will be renamed because it conflicts +// with TU1's struct S. The parameter type of g must be updated to +// refer to TU1's struct S. +struct S +{ + int x; + int y; +}; + +// Declaration matching TU1's g — parameter type refers to this TU's struct S, +// which will be renamed during linking. +int g(struct S *p) +{ + return p->x; +} diff --git a/regression/cbmc/Linking11/test.desc b/regression/cbmc/Linking11/test.desc new file mode 100644 index 00000000000..84d09abb868 --- /dev/null +++ b/regression/cbmc/Linking11/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +module.c +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Test that struct tags in function parameter types are properly updated during +linking when the tag is renamed due to a conflict between translation units. diff --git a/src/linking/linking.cpp b/src/linking/linking.cpp index 6bde1eb04cd..93ae69394bf 100644 --- a/src/linking/linking.cpp +++ b/src/linking/linking.cpp @@ -215,9 +215,18 @@ void linkingt::duplicate_code_symbol( n_it!=new_t.parameters().end(); ++o_it, ++n_it) { - if(o_it->type() != n_it->type()) + if(o_it->type() == n_it->type()) + continue; + + adjust_type_infot info(old_symbol, new_symbol); + bool failed = adjust_object_type_rec(o_it->type(), n_it->type(), info); + replace |= info.set_to_new; + + if(failed) + { conflicts.push_back( std::make_pair(o_it->type(), n_it->type())); + } } if(o_it!=old_t.parameters().end()) { @@ -490,22 +499,20 @@ bool linkingt::adjust_object_type_rec( if(t1.id()==ID_pointer) { linking_diagnosticst diag{message_handler, ns}; -#if 0 - bool s=info.set_to_new; - if(adjust_object_type_rec(t1.subtype(), t2.subtype(), info)) + if(info.old_symbol.type.id() == ID_code) + { + diag.warning( + info.old_symbol, + info.new_symbol, + "pointer parameter types differ between declaration and definition"); + } + else { diag.warning( info.old_symbol, info.new_symbol, "conflicting pointer types for variable"); - info.set_to_new=s; } -#else - diag.warning( - info.old_symbol, - info.new_symbol, - "conflicting pointer types for variable"); -#endif if(info.old_symbol.is_extern && !info.new_symbol.is_extern) { @@ -1006,7 +1013,7 @@ void linkingt::copy_symbols( } // Move over all the non-colliding ones - std::unordered_set collisions; + std::unordered_set type_collisions, non_type_collisions; for(const auto &named_symbol : src_symbols) { @@ -1023,21 +1030,27 @@ void linkingt::copy_symbols( // new main_symbol_table.add(named_symbol.second); } + else if(named_symbol.second.is_type) + type_collisions.insert(named_symbol.first); else - collisions.insert(named_symbol.first); + non_type_collisions.insert(named_symbol.first); } } // Now do the collisions - for(const irep_idt &collision : collisions) + for(const irep_idt &collision : type_collisions) { symbolt &old_symbol = main_symbol_table.get_writeable_ref(collision); symbolt &new_symbol=src_symbols.at(collision); - if(new_symbol.is_type) - duplicate_type_symbol(old_symbol, new_symbol); - else - duplicate_non_type_symbol(old_symbol, new_symbol); + duplicate_type_symbol(old_symbol, new_symbol); + } + for(const irep_idt &collision : non_type_collisions) + { + symbolt &old_symbol = main_symbol_table.get_writeable_ref(collision); + symbolt &new_symbol = src_symbols.at(collision); + + duplicate_non_type_symbol(old_symbol, new_symbol); } // Apply type updates to initializers From 7fd4d33f795a6665df421d44c94a3e5886aa1b92 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 15 May 2026 19:38:27 +0000 Subject: [PATCH 2/2] Linking: structural-equivalence-aware type comparison When comparing types across translation units, the linker used to treat two struct/union types with the same C source tag as different whenever their irept representations differed at the byte level. In practice this happens for any non-trivial header chain where one TU sees a sub-type as complete and another sees only a forward declaration: type-name canonicalisation propagates the completion state up through any anonymous tag identifiers nested inside the outer type, and the byte-level comparison rejects the pair. The original RENAME_NEW path then cascades through do_type_dependencies() to every type that transitively references the mismatching pair, and through copy_symbols()'s rename_new_symbol pass all references in function/object types are rewritten to ST[...]-style synthetic identifiers. duplicate_code_symbol() compares the rewritten new-side type against the un-rewritten main-side type, sees them as different, and emits 'ignoring conflicting function declarations'. Add a recursive structural-equivalence check that follows tag references through both namespaces and returns true when: - a tag pair resolves (via main_ns / src_ns) to equivalent types; - one side is incomplete and the other complete (linking will keep the complete one); - both are complete struct/union with matching component count and names, and pairwise structurally-equivalent component types. Wire it into needs_renaming_type() and duplicate_type_symbol() as a final pass before defaulting to RENAME_NEW or to the 'unexpected difference between type symbols' error. The existing incomplete-vs-complete special cases retain priority so that the linker continues to merge incomplete declarations into complete ones. Add regression test Linking12 covering the exact pattern: an outer struct containing an anonymous union whose synthetic tag identifier diverges between TUs because of an inner forward-vs-complete type. Co-authored-by: Kiro --- regression/cbmc/Linking12/main.c | 45 ++++++ regression/cbmc/Linking12/module.c | 21 +++ regression/cbmc/Linking12/test.desc | 21 +++ src/linking/linking.cpp | 215 +++++++++++++++++++++++++++- src/linking/linking_class.h | 16 ++- 5 files changed, 309 insertions(+), 9 deletions(-) create mode 100644 regression/cbmc/Linking12/main.c create mode 100644 regression/cbmc/Linking12/module.c create mode 100644 regression/cbmc/Linking12/test.desc diff --git a/regression/cbmc/Linking12/main.c b/regression/cbmc/Linking12/main.c new file mode 100644 index 00000000000..7111ce81c17 --- /dev/null +++ b/regression/cbmc/Linking12/main.c @@ -0,0 +1,45 @@ +// Regression test for structural-equivalence-aware linker. +// +// Two translation units share the same C type `struct S`, but the irept +// representations of `S` differ at the byte level because the inner type +// `struct Inner` is COMPLETE in this TU and INCOMPLETE in module.c. +// CBMC's compile-time canonicalisation bakes that completion state into +// any anonymous-tag identifiers that transitively reference Inner; the +// linker used to flag this as a "conflicting function declarations" warning +// and produce 'pointer parameter types differ between declaration and +// definition' diagnostics. +// +// With the structural-equivalence-aware needs_renaming_type / duplicate_ +// type_symbol pair, the linker recognises the two `struct S` types as the +// same C type and merges them silently. + +struct Inner +{ + int payload; +}; + +struct S +{ + // Anonymous union whose synthetic tag identifier serializes Inner's + // completion state, making the byte-level encoding of S differ between + // this TU (Inner complete here) and module.c (Inner forward-declared + // there). + union + { + struct Inner *via_struct; + int *via_int; + } u; + int sentinel; +}; + +int read_sentinel(struct S *p); + +int main(void) +{ + struct Inner inner = {7}; + struct S s; + s.u.via_struct = &inner; + s.sentinel = 42; + __CPROVER_assert(read_sentinel(&s) == 42, "sentinel survived linking"); + return 0; +} diff --git a/regression/cbmc/Linking12/module.c b/regression/cbmc/Linking12/module.c new file mode 100644 index 00000000000..5a01a4f0b26 --- /dev/null +++ b/regression/cbmc/Linking12/module.c @@ -0,0 +1,21 @@ +// Translation unit where `struct Inner` is only forward-declared. +// This makes anonymous-tag identifiers inside `struct S` serialize +// differently from main.c's view, exercising the structural-equivalence +// path of the linker. + +struct Inner; + +struct S +{ + union + { + struct Inner *via_struct; + int *via_int; + } u; + int sentinel; +}; + +int read_sentinel(struct S *p) +{ + return p->sentinel; +} diff --git a/regression/cbmc/Linking12/test.desc b/regression/cbmc/Linking12/test.desc new file mode 100644 index 00000000000..b4023ec1d0f --- /dev/null +++ b/regression/cbmc/Linking12/test.desc @@ -0,0 +1,21 @@ +CORE +main.c +module.c --validate-goto-model --validate-ssa-equation +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +^.*conflicting (function|pointer parameter types|return types).*$ +^.*unexpected difference between type symbols.*$ +-- +Two translation units describe the same C type `struct S`, but the +byte-level irept encoding differs because the inner type `struct Inner` +is COMPLETE in main.c and INCOMPLETE in module.c. Type-name +canonicalisation propagates the completion state into anonymous-tag +identifiers nested inside `struct S`, so the linker used to see the +types as different and flag the function declaration as conflicting. + +The structural-equivalence-aware linker recognises the two types as +the same C type, allows the link to succeed silently, and produces a +goto model that passes --validate-goto-model. diff --git a/src/linking/linking.cpp b/src/linking/linking.cpp index 93ae69394bf..a2e205a4bba 100644 --- a/src/linking/linking.cpp +++ b/src/linking/linking.cpp @@ -750,9 +750,18 @@ void linkingt::duplicate_non_type_symbol( old_symbol.is_extern=old_symbol.is_extern && new_symbol.is_extern; } +// Forward declaration; defined after duplicate_type_symbol. +static bool structurally_equivalent( + const typet &t1, + const typet &t2, + const namespacet &main_ns, + const namespacet &src_ns, + std::unordered_set &visited); + void linkingt::duplicate_type_symbol( symbolt &old_symbol, - const symbolt &new_symbol) + const symbolt &new_symbol, + const namespacet &src_ns) { PRECONDITION(new_symbol.is_type); @@ -831,6 +840,21 @@ void linkingt::duplicate_type_symbol( } } + // Final permissive check: two complete struct/union types may differ at + // the irept level only because nested anonymous tags carry synthetic + // identifiers that depend on per-translation-unit completion state of + // sub-types. Run a recursive structural-equivalence check that follows + // tag references through both namespaces; if they describe the same C + // type, keep the main-table version. + { + std::unordered_set visited; + if(structurally_equivalent( + old_symbol.type, new_symbol.type, ns, src_ns, visited)) + { + return; + } + } + linking_diagnosticst diag{message_handler, ns}; diag.detailed_conflict_report( @@ -840,9 +864,169 @@ void linkingt::duplicate_type_symbol( old_symbol, new_symbol, "unexpected difference between type symbols"); } +/// Recursive structural equivalence under tag canonicalization. +/// +/// Two types are equivalent when: +/// - Their `id()` matches, the recursion can drill into them, and at the +/// leaves they compare byte-equal; OR +/// - They are tag references whose lookup in the respective namespaces +/// resolves to equivalent types; OR +/// - One is an incomplete struct/union with the same source tag as the other +/// complete struct/union (the linker keeps the complete one). +/// +/// `main_ns` resolves tag references on the `t1` (old / main symbol-table) +/// side; `src_ns` resolves tag references on the `t2` (new / src +/// symbol-table) side. This lets two complete structs whose bodies refer to +/// inner anonymous types whose synthetic identifiers differ only because of +/// nested complete-vs-incomplete state still be recognised as the same C type. +static bool structurally_equivalent( + const typet &t1, + const typet &t2, + const namespacet &main_ns, + const namespacet &src_ns, + std::unordered_set &visited) +{ + if(t1 == t2) + return true; + + // Both are tag references of the same kind: follow into their definitions. + if( + t1.id() == t2.id() && (t1.id() == ID_struct_tag || + t1.id() == ID_union_tag || t1.id() == ID_c_enum_tag)) + { + const irep_idt &id1 = to_tag_type(t1).get_identifier(); + const irep_idt &id2 = to_tag_type(t2).get_identifier(); + + // Cycle guard: once we have started comparing this pair, treat them as + // equivalent at any deeper occurrence to avoid infinite recursion on + // mutually-recursive struct definitions like list_head. + std::string key = id2string(id1) + "\x01" + id2string(id2); + if(!visited.insert(key).second) + return true; + + return structurally_equivalent( + follow_tags_symbols(main_ns, t1), + follow_tags_symbols(src_ns, t2), + main_ns, + src_ns, + visited); + } + + // One side is a tag reference, the other an inline struct/union/enum. + // Follow the tag side and retry. + if( + t1.id() == ID_struct_tag || t1.id() == ID_union_tag || + t1.id() == ID_c_enum_tag) + { + return structurally_equivalent( + follow_tags_symbols(main_ns, t1), t2, main_ns, src_ns, visited); + } + if( + t2.id() == ID_struct_tag || t2.id() == ID_union_tag || + t2.id() == ID_c_enum_tag) + { + return structurally_equivalent( + t1, follow_tags_symbols(src_ns, t2), main_ns, src_ns, visited); + } + + // Different ids and neither is a tag reference: not structurally equivalent. + if(t1.id() != t2.id()) + return false; + + // Two struct or union types. + if(t1.id() == ID_struct || t1.id() == ID_union) + { + const auto &s1 = to_struct_union_type(t1); + const auto &s2 = to_struct_union_type(t2); + + bool inc1 = s1.is_incomplete(); + bool inc2 = s2.is_incomplete(); + + // If either side is incomplete, accept the pair: linking will keep the + // complete one. We do not require the C source tag to match here because + // anonymous inner types may carry differing synthetic identifiers across + // translation units. + if(inc1 || inc2) + return true; + + // Both complete: same component count and components must match + // pairwise by name (and recursively by type). We do NOT require the C + // source tag attribute to match: at the outer call site this function is + // invoked because the symbol-table keys already agree, and recursive + // calls via tag references may compare anonymous structs whose synthetic + // tags legitimately differ. + const auto &c1 = s1.components(); + const auto &c2 = s2.components(); + if(c1.size() != c2.size()) + return false; + + for(std::size_t i = 0; i < c1.size(); ++i) + { + if(c1[i].get_name() != c2[i].get_name()) + return false; + if(!structurally_equivalent( + c1[i].type(), c2[i].type(), main_ns, src_ns, visited)) + return false; + } + return true; + } + + // Pointer types: follow base type. + if(t1.id() == ID_pointer) + { + return structurally_equivalent( + to_pointer_type(t1).base_type(), + to_pointer_type(t2).base_type(), + main_ns, + src_ns, + visited); + } + + // Array types: same element type and same size expression. + if(t1.id() == ID_array) + { + const auto &a1 = to_array_type(t1); + const auto &a2 = to_array_type(t2); + if(a1.size() != a2.size()) + return false; + return structurally_equivalent( + a1.element_type(), a2.element_type(), main_ns, src_ns, visited); + } + + // Code (function) types. + if(t1.id() == ID_code) + { + const auto &k1 = to_code_type(t1); + const auto &k2 = to_code_type(t2); + if(k1.has_ellipsis() != k2.has_ellipsis()) + return false; + if(k1.parameters().size() != k2.parameters().size()) + return false; + if(!structurally_equivalent( + k1.return_type(), k2.return_type(), main_ns, src_ns, visited)) + return false; + for(std::size_t i = 0; i < k1.parameters().size(); ++i) + { + if(!structurally_equivalent( + k1.parameters()[i].type(), + k2.parameters()[i].type(), + main_ns, + src_ns, + visited)) + return false; + } + return true; + } + + // Fall-back: byte-equality already failed at the top of this function and + // we do not have a more permissive notion for this type class. + return false; +} + linkingt::renamingt linkingt::needs_renaming_type( const symbolt &old_symbol, - const symbolt &new_symbol) + const symbolt &new_symbol, + const namespacet &src_ns) { PRECONDITION(new_symbol.is_type); @@ -906,6 +1090,22 @@ linkingt::renamingt linkingt::needs_renaming_type( } } + // Last chance: types that look different at the irept level may still + // describe the same C type when nested anonymous tags carry synthetic + // identifiers that depend on the per-translation-unit completion state of + // sub-types. Run a recursive structural-equivalence check that follows + // tag references through their respective namespaces. If it accepts the + // pair, no rename is needed -- linking will keep the main-table version + // and the new-side reference is recognised as semantically the same type. + { + std::unordered_set visited; + if(structurally_equivalent( + old_symbol.type, new_symbol.type, ns, src_ns, visited)) + { + return renamingt::NO_RENAMING; + } + } + return renamingt::RENAME_NEW; // different } @@ -1038,12 +1238,17 @@ void linkingt::copy_symbols( } // Now do the collisions + // Construct a namespace over the src symbol table so that tag references + // appearing on the new side can be resolved. This is needed for the + // structural-equivalence pre-checks below. + namespacet src_ns(src_symbol_table); + for(const irep_idt &collision : type_collisions) { symbolt &old_symbol = main_symbol_table.get_writeable_ref(collision); symbolt &new_symbol=src_symbols.at(collision); - duplicate_type_symbol(old_symbol, new_symbol); + duplicate_type_symbol(old_symbol, new_symbol, src_ns); } for(const irep_idt &collision : non_type_collisions) { @@ -1070,6 +1275,8 @@ bool linkingt::link(const symbol_table_baset &src_symbol_table) const unsigned errors_before = message_handler.get_message_count(messaget::M_ERROR); + namespacet src_ns(src_symbol_table); + // We do this in three phases. We first figure out which symbols need to // be renamed, and then build the renaming, and finally apply this // renaming in the second pass over the symbol table. @@ -1086,7 +1293,7 @@ bool linkingt::link(const symbol_table_baset &src_symbol_table) if(m_it != main_symbol_table.symbols.end()) { // duplicate - switch(needs_renaming(m_it->second, symbol_pair.second)) + switch(needs_renaming(m_it->second, symbol_pair.second, src_ns)) { case renamingt::NO_RENAMING: break; diff --git a/src/linking/linking_class.h b/src/linking/linking_class.h index 0f5c3f1cf23..49fe7509324 100644 --- a/src/linking/linking_class.h +++ b/src/linking/linking_class.h @@ -51,16 +51,21 @@ class linkingt RENAME_NEW }; - renamingt - needs_renaming_type(const symbolt &old_symbol, const symbolt &new_symbol); + renamingt needs_renaming_type( + const symbolt &old_symbol, + const symbolt &new_symbol, + const namespacet &src_ns); renamingt needs_renaming_non_type(const symbolt &old_symbol, const symbolt &new_symbol); - renamingt needs_renaming(const symbolt &old_symbol, const symbolt &new_symbol) + renamingt needs_renaming( + const symbolt &old_symbol, + const symbolt &new_symbol, + const namespacet &src_ns) { if(new_symbol.is_type) - return needs_renaming_type(old_symbol, new_symbol); + return needs_renaming_type(old_symbol, new_symbol, src_ns); else return needs_renaming_non_type(old_symbol, new_symbol); } @@ -114,7 +119,8 @@ class linkingt void duplicate_type_symbol( symbolt &old_symbol, - const symbolt &new_symbol); + const symbolt &new_symbol, + const namespacet &src_ns); symbol_table_baset &main_symbol_table; namespacet ns;