From 51b95fb0cee168003dab201cb019d489d2260a7a Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 28 May 2026 06:26:49 +0000 Subject: [PATCH 1/2] boolbv_index: handle incomplete extern array types in symbol registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When boolbv flattens an array index expression where the array is a symbol of unbounded array_typet (e.g. an incomplete declaration like 'extern T arr[]'), it tried to register that symbol with boolbv_mapt::get_literals using a width of array_width_opt.value_or(0). Passing 0 created a zero-width entry that tripped the size-equals-width invariant in get_literals when the same symbol was — or had been — registered at a non-zero width via its element-typed access path (e.g. T arr[i] returns a T-width value). Skip the registration when the array width is unknown. The unknown-width array cannot be flattened anyway; the array decision procedure handles it through the byte-operator and free-variable branches that this registration is purely a caching helper for. This was first surfaced by integration/linux scans of kernel sources that include , which declares 'extern const unsigned char _ctype[]' — an incomplete extern array referenced by the is*() classifier macros that expand to '_ctype[c]'. CBMC aborted at boolbv_map.cpp:68 on every such TU. Regression test: regression/cbmc/incomplete_extern_array1/ Co-authored-by: Kiro --- .../cbmc/incomplete_extern_array1/main.c | 26 +++++++++++++++ .../cbmc/incomplete_extern_array1/test.desc | 15 +++++++++ src/solvers/flattening/boolbv_index.cpp | 32 +++++++++++++++---- 3 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 regression/cbmc/incomplete_extern_array1/main.c create mode 100644 regression/cbmc/incomplete_extern_array1/test.desc diff --git a/regression/cbmc/incomplete_extern_array1/main.c b/regression/cbmc/incomplete_extern_array1/main.c new file mode 100644 index 00000000000..815dd157aa4 --- /dev/null +++ b/regression/cbmc/incomplete_extern_array1/main.c @@ -0,0 +1,26 @@ +// Regression test for the fix to: +// src/solvers/flattening/boolbv_index.cpp +// where indexing into an `extern T arr[]` (incomplete array) +// at multiple indices used to register the symbol with width +// 0 in boolbv_mapt, then trip the size-equals-width invariant +// in get_literals. +// +// The Linux kernel hits this via 's +// `extern const unsigned char _ctype[]` declaration combined +// with the `__ismask(c)` macro that expands to `_ctype[c]`. + +extern const unsigned char _ctype[]; + +int nondet_int(void); + +int main(void) +{ + int c = nondet_int(); + if(c >= 0 && c < 128) + { + unsigned char m1 = _ctype[c]; + unsigned char m2 = _ctype[c + 1]; + return m1 + m2; + } + return 0; +} diff --git a/regression/cbmc/incomplete_extern_array1/test.desc b/regression/cbmc/incomplete_extern_array1/test.desc new file mode 100644 index 00000000000..b2eec64d70d --- /dev/null +++ b/regression/cbmc/incomplete_extern_array1/test.desc @@ -0,0 +1,15 @@ +CORE no-new-smt +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +^Invariant check failed +-- +This exercises the boolbv (SAT) flattening path. Tagged no-new-smt +because the incremental SMT2 back-end has a separate, unrelated defect on +incomplete extern arrays (it declares the array symbol twice, e.g. +"constant '_ctype#1' ... already declared"); that is out of scope for this +boolbv_index fix. diff --git a/src/solvers/flattening/boolbv_index.cpp b/src/solvers/flattening/boolbv_index.cpp index 2e400a57345..2880a46b904 100644 --- a/src/solvers/flattening/boolbv_index.cpp +++ b/src/solvers/flattening/boolbv_index.cpp @@ -51,10 +51,22 @@ bvt boolbvt::convert_index(const index_exprt &expr) final_array.id() == ID_symbol || final_array.id() == ID_nondet_symbol) { const auto &array_width_opt = bv_width.get_width_opt(array_type); - (void)map.get_literals( - final_array.get(ID_identifier), - array_type, - array_width_opt.value_or(0)); + const std::size_t array_width = array_width_opt.value_or(0); + // Record the array symbol's type. Skip only when the symbol is + // already registered at a *different* width: an incomplete extern + // array (e.g. `extern T arr[]`, width 0 here) may already be + // registered at a non-zero width via its element-typed access path, + // and re-registering at width 0 would trip get_literals' + // size-equals-width invariant. A first-time registration (the + // common case, including string refinement) is preserved. + const irep_idt &array_id = final_array.get(ID_identifier); + const auto existing = map.get_map_entry(array_id); + if( + !existing.has_value() || + existing->get().literal_map.size() == array_width) + { + (void)map.get_literals(array_id, array_type, array_width); + } } // make sure we have the index in the cache @@ -71,8 +83,16 @@ bvt boolbvt::convert_index(const index_exprt &expr) if(array.id() == ID_symbol || array.id() == ID_nondet_symbol) { const auto &array_width_opt = bv_width.get_width_opt(array_type); - (void)map.get_literals( - array.get(ID_identifier), array_type, array_width_opt.value_or(0)); + // See comment above for the same case in the byte-operator branch. + const std::size_t array_width = array_width_opt.value_or(0); + const irep_idt &array_id = array.get(ID_identifier); + const auto existing = map.get_map_entry(array_id); + if( + !existing.has_value() || + existing->get().literal_map.size() == array_width) + { + (void)map.get_literals(array_id, array_type, array_width); + } } // make sure we have the index in the cache From 232ff031e7a451c8afa31d76b1f87438fb9f2e67 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 19 Jun 2026 08:54:06 +0000 Subject: [PATCH 2/2] smt2_incremental: declare each SSA symbol at most once The incremental SMT2 back-end keyed its set of already-declared functions on the full expression (irept). An incomplete `extern T arr[]` symbol can be reached via two expressions that share the same SSA identifier but differ only in their (sort-equivalent) array-type irep -- e.g. _ctype[c] and _ctype[c + 1] from . Those are distinct expression keys, so both reached send_function_definition and emitted a second (declare-fun |_ctype#1| () (Array ...)), which z3 rejects with "constant '_ctype#1' (with the given signature) already declared". Deduplicate by SSA identifier in send_function_definition: if the symbol is already in identifier_table, map the later expression to the existing declaration instead of re-declaring it. With this, regression/cbmc/incomplete_extern_array1 also passes under the incremental SMT2 back-end, so its no-new-smt tag is removed. Co-authored-by: Kiro --- regression/cbmc/incomplete_extern_array1/test.desc | 8 +------- .../smt2_incremental_decision_procedure.cpp | 13 +++++++++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/regression/cbmc/incomplete_extern_array1/test.desc b/regression/cbmc/incomplete_extern_array1/test.desc index b2eec64d70d..813f0f82075 100644 --- a/regression/cbmc/incomplete_extern_array1/test.desc +++ b/regression/cbmc/incomplete_extern_array1/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE main.c ^EXIT=0$ @@ -7,9 +7,3 @@ main.c -- ^warning: ignoring ^Invariant check failed --- -This exercises the boolbv (SAT) flattening path. Tagged no-new-smt -because the incremental SMT2 back-end has a separate, unrelated defect on -incomplete extern arrays (it declares the array symbol twice, e.g. -"constant '_ctype#1' ... already declared"); that is out of scope for this -boolbv_index fix. diff --git a/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp b/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp index 2817ba012fe..4003dfa4fcc 100644 --- a/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp +++ b/src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp @@ -171,6 +171,19 @@ void send_function_definition( &expression_identifiers, std::unordered_map &identifier_table) { + // The same SSA identifier can be reached via two expressions that differ + // only in their (sort-equivalent) type irep -- e.g. an incomplete + // `extern T arr[]` whose array type is represented differently at + // different access sites. Such expressions are distinct keys in + // expression_identifiers, so both reach this point, but they denote the + // same symbol and must be declared to the solver only once. Map any + // later expression to the already-declared identifier. + const auto existing = identifier_table.find(symbol_identifier); + if(existing != identifier_table.end()) + { + expression_identifiers.emplace(expr, existing->second); + return; + } const smt_declare_function_commandt function{ smt_identifier_termt( symbol_identifier, convert_type_to_smt_sort(expr.type())),