From 9d2fb95d9a598bf89ecebe6cae1bf9f3f9cfb645 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 18 Jun 2026 21:14:11 +0000 Subject: [PATCH] SMT2: encode structs and multi-constructor ADTs as SMT-LIB datatypes Declare/parse struct and #adt_constructors (algebraic data) types as SMT-LIB datatypes keyed by tag/#index_type, and handle their construction, member access, and 'with' updates (parse_struct, convert_struct, convert_member, convert_with, find_symbols_rec). Co-authored-by: Kiro --- src/solvers/smt2/smt2_conv.cpp | 398 ++++++++++++++++++++++++++------- 1 file changed, 316 insertions(+), 82 deletions(-) diff --git a/src/solvers/smt2/smt2_conv.cpp b/src/solvers/smt2/smt2_conv.cpp index e5a7d4ff0f2..3f96247f805 100644 --- a/src/solvers/smt2/smt2_conv.cpp +++ b/src/solvers/smt2/smt2_conv.cpp @@ -691,7 +691,49 @@ smt2_convt::parse_struct(const irept &src, const struct_typet &type) if(use_datatypes) { - // Structs look like: + // Check for multi-constructor ADT + const auto &adt_ctors = type.find(irep_idt("#adt_constructors")); + if( + adt_ctors.is_not_nil() && adt_ctors.get_sub().size() > 1 && + !components.empty() && components[0].get_name() == "$tag") + { + // Parse multi-constructor value like (from_int 42) + // The first sub is the constructor name + const irep_idt &ctor_name = + src.get_sub().empty() ? src.id() : src.get_sub()[0].id(); + // Find matching constructor to get tag value and field mapping + for(std::size_t ci = 0; ci < adt_ctors.get_sub().size(); ++ci) + { + const auto &ctor = adt_ctors.get_sub()[ci]; + if(ctor.find(ID_name).id() == ctor_name) + { + // Set $tag + result.operands()[0] = from_integer(ci, components[0].type()); + // Parse constructor fields + const auto &fields = ctor.find(irep_idt("fields")).get_sub(); + std::size_t arg_idx = 1; // skip constructor name in src + for(const auto &field : fields) + { + for(std::size_t k = 0; k < components.size(); ++k) + { + if( + components[k].get_name() == field.id() && + arg_idx < src.get_sub().size()) + { + result.operands()[k] = + parse_rec(src.get_sub()[arg_idx], components[k].type()); + ++arg_idx; + } + } + } + return result; + } + } + // Constructor not found — return default + return result; + } + + // Single-constructor: Structs look like: // (mk-struct.1 ... ) std::size_t j = 1; for(std::size_t i=0; i 1 && + !components.empty() && components[0].get_name() == "$tag") + { + const auto &tag_op = expr.operands()[0]; + const auto &ctors = adt_ctors_cs.get_sub(); + // Optimization: if tag is constant, directly emit the matching + // constructor + mp_integer const_tag = -1; + if(tag_op.is_constant()) + to_integer(to_constant_expr(tag_op), const_tag); + + if(const_tag >= 0 && const_tag < (mp_integer)ctors.size()) + { + std::size_t ci = numeric_cast_v(const_tag); + const auto &ctor_fields = ctors[ci].find(irep_idt("fields")).get_sub(); + if(ctor_fields.empty()) + out << ctors[ci].find(ID_name).id(); + else + { + out << "(" << ctors[ci].find(ID_name).id(); + for(const auto &field : ctor_fields) + for(std::size_t j = 0; j < components.size(); ++j) + if(components[j].get_name() == field.id()) + { + out << " "; + convert_expr(expr.operands()[j]); + } + out << ")"; + } + return; + } + // Non-constant tag: emit ite chain + for(std::size_t ci = 0; ci < ctors.size(); ++ci) + { + if(ci + 1 < ctors.size()) + { + out << "(ite (= "; + convert_expr(tag_op); + out << " " << ci << ") "; + } + const auto &ctor_fields = ctors[ci].find(irep_idt("fields")).get_sub(); + if(ctor_fields.empty()) + out << ctors[ci].find(ID_name).id(); + else + { + out << "(" << ctors[ci].find(ID_name).id(); + for(const auto &field : ctor_fields) + for(std::size_t j = 0; j < components.size(); ++j) + if(components[j].get_name() == field.id()) + { + out << " "; + convert_expr(expr.operands()[j]); + } + out << ")"; + } + if(ci + 1 < ctors.size()) + out << " "; + } + for(std::size_t ci = 0; ci + 1 < ctors.size(); ++ci) + out << ")"; + return; + } out << "(mk-" << smt_typename; std::size_t i=0; @@ -4624,11 +4729,54 @@ void smt2_convt::convert_with(const with_exprt &expr) { const std::string &smt_typename = datatype_map.at(expr_type); - out << "(update-" << smt_typename << "." << component_name << " "; - convert_expr(expr.old()); - out << " "; - convert_expr(value); - out << ")"; + const auto &adt_ctors_cw = + struct_type.find(irep_idt("#adt_constructors")); + if(adt_ctors_cw.is_not_nil() && adt_ctors_cw.get_sub().size() > 1) + { + const auto &ctors = adt_ctors_cw.get_sub(); + for(std::size_t ci = 0; ci < ctors.size(); ++ci) + { + const auto &cn = ctors[ci].find(ID_name).id(); + const auto &flds = ctors[ci].find(irep_idt("fields")).get_sub(); + if(ci + 1 < ctors.size()) + { + out << "(ite ((_ is " << cn << ") "; + convert_expr(expr.old()); + out << ") "; + } + if(flds.empty()) + out << cn; + else + { + out << "(" << cn; + for(const auto &fld : flds) + { + out << " "; + if(fld.id() == component_name) + convert_expr(value); + else + { + out << "(" << smt_typename << "." << fld.id() << " "; + convert_expr(expr.old()); + out << ")"; + } + } + out << ")"; + } + if(ci + 1 < ctors.size()) + out << " "; + } + for(std::size_t ci = 0; ci + 1 < ctors.size(); ++ci) + out << ")"; + } + else + { + out << "(update-" << smt_typename << "." << component_name << " "; + convert_expr(expr.old()); + out << " "; + convert_expr(value); + out << ")"; + } } else { @@ -4849,11 +4997,34 @@ void smt2_convt::convert_member(const member_exprt &expr) { const std::string &smt_typename = datatype_map.at(struct_type); - out << "(" << smt_typename << "." - << struct_type.get_component(name).get_name() - << " "; - convert_expr(struct_op); - out << ")"; + const auto &adt_ctors_cm = + struct_type.find(irep_idt("#adt_constructors")); + if( + adt_ctors_cm.is_not_nil() && adt_ctors_cm.get_sub().size() > 1 && + name == "$tag") + { + const auto &ctors = adt_ctors_cm.get_sub(); + for(std::size_t i = 0; i < ctors.size(); ++i) + { + if(i + 1 < ctors.size()) + { + out << "(ite ((_ is " << ctors[i].find(ID_name).id() << ") "; + convert_expr(struct_op); + out << ") " << i << " "; + } + else + out << i; + } + for(std::size_t i = 0; i + 1 < ctors.size(); ++i) + out << ")"; + } + else + { + out << "(" << smt_typename << "." + << struct_type.get_component(name).get_name() << " "; + convert_expr(struct_op); + out << ")"; + } } else { @@ -5426,6 +5597,7 @@ void smt2_convt::find_symbols(const exprt &expr) for(const auto &symbol : q_expr.variables()) { const auto identifier = symbol.identifier(); + find_symbols(symbol.type()); auto id_entry = identifier_map.insert({identifier, identifiert{symbol.type(), true}}); shadowed_syms.insert( @@ -6184,6 +6356,10 @@ void smt2_convt::find_symbols_rec( const array_typet &array_type=to_array_type(type); find_symbols(array_type.size()); find_symbols_rec(array_type.element_type(), recstack); + const typet &idx_type = + static_cast(type.find(ID_C_index_type)); + if(idx_type.is_not_nil()) + find_symbols_rec(idx_type, recstack); } else if(type.id()==ID_complex) { @@ -6217,10 +6393,50 @@ void smt2_convt::find_symbols_rec( if(use_datatypes && datatype_map.find(type)==datatype_map.end()) { - const std::string smt_typename = - "struct." + std::to_string(datatype_map.size()); + // Reuse an existing datatype for a struct with the same tag name only + // when its components match: same-tagged variants may differ in + // non-semantic annotations (e.g. array sizes), which is fine, but they + // may also genuinely differ in their fields (e.g. a forward-declared vs. + // a fully-defined struct). Reusing across differing field sets would + // emit member selectors (e.g. struct.N._flags) that the chosen datatype + // never declared, which the solver rejects as an unknown constant. + const irep_idt &tag = to_struct_type(type).get_tag(); + const struct_typet::componentst &new_components = + to_struct_type(type).components(); + const auto same_component_names = + [&new_components](const struct_typet &other) + { + const struct_typet::componentst &other_components = other.components(); + if(other_components.size() != new_components.size()) + return false; + for(std::size_t i = 0; i < new_components.size(); ++i) + { + if(other_components[i].get_name() != new_components[i].get_name()) + return false; + } + return true; + }; + std::string smt_typename; + if(!tag.empty()) + { + for(const auto &entry : datatype_map) + { + if( + entry.first.id() == ID_struct && + to_struct_type(entry.first).get_tag() == tag && + same_component_names(to_struct_type(entry.first))) + { + smt_typename = entry.second; + break; + } + } + } + if(smt_typename.empty()) + { + smt_typename = "struct." + std::to_string(datatype_map.size()); + need_decl = true; + } datatype_map[type] = smt_typename; - need_decl=true; } const struct_typet::componentst &components = @@ -6234,81 +6450,99 @@ void smt2_convt::find_symbols_rec( { const std::string &smt_typename = datatype_map.at(type); - // We're going to create a datatype named something like `struct.0'. - // It's going to have a single constructor named `mk-struct.0' with an - // argument for each member of the struct. The declaration that - // creates this type looks like: - // - // (declare-datatypes ((struct.0 0)) (((mk-struct.0 - // (struct.0.component1 type1) - // ... - // (struct.0.componentN typeN))))) - out << "(declare-datatypes ((" << smt_typename << " 0)) " - << "(((mk-" << smt_typename << " "; - - for(const auto &component : components) + const auto &adt_ctors = + to_struct_type(type).find(irep_idt("#adt_constructors")); + if(adt_ctors.is_not_nil() && adt_ctors.get_sub().size() > 1) { - if(is_zero_width(component.type(), ns)) - continue; - - out << "(" << smt_typename << "." << component.get_name() - << " "; - convert_type(component.type()); - out << ") "; + out << "(declare-datatypes ((" << smt_typename << " 0)) (("; + for(const auto &ctor : adt_ctors.get_sub()) + { + const auto &decl_fields = ctor.find(irep_idt("fields")).get_sub(); + if(decl_fields.empty()) + out << "(" << ctor.find(ID_name).id() << ") "; + else + { + out << "(" << ctor.find(ID_name).id(); + for(const auto &field : decl_fields) + for(const auto &comp : components) + if(comp.get_name() == field.id()) + { + out << " (" << smt_typename << "." << field.id() << " "; + convert_type(comp.type()); + out << ")"; + } + out << ") "; + } + } + out << ")))\n"; } - - out << "))))" << "\n"; - - // Let's also declare convenience functions to update individual - // members of the struct whil we're at it. The functions are - // named like `update-struct.0.component1'. Their declarations - // look like: - // - // (declare-fun update-struct.0.component1 - // ((s struct.0) ; first arg -- the struct to update - // (v type1)) ; second arg -- the value to update - // struct.0 ; the output type - // (mk-struct.0 ; build the new struct... - // v ; the updated value - // (struct.0.component2 s) ; retain the other members - // ... - // (struct.0.componentN s))) - - for(struct_union_typet::componentst::const_iterator - it=components.begin(); - it!=components.end(); - ++it) + else { - if(is_zero_width(it->type(), ns)) - continue; + out << "(declare-datatypes ((" << smt_typename << " 0)) " + << "(((mk-" << smt_typename << " "; + for(const auto &component : components) + { + if(is_zero_width(component.type(), ns)) + continue; + out << "(" << smt_typename << "." << component.get_name() << " "; + convert_type(component.type()); + out << ") "; + } + out << "))))" + << "\n"; + } - const struct_union_typet::componentt &component=*it; - out << "(define-fun update-" << smt_typename << "." - << component.get_name() << " " - << "((s " << smt_typename << ") " - << "(v "; - convert_type(component.type()); - out << ")) " << smt_typename << " " - << "(mk-" << smt_typename - << " "; - - for(struct_union_typet::componentst::const_iterator - it2=components.begin(); - it2!=components.end(); - ++it2) + if(adt_ctors.is_nil() || adt_ctors.get_sub().size() <= 1) + { + // Let's also declare convenience functions to update individual + // members of the struct whil we're at it. The functions are + // named like `update-struct.0.component1'. Their declarations + // look like: + // + // (declare-fun update-struct.0.component1 + // ((s struct.0) ; first arg -- the struct to update + // (v type1)) ; second arg -- the value to update + // struct.0 ; the output type + // (mk-struct.0 ; build the new struct... + // v ; the updated value + // (struct.0.component2 s) ; retain the other members + // ... + // (struct.0.componentN s))) + + for(struct_union_typet::componentst::const_iterator it = + components.begin(); + it != components.end(); + ++it) { - if(it==it2) - out << "v "; - else if(!is_zero_width(it2->type(), ns)) + if(is_zero_width(it->type(), ns)) + continue; + + const struct_union_typet::componentt &component = *it; + out << "(define-fun update-" << smt_typename << "." + << component.get_name() << " " + << "((s " << smt_typename << ") " + << "(v "; + convert_type(component.type()); + out << ")) " << smt_typename << " " + << "(mk-" << smt_typename << " "; + + for(struct_union_typet::componentst::const_iterator it2 = + components.begin(); + it2 != components.end(); + ++it2) { - out << "(" << smt_typename << "." - << it2->get_name() << " s) "; + if(it == it2) + out << "v "; + else if(!is_zero_width(it2->type(), ns)) + { + out << "(" << smt_typename << "." << it2->get_name() << " s) "; + } } - } - - out << "))" << "\n"; - } + out << "))" + << "\n"; + } + } // end skip update for multi-constructor out << "\n"; } }