From 8d36980649ea727f719120e38f6c4e1da1fce9fd Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 18 Jun 2026 21:14:11 +0000 Subject: [PATCH] SMT2: encode mathematical and string/regex types and operations Convert mathematical integer/real types to SMT-LIB Int/Real (type2id, convert_type), encode mathematical mod/div semantics, string/regex types, string constants, and map Strata's Str.*/Re.* function applications to the corresponding SMT-LIB string/regex operators in convert_expr. Co-authored-by: Kiro --- src/solvers/smt2/smt2_conv.cpp | 125 ++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 3 deletions(-) diff --git a/src/solvers/smt2/smt2_conv.cpp b/src/solvers/smt2/smt2_conv.cpp index e5a7d4ff0f2..6dcc69279cc 100644 --- a/src/solvers/smt2/smt2_conv.cpp +++ b/src/solvers/smt2/smt2_conv.cpp @@ -1155,6 +1155,27 @@ std::string smt2_convt::type2id(const typet &type) const { return "A" + type2id(to_array_type(type).element_type()); } + else if(type.id() == ID_integer) + { + return "Int"; + } + else if(type.id() == ID_real) + { + return "Real"; + } + else if(type.id() == ID_string) + { + return "String"; + } + else if(type.id() == ID_mathematical_function) + { + std::string result = "MF"; + const auto &mf = to_mathematical_function_type(type); + for(const auto &d : mf.domain()) + result += "_" + type2id(d); + result += "_" + type2id(mf.codomain()); + return result; + } else { UNREACHABLE; @@ -2687,8 +2708,66 @@ void smt2_convt::convert_expr(const exprt &expr) else if(expr.id() == ID_function_application) { const auto &function_application_expr = to_function_application_expr(expr); + + // Check for string operations by looking at the function symbol name + std::string fn_name; + if(function_application_expr.function().id() == ID_symbol) + fn_name = id2string( + to_symbol_expr(function_application_expr.function()).get_identifier()); + + // Map Strata function names to SMT-LIB names for string/regex ops + std::string smt_name; + if(fn_name == "Str.Concat") + smt_name = "str.++"; + else if(fn_name == "Str.Length") + smt_name = "str.len"; + else if(fn_name == "Str.Substr") + smt_name = "str.substr"; + else if(fn_name == "Str.ToRegEx") + smt_name = "str.to_re"; + else if(fn_name == "Str.InRegEx") + smt_name = "str.in_re"; + else if(fn_name == "Re.AllChar") + smt_name = "re.allchar"; + else if(fn_name == "Re.All") + smt_name = "re.all"; + else if(fn_name == "Re.Range") + smt_name = "re.range"; + else if(fn_name == "Re.Concat") + smt_name = "re.concat"; + else if(fn_name == "Re.Star") + smt_name = "re.*"; + else if(fn_name == "Re.Plus") + smt_name = "re.+"; + else if(fn_name == "Re.Loop") + smt_name = "re.loop"; + else if(fn_name == "Re.Union") + smt_name = "re.union"; + else if(fn_name == "Re.Inter") + smt_name = "re.inter"; + else if(fn_name == "Re.Comp") + smt_name = "re.comp"; + else if(fn_name == "Re.None") + smt_name = "re.none"; + + if(!smt_name.empty()) + { + const auto &args = function_application_expr.arguments(); + if(args.empty()) + out << smt_name; + else + { + out << '(' << smt_name; + for(const auto &arg : args) + { + out << ' '; + convert_expr(arg); + } + out << ')'; + } + } // do not use parentheses if there function is a constant - if(function_application_expr.arguments().empty()) + else if(function_application_expr.arguments().empty()) { convert_expr(function_application_expr.function()); } @@ -3798,6 +3877,21 @@ void smt2_convt::convert_constant(const constant_exprt &expr) const auto value_int = numeric_cast_v(expr); out << "(_ bv" << (value_int - range_type.from()) << " " << width << ")"; } + else if(expr_type.id()==ID_string) + { + const std::string &value = id2string(expr.get_value()); + out << "\""; + for(char c : value) + { + if(c == '"') + out << "\"\""; + else if(c == '\\') + out << "\\\\"; + else + out << c; + } + out << "\""; + } else UNEXPECTEDCASE("unknown constant: "+expr_type.id_string()); } @@ -3832,6 +3926,20 @@ void smt2_convt::convert_mod(const mod_exprt &expr) convert_expr(expr.op1()); out << ")"; } + else if(expr.type().id() == ID_integer) + { + // SMT-LIB2 mod for mathematical integers (truncation semantics) + // Use (- a (* (div a b) b)) to get truncation mod + out << "(- "; + convert_expr(expr.op0()); + out << " (* (div "; + convert_expr(expr.op0()); + out << " "; + convert_expr(expr.op1()); + out << ") "; + convert_expr(expr.op1()); + out << "))"; + } else UNEXPECTEDCASE("unsupported type for mod: "+expr.type().id_string()); } @@ -4368,7 +4476,15 @@ void smt2_convt::convert_div(const div_exprt &expr) expr.type().id() == ID_rational || expr.type().id() == ID_integer || expr.type().id() == ID_natural || expr.type().id() == ID_real) { - out << "(/ "; + if( + expr.type().id() == ID_integer || expr.type().id() == ID_natural) + { + out << "(div "; + } + else + { + out << "(/ "; + } convert_expr(expr.op0()); out << " "; convert_expr(expr.op1()); @@ -6038,7 +6154,6 @@ void smt2_convt::convert_type(const typet &type) if(type.id()==ID_array) { const array_typet &array_type=to_array_type(type); - CHECK_RETURN(array_type.size().is_not_nil()); // we always use array theory for top-level arrays const typet &subtype = array_type.element_type(); @@ -6163,6 +6278,10 @@ void smt2_convt::convert_type(const typet &type) UNEXPECTEDCASE("unsupported range type"); out << "(_ BitVec " << address_bits(range_type.size()) << ")"; } + else if(type.id()==ID_string) + out << "String"; + else if(type.id()==ID_regex) + out << "RegLan"; else { UNEXPECTEDCASE("unsupported type: "+type.id_string());