diff --git a/doc/man/cbmc.1 b/doc/man/cbmc.1 index e9b5d0dfc5d..1773ed7f834 100644 --- a/doc/man/cbmc.1 +++ b/doc/man/cbmc.1 @@ -182,6 +182,11 @@ disable built\-in abstract C library limit size of nondet (e.g. input) object tree; at level N pointers are set to null .TP +\fB\-\-max\-dynamic\-object\-instances\fR N +hard cap on total dynamic allocations the object +factory will emit when nondet\-initialising a +single root; beyond N, pointers are null +.TP \fB\-\-min\-null\-tree\-depth\fR N minimum level at which a pointer can first be NULL in a recursively nondet initialized struct diff --git a/doc/man/goto-instrument.1 b/doc/man/goto-instrument.1 index 5d971063565..9e37f03abfc 100644 --- a/doc/man/goto-instrument.1 +++ b/doc/man/goto-instrument.1 @@ -758,6 +758,11 @@ replace calls to \fIf\fR with calls to \fIg\fR limit size of nondet (e.g. input) object tree; at level N pointers are set to null .TP +\fB\-\-max\-dynamic\-object\-instances\fR \fIN\fR +hard cap on total dynamic allocations the object +factory will emit when nondet\-initialising a +single root; beyond N, pointers are null +.TP \fB\-\-min\-null\-tree\-depth\fR \fIN\fR minimum level at which a pointer can first be NULL in a recursively nondet initialized struct diff --git a/regression/goto-instrument/generate-function-body-deep-struct-cap/main.c b/regression/goto-instrument/generate-function-body-deep-struct-cap/main.c new file mode 100644 index 00000000000..767f76dfdc9 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-deep-struct-cap/main.c @@ -0,0 +1,63 @@ +// Regression test for LIM-008: --generate-function-body havoc on a +// function whose return type transitively reaches wide, deep, but +// *non-recursive* struct hierarchies used to hang (and then OOM) +// because the `max_nondet_tree_depth` cap only fires when the same +// struct tag re-appears on the pointer chain. Kernel-style struct +// hierarchies are rarely self-referential in the first few levels, +// so the cap never fires and the object factory generates an +// exponentially large init body. +// +// The new `max_dynamic_object_instances` parameter hard-caps the +// total number of dynamic objects the factory will emit, making +// the body-generation terminate in linear time regardless of +// struct-hierarchy topology. +// +// This test wires up a moderate-depth, branching, non-recursive +// struct hierarchy and asserts that body generation completes +// quickly (no hang) and that the resulting goto binary is cbmc- +// verifiable in reasonable time. + +typedef struct l4 +{ + int a, b, c, d; +} l4_t; + +typedef struct l3 +{ + l4_t *p1; + l4_t *p2; + l4_t *p3; + int x; +} l3_t; + +typedef struct l2 +{ + l3_t *p1; + l3_t *p2; + l3_t *p3; + int x; +} l2_t; + +typedef struct l1 +{ + l2_t *p1; + l2_t *p2; + l2_t *p3; + int x; +} l1_t; + +// Function with an l1_t * return type: without the cap, havoc-body +// generation recursively nondet-inits l1 -> l2 -> l3 -> l4, +// branching 3-way at each level, producing an exponentially large +// init body. With the cap, generation terminates after a bounded +// number of allocations. +l1_t *make_l1(void); + +int main(void) +{ + l1_t *p = make_l1(); + // No substantive property under test; the point of the regression + // is that goto-instrument completes in bounded time / space and + // cbmc then trivially verifies. + return p != (l1_t *)0 ? 1 : 0; +} diff --git a/regression/goto-instrument/generate-function-body-deep-struct-cap/test.desc b/regression/goto-instrument/generate-function-body-deep-struct-cap/test.desc new file mode 100644 index 00000000000..81286fca38a --- /dev/null +++ b/regression/goto-instrument/generate-function-body-deep-struct-cap/test.desc @@ -0,0 +1,17 @@ +CORE +main.c +--generate-function-body make_l1 --generate-function-body-options havoc --max-dynamic-object-instances 50 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^\*\*\*\* WARNING: no body for function +-- +LIM-008 regression. Without the per-root allocation cap introduced +in c_object_factory_parameterst, havoc body generation for a +function returning a pointer to a wide, deep, non-recursive struct +hierarchy hangs and then OOMs. With the cap (explicit here to +keep the test fast; the default is higher), generation terminates +in bounded time. EXIT=0 suffices — the point is that goto- +instrument does not hang or abort; no substantive property on the +resulting goto binary. diff --git a/src/ansi-c/ansi_c_language.h b/src/ansi-c/ansi_c_language.h index 1fdf9db2321..9d075998e95 100644 --- a/src/ansi-c/ansi_c_language.h +++ b/src/ansi-c/ansi_c_language.h @@ -20,16 +20,19 @@ Author: Daniel Kroening, kroening@kroening.com // clang-format off #define OPT_ANSI_C_LANGUAGE \ "(max-nondet-tree-depth):" \ + "(max-dynamic-object-instances):" \ "(min-null-tree-depth):" #define HELP_ANSI_C_LANGUAGE \ " {y--max-nondet-tree-depth} {uN} \t " \ "limit size of nondet (e.g. input) object tree; at level {uN} pointers are " \ "set to null\n" \ + " {y--max-dynamic-object-instances} {uN} \t " \ + "hard cap on total dynamic allocations the object factory will emit when " \ + "nondet-initialising a single root; beyond {uN}, pointers are null\n" \ " {y--min-null-tree-depth} {uN} \t " \ "minimum level at which a pointer can first be NULL in a recursively " \ - "nondet initialized struct\n" \ -// clang-format on + "nondet initialized struct\n" // clang-format on class ansi_c_languaget:public languaget { diff --git a/src/ansi-c/c_nondet_symbol_factory.cpp b/src/ansi-c/c_nondet_symbol_factory.cpp index 7f32b16e122..b45716ddc70 100644 --- a/src/ansi-c/c_nondet_symbol_factory.cpp +++ b/src/ansi-c/c_nondet_symbol_factory.cpp @@ -80,6 +80,22 @@ void symbol_factoryt::gen_nondet_init( } } + // Belt-and-braces: cap the total number of dynamic objects the + // factory will emit for a single nondet-init root. The depth cap + // above only fires when the same struct tag re-appears on the + // pointer chain; wide-but-non-recursive struct hierarchies (most + // kernel structs) bypass it entirely and blow up exponentially in + // the absence of this cap. See LIM-008. + if( + dynamic_object_instance_count >= + object_factory_params.max_dynamic_object_instances) + { + assignments.add( + code_frontend_assignt{expr, null_pointer_exprt{pointer_type}, loc}); + return; + } + ++dynamic_object_instance_count; + code_blockt non_null_inst; typet object_type = base_type; diff --git a/src/ansi-c/c_nondet_symbol_factory.h b/src/ansi-c/c_nondet_symbol_factory.h index 2d7d768233c..c42e30eb01d 100644 --- a/src/ansi-c/c_nondet_symbol_factory.h +++ b/src/ansi-c/c_nondet_symbol_factory.h @@ -29,6 +29,14 @@ class symbol_factoryt const lifetimet lifetime; + /// Running count of dynamic objects this factory has allocated so + /// far. Compared against + /// `object_factory_params.max_dynamic_object_instances`; when the + /// cap is hit, pointers are initialised to NULL instead of + /// recursively expanded. See the parameter's docstring for the + /// kernel-struct motivation. + std::size_t dynamic_object_instance_count = 0; + public: typedef std::set recursion_sett; diff --git a/src/util/object_factory_parameters.cpp b/src/util/object_factory_parameters.cpp index 9926f6e86d9..3537243cf47 100644 --- a/src/util/object_factory_parameters.cpp +++ b/src/util/object_factory_parameters.cpp @@ -23,6 +23,11 @@ void object_factory_parameterst::set(const optionst &options) max_nondet_tree_depth = options.get_unsigned_int_option("max-nondet-tree-depth"); } + if(options.is_set("max-dynamic-object-instances")) + { + max_dynamic_object_instances = + options.get_unsigned_int_option("max-dynamic-object-instances"); + } if(options.is_set("min-null-tree-depth")) { min_null_tree_depth = @@ -63,6 +68,12 @@ void parse_object_factory_options(const cmdlinet &cmdline, optionst &options) options.set_option( "max-nondet-tree-depth", cmdline.get_value("max-nondet-tree-depth")); } + if(cmdline.isset("max-dynamic-object-instances")) + { + options.set_option( + "max-dynamic-object-instances", + cmdline.get_value("max-dynamic-object-instances")); + } if(cmdline.isset("min-null-tree-depth")) { options.set_option( diff --git a/src/util/object_factory_parameters.h b/src/util/object_factory_parameters.h index 12073e28b41..de8c5de6c72 100644 --- a/src/util/object_factory_parameters.h +++ b/src/util/object_factory_parameters.h @@ -57,6 +57,29 @@ struct object_factory_parameterst /// initialized to their full depth. size_t max_nondet_tree_depth = 5; + /// Maximum total number of dynamic objects the object factory will + /// allocate on behalf of a single nondet-initialisation root. + /// + /// The ``max_nondet_tree_depth`` cap above only fires when the same + /// struct tag appears twice on the same pointer chain, which is the + /// pattern the object factory historically worried about (linked + /// lists, trees). Kernel struct hierarchies, by contrast, are + /// typically *wide and deep without revisiting the same type*: a + /// single ``struct sock *`` pointer transitively reaches hundreds + /// of further pointer-to-struct fields, none of which cycle back + /// to ``struct sock``, so the depth cap never fires and the object + /// factory generates an exponentially large init body. + /// + /// This hard cap on allocation count provides a belt-and-braces + /// termination guarantee independent of the depth cap. When the + /// cap is hit, further pointers are initialized to NULL rather + /// than to freshly-allocated sub-structs. LIM-008 in + /// integration/linux/CBMC_LIMITATIONS.md is the concrete motivator: + /// ``goto-instrument --generate-function-body af_alg_alloc_areq + /// --generate-function-body-options havoc`` on a real kernel goto + /// binary used to hang indefinitely because of this runaway. + size_t max_dynamic_object_instances = 1000; + /// To force a certain depth of non-null objects. /// The default is that objects are 'maybe null' up to the nondet tree depth. /// Examples: diff --git a/src/util/symbol_table_base.h b/src/util/symbol_table_base.h index 976d108f4b7..a07af780f35 100644 --- a/src/util/symbol_table_base.h +++ b/src/util/symbol_table_base.h @@ -54,10 +54,19 @@ class symbol_table_baset virtual ~symbol_table_baset(); +protected: + /// Per-prefix "next search-start" hint consulted by + /// `next_unused_suffix(prefix)`. Mutable so the hint can be updated + /// through the const read API. See that function's docstring for + /// semantics. + mutable std::unordered_map suffix_hint_cache; + +public: /// Find smallest unused integer i so that prefix + std::to_string(i) - /// does not exist in the list \p symbols. + /// does not exist in the list \p symbols, starting the search at + /// \p start_number. /// \param prefix: A string denoting the prefix we want to find the - /// smallest suffix of. + /// smallest unused suffix of. /// \param start_number: The starting suffix number to search from. /// \return The small unused suffix size. std::size_t @@ -70,9 +79,32 @@ class symbol_table_baset return start_number; } + /// Find an unused integer i so that prefix + std::to_string(i) does + /// not exist in the list \p symbols. + /// + /// Uses a per-prefix hint cache so that repeated allocation of names + /// sharing the same prefix is amortised O(1) per call rather than + /// O(N) where N is the number of already-allocated symbols under + /// that prefix. The object factory (see + /// `symbol_factoryt::gen_nondet_init` for a \c struct field expansion + /// of a deep kernel struct) can otherwise call this function + /// O(N) times with a linear scan each, producing O(N^2) behaviour + /// that manifests as a goto-instrument hang on large goto binaries. + /// + /// The returned suffix is guaranteed to be unused, but is no longer + /// guaranteed to be the strictly smallest such value: if a prior + /// symbol with a lower suffix has been erased from the table since + /// the hint was last updated, this function will not re-discover + /// that gap. No caller in-tree today relies on the "smallest" + /// property (callers only need uniqueness); the derived + /// `symbol_table_builder` has already had this semantics since + /// its introduction and remains correct under this change. virtual std::size_t next_unused_suffix(const std::string &prefix) const { - return next_unused_suffix(prefix, 0); + auto it = suffix_hint_cache.insert({prefix, 0}).first; + const std::size_t free_suffix = next_unused_suffix(prefix, it->second); + it->second = free_suffix + 1; + return free_suffix; } /// Permits implicit cast to const symbol_tablet &