cprover
Loading...
Searching...
No Matches
enumerative_loop_contracts_synthesizer.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Enumerative Loop Contracts Synthesizer
4
5Author: Qinheping Hu
6
7\*******************************************************************/
8
11
13
14#include <util/arith_tools.h>
15#include <util/c_types.h>
16#include <util/find_symbols.h>
17#include <util/format_expr.h>
19#include <util/replace_symbol.h>
20#include <util/simplify_expr.h>
21
28
29#include "cegis_verifier.h"
30#include "expr_enumerator.h"
31
32// substitute all tmp_post variables with their origins in `expr`
34 exprt &dest,
35 const std::unordered_map<exprt, exprt, irep_hash> &tmp_post_map)
36{
38 for(const auto &tmp_post_entry : tmp_post_map)
39 {
41 can_cast_expr<symbol_exprt>(tmp_post_entry.first),
42 "tmp_post variables must be symbol expression.");
43 const auto &tmp_post_symbol =
44 expr_dynamic_cast<symbol_exprt>(tmp_post_entry.first);
45 r.insert(tmp_post_symbol, tmp_post_entry.second);
46 }
47 r.replace(dest);
48}
49
50std::vector<exprt> construct_terminals(const std::set<symbol_exprt> &symbols)
51{
52 // Construct a vector of terminal expressions.
53 // Terminals include:
54 // 1: scalar type variables and their loop_entry version.
55 // 2: offsets of pointers and loop_entry of pointers.
56 // 3: constants 0 and 1.
57
58 std::vector<exprt> result;
59 for(const auto &e : symbols)
60 {
61 if(e.type().id() == ID_unsignedbv)
62 {
63 // For a variable v with primitive type, we add
64 // v, __CPROVER_loop_entry(v)
65 // into the result.
66 result.push_back(typecast_exprt(e, size_type()));
67 result.push_back(
68 typecast_exprt(unary_exprt(ID_loop_entry, e, e.type()), size_type()));
69 }
70 if((e.type().id() == ID_signedbv))
71 {
72 result.push_back(e);
73 result.push_back(unary_exprt(ID_loop_entry, e, e.type()));
74 }
75 if((e.type().id() == ID_pointer))
76 {
77 // For a variable v with pointer type, we add
78 // __CPROVER_pointer_offset(v),
79 // __CPROVER_pointer_offset(__CPROVER_loop_entry(v))
80 // into the result.
81 result.push_back(pointer_offset_exprt(e, size_type()));
82 result.push_back(pointer_offset_exprt(
83 unary_exprt(ID_loop_entry, e, e.type()), size_type()));
84 }
85 }
86 result.push_back(from_integer(1, signed_short_int_type()));
87 result.push_back(from_integer(0, signed_short_int_type()));
88 return result;
89}
90
92{
93 for(auto &function_p : goto_model.goto_functions.function_map)
94 {
95 natural_loops_mutablet natural_loops;
96 natural_loops(function_p.second.body);
97
98 // TODO: use global may alias instead.
99 local_may_aliast local_may_alias(function_p.second);
100
101 // Initialize invariants for unannotated loops as true
102 for(const auto &loop_head_and_content : natural_loops.loop_map)
103 {
104 // Ignore empty loops and self-looped node.
105 if(loop_head_and_content.second.size() <= 1)
106 continue;
107
108 goto_programt::targett loop_end =
110 loop_head_and_content.first, loop_head_and_content.second);
111
112 loop_idt new_id(function_p.first, loop_end->loop_number);
113 loop_cfg_infot cfg_info(function_p.second, loop_head_and_content.second);
114
115 log.progress() << "Initialize candidates for the loop at "
116 << loop_end->source_location() << messaget::eom;
117
118 // Turn do while loops of form
119 //
120 // do
121 // { loop body }
122 // while (0)
123 //
124 // into non-loop block
125 //
126 // { loop body }
127 // skip
128 //
129 if(
130 loop_end->is_goto() &&
131 simplify_expr(loop_end->condition(), ns) == false_exprt())
132 {
133 loop_end->turn_into_skip();
134 continue;
135 }
136
137 // we only synthesize invariants and assigns for unannotated loops
138 if(loop_end->condition().find(ID_C_spec_loop_invariant).is_nil())
139 {
140 // Store the loop guard if exists.
141 auto loop_head = get_loop_head(
142 loop_end->loop_number,
143 goto_model.goto_functions.function_map[function_p.first]);
144
145 if(loop_head->has_condition())
146 neg_guards[new_id] = loop_head->condition();
147
148 // Initialize invariant clauses as `true`.
151
152 // Initialize assigns clauses.
153 if(loop_end->condition().find(ID_C_spec_assigns).is_nil())
154 {
155 assigns_map[new_id] = {};
156
157 // Infer loop assigns using alias analysis.
159 local_may_alias, loop_head_and_content.second, assigns_map[new_id]);
160
161 // remove loop-local symbols from the inferred set
162 cfg_info.erase_locals(assigns_map[new_id]);
163
164 // If the set contains pairs (i, a[i]),
165 // we widen them to (i, __CPROVER_POINTER_OBJECT(a))
166 widen_assigns(assigns_map[new_id], ns);
167 }
168 }
169 }
170 }
171 log.debug() << "Finished candidates initialization." << messaget::eom;
172}
173
175 const exprt &checked_pointer,
176 const std::list<loop_idt> cause_loop_ids)
177{
178 auto new_assign = checked_pointer;
179
180 // Add the new assigns target to the most-inner loop that doesn't contain
181 // the new assigns target yet.
182 for(const auto &loop_id : cause_loop_ids)
183 {
184 // Widen index and dereference to whole object.
185 if(new_assign.id() == ID_index || new_assign.id() == ID_dereference)
186 {
187 address_of_exprt address_of_new_assigns(new_assign);
189 if(!is_constant(address_of_new_assigns))
190 {
191 new_assign = pointer_object(address_of_new_assigns);
192 }
193 }
194
195 const auto &source_location =
197 loop_id.loop_number,
198 goto_model.goto_functions.function_map[loop_id.function_id])
199 ->source_location();
200
201 // Simplify expr to avoid offset that is out of scope.
202 // In the case of nested loops, After widening, pointer_object(ptr + i)
203 // can contain the pointer ptr in the scope of both loops, and the offset
204 // i which is only in the scope of the inner loop.
205 // After simplification, pointer_object(ptr + i) -> pointer_object(ptr).
206 new_assign = simplify_expr(new_assign, ns);
207 new_assign.add_source_location() = source_location;
208
209 // Avoid adding same target.
210 if(assigns_map[loop_id].insert(new_assign).second)
211 return;
212 }
213 INVARIANT(false, "Failed to synthesize a new assigns target.");
214}
215
217{
218 for(auto &goto_function : goto_model.goto_functions.function_map)
219 {
220 for(const auto &instruction : goto_function.second.body.instructions)
221 {
222 // tmp_post variables are symbol lhs of ASSIGN.
223 if(!instruction.is_assign() || instruction.assign_lhs().id() != ID_symbol)
224 continue;
225
226 const auto symbol_lhs =
227 expr_try_dynamic_cast<symbol_exprt>(instruction.assign_lhs());
228
229 // tmp_post variables have identifiers with the prefix tmp::tmp_post.
230 if(
231 id2string(symbol_lhs->get_identifier()).find("tmp::tmp_post") !=
232 std::string::npos)
233 {
234 tmp_post_map[instruction.assign_lhs()] = instruction.assign_rhs();
235 }
236 }
237 }
238}
239
240std::set<symbol_exprt>
242 const loop_idt &cause_loop_id,
243 const exprt &new_clause,
244 const std::set<exprt> &live_vars)
245{
246 // We overapproximate dependent symbols as all symbols in live variables.
247 // TODO: using flow-dependency analysis to rule out not dependent symbols.
248
249 std::set<symbol_exprt> result;
250 for(const auto &e : live_vars)
251 find_symbols(e, result);
252
253 // Erase all variables added during loop transformations---they are not in
254 // the original symbol table.
255 for(auto it = result.begin(); it != result.end();)
256 {
257 if(original_symbol_table.lookup(it->get_identifier()) == nullptr)
258 {
259 it = result.erase(it);
260 }
261 else
262 it++;
263 }
264
265 return result;
266}
267
269 const exprt &violated_predicate)
270{
271 // For the case where the violated predicate is dependent on no instruction
272 // other than loop havocing, the violated_predicate is
273 // WLP(loop_body_before_violation, violated_predicate).
274 // TODO: implement a more complete WLP algorithm.
275 return violated_predicate;
276}
277
279 const exprt &checked_pointer)
280{
281 // The same object predicate says that the checked pointer points to the
282 // same object as it pointed before entering the loop.
283 // It works for the array-manipulating loops where only offset of pointer
284 // are modified but not the object pointers point to.
285 return same_object(
286 checked_pointer, unary_exprt(ID_loop_entry, checked_pointer));
287}
288
290 const std::vector<exprt> terminal_symbols,
291 const loop_idt &cause_loop_id,
292 const irep_idt &violation_id)
293{
294 // Synthesis of strengthening clauses is a enumerate-and-check process.
295 // We first construct the enumerator for the following grammar.
296 // And then enumerate clause and check that if it can make the invariant
297 // inductive.
298
299 // Initialize factory representing grammar
300 // StartBool -> StartBool && StartBool | Start == Start
301 // | StartBool <= StartBool | StartBool < StartBool
302 // Start -> Start + Start | terminal_symbols
303 // where a0, and a1 are symbol expressions.
305 recursive_enumerator_placeholdert start_bool_ph(factory, "StartBool", ns);
306 recursive_enumerator_placeholdert start_ph(factory, "Start", ns);
307
308 // terminals
309 expr_sett leafexprs(terminal_symbols.begin(), terminal_symbols.end());
310
311 // rules for Start
312 enumeratorst start_args;
313 // Start -> terminals
314 leaf_enumeratort leaf_g(leafexprs, ns);
315 start_args.push_back(&leaf_g);
316
317 // Start -> Start + Start
319 ID_plus,
320 start_ph,
321 start_ph,
322 [](const partitiont &partition) {
323 if(partition.size() <= 1)
324 return true;
325 return partition.front() == 1;
326 },
327 ns);
328 start_args.push_back(&plus_g);
329
330 // rules for StartBool
331 enumeratorst start_bool_args;
332 // StartBool -> StartBool && StartBool
333 binary_functional_enumeratort and_g(ID_and, start_bool_ph, start_bool_ph, ns);
334 start_bool_args.push_back(&and_g);
335 // StartBool -> Start == Start
336 binary_functional_enumeratort equal_g(ID_equal, start_ph, start_ph, ns);
337 start_bool_args.push_back(&equal_g);
338 // StartBool -> Start <= Start
339 binary_functional_enumeratort le_g(ID_le, start_ph, start_ph, ns);
340 start_bool_args.push_back(&le_g);
341 // StartBool -> Start < Start
342 binary_functional_enumeratort lt_g(ID_lt, start_ph, start_ph, ns);
343 start_bool_args.push_back(&lt_g);
344
345 // add the two nonterminals to the factory
346 factory.attach_productions("Start", start_args);
347 factory.attach_productions("StartBool", start_bool_args);
348
349 // size of candidates we are searching now,
350 // starting from 0
351 size_t size_bound = 0;
352
353 // Start to enumerate and check.
354 while(true)
355 {
356 size_bound++;
357
358 // generate candidate and verify
359 for(auto strengthening_candidate : start_bool_ph.enumerate(size_bound))
360 {
361 log.progress() << "Verifying candidate: "
362 << format(strengthening_candidate) << messaget::eom;
364 new_in_clauses[cause_loop_id] =
365 and_exprt(new_in_clauses[cause_loop_id], strengthening_candidate);
367 new_pos_clauses[cause_loop_id] =
368 and_exprt(new_pos_clauses[cause_loop_id], strengthening_candidate);
369 const auto &combined_invariant = combine_in_and_post_invariant_clauses(
370 new_in_clauses, new_pos_clauses, neg_guards);
371
372 // The verifier we use to check current invariant candidates.
373 cegis_verifiert verifier(
374 combined_invariant, assigns_map, goto_model, options, log);
375
376 // A good strengthening clause if
377 // 1. all checks pass, or
378 // 2. the loop invariant is inductive and hold upon the entry.
379 const auto &return_cex = verifier.verify();
380 if(
381 !return_cex.has_value() ||
382 (verifier.properties.at(violation_id).status !=
384 return_cex->violation_type !=
386 return_cex->violation_type !=
388 {
389 return strengthening_candidate;
390 }
391 }
392 }
394}
395
397{
400
401 // The invariants we are going to synthesize and verify are the combined
402 // invariants from in- and post- invariant clauses.
403 auto combined_invariant = combine_in_and_post_invariant_clauses(
405
406 // The verifier we use to check current invariant candidates.
407 cegis_verifiert verifier(
408 combined_invariant, assigns_map, goto_model, options, log);
409
410 // Set of symbols the violation may be dependent on.
411 // We enumerate strengthening clauses built from symbols from the set.
412 std::set<symbol_exprt> dependent_symbols;
413 // Set of symbols we used to enumerate strengthening clauses.
414 std::vector<exprt> terminal_symbols;
415
416 log.debug() << "Start the first synthesis iteration." << messaget::eom;
417 auto return_cex = verifier.verify();
418
419 while(return_cex.has_value())
420 {
421 exprt new_invariant_clause = true_exprt();
422 // Synthesize the new_clause
423 // We use difference strategies for different type of violations.
424 switch(return_cex->violation_type)
425 {
427 new_invariant_clause =
428 synthesize_range_predicate(return_cex->violated_predicate);
429 break;
430
431 case cext::violation_typet ::cex_null_pointer:
432 new_invariant_clause =
433 synthesize_same_object_predicate(return_cex->checked_pointer);
434 break;
435
438 return_cex->checked_pointer, return_cex->cause_loop_ids);
439 break;
440
442 // Update the dependent symbols.
443 dependent_symbols = compute_dependent_symbols(
444 return_cex->cause_loop_ids.front(),
445 new_invariant_clause,
446 return_cex->live_variables);
448 terminal_symbols = construct_terminals(dependent_symbols);
449 new_invariant_clause = synthesize_strengthening_clause(
450 terminal_symbols,
451 return_cex->cause_loop_ids.front(),
452 verifier.target_violation_id);
453 break;
454
456 INVARIANT(false, "unsupported violation type");
457 break;
458 }
459
460 // Assigns map has already been updated in the switch block.
461 // Update invariants map for other types of violations.
462 if(return_cex->violation_type != cext::violation_typet::cex_assignable)
463 {
464 INVARIANT(!return_cex->cause_loop_ids.empty(), "No cause loop found!");
465 INVARIANT(
466 new_invariant_clause != true_exprt(),
467 "failed to synthesized meaningful clause");
468
469 // There could be tmp_post variables in the synthesized clause.
470 // We substitute them with their original variables.
471 replace_tmp_post(new_invariant_clause, tmp_post_map);
472
473 const auto &cause_loop_id = return_cex->cause_loop_ids.front();
474 // Update the dependent symbols.
475 dependent_symbols = compute_dependent_symbols(
476 cause_loop_id, new_invariant_clause, return_cex->live_variables);
477
478 // add the new clause to the candidate invariants.
479 if(
480 return_cex->violation_location ==
482 {
483 // When the violation happens in the loop guard, the new clause
484 // should hold for the both cases of
485 // 1. loop guard holds --- loop_guard -> in_invariant
486 // 2. loop guard doesn't hold --- !loop_guard -> pos_invariant
487 in_invariant_clause_map[cause_loop_id] = and_exprt(
488 in_invariant_clause_map[cause_loop_id], new_invariant_clause);
489 pos_invariant_clause_map[cause_loop_id] = and_exprt(
490 pos_invariant_clause_map[cause_loop_id], new_invariant_clause);
491 }
492 else if(
493 return_cex->violation_location == cext::violation_locationt::in_loop)
494 {
495 // When the violation happens in the loop body, the new clause
496 // should hold for the case of
497 // loop guard holds --- loop_guard -> in_invariant
498 in_invariant_clause_map[cause_loop_id] = and_exprt(
499 in_invariant_clause_map[cause_loop_id], new_invariant_clause);
500 }
501 else
502 {
503 // When the violation happens after the loop body, the new clause
504 // should hold for the case of
505 // loop guard doesn't hold --- !loop_guard -> pos_invariant
506 pos_invariant_clause_map[cause_loop_id] = and_exprt(
507 pos_invariant_clause_map[cause_loop_id], new_invariant_clause);
508 }
509
510 // Re-combine invariant clauses and update the candidate map.
511 combined_invariant = combine_in_and_post_invariant_clauses(
513 }
514
515 return_cex = verifier.verify();
516 }
517
518 log.result() << "result : " << log.green << "PASS" << log.reset
519 << messaget::eom;
520
521 return combined_invariant;
522}
523
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
unsignedbv_typet size_type()
Definition c_types.cpp:55
signedbv_typet signed_short_int_type()
Definition c_types.cpp:34
Verifier for Counterexample-Guided Synthesis.
Classes providing CFG-based information about symbols to guide simplifications in function and loop a...
Operator to return the address of an object.
Boolean AND.
Definition std_expr.h:2071
Enumerator that enumerates binary functional expressions.
Verifier that take a goto program as input, and output formatted counterexamples for counterexample-g...
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:39
exprt synthesize_strengthening_clause(const std::vector< exprt > terminal_symbols, const loop_idt &cause_loop_id, const irep_idt &violation_id)
Synthesize strengthening clause for invariant-not-preserved violation.
std::map< loop_idt, std::set< exprt > > assigns_map
Synthesized assigns clauses.
invariant_mapt synthesize_all() override
This synthesizer guarantees that, with the synthesized loop contracts, all checks in the annotated GO...
exprt synthesize(loop_idt loop_id) override
Synthesize loop invariant for a specified loop in the goto_model.
exprt synthesize_range_predicate(const exprt &violated_predicate)
Synthesize range predicate for OOB violation with violated_predicate.
const symbol_tablet original_symbol_table
Symbol table of the input goto model.
std::set< symbol_exprt > compute_dependent_symbols(const loop_idt &cause_loop_id, const exprt &new_clause, const std::set< exprt > &live_vars)
Compute the dependent symbols for a loop with invariant-not-preserved violation which happen after ne...
invariant_mapt in_invariant_clause_map
Synthesize invariant of form (in_inv || !guard) && (!guard -> pos_inv)
void build_tmp_post_map()
Scan all ASSIGN instructions to build the map from tmp_post variables to their original variables.
std::unordered_map< exprt, exprt, irep_hash > tmp_post_map
Map from tmp_post variables to their original variables.
void init_candidates()
Initialize invariants as true for all unannotated loops.
void synthesize_assigns(const exprt &checked_pointer, const std::list< loop_idt > cause_loop_ids)
Synthesize assigns target and update assigns_map.
exprt synthesize_same_object_predicate(const exprt &checked_pointer)
Synthesize same object predicate for null-pointer violation for checked_pointer.
Factory for enumerator that can be used to represent a tree grammar.
void attach_productions(const std::string &id, const enumeratorst &enumerators)
Attach enumerators to the placeholder with id.
Base class for all expressions.
Definition expr.h:56
The Boolean constant false.
Definition std_expr.h:3017
function_mapt function_map
goto_functionst goto_functions
GOTO functions.
Definition goto_model.h:34
instructionst::iterator targett
A class containing utility functions for havocing expressions.
Definition havoc_utils.h:28
Enumerator that enumerates leaf expressions from a given list.
loop_mapt loop_map
void erase_locals(std::set< exprt > &exprs)
Definition cfg_info.h:171
static const commandt reset
return to default formatting, as defined by the terminal
Definition message.h:343
mstreamt & debug() const
Definition message.h:429
static const commandt green
render text with green foreground color
Definition message.h:349
mstreamt & progress() const
Definition message.h:424
mstreamt & result() const
Definition message.h:409
static eomt eom
Definition message.h:297
The offset (in bytes) of a pointer relative to the object.
Placeholders for actual enumerators, which represent nonterminals.
expr_sett enumerate(const std::size_t size) const override
Replace a symbol expression by a given expression.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
The Boolean constant true.
Definition std_expr.h:3008
Semantic type conversion.
Definition std_expr.h:2017
Generic base class for unary expressions.
Definition std_expr.h:314
std::unordered_set< exprt, irep_hash > expr_sett
static const exprt::operandst & get_assigns(const goto_programt::const_targett &latch_target)
Extracts the assigns clause expression from the latch condition.
std::vector< exprt > construct_terminals(const std::set< symbol_exprt > &symbols)
void replace_tmp_post(exprt &dest, const std::unordered_map< exprt, exprt, irep_hash > &tmp_post_map)
Enumerative Loop Contracts Synthesizer.
auto expr_try_dynamic_cast(TExpr &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TExpr >::type
Try to cast a reference to a generic exprt to a specific derived class.
Definition expr_cast.h:81
auto expr_dynamic_cast(TExpr &base) -> typename detail::expr_dynamic_cast_return_typet< T, TExpr >::type
Cast a reference to a generic exprt to a specific derived class.
Definition expr_cast.h:207
Enumerator Interface.
std::list< const enumerator_baset * > enumeratorst
std::list< std::size_t > partitiont
static bool find_symbols(symbol_kindt, const typet &, std::function< bool(const symbol_exprt &)>, std::unordered_set< irep_idt > &bindings)
static format_containert< T > format(const T &o)
Definition format.h:37
Utilities for building havoc code for expressions.
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
static int8_t r
Definition irep_hash.h:60
Field-insensitive, location-sensitive may-alias analysis.
Helper functions for k-induction and loop invariants.
Compute natural loops in a goto_function.
exprt same_object(const exprt &p1, const exprt &p2)
exprt pointer_object(const exprt &p)
Various predicates over pointers in programs.
@ FAIL
The property was violated.
exprt simplify_expr(exprt src, const namespacet &ns)
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition std_expr.h:206
bool is_constant(const typet &type)
This method tests, if the given typet is a constant.
Definition std_types.h:29
Loop id used to identify loops.
Definition loop_ids.h:28
invariant_mapt combine_in_and_post_invariant_clauses(const invariant_mapt &in_clauses, const invariant_mapt &post_clauses, const invariant_mapt &neg_guards)
Combine invariant of form (in_inv || !guard) && (!guard -> pos_inv)
goto_programt::targett get_loop_head(const unsigned int target_loop_number, goto_functiont &function)
Find and return the first instruction of the natural loop with loop_number in function.
Definition utils.cpp:695
void widen_assigns(assignst &assigns, const namespacet &ns)
Widen expressions in assigns with the following strategy.
Definition utils.cpp:341
goto_programt::targett get_loop_end_from_loop_head_and_content_mutable(const goto_programt::targett &loop_head, const loop_templatet< goto_programt::targett, goto_programt::target_less_than > &loop)
Find the goto instruction of loop that jumps to loop_head
Definition utils.cpp:639
std::map< loop_idt, exprt > invariant_mapt
Definition utils.h:30