Skip to content

Commit 0490f57

Browse files
committed
Initial commit of symex complexity work
This work allows for a more intelligent way of pathing around / slicing away problematic code dynamically as symex runs. Every path will have a 'complexity' rating that shows currently how difficult the code it's running is for symex to analyze (alternatively also for the SAT solver to solve) and when that rating breaches a certain range, that path will be shut down. This means we won't be able to generate traces that would use the discarded path, but when that path is not important it's a net benefit, and when it was important CBMC wouldn't have completed anyway so we trade an inaccurate trace for nothing at all. Note: There is special consideration for loops: when any of a loops children fails (inc. loops / recursion) it increments a counter, and when that counter reaches a certain limit the whole loop itself gets cancelled. This is done because when a branch deep within a loop gets cancelled along all paths the loop will simply try again even though it's been proven that the code will become too complicated to analyze.
1 parent 4871953 commit 0490f57

12 files changed

+526
-7
lines changed

src/goto-symex/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ SRC = auto_objects.cpp \
3535
symex_target.cpp \
3636
symex_target_equation.cpp \
3737
symex_throw.cpp \
38+
complexity_limiter.cpp \
3839
# Empty last line
3940

4041
INCLUDES= -I ..
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/*******************************************************************\
2+
3+
Module: Symbolic Execution
4+
5+
Author: John Dumbell
6+
7+
\*******************************************************************/
8+
9+
#include "complexity_limiter.h"
10+
#include "goto_symex_state.h"
11+
#include <cmath>
12+
13+
complexity_limitert::complexity_limitert(
14+
message_handlert &message_handler,
15+
const optionst &options)
16+
: log(message_handler)
17+
{
18+
std::size_t limit = options.get_signed_int_option("symex-complexity-limit");
19+
if((complexity_active = limit > 0))
20+
{
21+
// This gives a curve that allows low limits to be rightly restrictive,
22+
// while larger numbers are very large.
23+
max_complexity = static_cast<std::size_t>((limit * limit) * 25);
24+
25+
const std::size_t failed_child_loops_limit = options.get_signed_int_option(
26+
"symex-complexity-failed-child-loops-limit");
27+
const std::size_t unwind = options.get_signed_int_option("unwind");
28+
29+
// If we have complexity enabled, try to work out a failed_children_limit.
30+
// In order of priority:
31+
// * explicit limit
32+
// * inferred limit from unwind
33+
// * best limit we can apply with very little information.
34+
if(failed_child_loops_limit > 0)
35+
max_loops_complexity = failed_child_loops_limit;
36+
else if(unwind > 0)
37+
max_loops_complexity = std::max(static_cast<int>(floor(unwind / 3)), 1);
38+
else
39+
max_loops_complexity = limit;
40+
}
41+
}
42+
43+
framet::active_loop_infot *
44+
complexity_limitert::get_current_active_loop(call_stackt &current_call_stack)
45+
{
46+
for(auto frame_iter = current_call_stack.rbegin();
47+
frame_iter != current_call_stack.rend();
48+
++frame_iter)
49+
{
50+
// As we're walking bottom-up, the first frame we find with active loops
51+
// is our closest active one.
52+
if(!frame_iter->active_loops.empty())
53+
{
54+
return &frame_iter->active_loops.back();
55+
}
56+
}
57+
58+
return {};
59+
}
60+
61+
bool complexity_limitert::in_blacklisted_loop(
62+
const call_stackt &current_call_stack,
63+
const goto_programt::const_targett &instr)
64+
{
65+
for(auto frame_iter = current_call_stack.rbegin();
66+
frame_iter != current_call_stack.rend();
67+
++frame_iter)
68+
{
69+
for(auto &loop_iter : frame_iter->active_loops)
70+
{
71+
for(auto &blacklisted_loop : loop_iter.blacklisted_loops)
72+
{
73+
if(blacklisted_loop.get().contains(instr))
74+
{
75+
return true;
76+
}
77+
}
78+
}
79+
}
80+
81+
return false;
82+
}
83+
84+
bool complexity_limitert::are_loop_children_too_complicated(
85+
call_stackt &current_call_stack)
86+
{
87+
std::size_t sum_complexity = 0;
88+
89+
// This will walk all currently active loops, from inner-most to outer-most,
90+
// and sum the times their branches have failed.
91+
//
92+
// If we find that this sum is higher than our max_loops_complexity we take
93+
// note of the loop that happens in and then cause every parent still
94+
// executing that loop to blacklist it.
95+
//
96+
// This acts as a context-sensitive loop cancel, so if we've got unwind 20
97+
// and find out at the 3rd iteration a particular nested loop is too
98+
// complicated, we make sure we don't execute it the other 17 times. But as
99+
// soon as we're running the loop again via a different context it gets a
100+
// chance to redeem itself.
101+
optionalt<std::reference_wrapper<lexical_loopst::loopt>> loop_to_blacklist;
102+
for(auto frame_iter = current_call_stack.rbegin();
103+
frame_iter != current_call_stack.rend();
104+
++frame_iter)
105+
{
106+
for(auto it = frame_iter->active_loops.rbegin();
107+
it != frame_iter->active_loops.rend();
108+
it++)
109+
{
110+
auto &loop_info = *it;
111+
112+
// Because we're walking in reverse this will only be non-empty for
113+
// parents of the loop that's been blacklisted. We then add that to their
114+
// internal lists of blacklisted children.
115+
if(loop_to_blacklist)
116+
{
117+
loop_info.blacklisted_loops.emplace_back(*loop_to_blacklist);
118+
}
119+
else
120+
{
121+
sum_complexity += loop_info.children_too_complex;
122+
if(sum_complexity > max_loops_complexity)
123+
{
124+
loop_to_blacklist =
125+
std::reference_wrapper<lexical_loopst::loopt>(loop_info.loop);
126+
}
127+
}
128+
}
129+
}
130+
131+
return !loop_to_blacklist;
132+
}
133+
134+
complexity_violationt
135+
complexity_limitert::check_complexity(goto_symex_statet &state)
136+
{
137+
if(!complexity_limits_active() || !state.reachable)
138+
return complexity_violationt::NONE;
139+
140+
std::size_t complexity = state.complexity();
141+
if(complexity == 0)
142+
return complexity_violationt::NONE;
143+
144+
auto &current_call_stack = state.call_stack();
145+
146+
// Check if this branch is too complicated to continue.
147+
auto active_loop = get_current_active_loop(current_call_stack);
148+
if(complexity >= max_complexity)
149+
{
150+
// If we're too complex, add a counter to the current loop we're in and
151+
// check if we've violated child-loop complexity limits.
152+
if(active_loop != nullptr)
153+
{
154+
active_loop->children_too_complex++;
155+
156+
// If we're considered too complex, cancel branch.
157+
if(are_loop_children_too_complicated(current_call_stack))
158+
{
159+
log.warning()
160+
<< "[symex-complexity] Loop operations considered too complex"
161+
<< (state.source.pc->source_location.is_not_nil()
162+
? " at: " + state.source.pc->source_location.as_string()
163+
: ", location number: " +
164+
std::to_string(state.source.pc->location_number) + ".")
165+
<< messaget::eom;
166+
167+
return complexity_violationt::LOOP;
168+
}
169+
}
170+
171+
log.warning() << "[symex-complexity] Branch considered too complex"
172+
<< (state.source.pc->source_location.is_not_nil()
173+
? " at: " + state.source.pc->source_location.as_string()
174+
: ", location number: " +
175+
std::to_string(state.source.pc->location_number) +
176+
".")
177+
<< messaget::eom;
178+
179+
// Then kill this branch.
180+
return complexity_violationt::BRANCH;
181+
}
182+
183+
// If we're not in any loop, return with no violation.
184+
if(!active_loop)
185+
return complexity_violationt::NONE;
186+
187+
// Check if we've entered a loop that has been previously black-listed, and
188+
// if so then cancel before we go any further.
189+
if(in_blacklisted_loop(current_call_stack, state.source.pc))
190+
{
191+
log.warning() << "[symex-complexity] Trying to enter blacklisted loop"
192+
<< (state.source.pc->source_location.is_not_nil()
193+
? " at: " + state.source.pc->source_location.as_string()
194+
: ", location number: " +
195+
std::to_string(state.source.pc->location_number) +
196+
".")
197+
<< messaget::eom;
198+
199+
return complexity_violationt::LOOP;
200+
}
201+
202+
return complexity_violationt::NONE;
203+
}
204+
205+
void complexity_limitert::run_transformations(
206+
complexity_violationt complexity_violation,
207+
goto_symex_statet &current_state)
208+
{
209+
if(violation_transformations.empty())
210+
default_transformation.transform(complexity_violation, current_state);
211+
else
212+
for(auto transform_lambda : violation_transformations)
213+
transform_lambda.transform(complexity_violation, current_state);
214+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*******************************************************************\
2+
3+
Module: Symbolic Execution
4+
5+
Author: John Dumbell
6+
7+
\*******************************************************************/
8+
9+
/// \file
10+
/// Symbolic Execution
11+
12+
#ifndef CPROVER_GOTO_SYMEX_COMPLEXITY_LIMITER_H
13+
#define CPROVER_GOTO_SYMEX_COMPLEXITY_LIMITER_H
14+
15+
#include "complexity_violation.h"
16+
#include "goto_symex_state.h"
17+
#include "symex_complexity_limit_exceeded_action.h"
18+
19+
/// Symex complexity module.
20+
///
21+
/// Dynamically sets branches as unreachable symex considers them too complex.
22+
/// A branch is too complex when runtime may be beyond reasonable
23+
/// bounds due to internal structures becoming too large or solving conditions
24+
/// becoming too big for the SAT solver to deal with.
25+
///
26+
/// On top of the branch cancelling there is special consideration for loops.
27+
/// As branches get cancelled it keeps track of what loops are currently active
28+
/// up through the stack. If a loop has too many of its child branches killed
29+
/// (including additional loops or recursion) it won't do any more unwinds of
30+
/// that particular loop and will blacklist it.
31+
///
32+
/// This blacklist is only held for as long as any loop is still active. As soon
33+
/// as the loop iteration ends and the context in which the code is being
34+
/// executed changes it'll be able to be run again.
35+
///
36+
/// Example of loop blacklisting:
37+
///
38+
/// loop A: (complexity: 5070)
39+
/// loop B: (complexity: 5000)
40+
/// loop C: (complexity: 70)
41+
///
42+
/// In the above loop B will be blacklisted if we have a complexity limitation
43+
/// < 5000, but loop A and C will still be run, because when loop B is removed
44+
/// the complexity of the loop as a whole is acceptable.
45+
class complexity_limitert
46+
{
47+
public:
48+
complexity_limitert() = default;
49+
50+
complexity_limitert(message_handlert &logger, const optionst &options);
51+
52+
/// Is the complexity module active?
53+
/// \return Whether complexity limits are active or not.
54+
inline bool complexity_limits_active()
55+
{
56+
return this->complexity_active;
57+
}
58+
59+
/// Checks the passed-in state to see if its become too complex for us to deal
60+
/// with, and if so set its guard to false.
61+
/// \param state goto_symex_statet you want to check the complexity of.
62+
/// \return True/false depending on whether this branch should be abandoned.
63+
complexity_violationt check_complexity(goto_symex_statet &state);
64+
65+
/// Runs a suite of transformations on the state and symex executable,
66+
/// performing whatever transformations are required depending on
67+
/// the modules that have been loaded.
68+
void run_transformations(
69+
complexity_violationt complexity_violation,
70+
goto_symex_statet &current_state);
71+
72+
protected:
73+
mutable messaget log;
74+
75+
/// Is the complexity module active, usually coincides with a max_complexity
76+
/// value above 0.
77+
bool complexity_active = false;
78+
79+
/// Functions called when the heuristic has been violated. Can perform
80+
/// any form of branch, state or full-program transformation.
81+
std::vector<symex_complexity_limit_exceeded_actiont>
82+
violation_transformations;
83+
84+
/// Default heuristic transformation. Sets state as unreachable.
85+
symex_complexity_limit_exceeded_actiont default_transformation;
86+
87+
/// The max complexity rating that a branch can be before it's abandoned. The
88+
/// internal representation for computing complexity, has no relation to the
89+
/// argument passed in via options.
90+
std::size_t max_complexity = 0;
91+
92+
/// The amount of branches that can fail within the scope of a loops execution
93+
/// before the entire loop is abandoned.
94+
std::size_t max_loops_complexity = 0;
95+
96+
/// Checks whether the current loop execution stack has violated
97+
/// max_loops_complexity.
98+
bool are_loop_children_too_complicated(call_stackt &current_call_stack);
99+
100+
/// Checks whether we're in a loop that is currently considered blacklisted,
101+
/// and shouldn't be executed.
102+
static bool in_blacklisted_loop(
103+
const call_stackt &current_call_stack,
104+
const goto_programt::const_targett &instr);
105+
106+
/// Returns inner-most currently active loop. This is frame-agnostic, so if
107+
/// we're in a loop further up the stack that will still be returned as
108+
/// the 'active' one.
109+
static framet::active_loop_infot *
110+
get_current_active_loop(call_stackt &current_call_stack);
111+
};
112+
113+
#endif // CPROVER_GOTO_SYMEX_COMPLEXITY_LIMITER_H
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*******************************************************************\
2+
3+
Module: Symbolic Execution
4+
5+
Author: John Dumbell
6+
7+
\*******************************************************************/
8+
9+
#ifndef CPROVER_GOTO_SYMEX_COMPLEXITY_VIOLATION_H
10+
#define CPROVER_GOTO_SYMEX_COMPLEXITY_VIOLATION_H
11+
12+
/// What sort of symex-complexity violation has taken place.
13+
///
14+
/// Loop: If we've violated a loop complexity boundary, or are part of
15+
/// a blacklisted loop.
16+
/// Branch: If this particular branch has been deemed too complex.
17+
enum class complexity_violationt
18+
{
19+
NONE,
20+
LOOP,
21+
BRANCH,
22+
};
23+
24+
#endif // CPROVER_GOTO_SYMEX_COMPLEXITY_VIOLATION_H

src/goto-symex/frame.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com
1414

1515
#include "goto_state.h"
1616
#include "symex_target.h"
17+
#include <analyses/lexical_loops.h>
1718

1819
/// Stack frames -- these are used for function calls and for exceptions
1920
struct framet
@@ -46,6 +47,27 @@ struct framet
4647
bool is_recursion = false;
4748
};
4849

50+
class active_loop_infot
51+
{
52+
public:
53+
explicit active_loop_infot(lexical_loopst::loopt &_loop) : loop(_loop)
54+
{
55+
}
56+
57+
/// Incremental counter on how many branches this loop has had killed.
58+
std::size_t children_too_complex = 0;
59+
60+
/// Set of loop ID's that have been blacklisted.
61+
std::vector<std::reference_wrapper<lexical_loopst::loopt>>
62+
blacklisted_loops;
63+
64+
// Loop information.
65+
lexical_loopst::loopt &loop;
66+
};
67+
68+
std::shared_ptr<lexical_loopst> loops_info;
69+
std::vector<active_loop_infot> active_loops;
70+
4971
std::unordered_map<irep_idt, loop_infot> loop_iterations;
5072

5173
framet(symex_targett::sourcet _calling_location, const guardt &state_guard)

0 commit comments

Comments
 (0)