-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparse_results.py
More file actions
executable file
·295 lines (242 loc) · 7.07 KB
/
parse_results.py
File metadata and controls
executable file
·295 lines (242 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env python3
#
# Parse results in output directory and report interesting statistics.
#
import sys
import os
import re
from pathlib import Path
from functools import lru_cache
from subprocess import check_output
if len(sys.argv) < 2:
sys.exit(f'Usage: {sys.argv[0]} RESULT_DIR [BINARY ...]')
res_dir = Path(sys.argv[1])
binaries = list(map(Path, sys.argv[2:]))
time_mem_exp = re.compile(r'Completed in ([\d.]+) seconds, using (\d+) MiB of memory')
nm_exp = re.compile(r'[\da-fA-F]+\s+T\s+([^@\s]+)')
class Res:
funcname: str
time: float
mem: float
found: bool
errored: bool
error: str
verified: str
ver_outcome: str
__slots__ = (
'funcname',
'time',
'mem',
'found',
'errored',
'error',
'verified',
'ver_outcome',
)
def merge(r1, r2):
if r1.errored and not r2.errored:
return r2
if r2.errored and not r1.errored:
return r1
if r1.found and not r2.found:
return r1
if r2.found and not r1.found:
return r2
if r1.verified and not r2.verified:
return r1
if r2.verified and not r1.verified:
return r2
if r1.verified and r2.verified:
if r1.ver_outcome == 'ok' and r2.ver_outcome != 'ok':
return r1
if r2.ver_outcome == 'ok' and r1.ver_outcome != 'ok':
return r2
assert r1.found == r2.found
assert r1.errored == r2.errored
assert r1.verified == r2.verified
r = Res()
r.time = r1.time + r2.time
r.mem = max(r1.mem, r2.mem)
r.found = r1.found or r2.found
r.errored = r1.errored
r.verified = r1.verified
if r1.errored:
# r1 and r2 errors could be different, not important
r.error = r1.error
if r1.verified:
assert r1.ver_outcome == r2.ver_outcome, f'{r1.ver_outcome} != {r2.ver_outcome}'
r.ver_outcome = r1.ver_outcome
return r
@lru_cache()
def nm(binary: str):
res = set()
for line in check_output(('nm', '-D', binary), text=True).splitlines():
m = nm_exp.match(line)
if m is None:
continue
res.add(m.group(1))
return res
funcs = {}
unexported = 0
for f in res_dir.iterdir():
with f.open() as fobj:
data = fobj.read()
# Skip functions that weren't actually found
if 'SymbolNotFound' in data:
continue
funcname = f.stem[f.stem.find('_') + 1:]
# Skip functions that are not exported
if binaries:
for binary in binaries:
if funcname in nm(binary):
break
else:
unexported += 1
continue
m = time_mem_exp.search(data)
if not m:
# Skip incomplete files
continue
cur = Res()
cur.time, cur.mem = float(m.group(1)), float(m.group(2))
if 'Exceeded maximum memory usage' in data:
cur.errored = True
cur.error = 'mem'
elif 'Exceeded maximum execution time' in data:
cur.errored = True
cur.error = 'time'
elif 'SymexecFailed' in data or 'Symexec failed' in data or 'argument evaluation failed' in data:
cur.errored = True
cur.error = 'choked'
else:
cur.errored = False
if cur.errored:
cur.found = False
cur.verified = False
else:
if 'Reached call to' in data and not 'argument evaluation failed' in data:
cur.found = True
else:
cur.found = False
if 'Verification successful' in data:
cur.verified = True
cur.ver_outcome = 'ok'
elif 'Verification failed' in data:
cur.verified = True
cur.ver_outcome = 'fail'
elif 'Verification errored' in data:
cur.verified = True
cur.ver_outcome = 'error'
else:
cur.verified = False
cur.funcname = funcname
# libxml2 quirk
if funcname.endswith('__internal_alias'):
funcname = funcname.replace('__internal_alias', '')
if funcname in funcs:
cur = merge(funcs[funcname], cur)
if cur.errored:
assert cur.error
if cur.verified:
assert cur.ver_outcome
funcs[funcname] = cur
if unexported:
print('Skipped', unexported, 'unexported functions', end='\n\n')
total = 0 # total funcs analyzed
found = 0 # funcs for which symex found a result
notfound = 0 # funcs for which symex did not find a result
verified = 0 # funcs that also got verified on the found result
ver_ok = 0 # funcs that also passed verification on the found result
ver_fail = 0 # funcs that did not pass verification on the found result
ver_err = 0 # verification errored
errored = 0 # funcs for which symex errored/failed/crashed/whatever
err_mem = 0 # ran out of memory
err_time = 0 # ran out of time
err_choked = 0 # angr/claripy choked
tot_time = 0 # total time spent doing everything
time_avg = 0 # average time spent on a func regardless of outcome
found_time = 0 # total time spent on funcs that returned a found result
found_time_max = 0 # max time spent on a func that returned a found result
found_time_avg = 0 # avg time spent on funcs that returned a found result
tot_mem = 0 # total mem used
mem_avg = 0 # avg mem used
found_mem = 0 # total mem used on funcs that returned a found result
found_mem_avg = 0 # avg mem used on funcs that returned a found result
found_mem_max = 0 # max mem used on single func that returned a found result
for funcname, res in funcs.items():
total += 1
tot_time += res.time
tot_mem += res.mem
if res.found:
found += 1
found_mem += res.mem
found_time += res.time
found_time_max = max(found_time_max, res.time)
found_mem_max = max(found_mem_max, res.mem)
else:
notfound += 1
if res.errored:
errored += 1
if res.error == 'time':
err_time += 1
elif res.error == 'mem':
err_mem += 1
elif res.error == 'choked':
err_choked += 1
else:
assert False
if res.verified:
verified += 1
if res.ver_outcome == 'ok':
ver_ok += 1
elif res.ver_outcome == 'fail':
ver_fail += 1
elif res.ver_outcome == 'error':
ver_err += 1
else:
assert False
if total == 0:
print(f'Nothing to do here...')
sys.exit(0)
assert found + notfound == total
assert found == verified
assert ver_ok + ver_fail + ver_err == verified
assert err_mem + err_time + err_choked == errored
noterrored = notfound - errored
mem_avg = tot_mem / total
time_avg = tot_time / total
if found > 0:
found_time_avg = found_time / found
found_mem_avg = found_mem / found
else:
found_time_avg = 0
found_mem_avg = 0
print(f'''\
Total functions {total}
Solution found {found} ({found / total:.2%})
Verified {verified}''')
if verified:
print(f'''\
Ver OK {ver_ok} ({ver_ok / verified:.2%})
Ver failed {ver_fail} ({ver_fail / verified:.2%})
Ver errored {ver_err} ({ver_err / verified:.2%})''')
print(f'''\
Solution not found {notfound} ({notfound / total:.2%})''')
if notfound:
print(f'''\
Not errored {noterrored} ({noterrored / notfound:.2%})
Errored {errored} ({errored / notfound:.2%})''')
if errored:
print(f'''\
Out of time {err_time} ({err_time / errored:.2%})
Out of memory {err_mem} ({err_mem / errored:.2%})
Symex error {err_choked} ({err_choked / errored:.2%})''')
print(f'''
Total time {tot_time:.2f} seconds
Average time {time_avg:.2f} seconds
Average memory {mem_avg:.2f} MiB
Found total time {found_time:.2f} seconds
Found avg time {found_time_avg:.2f} seconds
Found max time {found_time_max:.2f} seconds
Found avg memory {found_mem_avg:.0f} MiB
Found max memory {found_mem_max:.0f} MiB''')