From c4ecd79d8f3de4f7a3d0422ea464a6349fae80d4 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Fri, 24 Feb 2023 16:34:59 -0500 Subject: [PATCH 1/9] initial pycbat commit --- wp/pycbat/.gitignore | 1 + wp/pycbat/README.md | 22 ++ wp/pycbat/junk/README.md | 18 ++ wp/pycbat/junk/angrplay.ipynb | 417 ++++++++++++++++++++++++++++++ wp/pycbat/junk/angrtype.py | 15 ++ wp/pycbat/junk/foo.c | 6 + wp/pycbat/junk/foo.o | Bin 0 -> 2976 bytes wp/pycbat/junk/tests.py | 74 ++++++ wp/pycbat/pyproject.toml | 21 ++ wp/pycbat/requirements.txt | 2 + wp/pycbat/src/cbat/__init__.py | 446 +++++++++++++++++++++++++++++++++ 11 files changed, 1022 insertions(+) create mode 100644 wp/pycbat/.gitignore create mode 100644 wp/pycbat/README.md create mode 100644 wp/pycbat/junk/README.md create mode 100644 wp/pycbat/junk/angrplay.ipynb create mode 100644 wp/pycbat/junk/angrtype.py create mode 100644 wp/pycbat/junk/foo.c create mode 100644 wp/pycbat/junk/foo.o create mode 100644 wp/pycbat/junk/tests.py create mode 100644 wp/pycbat/pyproject.toml create mode 100644 wp/pycbat/requirements.txt create mode 100644 wp/pycbat/src/cbat/__init__.py diff --git a/wp/pycbat/.gitignore b/wp/pycbat/.gitignore new file mode 100644 index 00000000..ed8ebf58 --- /dev/null +++ b/wp/pycbat/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/wp/pycbat/README.md b/wp/pycbat/README.md new file mode 100644 index 00000000..414d8521 --- /dev/null +++ b/wp/pycbat/README.md @@ -0,0 +1,22 @@ +# CBAT Python Bindings + + + + +Installation +``` +python3 -m pip install -e . +``` + +Usage: + +```python +import z3 +import cbat + +rax = z3.BitVec("RAX", 64) +postcond = rax == 4 +# Check that rax is always 4 at end of function "fact" in binary file myfactorial.o +cbat.run_wp("./myfactorial.o", func="fact", postcond=postcond) +``` + diff --git a/wp/pycbat/junk/README.md b/wp/pycbat/junk/README.md new file mode 100644 index 00000000..f09397f8 --- /dev/null +++ b/wp/pycbat/junk/README.md @@ -0,0 +1,18 @@ + + +Ok. + +Docker install cbat +pip install pycbat + +extract angr semantics +structs. Slam through? +Use angr to construct preconditions + +cbat jupyter notebook tutorial + + +Requirements: +Z3 +docker +angr \ No newline at end of file diff --git a/wp/pycbat/junk/angrplay.ipynb b/wp/pycbat/junk/angrplay.ipynb new file mode 100644 index 00000000..1ef66a7b --- /dev/null +++ b/wp/pycbat/junk/angrplay.ipynb @@ -0,0 +1,417 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "import angr" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING | 2023-02-24 14:22:16,654 | cle.loader | The main binary is a position-independent executable. It is being loaded with a base address of 0x400000.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slotnames__', '__str__', '__subclasshook__', '__weakref__', '_arch', '_base_name', '_can_refine_int', '_fields', '_init_str', '_refine', '_refine_dir', '_size', '_with_arch', 'alignment', 'arch', 'base', 'c_repr', 'copy', 'extract', 'extract_claripy', 'label', 'signed', 'size', 'store', 'with_arch']\n", + "True\n", + "\n", + "None\n", + "True\n", + "4\n" + ] + } + ], + "source": [ + "proj = angr.Project(\"foo.o\", load_debug_info=True)\n", + "print(proj.arch)\n", + "t = angr.types.parse_type('int')\n", + "t.arch = proj.arch\n", + "# print(inspect.getmembers(t))\n", + "print(dir(t))\n", + "print(t.base)\n", + "print(t.c_repr)\n", + "print(t.label)\n", + "print(t.signed)\n", + "t2 = t.with_arch(proj.arch)\n", + "print(t2.alignment)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "OrderedDict([('x', int), ('y', long)])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "angr.types.parse_type('struct aa {int x; long y;}').fields" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ARCH',\n", + " 'ARG_REGS',\n", + " 'ArgSession',\n", + " 'CALLEE_CLEANUP',\n", + " 'CALLER_SAVED_REGS',\n", + " 'FP_ARG_REGS',\n", + " 'FP_RETURN_VAL',\n", + " 'OVERFLOW_FP_RETURN_VAL',\n", + " 'OVERFLOW_RETURN_VAL',\n", + " 'RETURN_ADDR',\n", + " 'RETURN_VAL',\n", + " 'STACKARG_SP_BUFF',\n", + " 'STACKARG_SP_DIFF',\n", + " 'STACK_ALIGNMENT',\n", + " '__annotations__',\n", + " '__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " '_classify',\n", + " '_combine_classes',\n", + " '_flatten',\n", + " '_match',\n", + " '_standardize_value',\n", + " 'arch',\n", + " 'arg_locs',\n", + " 'arg_session',\n", + " 'find_cc',\n", + " 'fp_args',\n", + " 'get_arg_info',\n", + " 'get_args',\n", + " 'guess_prototype',\n", + " 'int_args',\n", + " 'is_fp_arg',\n", + " 'is_fp_value',\n", + " 'memory_args',\n", + " 'next_arg',\n", + " 'return_addr',\n", + " 'return_in_implicit_outparam',\n", + " 'return_val',\n", + " 'set_return_val',\n", + " 'setup_callsite',\n", + " 'stack_space',\n", + " 'teardown_callsite']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(proj.factory.cc()) # calling conventions. Interesting\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hmm. Here's an idea. Literally use angr to write specs?\n", + "mem.long.yada == foo\n", + "\n", + "https://api.angr.io/angr.html#module-angr.calling_conventions\n", + "\n", + "\n", + "state \n", + "\n", + "unroll_loop parameter. That's interesting." + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "st = proj.factory.entry_state()\n", + "st = proj.factory.entry_state(add_options={angr.options.LAZY_SOLVES, angr.options.SYMBOLIC, angr.options.SYMBOLIC_INITIAL_VALUES})\n", + "st.regs.rax\n", + "st.mem[0x601048].long\n", + "st.registers\n", + "st.memory\n", + "list(st.solver.get_variables(\"\"))\n", + "st.regs.rax.to_claripy()\n", + "st.registers." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import claripy\n", + "s = claripy.Solver()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(,)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.add(st.regs.rax == st.regs.rdi)\n", + "()" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING | 2023-02-23 14:36:39,907 | angr.calling_conventions | Guessing call prototype. Please specify prototype.\n", + "WARNING | 2023-02-23 14:36:39,919 | angr.storage.memory_mixins.default_filler_mixin | The program is accessing register with an unspecified value. This could indicate unwanted behavior.\n", + "WARNING | 2023-02-23 14:36:39,921 | angr.storage.memory_mixins.default_filler_mixin | angr will cope with this by generating an unconstrained symbolic variable and continuing. You can resolve this by:\n", + "WARNING | 2023-02-23 14:36:39,922 | angr.storage.memory_mixins.default_filler_mixin | 1) setting a value to the initial state\n", + "WARNING | 2023-02-23 14:36:39,923 | angr.storage.memory_mixins.default_filler_mixin | 2) adding the state option ZERO_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to make unknown regions hold null\n", + "WARNING | 2023-02-23 14:36:39,925 | angr.storage.memory_mixins.default_filler_mixin | 3) adding the state option SYMBOL_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to suppress these messages.\n", + "WARNING | 2023-02-23 14:36:39,927 | angr.storage.memory_mixins.default_filler_mixin | Filling register rbp with 8 unconstrained bytes referenced from 0x400000 (foo+0x0 in foo.o (0x0))\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#https://api.angr.io/angr.html#angr.state_plugins.debug_variables.SimDebugVariablePlugin\n", + "#p = angr.Project(\"various_variables\", load_debug_info=True)\n", + "proj.kb.dvars.load_from_dwarf()\n", + "state = proj.factory.call_state(0x400000) # navigate to the state you want\n", + "#state.dvars.get_variable(\"x\").deref.mem\n", + "state.dvars.dwarf_cfa" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "angr.analyses.decompiler" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING | 2023-02-23 14:52:21,986 | angr.analyses.cfg.cfg_base | \"auto_load_libs\" is enabled. With libraries loaded in project, CFG will cover libraries, which may take significantly more time than expected. You may reload the binary with \"auto_load_libs\" disabled, or specify \"regions\" to limit the scope of CFG recovery.\n" + ] + }, + { + "data": { + "text/plain": [ + "'long long foo(unsigned long a0, unsigned long a1)\\n{\\n char v0; // [bp-0x20]\\n\\n v0 = a1;\\n return a0 + 3;\\n}\\n'" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#proj.analyses.Decompiler()\n", + "sym = proj.loader.find_symbol(\"foo\")\n", + "#proj.load_function(sym)\n", + "cfg = proj.analyses.CFGFast()\n", + "cfg.normalize()\n", + "foo_func = cfg.kb.functions[\"foo\"]\n", + "foo_func.normalize()\n", + "decomp = proj.analyses.Decompiler(foo_func)\n", + "decomp.codegen.cfunc.c_repr()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[int, char]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t = angr.sim_type.parse_signature(\"int foo(int x, char y)\")\n", + "t.arg_names\n", + "t.args\n", + "t." + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "proj.kb.defs\n", + "proj.kb.propagations\n", + "# angr.knowledge_plugins.patches.PatchManager\n", + "proj.kb.functions[\"foo\"] # need to fun CFG first" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "angr.analyses.bindiff.FunctionDiff\n", + "https://api.angr.io/angr.html#angr.analyses.bindiff.FunctionDiff\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.6 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/wp/pycbat/junk/angrtype.py b/wp/pycbat/junk/angrtype.py new file mode 100644 index 00000000..95c59d5e --- /dev/null +++ b/wp/pycbat/junk/angrtype.py @@ -0,0 +1,15 @@ +import angr +import inspect +proj = angr.Project("foo.o") +print(proj.arch) +t = angr.types.parse_type('int') +t.arch = proj.arch +# print(inspect.getmembers(t)) +print(dir(t)) +print(t.base) +print(t.c_repr) +print(t.label) +print(t.signed) +t2 = t.with_arch(proj.arch) +print(t2.alignment) +# print(help(t2.extract)) diff --git a/wp/pycbat/junk/foo.c b/wp/pycbat/junk/foo.c new file mode 100644 index 00000000..54d231a6 --- /dev/null +++ b/wp/pycbat/junk/foo.c @@ -0,0 +1,6 @@ +int foo(int x, char y) +{ + int bar; + bar = x; + return 3 + bar; +} \ No newline at end of file diff --git a/wp/pycbat/junk/foo.o b/wp/pycbat/junk/foo.o new file mode 100644 index 0000000000000000000000000000000000000000..ddde48ca429f348bc2d40fad82f7226064e63edb GIT binary patch literal 2976 zcmbtW-ESL35TEt=^1(H(6SvS1XfBAVeEAZWsI-YvECth6pa@k8;-Qe9FXs={zO(Mm zB`#_yLLj0@2m}v2@W}tb3lfo#c;=CJgoH#Ng5U`%!OY!`ec2vDz({*D^P8F7*`1xW z->$vOrw(F*0&?_uls;)XnNqZd?}HHfjTFXq@j0Pl2d0SUKE5gjaQs#*zLu}s z0SIj}v2;Ms>uoQBF_GjfbkHVN7R2g;IB#4opLw z^fOkC^Tz4f%Ox^YPu!dpC4l@gO%a=t3G~OoFfJGx?rcno1x)4{N(cu)yyaM;5AR|K zNMQrHj=oPJ7ef3|m?B2!j{}AIxD+`tpF5gN8^`GaCWmTCpZrZPG+kTH85ywtRNkEH}+^!wv_Y(+Ygg?}z2S_h#GM zE=P8~;b|@T{m>`-EvYh>j%;#LX*s+lpfD$oRxZScOLrr$H1~vsNg+pIe_A z+eC*1&iqmV^YF``Sw=0h(KrNl+RAL6g(^<}>a9ZP()E`uEslvljpmQ7Eag?b;h-iMQ9n*>wRR22? z=X^etI_E|>*PKjrCy3N}9!ubOUCRGysZ*yD(L98|LJCqPVMF{?sjo_|&TmSb_L|-= zSmB_HeTX~?65e7da(5!Iw(ZCUs~(161#ZW-w%vNaxz+X>KH6g0uGa(i#v!vE*k02; z9MWlfE{EU}EE2pG>yF>0mIcehcfi7~!FK@P1*tc^zJ)JD&kdr1T+3~3H3GZq%Dw#` zHiyAbZCUG^@)5ZvV`+VQMb-6th|!#!CyFtB zCl~sJ0m*rN=5HaM74OJqzxogHdw&!EwT$O?h^OIonSUzd+0OjuLyrxk_n-9_Xrj8P z?gtVuN1?IA=g06@C{QkHd{H*kId+s>jsFtytoq%N_2YX^^(*4wc*e8Vrll^)^}i 0){ + acc *= x; + x--; + } + return acc; +} +''' + + +code = ''' +struct abcd { + int x, + int y +} +int fact(struct abcd* x){ + return x->y + 3; +} +''' + +rax = z3.BitVec("RAX", 64) +init_rax = z3.BitVec("init_RAX", 64) +init_rdi = z3.BitVec("init_RDI", 64) + + +def run_code(code, postcond): + with tempfile.NamedTemporaryFile(suffix=".c") as fp: + with tempfile.TemporaryDirectory() as mydir: + fp.write(code.encode()) + fp.flush() + fp.seek(0) + print(fp.readlines()) + outfile = mydir + "/fact" + print(subprocess.run(["gcc", "-g", "-c", "-O1", + "-o", outfile, fp.name], check=True)) + + print(subprocess.run(["objdump", "-d", outfile], check=True)) + return run_wp(outfile, func="fact", postcond=postcond) + + +code = ''' +int fact(int x){ + return 3; +} +''' + +postcond = rax == 3 + +assert run_code(code, postcond)[0] == z3.unsat + +code = ''' +int fact(int x){ + return 3; +} +''' +postcond = rax == 4 + +assert run_code(code, postcond)[0] == z3.sat + +code = ''' +int fact(int x){ + return x + 3; +} +''' +postcond = z3.Extract(31, 0, rax) == z3.Extract(31, 0, init_rdi) + 3 +# postcond = rax == init_rdi + 3 + +assert run_code(code, postcond)[0] == z3.unsat diff --git a/wp/pycbat/pyproject.toml b/wp/pycbat/pyproject.toml new file mode 100644 index 00000000..016fd524 --- /dev/null +++ b/wp/pycbat/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "cbat" +version = "0.0.1" +authors = [ + { name="Example Author", email="author@example.com" }, +] +description = "Comparative Binary Analysis Tool" +readme = "README.md" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] + +[project.urls] +"Homepage" = "https://github.com/draperlaboratory/cbat_tools" \ No newline at end of file diff --git a/wp/pycbat/requirements.txt b/wp/pycbat/requirements.txt new file mode 100644 index 00000000..7ca67aa4 --- /dev/null +++ b/wp/pycbat/requirements.txt @@ -0,0 +1,2 @@ +z3 +docker diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py new file mode 100644 index 00000000..c9ac212a --- /dev/null +++ b/wp/pycbat/src/cbat/__init__.py @@ -0,0 +1,446 @@ + + +import subprocess +import z3 +import docker + +IMAGE = "philzook58/cbat_min" +client = docker.from_env() +print("Downloading CBAT Docker Image") +client.images.pull(IMAGE) +print("Download finished") + +''' +# import angr # , monkeyhex +class Reg(Z3BitVec): +class Mem(Z3Array): + +class +class PostCond(): + def __init__(self): + self.precond + @property + def init(self): + @property + def mod(self): + def orig(self): + def high # C? + def low + def sexpr(): + + def mem(): + +class HighVar(): + def __init__(self, typ, lowloc): + self.type = angr.type(typ) + def + +class Correspondence # Bisim? + +class LowVar(): + + +class NameBuilder() + def __init__(): + self.prefix = "" + self.suffix = "" + # This property stuff seems too clever for it's own good. + @property + def init + def orig + def mod + +variable dictionary: +{ + init: {reg : z3.BitVec(capital(f"init_{reg}"), reg.size()) for reg in state} + mod: {} + orig: {} + "C" : { cvar : z3.BitVec() for cvar in api } +} + +state1.high["foo"] == state2.high["foo"] +state1.high["foo"].field1.field2->field3 + +foo = state1.high["foo"] +state1.mem[] +Do lowering now or put it all in cbat? + + +class Z3View(Z3Expr): + + def __init__(self): + mem + addr + typ = angr.type() + def cast(): + return Concat(mem[addr], mem[addr+1], ...) + def as_type(): + def field(self, name): + def deref(self): + + + +mem[foo + fieldy].as64 + + + +class CBATModel(): + + +''' + + +def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, postcond=None, use_docker=True, **kwargs): + cmd = ["bap", "wp", "--no-cache", + "--show", "precond-smtlib", "--func", func] + if precond != None: + cmd.append("--precond") + cmd.append("(assert " + precond.sexpr() + ")") + if postcond != None: + cmd.append("--postcond") + cmd.append("(assert " + postcond.sexpr() + ")") + + # forward kwargs. Typo unfriendly + flags = ["check-invalid-derefs", "check-null-derefs"] # and so on + assert all(k in flags for k in kwargs.keys()) + cmd += [f"--{k}" for k, + v in kwargs.items() if v == True and k in flags] + + cmd.append(filename) + if filename2 != None: + cmd.append(filename2) + + if use_docker: + cmd = ["docker", "run", "-it", + "-v", f"{filename}:{filename}", IMAGE] + cmd + ''' + # Having a lot of trouble with the docker library here :( + try: + docker_res = client.containers.run("38c124f7f4f6", cmd, volumes={ + filename: {'bind': filename, 'mode': 'ro'}}, auto_remove=True, stderr=True) + except docker.errors.ContainerError as e: + print(e) + print(e.stderr) + raise e + print(docker_res) + stdout = docker_res + ''' + res = subprocess.run(cmd, check=False, capture_output=True) + print(res.args) + print(res.stdout.decode()) + stdout = res.stdout.decode() + smtlib = stdout.split("Z3 :")[1] + s = z3.Solver() + s.from_string(smtlib) + print(s) + res = s.check() + if res == z3.unsat: + return (z3.unsat, f"Property {postcond} proved") + elif res == z3.sat: + # raise? + return (z3.sat, s.model()) + + +""" + +run () { + bap wp main main.patched \ + --no-cache \ + --no-objdump \ + --ogre-orig=./vibes/loader.ogre \ + --ogre-mod=./main.patched.ogre \ + --func=main \ + --show=diagnostics \ + --postcond="(assert (= (bvadd R0_mod #x00000002) R0_orig))" +} +""" + + +""" +NAME + bap-wp - wP is a comparative analysis tool + +SYNOPSIS + bap wp [OPTION]… [VAL]… + + WP is a comparative analysis tool. It can compare two binaries and + check that they behave in similar ways. It can also be used to check a + single binary for certain behaviors. + +ARGUMENTS + VAL Path(s) to one or two binaries to analyze. If two binaries are + specified, WP will run a comparative analysis. If 0 or more than 2 + files are given, WP will exit with an error message. + +OPTIONS + --bildb-output=VAL + When WP results in SAT, outputs a BILDB initialization script to + the filename specified. This YAML file sets the registers and + memory to the values found in the countermodel, allowing BILDB to + follow the same execution trace. In the case WP returns UNSAT or + UNKNOWN, no script will be outputted. + + --check-invalid-derefs + This flag is only used in a comparative analysis. Checks that the + modified binary has no additional paths that result in dereferences + to invalid memory locations. That is, all memory dereferences are + either on the stack or heap. The stack is defined as the memory + region above the current stack pointer, and the heap is defined as + the memory region 0x256 bytes below the lowest address of the + stack. + + --check-null-derefs + Checks for inputs that would result in dereferencing a null value + during a memory read or write. In the case of a comparative + analysis, checks that the modified binary has no additional paths + with null dereferences in comparison with the original binary. + + --compare-func-calls + This flag is only used in a comparative analysis. Checks that + function calls do not occur in the modified binary if they have not + occurred in the original binary. + + --compare-post-reg-values=VAL + This flag is only used in a comparatve analysis. Compares the + values stored in the specified registers at the end of the + function's execution. For example, `RAX,RDI' compares the values of + RAX and RDI at the end of execution. If unsure about which + registers to compare, check the architecture's ABI. x86_64 + architectures place their output in RAX, and ARM architectures + place their output in R0. + + --debug=VAL + A list of various debugging statistics to display. Multiple + statistics may be specified in a comma-separated list. For example: + `--debug=z3-solver-stats,z3-verbose'. The options are: + `z3-solver-stats': Information and statistics about Z3's solver. It + includes information such as the maximum amount of memory used and + the number of allocations. `z3-verbose': Z3's verbosity level. It + outputs information such as the tactics the Z3 solver used. + `constraint-stats': Statistics regarding the internal `Constr.t' + data structure, including the number of goals, ITEs, clauses, and + substitutions. `eval-constraint-stats': Statistics regarding the + internal expression lists during evaluation of the `Constr.t' data + type. + + --ext-solver-path=VAL + Path of external smt solver to call. Boolector recommended. + + --fun-specs=VAL + List of built-in function summaries to be used at a function call + site in order of precedence. A target function will be mapped to a + function spec if it fulfills the spec's requirements. All function + specs set the target function as called and update the stack + pointer. The default specs set are verifier-assume, + varifier-nondet, empty, and chaos-caller-saved. Note that if a + function is set to be inlined, it will not use any of the following + function specs. Available built-in specs: `verifier-error': Used + for calls to __VERIFIER_error and __assert_fail. Looks for inputs + that would cause execution to reach these functions. + `verifier-assume': Used for calls to __VERIFIER_assume. Adds an + assumption to the precondition based on the argument to the + function call. `verifier-nondet': Used for calls to + nondeterministic functions such as __VERIFIER_nondet_*, calloc, and + malloc. Chaoses the output to the function call representing an + arbitrary pointer. `afl-maybe-log': Used for calls to + __afl_maybe_log. Chaoses the registers RAX, RCX, and RDX. + `arg-terms': Used when BAP's uplifter returns a nonempty list of + input and output registers for the target function. Chaoses this + list of output registers. `chaos-caller-saved': Used for the x86 + architecture. Chaoses the caller-saved registers. `rax-out': Chaos + RAX if it can be found on the left-hand side of an assignment in + the target function. `chaos-rax': Chaos RAX regardless if it has + been used on the left-hand side of an assignment in the target + function. `empty': Used for empty subroutines. Performs no actions. + + --func-name-map=VAL + Maps the subroutine names from the original binary to their names + in the modified binary based on the regex from the user. Usage: + --func-name-map=",". For example: --func-name-map="\(.*\),foo_\1" means that all + subroutines in the original binary have foo_ prepended in the + modified binary. Multiple patterns can be used to map function + names and are delimited with ';'s (i.e. + ",;,"). By default, WP + assumes subroutines have the same names between the two binaries. + + --function=VAL, --func=VAL + Determines which function to verify. WP verifies a single function, + though calling it on the `main' function along with the `inline' + option will analyze the whole program. If no function is specified + or the function cannot be found in the binary/binaries, WP will + exit with an error message. + + --gdb-output=VAL + When WP results in SAT, outputs a gdb script to the filename + specified. From within gdb, run `source filename.gdb' to set a + breakpoint at the function given by `--func' and fill the + appropriate registers with the values found in the countermodel. In + the case WP returns UNSAT or UNKNOWN, no script will be outputted. + + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --init-mem + This flag adds a number of hypotheses of the form mem[addr] == val, + where the (addr, val) pairs come from the .rodata section of the + binar(ies) under scrutiny. + + --inline=VAL + Functions specified by the provided POSIX regular expression will + be inlined. When functions are not inlined, heuristic function + summaries are used at function call sites. For example, if you want + to inline the functions `foo' and `bar', you can write + `--inline=foo|bar'. To inline everything, use `--inline=.*' (not + generally recommended). + + -L VAL, --plugin-path=VAL, --load-path=VAL + Adds folder to the list of plugins search paths + + --logdir=VAL, --log-dir=VAL (absent BAP_LOG_DIR env) + A folder for log files. + + --loop-invariant=VAL + Usage: `(((address ) (invariant )) (...))'. Assumes + the subroutine contains unnested while loops with one entry point + and one exit each. Checks the loop invariant written in smt-lib2 + format for the loop with its header at the given address. The + address should be written in BAP's bitvector string format. Only + supported for a single binary analysis. + + --mem-offset + This flag is only used in a comparative analysis. Maps the symbols + in the data and bss sections from their addresses in the original + binary to their addresses in the modified binary. If this flag is + not present, WP assumes that memory between both binaries start at + the same offset. + + --no-chaos=VAL + By default, nondeterministic functions and functions that allocate + memory will use the Chaos function spec that freshens the output + variable, "chaosing" its value as a result. Setting this flag will + turn off this feature and treat the specified list as other + functions that will use other function specs. + + --num-unroll=VAL + Specifies the number of times to unroll each loop. WP will unroll + each loop 5 times by default. + + --ogre=VAL + Path of an ogre file, which will be used instead of the default BAP + lifter. This is useful for stripped binaries, or to lift only a + portion of a large binary. + + --ogre-mod=VAL + Path of an ogre file, which will be used instead of the default BAP + lifter for the modified binary in comparative analysis. This is + useful for stripped binaries, or to lift only a portion of a large + binary. + + --ogre-orig=VAL + Path of an ogre file, which will be used instead of the default BAP + lifter for the original binary in comparative analysis. This is + useful for stripped binaries, or to lift only a portion of a large + binary. + + --pointer-reg-list=VAL + This flag specifies a comma delimited list of input registers to be + treated as pointers at the start of program execution. This means + that these registers are restricted in value to point to memory + known to be initialized at the start of the function. For example, + `RSI,RDI` would specify that `RSI` and `RDI`'s values should be + restricted to initialized memory at the start of execution. + + --postcond=VAL + Allows you to specify an assertion that WP will assume is true at + the end of the function it is analyzing, using the smt-lib2 format. + Similar to `--precond', one may create comparative postconditions + on variables by appending `_orig' and `_mod' to variable names. If + no postcondition is specified, a trivial postcondition of `true' + will be used. + + --precond=VAL + Allows you to specify an assertion that WP will assume is true at + the beginning of the function it is analyzing. Assertions are + specified in the smt-lib2 format. For comparative predicates, one + may refer to variables in the original and modified programs by + appending the suffix `_orig' and `_mod' to variable names in the + smt-lib expression. For example, `--precond="(assert (= RDI_mod + # x0000000000000003)) (assert (= RDI_orig #x0000000000000003))".' If + no precondition is specified, a trivial precondition of `true' will + be used. + + --recipe=VAL + Load the specified recipe + + --rewrite-addresses + This flag is only used in a comparative analysis. Rewrites the + concrete addresses in the modified binary to the same address in + the original binary if they point to the same symbol. This flag + should not be used in conjunction with the `--mem-offset' flag. + + --show=VAL + A list of details to print out from the analysis. Multiple options + can be specified as a comma-separated list. For example: + `--show=bir,refuted-goals'. The options are: `bir': The code of the + binary/binaries in BAP Intermediate Representation. + `refuted-goals': In the case WP results in SAT, a list of goals + refuted in the model that contains their tagged names, their + concrete values, and their Z3 representation. `paths': The + execution path of the binary that results in a refuted goal. The + path contains information about the jumps taken, their addresses, + and the values of the registers at each jump. This option + automatically prints out the refuted goals. `precond-smtlib': The + precondition printed out in Z3's SMT-LIB2 format. + `precond-internal': The precondition printed out in WP's internal + format for the `Constr.t' type. `colorful': precond-internal can + have color to highlight key words, making the output easier to + read. Warning: Coloring will change the syntax, so don't use this + flag if you wish to pass the printed output as an input elsewhere. + `diagnostics': Prints out debugging information about runtime to + stderr. + + --stack-base=VAL + Sets the location of the stack frame for the function under + analysis. By default, WP assumes the stack frame for the current + function is between 0x40000000 and 0x3F800080. + + --stack-size=VAL + Sets the size of the stack, which should be denoted in bytes. By + default, the size of the stack is 0x800000 which is 8MB. + + --trip-asserts + Looks for inputs to the subroutine that would cause an + `__assert_fail' or `__VERIFIER_error' to be reached. + + --use-fun-input-regs + At a function call site, uses all possible input registers as + arguments to a function symbol generated for an output register. If + this flag is not present, no registers will be used. + + --user-func-specs=VAL + List of user-defined subroutine specifications. For each + subroutine, it creates the weakest precondition given the name of + the subroutine and its pre and post-conditions. Usage: + --user-func-specs=",,". For + example, --user-func-specs="foo,(assert (= RAX RDI)),(assert (= RAX + init_RDI))" means "for subroutine named foo, specify that its + precondition is RAX = RDI and its postcondition is RAX = init_RDI". + Multiple subroutine specifications are delimited with ';'s, i.e.: + --user-func-specs="foo,(assert (= RAX RDI)),(assert (= RAX + init_RDI)); bar,(assert (= RAX RDI)),(assert (= RAX init_RDI))". + + --user-func-specs-mod=VAL + List of user-defined subroutine specifications to be used only for + the modified binary in comparative analysis. Usage: See + --user-func-specs. + + --user-func-specs-orig=VAL + List of user-defined subroutine specifications to be used only for + the original binary in comparative analysis. Usage: See + --user-func-specs. + + --version + Show version information. +""" From b91dea7be767ef7aa010df5f1ce518989556f704 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Tue, 28 Feb 2023 15:04:43 -0500 Subject: [PATCH 2/9] minimum dockerfile and cleanup --- Dockerfile.min | 37 ++ wp/pycbat/.gitignore | 3 +- wp/pycbat/README.md | 1 + wp/pycbat/junk/README.md | 18 - wp/pycbat/junk/angrplay.ipynb | 417 ------------------ wp/pycbat/junk/angrtype.py | 15 - wp/pycbat/junk/foo.c | 6 - wp/pycbat/junk/foo.o | Bin 2976 -> 0 bytes wp/pycbat/src/cbat/__init__.py | 415 +---------------- .../{junk/tests.py => tests/test_cbat.py} | 71 ++- 10 files changed, 74 insertions(+), 909 deletions(-) create mode 100644 Dockerfile.min delete mode 100644 wp/pycbat/junk/README.md delete mode 100644 wp/pycbat/junk/angrplay.ipynb delete mode 100644 wp/pycbat/junk/angrtype.py delete mode 100644 wp/pycbat/junk/foo.c delete mode 100644 wp/pycbat/junk/foo.o rename wp/pycbat/{junk/tests.py => tests/test_cbat.py} (54%) diff --git a/Dockerfile.min b/Dockerfile.min new file mode 100644 index 00000000..acceb8de --- /dev/null +++ b/Dockerfile.min @@ -0,0 +1,37 @@ +FROM ubuntu:20.04 +# Multi-stage docker build +# Copying over only needed compiled objects from development docker to get CBAT to run +# https://docs.docker.com/build/building/multi-stage/ + +WORKDIR /root/ +# Bap cli binary +COPY --from=cbat:latest /home/opam/.opam/4.14/bin/bap /home/opam/.opam/4.14/bin/bap +ENV PATH="$PATH:/home/opam/.opam/4.14/bin" + +# Plugins +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/bap/*.plugin /home/opam/.opam/4.14/lib/bap/ +# Removing some unneeded larger plugins mostly related to primus +RUN rm /home/opam/.opam/4.14/lib/bap/primus* \ + /home/opam/.opam/4.14/lib/bap/beagle.plugin \ + /home/opam/.opam/4.14/lib/bap/run.plugin \ + /home/opam/.opam/4.14/lib/bap/constant_tracker.plugin +# Do we need this? /home/opam/.opam/4.14/lib/bap/frontc_parser.plugin \ + +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/bap/primus_lisp.plugin /home/opam/.opam/4.14/lib/bap/ + +# Some primus semantics +COPY --from=cbat:latest /home/opam/.opam/4.14/share/bap/primus/ /home/opam/.opam/4.14/share/bap/primus/ + +# Get Z3 library +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/stublibs/libz3.so /home/opam/.opam/4.14/lib/stublibs/libz3.so + +# Objdump and Objdump dependencies +COPY --from=cbat:latest /usr/bin/objdump /usr/bin/objdump +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libopcodes-2.34-multiarch.so /lib/x86_64-linux-gnu/libopcodes-2.34-multiarch.so +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libbfd-2.34-multiarch.so /lib/x86_64-linux-gnu/libbfd-2.34-multiarch.so +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libctf-multiarch.so.0 /lib/x86_64-linux-gnu/libctf-multiarch.so.0 + +# Removing unneeded bytecode .cma from plugins +COPY --from=cbat:latest /usr/bin/zip /usr/bin/zip +RUN for file in /home/opam/.opam/4.14/lib/bap/*.plugin; do zip --delete $file "*.cma"; done +RUN rm /usr/bin/zip diff --git a/wp/pycbat/.gitignore b/wp/pycbat/.gitignore index ed8ebf58..f0712403 100644 --- a/wp/pycbat/.gitignore +++ b/wp/pycbat/.gitignore @@ -1 +1,2 @@ -__pycache__ \ No newline at end of file +__pycache__ +.pytest_cache \ No newline at end of file diff --git a/wp/pycbat/README.md b/wp/pycbat/README.md index 414d8521..35de37b3 100644 --- a/wp/pycbat/README.md +++ b/wp/pycbat/README.md @@ -5,6 +5,7 @@ Installation ``` +docker pull philzook58/cbat_min python3 -m pip install -e . ``` diff --git a/wp/pycbat/junk/README.md b/wp/pycbat/junk/README.md deleted file mode 100644 index f09397f8..00000000 --- a/wp/pycbat/junk/README.md +++ /dev/null @@ -1,18 +0,0 @@ - - -Ok. - -Docker install cbat -pip install pycbat - -extract angr semantics -structs. Slam through? -Use angr to construct preconditions - -cbat jupyter notebook tutorial - - -Requirements: -Z3 -docker -angr \ No newline at end of file diff --git a/wp/pycbat/junk/angrplay.ipynb b/wp/pycbat/junk/angrplay.ipynb deleted file mode 100644 index 1ef66a7b..00000000 --- a/wp/pycbat/junk/angrplay.ipynb +++ /dev/null @@ -1,417 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [], - "source": [ - "import angr" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING | 2023-02-24 14:22:16,654 | cle.loader | The main binary is a position-independent executable. It is being loaded with a base address of 0x400000.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slotnames__', '__str__', '__subclasshook__', '__weakref__', '_arch', '_base_name', '_can_refine_int', '_fields', '_init_str', '_refine', '_refine_dir', '_size', '_with_arch', 'alignment', 'arch', 'base', 'c_repr', 'copy', 'extract', 'extract_claripy', 'label', 'signed', 'size', 'store', 'with_arch']\n", - "True\n", - "\n", - "None\n", - "True\n", - "4\n" - ] - } - ], - "source": [ - "proj = angr.Project(\"foo.o\", load_debug_info=True)\n", - "print(proj.arch)\n", - "t = angr.types.parse_type('int')\n", - "t.arch = proj.arch\n", - "# print(inspect.getmembers(t))\n", - "print(dir(t))\n", - "print(t.base)\n", - "print(t.c_repr)\n", - "print(t.label)\n", - "print(t.signed)\n", - "t2 = t.with_arch(proj.arch)\n", - "print(t2.alignment)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "OrderedDict([('x', int), ('y', long)])" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "angr.types.parse_type('struct aa {int x; long y;}').fields" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['ARCH',\n", - " 'ARG_REGS',\n", - " 'ArgSession',\n", - " 'CALLEE_CLEANUP',\n", - " 'CALLER_SAVED_REGS',\n", - " 'FP_ARG_REGS',\n", - " 'FP_RETURN_VAL',\n", - " 'OVERFLOW_FP_RETURN_VAL',\n", - " 'OVERFLOW_RETURN_VAL',\n", - " 'RETURN_ADDR',\n", - " 'RETURN_VAL',\n", - " 'STACKARG_SP_BUFF',\n", - " 'STACKARG_SP_DIFF',\n", - " 'STACK_ALIGNMENT',\n", - " '__annotations__',\n", - " '__class__',\n", - " '__delattr__',\n", - " '__dict__',\n", - " '__dir__',\n", - " '__doc__',\n", - " '__eq__',\n", - " '__format__',\n", - " '__ge__',\n", - " '__getattribute__',\n", - " '__gt__',\n", - " '__hash__',\n", - " '__init__',\n", - " '__init_subclass__',\n", - " '__le__',\n", - " '__lt__',\n", - " '__module__',\n", - " '__ne__',\n", - " '__new__',\n", - " '__reduce__',\n", - " '__reduce_ex__',\n", - " '__repr__',\n", - " '__setattr__',\n", - " '__sizeof__',\n", - " '__str__',\n", - " '__subclasshook__',\n", - " '__weakref__',\n", - " '_classify',\n", - " '_combine_classes',\n", - " '_flatten',\n", - " '_match',\n", - " '_standardize_value',\n", - " 'arch',\n", - " 'arg_locs',\n", - " 'arg_session',\n", - " 'find_cc',\n", - " 'fp_args',\n", - " 'get_arg_info',\n", - " 'get_args',\n", - " 'guess_prototype',\n", - " 'int_args',\n", - " 'is_fp_arg',\n", - " 'is_fp_value',\n", - " 'memory_args',\n", - " 'next_arg',\n", - " 'return_addr',\n", - " 'return_in_implicit_outparam',\n", - " 'return_val',\n", - " 'set_return_val',\n", - " 'setup_callsite',\n", - " 'stack_space',\n", - " 'teardown_callsite']" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dir(proj.factory.cc()) # calling conventions. Interesting\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Hmm. Here's an idea. Literally use angr to write specs?\n", - "mem.long.yada == foo\n", - "\n", - "https://api.angr.io/angr.html#module-angr.calling_conventions\n", - "\n", - "\n", - "state \n", - "\n", - "unroll_loop parameter. That's interesting." - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "st = proj.factory.entry_state()\n", - "st = proj.factory.entry_state(add_options={angr.options.LAZY_SOLVES, angr.options.SYMBOLIC, angr.options.SYMBOLIC_INITIAL_VALUES})\n", - "st.regs.rax\n", - "st.mem[0x601048].long\n", - "st.registers\n", - "st.memory\n", - "list(st.solver.get_variables(\"\"))\n", - "st.regs.rax.to_claripy()\n", - "st.registers." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "import claripy\n", - "s = claripy.Solver()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(,)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.add(st.regs.rax == st.regs.rdi)\n", - "()" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING | 2023-02-23 14:36:39,907 | angr.calling_conventions | Guessing call prototype. Please specify prototype.\n", - "WARNING | 2023-02-23 14:36:39,919 | angr.storage.memory_mixins.default_filler_mixin | The program is accessing register with an unspecified value. This could indicate unwanted behavior.\n", - "WARNING | 2023-02-23 14:36:39,921 | angr.storage.memory_mixins.default_filler_mixin | angr will cope with this by generating an unconstrained symbolic variable and continuing. You can resolve this by:\n", - "WARNING | 2023-02-23 14:36:39,922 | angr.storage.memory_mixins.default_filler_mixin | 1) setting a value to the initial state\n", - "WARNING | 2023-02-23 14:36:39,923 | angr.storage.memory_mixins.default_filler_mixin | 2) adding the state option ZERO_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to make unknown regions hold null\n", - "WARNING | 2023-02-23 14:36:39,925 | angr.storage.memory_mixins.default_filler_mixin | 3) adding the state option SYMBOL_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to suppress these messages.\n", - "WARNING | 2023-02-23 14:36:39,927 | angr.storage.memory_mixins.default_filler_mixin | Filling register rbp with 8 unconstrained bytes referenced from 0x400000 (foo+0x0 in foo.o (0x0))\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#https://api.angr.io/angr.html#angr.state_plugins.debug_variables.SimDebugVariablePlugin\n", - "#p = angr.Project(\"various_variables\", load_debug_info=True)\n", - "proj.kb.dvars.load_from_dwarf()\n", - "state = proj.factory.call_state(0x400000) # navigate to the state you want\n", - "#state.dvars.get_variable(\"x\").deref.mem\n", - "state.dvars.dwarf_cfa" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "angr.analyses.decompiler" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING | 2023-02-23 14:52:21,986 | angr.analyses.cfg.cfg_base | \"auto_load_libs\" is enabled. With libraries loaded in project, CFG will cover libraries, which may take significantly more time than expected. You may reload the binary with \"auto_load_libs\" disabled, or specify \"regions\" to limit the scope of CFG recovery.\n" - ] - }, - { - "data": { - "text/plain": [ - "'long long foo(unsigned long a0, unsigned long a1)\\n{\\n char v0; // [bp-0x20]\\n\\n v0 = a1;\\n return a0 + 3;\\n}\\n'" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#proj.analyses.Decompiler()\n", - "sym = proj.loader.find_symbol(\"foo\")\n", - "#proj.load_function(sym)\n", - "cfg = proj.analyses.CFGFast()\n", - "cfg.normalize()\n", - "foo_func = cfg.kb.functions[\"foo\"]\n", - "foo_func.normalize()\n", - "decomp = proj.analyses.Decompiler(foo_func)\n", - "decomp.codegen.cfunc.c_repr()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[int, char]" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "t = angr.sim_type.parse_signature(\"int foo(int x, char y)\")\n", - "t.arg_names\n", - "t.args\n", - "t." - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "proj.kb.defs\n", - "proj.kb.propagations\n", - "# angr.knowledge_plugins.patches.PatchManager\n", - "proj.kb.functions[\"foo\"] # need to fun CFG first" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "angr.analyses.bindiff.FunctionDiff\n", - "https://api.angr.io/angr.html#angr.analyses.bindiff.FunctionDiff\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10.6 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/wp/pycbat/junk/angrtype.py b/wp/pycbat/junk/angrtype.py deleted file mode 100644 index 95c59d5e..00000000 --- a/wp/pycbat/junk/angrtype.py +++ /dev/null @@ -1,15 +0,0 @@ -import angr -import inspect -proj = angr.Project("foo.o") -print(proj.arch) -t = angr.types.parse_type('int') -t.arch = proj.arch -# print(inspect.getmembers(t)) -print(dir(t)) -print(t.base) -print(t.c_repr) -print(t.label) -print(t.signed) -t2 = t.with_arch(proj.arch) -print(t2.alignment) -# print(help(t2.extract)) diff --git a/wp/pycbat/junk/foo.c b/wp/pycbat/junk/foo.c deleted file mode 100644 index 54d231a6..00000000 --- a/wp/pycbat/junk/foo.c +++ /dev/null @@ -1,6 +0,0 @@ -int foo(int x, char y) -{ - int bar; - bar = x; - return 3 + bar; -} \ No newline at end of file diff --git a/wp/pycbat/junk/foo.o b/wp/pycbat/junk/foo.o deleted file mode 100644 index ddde48ca429f348bc2d40fad82f7226064e63edb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2976 zcmbtW-ESL35TEt=^1(H(6SvS1XfBAVeEAZWsI-YvECth6pa@k8;-Qe9FXs={zO(Mm zB`#_yLLj0@2m}v2@W}tb3lfo#c;=CJgoH#Ng5U`%!OY!`ec2vDz({*D^P8F7*`1xW z->$vOrw(F*0&?_uls;)XnNqZd?}HHfjTFXq@j0Pl2d0SUKE5gjaQs#*zLu}s z0SIj}v2;Ms>uoQBF_GjfbkHVN7R2g;IB#4opLw z^fOkC^Tz4f%Ox^YPu!dpC4l@gO%a=t3G~OoFfJGx?rcno1x)4{N(cu)yyaM;5AR|K zNMQrHj=oPJ7ef3|m?B2!j{}AIxD+`tpF5gN8^`GaCWmTCpZrZPG+kTH85ywtRNkEH}+^!wv_Y(+Ygg?}z2S_h#GM zE=P8~;b|@T{m>`-EvYh>j%;#LX*s+lpfD$oRxZScOLrr$H1~vsNg+pIe_A z+eC*1&iqmV^YF``Sw=0h(KrNl+RAL6g(^<}>a9ZP()E`uEslvljpmQ7Eag?b;h-iMQ9n*>wRR22? z=X^etI_E|>*PKjrCy3N}9!ubOUCRGysZ*yD(L98|LJCqPVMF{?sjo_|&TmSb_L|-= zSmB_HeTX~?65e7da(5!Iw(ZCUs~(161#ZW-w%vNaxz+X>KH6g0uGa(i#v!vE*k02; z9MWlfE{EU}EE2pG>yF>0mIcehcfi7~!FK@P1*tc^zJ)JD&kdr1T+3~3H3GZq%Dw#` zHiyAbZCUG^@)5ZvV`+VQMb-6th|!#!CyFtB zCl~sJ0m*rN=5HaM74OJqzxogHdw&!EwT$O?h^OIonSUzd+0OjuLyrxk_n-9_Xrj8P z?gtVuN1?IA=g06@C{QkHd{H*kId+s>jsFtytoq%N_2YX^^(*4wc*e8Vrll^)^}ifield3 - -foo = state1.high["foo"] -state1.mem[] -Do lowering now or put it all in cbat? - - -class Z3View(Z3Expr): - - def __init__(self): - mem - addr - typ = angr.type() - def cast(): - return Concat(mem[addr], mem[addr+1], ...) - def as_type(): - def field(self, name): - def deref(self): - - -mem[foo + fieldy].as64 + - -class CBATModel(): - - -''' - - -def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, postcond=None, use_docker=True, **kwargs): +def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, postcond=None, docker_image="philzook58/cbat_min", **kwargs): cmd = ["bap", "wp", "--no-cache", "--show", "precond-smtlib", "--func", func] if precond != None: @@ -109,21 +22,11 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, if filename2 != None: cmd.append(filename2) - if use_docker: - cmd = ["docker", "run", "-it", - "-v", f"{filename}:{filename}", IMAGE] + cmd - ''' - # Having a lot of trouble with the docker library here :( - try: - docker_res = client.containers.run("38c124f7f4f6", cmd, volumes={ - filename: {'bind': filename, 'mode': 'ro'}}, auto_remove=True, stderr=True) - except docker.errors.ContainerError as e: - print(e) - print(e.stderr) - raise e - print(docker_res) - stdout = docker_res - ''' + if docker_image != None: + flags = ["-it", "-v", f"{filename}:{filename}"] + if filename2 != None: + flags += ["-v", f"{filename2}:{filename2}"] + cmd = ["docker", "run"] + flags + [docker_image] + cmd res = subprocess.run(cmd, check=False, capture_output=True) print(res.args) print(res.stdout.decode()) @@ -138,309 +41,3 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, elif res == z3.sat: # raise? return (z3.sat, s.model()) - - -""" - -run () { - bap wp main main.patched \ - --no-cache \ - --no-objdump \ - --ogre-orig=./vibes/loader.ogre \ - --ogre-mod=./main.patched.ogre \ - --func=main \ - --show=diagnostics \ - --postcond="(assert (= (bvadd R0_mod #x00000002) R0_orig))" -} -""" - - -""" -NAME - bap-wp - wP is a comparative analysis tool - -SYNOPSIS - bap wp [OPTION]… [VAL]… - - WP is a comparative analysis tool. It can compare two binaries and - check that they behave in similar ways. It can also be used to check a - single binary for certain behaviors. - -ARGUMENTS - VAL Path(s) to one or two binaries to analyze. If two binaries are - specified, WP will run a comparative analysis. If 0 or more than 2 - files are given, WP will exit with an error message. - -OPTIONS - --bildb-output=VAL - When WP results in SAT, outputs a BILDB initialization script to - the filename specified. This YAML file sets the registers and - memory to the values found in the countermodel, allowing BILDB to - follow the same execution trace. In the case WP returns UNSAT or - UNKNOWN, no script will be outputted. - - --check-invalid-derefs - This flag is only used in a comparative analysis. Checks that the - modified binary has no additional paths that result in dereferences - to invalid memory locations. That is, all memory dereferences are - either on the stack or heap. The stack is defined as the memory - region above the current stack pointer, and the heap is defined as - the memory region 0x256 bytes below the lowest address of the - stack. - - --check-null-derefs - Checks for inputs that would result in dereferencing a null value - during a memory read or write. In the case of a comparative - analysis, checks that the modified binary has no additional paths - with null dereferences in comparison with the original binary. - - --compare-func-calls - This flag is only used in a comparative analysis. Checks that - function calls do not occur in the modified binary if they have not - occurred in the original binary. - - --compare-post-reg-values=VAL - This flag is only used in a comparatve analysis. Compares the - values stored in the specified registers at the end of the - function's execution. For example, `RAX,RDI' compares the values of - RAX and RDI at the end of execution. If unsure about which - registers to compare, check the architecture's ABI. x86_64 - architectures place their output in RAX, and ARM architectures - place their output in R0. - - --debug=VAL - A list of various debugging statistics to display. Multiple - statistics may be specified in a comma-separated list. For example: - `--debug=z3-solver-stats,z3-verbose'. The options are: - `z3-solver-stats': Information and statistics about Z3's solver. It - includes information such as the maximum amount of memory used and - the number of allocations. `z3-verbose': Z3's verbosity level. It - outputs information such as the tactics the Z3 solver used. - `constraint-stats': Statistics regarding the internal `Constr.t' - data structure, including the number of goals, ITEs, clauses, and - substitutions. `eval-constraint-stats': Statistics regarding the - internal expression lists during evaluation of the `Constr.t' data - type. - - --ext-solver-path=VAL - Path of external smt solver to call. Boolector recommended. - - --fun-specs=VAL - List of built-in function summaries to be used at a function call - site in order of precedence. A target function will be mapped to a - function spec if it fulfills the spec's requirements. All function - specs set the target function as called and update the stack - pointer. The default specs set are verifier-assume, - varifier-nondet, empty, and chaos-caller-saved. Note that if a - function is set to be inlined, it will not use any of the following - function specs. Available built-in specs: `verifier-error': Used - for calls to __VERIFIER_error and __assert_fail. Looks for inputs - that would cause execution to reach these functions. - `verifier-assume': Used for calls to __VERIFIER_assume. Adds an - assumption to the precondition based on the argument to the - function call. `verifier-nondet': Used for calls to - nondeterministic functions such as __VERIFIER_nondet_*, calloc, and - malloc. Chaoses the output to the function call representing an - arbitrary pointer. `afl-maybe-log': Used for calls to - __afl_maybe_log. Chaoses the registers RAX, RCX, and RDX. - `arg-terms': Used when BAP's uplifter returns a nonempty list of - input and output registers for the target function. Chaoses this - list of output registers. `chaos-caller-saved': Used for the x86 - architecture. Chaoses the caller-saved registers. `rax-out': Chaos - RAX if it can be found on the left-hand side of an assignment in - the target function. `chaos-rax': Chaos RAX regardless if it has - been used on the left-hand side of an assignment in the target - function. `empty': Used for empty subroutines. Performs no actions. - - --func-name-map=VAL - Maps the subroutine names from the original binary to their names - in the modified binary based on the regex from the user. Usage: - --func-name-map=",". For example: --func-name-map="\(.*\),foo_\1" means that all - subroutines in the original binary have foo_ prepended in the - modified binary. Multiple patterns can be used to map function - names and are delimited with ';'s (i.e. - ",;,"). By default, WP - assumes subroutines have the same names between the two binaries. - - --function=VAL, --func=VAL - Determines which function to verify. WP verifies a single function, - though calling it on the `main' function along with the `inline' - option will analyze the whole program. If no function is specified - or the function cannot be found in the binary/binaries, WP will - exit with an error message. - - --gdb-output=VAL - When WP results in SAT, outputs a gdb script to the filename - specified. From within gdb, run `source filename.gdb' to set a - breakpoint at the function given by `--func' and fill the - appropriate registers with the values found in the countermodel. In - the case WP returns UNSAT or UNKNOWN, no script will be outputted. - - --help[=FMT] (default=auto) - Show this help in format FMT. The value FMT must be one of auto, - pager, groff or plain. With auto, the format is pager or plain - whenever the TERM env var is dumb or undefined. - - --init-mem - This flag adds a number of hypotheses of the form mem[addr] == val, - where the (addr, val) pairs come from the .rodata section of the - binar(ies) under scrutiny. - - --inline=VAL - Functions specified by the provided POSIX regular expression will - be inlined. When functions are not inlined, heuristic function - summaries are used at function call sites. For example, if you want - to inline the functions `foo' and `bar', you can write - `--inline=foo|bar'. To inline everything, use `--inline=.*' (not - generally recommended). - - -L VAL, --plugin-path=VAL, --load-path=VAL - Adds folder to the list of plugins search paths - - --logdir=VAL, --log-dir=VAL (absent BAP_LOG_DIR env) - A folder for log files. - - --loop-invariant=VAL - Usage: `(((address ) (invariant )) (...))'. Assumes - the subroutine contains unnested while loops with one entry point - and one exit each. Checks the loop invariant written in smt-lib2 - format for the loop with its header at the given address. The - address should be written in BAP's bitvector string format. Only - supported for a single binary analysis. - - --mem-offset - This flag is only used in a comparative analysis. Maps the symbols - in the data and bss sections from their addresses in the original - binary to their addresses in the modified binary. If this flag is - not present, WP assumes that memory between both binaries start at - the same offset. - - --no-chaos=VAL - By default, nondeterministic functions and functions that allocate - memory will use the Chaos function spec that freshens the output - variable, "chaosing" its value as a result. Setting this flag will - turn off this feature and treat the specified list as other - functions that will use other function specs. - - --num-unroll=VAL - Specifies the number of times to unroll each loop. WP will unroll - each loop 5 times by default. - - --ogre=VAL - Path of an ogre file, which will be used instead of the default BAP - lifter. This is useful for stripped binaries, or to lift only a - portion of a large binary. - - --ogre-mod=VAL - Path of an ogre file, which will be used instead of the default BAP - lifter for the modified binary in comparative analysis. This is - useful for stripped binaries, or to lift only a portion of a large - binary. - - --ogre-orig=VAL - Path of an ogre file, which will be used instead of the default BAP - lifter for the original binary in comparative analysis. This is - useful for stripped binaries, or to lift only a portion of a large - binary. - - --pointer-reg-list=VAL - This flag specifies a comma delimited list of input registers to be - treated as pointers at the start of program execution. This means - that these registers are restricted in value to point to memory - known to be initialized at the start of the function. For example, - `RSI,RDI` would specify that `RSI` and `RDI`'s values should be - restricted to initialized memory at the start of execution. - - --postcond=VAL - Allows you to specify an assertion that WP will assume is true at - the end of the function it is analyzing, using the smt-lib2 format. - Similar to `--precond', one may create comparative postconditions - on variables by appending `_orig' and `_mod' to variable names. If - no postcondition is specified, a trivial postcondition of `true' - will be used. - - --precond=VAL - Allows you to specify an assertion that WP will assume is true at - the beginning of the function it is analyzing. Assertions are - specified in the smt-lib2 format. For comparative predicates, one - may refer to variables in the original and modified programs by - appending the suffix `_orig' and `_mod' to variable names in the - smt-lib expression. For example, `--precond="(assert (= RDI_mod - # x0000000000000003)) (assert (= RDI_orig #x0000000000000003))".' If - no precondition is specified, a trivial precondition of `true' will - be used. - - --recipe=VAL - Load the specified recipe - - --rewrite-addresses - This flag is only used in a comparative analysis. Rewrites the - concrete addresses in the modified binary to the same address in - the original binary if they point to the same symbol. This flag - should not be used in conjunction with the `--mem-offset' flag. - - --show=VAL - A list of details to print out from the analysis. Multiple options - can be specified as a comma-separated list. For example: - `--show=bir,refuted-goals'. The options are: `bir': The code of the - binary/binaries in BAP Intermediate Representation. - `refuted-goals': In the case WP results in SAT, a list of goals - refuted in the model that contains their tagged names, their - concrete values, and their Z3 representation. `paths': The - execution path of the binary that results in a refuted goal. The - path contains information about the jumps taken, their addresses, - and the values of the registers at each jump. This option - automatically prints out the refuted goals. `precond-smtlib': The - precondition printed out in Z3's SMT-LIB2 format. - `precond-internal': The precondition printed out in WP's internal - format for the `Constr.t' type. `colorful': precond-internal can - have color to highlight key words, making the output easier to - read. Warning: Coloring will change the syntax, so don't use this - flag if you wish to pass the printed output as an input elsewhere. - `diagnostics': Prints out debugging information about runtime to - stderr. - - --stack-base=VAL - Sets the location of the stack frame for the function under - analysis. By default, WP assumes the stack frame for the current - function is between 0x40000000 and 0x3F800080. - - --stack-size=VAL - Sets the size of the stack, which should be denoted in bytes. By - default, the size of the stack is 0x800000 which is 8MB. - - --trip-asserts - Looks for inputs to the subroutine that would cause an - `__assert_fail' or `__VERIFIER_error' to be reached. - - --use-fun-input-regs - At a function call site, uses all possible input registers as - arguments to a function symbol generated for an output register. If - this flag is not present, no registers will be used. - - --user-func-specs=VAL - List of user-defined subroutine specifications. For each - subroutine, it creates the weakest precondition given the name of - the subroutine and its pre and post-conditions. Usage: - --user-func-specs=",,". For - example, --user-func-specs="foo,(assert (= RAX RDI)),(assert (= RAX - init_RDI))" means "for subroutine named foo, specify that its - precondition is RAX = RDI and its postcondition is RAX = init_RDI". - Multiple subroutine specifications are delimited with ';'s, i.e.: - --user-func-specs="foo,(assert (= RAX RDI)),(assert (= RAX - init_RDI)); bar,(assert (= RAX RDI)),(assert (= RAX init_RDI))". - - --user-func-specs-mod=VAL - List of user-defined subroutine specifications to be used only for - the modified binary in comparative analysis. Usage: See - --user-func-specs. - - --user-func-specs-orig=VAL - List of user-defined subroutine specifications to be used only for - the original binary in comparative analysis. Usage: See - --user-func-specs. - - --version - Show version information. -""" diff --git a/wp/pycbat/junk/tests.py b/wp/pycbat/tests/test_cbat.py similarity index 54% rename from wp/pycbat/junk/tests.py rename to wp/pycbat/tests/test_cbat.py index 190917ea..8abd0904 100644 --- a/wp/pycbat/junk/tests.py +++ b/wp/pycbat/tests/test_cbat.py @@ -2,27 +2,6 @@ import z3 import tempfile import subprocess -code = ''' -int fact(int x){ - int acc = 1; - while(x > 0){ - acc *= x; - x--; - } - return acc; -} -''' - - -code = ''' -struct abcd { - int x, - int y -} -int fact(struct abcd* x){ - return x->y + 3; -} -''' rax = z3.BitVec("RAX", 64) init_rax = z3.BitVec("init_RAX", 64) @@ -30,6 +9,7 @@ def run_code(code, postcond): + with tempfile.NamedTemporaryFile(suffix=".c") as fp: with tempfile.TemporaryDirectory() as mydir: fp.write(code.encode()) @@ -44,31 +24,36 @@ def run_code(code, postcond): return run_wp(outfile, func="fact", postcond=postcond) -code = ''' -int fact(int x){ - return 3; -} -''' +def test1(): + code = ''' + int fact(int x){ + return 3; + } + ''' -postcond = rax == 3 + postcond = rax == 3 -assert run_code(code, postcond)[0] == z3.unsat + assert run_code(code, postcond)[0] == z3.unsat -code = ''' -int fact(int x){ - return 3; -} -''' -postcond = rax == 4 -assert run_code(code, postcond)[0] == z3.sat +def test2(): + code = ''' + int fact(int x){ + return 3; + } + ''' + postcond = rax == 4 + + assert run_code(code, postcond)[0] == z3.sat -code = ''' -int fact(int x){ - return x + 3; -} -''' -postcond = z3.Extract(31, 0, rax) == z3.Extract(31, 0, init_rdi) + 3 -# postcond = rax == init_rdi + 3 -assert run_code(code, postcond)[0] == z3.unsat +def test3(): + code = ''' + int fact(int x){ + return x + 3; + } + ''' + postcond = z3.Extract(31, 0, rax) == z3.Extract(31, 0, init_rdi) + 3 + # postcond = rax == init_rdi + 3 + + assert run_code(code, postcond)[0] == z3.unsat From 8d87be7672d61ce513ed459a66a45d428ca70893 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Tue, 28 Feb 2023 15:39:44 -0500 Subject: [PATCH 3/9] cleanup --- wp/pycbat/src/cbat/__init__.py | 8 +++----- wp/pycbat/tests/test_cbat.py | 6 ++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index 87874dfe..30de633e 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -11,9 +11,11 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, if postcond != None: cmd.append("--postcond") cmd.append("(assert " + postcond.sexpr() + ")") + # TODO: Fill out invariants # forward kwargs. Typo unfriendly - flags = ["check-invalid-derefs", "check-null-derefs"] # and so on + # TODO: fill out other allowed flags + flags = ["check-invalid-derefs", "check-null-derefs"] assert all(k in flags for k in kwargs.keys()) cmd += [f"--{k}" for k, v in kwargs.items() if v == True and k in flags] @@ -28,13 +30,9 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, flags += ["-v", f"{filename2}:{filename2}"] cmd = ["docker", "run"] + flags + [docker_image] + cmd res = subprocess.run(cmd, check=False, capture_output=True) - print(res.args) - print(res.stdout.decode()) - stdout = res.stdout.decode() smtlib = stdout.split("Z3 :")[1] s = z3.Solver() s.from_string(smtlib) - print(s) res = s.check() if res == z3.unsat: return (z3.unsat, f"Property {postcond} proved") diff --git a/wp/pycbat/tests/test_cbat.py b/wp/pycbat/tests/test_cbat.py index 8abd0904..d61b0281 100644 --- a/wp/pycbat/tests/test_cbat.py +++ b/wp/pycbat/tests/test_cbat.py @@ -57,3 +57,9 @@ def test3(): # postcond = rax == init_rdi + 3 assert run_code(code, postcond)[0] == z3.unsat + + +# I dunno. Something weird is going on with pytest and IO. +test1() +test2() +test3() From 1a34d8ce331e407ffcb51cd0d0bb0226b24c84a6 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Tue, 28 Feb 2023 15:41:21 -0500 Subject: [PATCH 4/9] cleanup --- wp/pycbat/pyproject.toml | 4 ++-- wp/pycbat/requirements.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/wp/pycbat/pyproject.toml b/wp/pycbat/pyproject.toml index 016fd524..2f7f8feb 100644 --- a/wp/pycbat/pyproject.toml +++ b/wp/pycbat/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "cbat" version = "0.0.1" authors = [ - { name="Example Author", email="author@example.com" }, + { name="Philip Zucker", email="pzucker@draper.com" }, ] description = "Comparative Binary Analysis Tool" readme = "README.md" @@ -18,4 +18,4 @@ classifiers = [ ] [project.urls] -"Homepage" = "https://github.com/draperlaboratory/cbat_tools" \ No newline at end of file +"Homepage" = "https://github.com/draperlaboratory/cbat_tools" diff --git a/wp/pycbat/requirements.txt b/wp/pycbat/requirements.txt index 7ca67aa4..d4930567 100644 --- a/wp/pycbat/requirements.txt +++ b/wp/pycbat/requirements.txt @@ -1,2 +1,3 @@ z3 docker + From cd57ec05bc7f6aed53944c5959e90bce367cfb06 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Tue, 14 Mar 2023 15:12:18 -0400 Subject: [PATCH 5/9] added helpers --- wp/pycbat/src/cbat/__init__.py | 5 ++- wp/pycbat/src/cbat/helpers.py | 62 ++++++++++++++++++++++++++++++ wp/pycbat/tests/resources/test4.c | 6 +++ wp/pycbat/tests/resources/test4.o | Bin 0 -> 1208 bytes wp/pycbat/tests/test_cbat.py | 22 +++++++++++ 5 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 wp/pycbat/src/cbat/helpers.py create mode 100644 wp/pycbat/tests/resources/test4.c create mode 100644 wp/pycbat/tests/resources/test4.o diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index 30de633e..cb32340e 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -25,12 +25,13 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, cmd.append(filename2) if docker_image != None: - flags = ["-it", "-v", f"{filename}:{filename}"] + flags = ["-v", f"{filename}:{filename}"] if filename2 != None: flags += ["-v", f"{filename2}:{filename2}"] cmd = ["docker", "run"] + flags + [docker_image] + cmd res = subprocess.run(cmd, check=False, capture_output=True) - smtlib = stdout.split("Z3 :")[1] + print(res.stderr) + smtlib = res.stdout.decode().split("Z3 :")[1] s = z3.Solver() s.from_string(smtlib) res = s.check() diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py new file mode 100644 index 00000000..0a14981b --- /dev/null +++ b/wp/pycbat/src/cbat/helpers.py @@ -0,0 +1,62 @@ +import z3 +import angr + + +class MemView(): + def __init__(self, mem: z3.ArrayRef, addr: z3.BitVecRef, typ: angr.sim_type.SimType): + self.mem = mem + self.addr = addr + self.typ = typ + + def deref(self): + addr = self.z3() + return MemView(self.mem, addr, self.typ.pts_to) + + def z3(self): + # check endian + return z3.Concat([self.mem[self.addr + n] for n in range(self.typ.size // 8)]) + + def __getitem__(self, field): + addr = self.addr + self.typ.offsets[field] + return MemView(self.mem, addr, self.typ.fields[field]) + + def __repr__(self): + return f"({repr(self.typ)}){repr(self.mem)}[{repr(self.addr)}]" + + +class PropertyBuilder(): + def __init__(self, binary=None, headers=None): + if binary != None: + self.load_binary(binary) + if headers != None: + self.load_headers(headers) + + def make_mem(name): + return z3.Array(name, z3.BitVecSort(64), z3.BitVecSort(8)) + self.mem = make_mem("mem") + self.init_mem = make_mem("init_mem") + + def load_binary(self, filename): + self.proj = angr.Project(filename, load_debug_info=True) + self.cc = self.proj.factory.cc() + + def load_headers(self, header_str): + (defns, types) = angr.types.parse_file(header_str) + angr.types.register_types(types) + self.defns = defns + + def fun_args(self, func, prefix="", suffix=""): + funsig = self.defns[func] + funsig = funsig.with_arch(self.proj.arch) + # stack args not supported yet + assert len(funsig.args) <= len(self.cc.ARG_REGS) + return [z3.Extract(arg.size - 1, 0, z3.BitVec(prefix + reg.upper() + suffix, 64)) for arg, reg in zip(funsig.args, self.cc.ARG_REGS)] + + def init_fun_args(self, func): + return self.fun_args(func, prefix="init_") + + def ret_val(self, func): + funsig = self.defns[func] + funsig = funsig.with_arch(self.proj.arch) + reg = self.cc.RETURN_VAL.reg_name + return z3.Extract(funsig.returnty.size - 1, 0, z3.BitVec(reg.upper(), 64)) diff --git a/wp/pycbat/tests/resources/test4.c b/wp/pycbat/tests/resources/test4.c new file mode 100644 index 00000000..54d231a6 --- /dev/null +++ b/wp/pycbat/tests/resources/test4.c @@ -0,0 +1,6 @@ +int foo(int x, char y) +{ + int bar; + bar = x; + return 3 + bar; +} \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test4.o b/wp/pycbat/tests/resources/test4.o new file mode 100644 index 0000000000000000000000000000000000000000..3b7817577771052a4c4bc513312a545ff1ca8331 GIT binary patch literal 1208 zcmbVLPfG$(5T8{`DIg#x^4U^RfNkcI{LTH5%2 zYQ*26QL7!~^3B$;FNV2NiS4ptp)?G_(nGn-ij{2%2kZo{8Ppln#%U8`0B?lFQAt)O z2CVyYutpgxn@*;laSw#A%Unv@{|s$A0iQonHSvg(&SBw-hKDl*_eF)Zq2s#LD@O|o z?mN&RpoRHkN7ya2!kgHh@&q3VU^lj~fwg=eSle|R-bbwO3eNBv3cj%0w=(Ybhiu@v z1MZ12FpuBajQ6fOp6&2(+SE=KnRGX^Mu)15!*k1+YIWJffH3u5DtD|L!j`;E{RI-uggdVIUPjbyw6L-sHe$)kv#KU zCjSEK=zpoodS&+IM%N@j-!tt~@f995n Date: Tue, 14 Mar 2023 15:51:36 -0400 Subject: [PATCH 6/9] endian issues --- wp/pycbat/.gitignore | 2 +- wp/pycbat/src/cbat/__init__.py | 2 ++ wp/pycbat/src/cbat/helpers.py | 8 +++++-- wp/pycbat/tests/resources/test4.c | 2 +- wp/pycbat/tests/resources/test5.c | 12 ++++++++++ wp/pycbat/tests/resources/test5.o | Bin 0 -> 1208 bytes wp/pycbat/tests/test_cbat.py | 37 +++++++++++++++++++++++++++++- 7 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 wp/pycbat/tests/resources/test5.c create mode 100644 wp/pycbat/tests/resources/test5.o diff --git a/wp/pycbat/.gitignore b/wp/pycbat/.gitignore index f0712403..375de919 100644 --- a/wp/pycbat/.gitignore +++ b/wp/pycbat/.gitignore @@ -1,2 +1,2 @@ __pycache__ -.pytest_cache \ No newline at end of file +.pytest_cache diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index cb32340e..93019175 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -32,8 +32,10 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, res = subprocess.run(cmd, check=False, capture_output=True) print(res.stderr) smtlib = res.stdout.decode().split("Z3 :")[1] + print(smtlib) s = z3.Solver() s.from_string(smtlib) + print(s) res = s.check() if res == z3.unsat: return (z3.unsat, f"Property {postcond} proved") diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py index 0a14981b..f4b18c6c 100644 --- a/wp/pycbat/src/cbat/helpers.py +++ b/wp/pycbat/src/cbat/helpers.py @@ -3,10 +3,11 @@ class MemView(): - def __init__(self, mem: z3.ArrayRef, addr: z3.BitVecRef, typ: angr.sim_type.SimType): + def __init__(self, mem: z3.ArrayRef, addr: z3.BitVecRef, typ: angr.sim_type.SimType, le=True): self.mem = mem self.addr = addr self.typ = typ + self.le = True def deref(self): addr = self.z3() @@ -14,7 +15,10 @@ def deref(self): def z3(self): # check endian - return z3.Concat([self.mem[self.addr + n] for n in range(self.typ.size // 8)]) + bytes = range(self.typ.size // 8) + if self.le: + bytes = reversed(bytes) + return z3.Concat([self.mem[self.addr + n] for n in bytes]) def __getitem__(self, field): addr = self.addr + self.typ.offsets[field] diff --git a/wp/pycbat/tests/resources/test4.c b/wp/pycbat/tests/resources/test4.c index 54d231a6..5e18a82d 100644 --- a/wp/pycbat/tests/resources/test4.c +++ b/wp/pycbat/tests/resources/test4.c @@ -3,4 +3,4 @@ int foo(int x, char y) int bar; bar = x; return 3 + bar; -} \ No newline at end of file +} diff --git a/wp/pycbat/tests/resources/test5.c b/wp/pycbat/tests/resources/test5.c new file mode 100644 index 00000000..e6753d1d --- /dev/null +++ b/wp/pycbat/tests/resources/test5.c @@ -0,0 +1,12 @@ +typedef struct mystruct +{ + int f1; + char f2; +} mystruct; + +int foo(mystruct *x, char y) +{ + int bar; + bar = x->f1; + return 3 + bar; +} diff --git a/wp/pycbat/tests/resources/test5.o b/wp/pycbat/tests/resources/test5.o new file mode 100644 index 0000000000000000000000000000000000000000..c97f2e0b740eef532a0f0dcf281fdc2db262e2cc GIT binary patch literal 1208 zcmbVLK}!Nb6n>kQRz?;b0wcCl206Hv5FJ7g6Cxr4p-a?it3s_SyJKjFI&|#Pv7gdW z=+fVbZe8lVb!Hm|>EMGm?|t8U-@F-TeXJZGP8kMBGvEOJkYoYg6tWc3B}l2qM$Hn)M*g8*2=b6ux{T*u=M!=E7+aJz9MaI4c} zUEk|EJ|6({om-nZ?aQWbyG}H1;v|Dg`fX;k9ZCX6_ZFCXRog&7oO~}?JCcbOb^RvR zljxJ`w^2)b=FwG?^Qo?yoG)V$i!xXH?o^=UdT#}533PgY)%pUMDv3GWRkaHQw5P6r z6;1lRbp1K@(f_3+`eoP?2VE5eeb01Gsh9k?*(CX(1??(a0q8rRH`4XAul{x<$%^?4 W1QZi=#4KSpPJb#ITT$mI(e=L%_DFL8 literal 0 HcmV?d00001 diff --git a/wp/pycbat/tests/test_cbat.py b/wp/pycbat/tests/test_cbat.py index b6a3081c..d3489ec4 100644 --- a/wp/pycbat/tests/test_cbat.py +++ b/wp/pycbat/tests/test_cbat.py @@ -3,7 +3,7 @@ import z3 import tempfile import subprocess - +import angr rax = z3.BitVec("RAX", 64) init_rax = z3.BitVec("init_RAX", 64) init_rdi = z3.BitVec("init_RDI", 64) @@ -80,7 +80,42 @@ def test4(): assert res == z3.sat +def test5(): + header = ''' + typedef struct mystruct + { + int f1; + char f2; + } mystruct; + + int foo(mystruct *x, char y); + ''' + pb = PropertyBuilder(binary="resources/test5.o", headers=header) + x, y = pb.fun_args("foo") + init_x, init_y = pb.init_fun_args("foo") + retval = pb.ret_val("foo") + x_init = MemView(pb.init_mem, init_x, angr.types.parse_type( + "mystruct").with_arch(pb.proj.arch)) + postcond = retval == x_init["f1"].z3() + 3 + (res, model) = run_wp("resources/test5.o", func="foo", + postcond=postcond, docker_image=None) + retval = z3.Extract(31, 0, z3.BitVec("RAX0", 64)) + init_x = z3.BitVec("init_RDI0", 64) + init_mem = z3.Array("init_mem0", + z3.BitVecSort(64), z3.BitVecSort(8)) + x_init = MemView(init_mem, init_x, angr.types.parse_type( + "mystruct").with_arch(pb.proj.arch)) + postcond = retval == x_init["f1"].z3() + 3 + # print(postcond) + # print(model) + # print(model.eval(postcond)) + # print(model.eval(x_init["f1"].z3())) + #print(model.eval(x_init["f1"].z3() + 3)) + assert res == z3.unsat + + # I dunno. Something weird is going on with pytest and IO. +test5() test4() test1() test2() From f5327c01335d396e503ad424c28d538887db1737 Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Wed, 15 Mar 2023 16:12:11 -0400 Subject: [PATCH 7/9] switched to TypedView --- wp/pycbat/src/cbat/__init__.py | 13 +++++---- wp/pycbat/src/cbat/helpers.py | 53 ++++++++++++++++++++++++---------- wp/pycbat/tests/test_cbat.py | 21 ++++++-------- 3 files changed, 53 insertions(+), 34 deletions(-) diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index 93019175..1db6bfa4 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -2,7 +2,7 @@ import z3 -def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, postcond=None, docker_image="philzook58/cbat_min", **kwargs): +def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image="philzook58/cbat_min", **kwargs): cmd = ["bap", "wp", "--no-cache", "--show", "precond-smtlib", "--func", func] if precond != None: @@ -30,15 +30,16 @@ def run_wp(filename, filename2=None, func="main", invariants=[], precond=None, flags += ["-v", f"{filename2}:{filename2}"] cmd = ["docker", "run"] + flags + [docker_image] + cmd res = subprocess.run(cmd, check=False, capture_output=True) - print(res.stderr) + if debug: + print(res.stderr) smtlib = res.stdout.decode().split("Z3 :")[1] - print(smtlib) s = z3.Solver() s.from_string(smtlib) - print(s) + if debug: + print(s) res = s.check() if res == z3.unsat: - return (z3.unsat, f"Property {postcond} proved") + return (z3.unsat, s) elif res == z3.sat: # raise? - return (z3.sat, s.model()) + return (z3.sat, s) diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py index f4b18c6c..ada7b080 100644 --- a/wp/pycbat/src/cbat/helpers.py +++ b/wp/pycbat/src/cbat/helpers.py @@ -2,30 +2,45 @@ import angr -class MemView(): - def __init__(self, mem: z3.ArrayRef, addr: z3.BitVecRef, typ: angr.sim_type.SimType, le=True): +class TypedView(): + def __init__(self, value: z3.BitVecRef, typ: angr.sim_type.SimType, le=True, mem=None): self.mem = mem - self.addr = addr + self.value = value self.typ = typ self.le = True def deref(self): - addr = self.z3() - return MemView(self.mem, addr, self.typ.pts_to) - - def z3(self): - # check endian + assert self.mem != None bytes = range(self.typ.size // 8) if self.le: bytes = reversed(bytes) - return z3.Concat([self.mem[self.addr + n] for n in bytes]) + value = z3.Concat([self.mem[self.value + n] for n in bytes]) + return TypedView(value, self.typ.pts_to, le=self.le, mem=self.mem) def __getitem__(self, field): - addr = self.addr + self.typ.offsets[field] - return MemView(self.mem, addr, self.typ.fields[field]) + start = self.typ.offsets[field] + end = start + self.typ.fields[field].size + value = z3.Extract(end-1, start, self.value) + return TypedView(value, self.typ.fields[field], mem=self.mem, le=self.le) + + def __add__(self, b): + if isinstance(b, int): + return TypedView(self.value + b, self.typ, mem=self.mem, le=self.le) + elif isinstance(b, TypedView): + assert b.typ == self.typ and self.mem == b.mem and self.le == b.le + return TypedView(self.value + b.value, self.typ, mem=self.mem, le=self.le) + assert False, "Unexpected addition type" + + def __eq__(self, b): + assert self.typ == b.typ + return self.value == b.value def __repr__(self): - return f"({repr(self.typ)}){repr(self.mem)}[{repr(self.addr)}]" + return f"({repr(self.typ)}){repr(self.value)}" + + +def make_mem(name): + return z3.Array(name, z3.BitVecSort(64), z3.BitVecSort(8)) class PropertyBuilder(): @@ -35,10 +50,10 @@ def __init__(self, binary=None, headers=None): if headers != None: self.load_headers(headers) - def make_mem(name): - return z3.Array(name, z3.BitVecSort(64), z3.BitVecSort(8)) self.mem = make_mem("mem") self.init_mem = make_mem("init_mem") + self.mem0 = make_mem("mem0") + self.init_mem0 = make_mem("init_mem0") def load_binary(self, filename): self.proj = angr.Project(filename, load_debug_info=True) @@ -49,12 +64,18 @@ def load_headers(self, header_str): angr.types.register_types(types) self.defns = defns + def cast(self, value, typ, prefix="", suffix=""): + mem = make_mem(prefix+"mem"+suffix) + le = self.proj.arch.memory_endness == 'Iend_LE' + value = z3.Extract(typ.size - 1, 0, value) + return TypedView(value, typ, le=le, mem=mem) + def fun_args(self, func, prefix="", suffix=""): funsig = self.defns[func] funsig = funsig.with_arch(self.proj.arch) # stack args not supported yet assert len(funsig.args) <= len(self.cc.ARG_REGS) - return [z3.Extract(arg.size - 1, 0, z3.BitVec(prefix + reg.upper() + suffix, 64)) for arg, reg in zip(funsig.args, self.cc.ARG_REGS)] + return [self.cast(z3.BitVec(prefix + reg.upper() + suffix, 64), typ, prefix=prefix, suffix=suffix) for typ, reg in zip(funsig.args, self.cc.ARG_REGS)] def init_fun_args(self, func): return self.fun_args(func, prefix="init_") @@ -63,4 +84,4 @@ def ret_val(self, func): funsig = self.defns[func] funsig = funsig.with_arch(self.proj.arch) reg = self.cc.RETURN_VAL.reg_name - return z3.Extract(funsig.returnty.size - 1, 0, z3.BitVec(reg.upper(), 64)) + return self.cast(z3.BitVec(reg.upper(), 64), funsig.returnty) diff --git a/wp/pycbat/tests/test_cbat.py b/wp/pycbat/tests/test_cbat.py index d3489ec4..ddf7b6b9 100644 --- a/wp/pycbat/tests/test_cbat.py +++ b/wp/pycbat/tests/test_cbat.py @@ -1,5 +1,5 @@ from cbat import run_wp -from cbat.helpers import PropertyBuilder, MemView +from cbat.helpers import PropertyBuilder import z3 import tempfile import subprocess @@ -61,7 +61,6 @@ def test3(): def test4(): - print("running test4") pb = PropertyBuilder(binary="resources/test4.o", headers="int foo(int x, char y);") x, y = pb.fun_args("foo") @@ -94,18 +93,16 @@ def test5(): x, y = pb.fun_args("foo") init_x, init_y = pb.init_fun_args("foo") retval = pb.ret_val("foo") - x_init = MemView(pb.init_mem, init_x, angr.types.parse_type( - "mystruct").with_arch(pb.proj.arch)) - postcond = retval == x_init["f1"].z3() + 3 + postcond = retval.value == init_x.deref()["f1"].value + 3 (res, model) = run_wp("resources/test5.o", func="foo", postcond=postcond, docker_image=None) - retval = z3.Extract(31, 0, z3.BitVec("RAX0", 64)) - init_x = z3.BitVec("init_RDI0", 64) - init_mem = z3.Array("init_mem0", - z3.BitVecSort(64), z3.BitVecSort(8)) - x_init = MemView(init_mem, init_x, angr.types.parse_type( - "mystruct").with_arch(pb.proj.arch)) - postcond = retval == x_init["f1"].z3() + 3 + # debugging + #retval = z3.Extract(31, 0, z3.BitVec("RAX0", 64)) + #init_mem0 = pb.init_mem0 + #init_x0, init_y0 = pb.init_fun_args("foo", prefix="init_", suffix="0") + # x_init = MemView(init_mem0, init_x0, angr.types.parse_type( + # "mystruct").with_arch(pb.proj.arch)) + #postcond = retval == x_init0["f1"].z3() + 3 # print(postcond) # print(model) # print(model.eval(postcond)) From 6622dea3aafad394d64443d4e8bcfc214574570c Mon Sep 17 00:00:00 2001 From: Philip Zucker Date: Tue, 21 Mar 2023 10:16:33 -0400 Subject: [PATCH 8/9] added comparative condtions. refactoted property builder a little --- wp/pycbat/src/cbat/__init__.py | 11 +++++-- wp/pycbat/src/cbat/helpers.py | 47 +++++++++++++++++++++++----- wp/pycbat/tests/resources/Makefile | 4 +++ wp/pycbat/tests/resources/test6_1.c | 4 +++ wp/pycbat/tests/resources/test6_1.o | Bin 0 -> 1208 bytes wp/pycbat/tests/resources/test6_2.c | 4 +++ wp/pycbat/tests/resources/test6_2.o | Bin 0 -> 1208 bytes wp/pycbat/tests/test_cbat.py | 29 ++++++++++++----- 8 files changed, 80 insertions(+), 19 deletions(-) create mode 100644 wp/pycbat/tests/resources/Makefile create mode 100644 wp/pycbat/tests/resources/test6_1.c create mode 100644 wp/pycbat/tests/resources/test6_1.o create mode 100644 wp/pycbat/tests/resources/test6_2.c create mode 100644 wp/pycbat/tests/resources/test6_2.o diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index 1db6bfa4..a7aba743 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -2,7 +2,7 @@ import z3 -def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image="philzook58/cbat_min", **kwargs): +def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image=None, **kwargs): cmd = ["bap", "wp", "--no-cache", "--show", "precond-smtlib", "--func", func] if precond != None: @@ -16,7 +16,7 @@ def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, p # forward kwargs. Typo unfriendly # TODO: fill out other allowed flags flags = ["check-invalid-derefs", "check-null-derefs"] - assert all(k in flags for k in kwargs.keys()) + assert all(k in flags for k in kwargs.keys()), kwargs cmd += [f"--{k}" for k, v in kwargs.items() if v == True and k in flags] @@ -32,7 +32,12 @@ def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, p res = subprocess.run(cmd, check=False, capture_output=True) if debug: print(res.stderr) - smtlib = res.stdout.decode().split("Z3 :")[1] + smt = res.stdout.decode().split("Z3 :") + if len(smt) != 2: + print("SMT formula extraction failed", smt) + print(res.stderr) + assert False + smtlib = smt[1] s = z3.Solver() s.from_string(smtlib) if debug: diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py index ada7b080..00c877e9 100644 --- a/wp/pycbat/src/cbat/helpers.py +++ b/wp/pycbat/src/cbat/helpers.py @@ -1,5 +1,6 @@ import z3 import angr +from . import run_wp class TypedView(): @@ -44,11 +45,15 @@ def make_mem(name): class PropertyBuilder(): - def __init__(self, binary=None, headers=None): + def __init__(self, binary=None, binary2=None, func=None, headers=None): + self.binary = binary + self.binary2 = binary2 + self.func = func if binary != None: self.load_binary(binary) if headers != None: self.load_headers(headers) + self.posts = [] self.mem = make_mem("mem") self.init_mem = make_mem("init_mem") @@ -70,18 +75,44 @@ def cast(self, value, typ, prefix="", suffix=""): value = z3.Extract(typ.size - 1, 0, value) return TypedView(value, typ, le=le, mem=mem) - def fun_args(self, func, prefix="", suffix=""): - funsig = self.defns[func] + def fun_args(self, prefix="", suffix=""): + assert self.func != None + funsig = self.defns[self.func] funsig = funsig.with_arch(self.proj.arch) # stack args not supported yet assert len(funsig.args) <= len(self.cc.ARG_REGS) return [self.cast(z3.BitVec(prefix + reg.upper() + suffix, 64), typ, prefix=prefix, suffix=suffix) for typ, reg in zip(funsig.args, self.cc.ARG_REGS)] - def init_fun_args(self, func): - return self.fun_args(func, prefix="init_") + def init_fun_args(self): + return self.fun_args(prefix="init_") - def ret_val(self, func): - funsig = self.defns[func] + def init_fun_args_mod(self): + return self.fun_args(prefix="init_", suffix="_mod") + + def fun_args_mod(self): + return self.fun_args(suffix="_mod") + + def init_fun_args_orig(self): + return self.fun_args(prefix="init_", suffix="_orig") + + def fun_args_orig(self): + return self.fun_args(suffix="_orig") + + def ret_val(self, suffix=""): + assert self.func != None + funsig = self.defns[self.func] funsig = funsig.with_arch(self.proj.arch) reg = self.cc.RETURN_VAL.reg_name - return self.cast(z3.BitVec(reg.upper(), 64), funsig.returnty) + return self.cast(z3.BitVec(reg.upper() + suffix, 64), funsig.returnty) + + def ret_val_orig(self): + return self.ret_val(suffix="_orig") + + def ret_val_mod(self): + return self.ret_val(suffix="_mod") + + def add_post(self, post): + self.posts.append(post) + + def run_wp(self): + return run_wp(self.binary, func=self.func, filename2=self.binary2, postcond=z3.And(self.posts)) diff --git a/wp/pycbat/tests/resources/Makefile b/wp/pycbat/tests/resources/Makefile new file mode 100644 index 00000000..7b0fbd18 --- /dev/null +++ b/wp/pycbat/tests/resources/Makefile @@ -0,0 +1,4 @@ + +test6: test6_1.c test6_2.c + gcc -O1 -c test6_1.c -o test6_1.o + gcc -O1 -c test6_2.c -o test6_2.o \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test6_1.c b/wp/pycbat/tests/resources/test6_1.c new file mode 100644 index 00000000..5b1faf89 --- /dev/null +++ b/wp/pycbat/tests/resources/test6_1.c @@ -0,0 +1,4 @@ +int foo(int x) +{ + return x + 3; +} \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test6_1.o b/wp/pycbat/tests/resources/test6_1.o new file mode 100644 index 0000000000000000000000000000000000000000..1b1d87b9b65709e5186a776d056dd0456902a4cf GIT binary patch literal 1208 zcmbVL&r1S96n?9gmVy=?0we5{K@P6{AUcE~W`~Ffgf3yLtqSF??2e!v>Msa7_NR0V zy7ccvw=VVGI^+{{tF32A@CEYNAt4n#96og%5jh+vocYleM7jx&S$` zACtC)-9`t$Y}4}~TtP=bD+SD2tZb^VJF z=;zY)msm&tOIg%QI1q!b3St3~=9GHLPwP#R5Bi{82Va0iM0z7#Py6a`N0PkgpQb=D UL5Iu|deijhqOcQIjvZb93zFqW`2YX_ literal 0 HcmV?d00001 diff --git a/wp/pycbat/tests/resources/test6_2.c b/wp/pycbat/tests/resources/test6_2.c new file mode 100644 index 00000000..ac9fc555 --- /dev/null +++ b/wp/pycbat/tests/resources/test6_2.c @@ -0,0 +1,4 @@ +int foo(int x) +{ + return x + 4; +} \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test6_2.o b/wp/pycbat/tests/resources/test6_2.o new file mode 100644 index 0000000000000000000000000000000000000000..b4a483940232237b0f12f6f8a4d61167db52358c GIT binary patch literal 1208 zcmbVL&r1S96n?9gmVy=?0we5{K@Ms^hz_wJW`~Ffgf3yLtqSF??9PIAsJ|fS*q_od z=+eIv-MZ9!I-}FDbnwCK_ulv3H}B01`&d0aju-|=GvElOkz@g071A8hCZr$jLD& zhB!CPJ~ieKd)#UQ^SL+1uJpJ);J|JyZUJlhKCqVC>)8%+j>~O^*HG}e)w&gN#|c>9 zbNjZ(hrm4h&SG}A(e|vKJ#9Cumqj7n&AhgODdNewMNGA-ZD2s0d@q?hk&cGCeiQT5 zV^;k(YH80RF4bgz%Bv>(OWebx^wm1928yrORxuaH_2=y|9tcfNpalIsuP{M->iQQU z(9fmoFR_mP7gN+rI1q!b3St3~=9GHLhxI1O2Yt}4M_+(NM0z7#Py6a`N0PkgpQb=D UK~I<^^uqM#qOdcm96P%H7n Date: Mon, 10 Apr 2023 14:42:42 -0400 Subject: [PATCH 9/9] added inline parameter and small amount of commentary --- wp/pycbat/src/cbat/__init__.py | 6 +++++- wp/pycbat/src/cbat/helpers.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py index a7aba743..e749698a 100644 --- a/wp/pycbat/src/cbat/__init__.py +++ b/wp/pycbat/src/cbat/__init__.py @@ -2,7 +2,8 @@ import z3 -def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image=None, **kwargs): +def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image=None, inline=None, **kwargs): + '''Simple wrapper around cbat cli. For more information about the options, see `bap wp --help`''' cmd = ["bap", "wp", "--no-cache", "--show", "precond-smtlib", "--func", func] if precond != None: @@ -11,6 +12,9 @@ def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, p if postcond != None: cmd.append("--postcond") cmd.append("(assert " + postcond.sexpr() + ")") + if inline != None: + cmd.append("--inline") + cmd.append(inline) # TODO: Fill out invariants # forward kwargs. Typo unfriendly diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py index 00c877e9..f098543a 100644 --- a/wp/pycbat/src/cbat/helpers.py +++ b/wp/pycbat/src/cbat/helpers.py @@ -114,5 +114,5 @@ def ret_val_mod(self): def add_post(self, post): self.posts.append(post) - def run_wp(self): - return run_wp(self.binary, func=self.func, filename2=self.binary2, postcond=z3.And(self.posts)) + def run_wp(self, **kwargs): + return run_wp(self.binary, func=self.func, filename2=self.binary2, postcond=z3.And(self.posts), **kwargs)