diff --git a/CMakeLists.txt b/CMakeLists.txt index cf0bbdd..1383b89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ set(EXAMPLES_WARNING_FLAGS -Wall -Wextra -Wpedantic -Werror) add_subdirectory(CFGraph) add_subdirectory(codeCoverage) +add_subdirectory(compareLLVM) add_subdirectory(dataflowAPI) add_subdirectory(disassemble) add_subdirectory(DynC) diff --git a/compareLLVM/CMakeLists.txt b/compareLLVM/CMakeLists.txt new file mode 100644 index 0000000..eea2b20 --- /dev/null +++ b/compareLLVM/CMakeLists.txt @@ -0,0 +1,25 @@ +project(compareLLVM LANGUAGES CXX) + +add_executable(compareLLVM disassemble.cpp) +target_compile_options(compareLLVM PRIVATE ${EXAMPLES_WARNING_FLAGS}) +target_link_libraries(compareLLVM Dyninst::dyninstAPI) + +add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/compareLLVM/parser.py + COMMAND ${CMAKE_COMMAND} -E create_symlink + "${CMAKE_SOURCE_DIR}/compareLLVM/parser.py" + "${CMAKE_BINARY_DIR}/compareLLVM/parser.py" + COMMENT "Creating Symlink for compareLLVM/parser.py" +) + +add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/compareLLVM/run.sh + COMMAND ${CMAKE_COMMAND} -E create_symlink + "${CMAKE_SOURCE_DIR}/compareLLVM/run.sh" + "${CMAKE_BINARY_DIR}/compareLLVM/run.sh" + COMMENT "Creating Symlink for compareLLVM/run.sh" +) + +#add_custom_target(create_my_link ALL DEPENDS ${CMAKE_BINARY_DIR}/compareLLVM/parser.py) + +add_custom_target(create_my_link ALL DEPENDS ${CMAKE_BINARY_DIR}/compareLLVM/parser.py ${CMAKE_BINARY_DIR}/compareLLVM/run.sh) diff --git a/compareLLVM/README.md b/compareLLVM/README.md new file mode 100644 index 0000000..2a49cb8 --- /dev/null +++ b/compareLLVM/README.md @@ -0,0 +1,5 @@ +# CompareLLVM + +## Compare InstructionAPI disassmble to llvm-objdump + + diff --git a/compareLLVM/disassemble.cpp b/compareLLVM/disassemble.cpp new file mode 100644 index 0000000..5281a6d --- /dev/null +++ b/compareLLVM/disassemble.cpp @@ -0,0 +1,98 @@ +/* + Copyright (C) 2015 Alin Mindroc + (mindroc dot alin at gmail dot com) + + This is a sample program that shows how to use InstructionAPI in order to + print the assembly code and functions in a provided binary. + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. +*/ +#include "CodeObject.h" +#include "InstructionDecoder.h" + +#include +#include +using namespace std; +using namespace Dyninst; +using namespace ParseAPI; +using namespace InstructionAPI; + +// How many bytes of the instruction hex dump should be printed +// on the first line. The remaining will go to the second line +// on the assumption that an instruction is at most 15 bytes long. +static const int l1_width = 7; + +int main(int argc, char** argv) { + if(argc != 2) { + printf("Usage: %s \n", argv[0]); + return -1; + } + char* binaryPath = argv[1]; + + SymtabCodeSource* sts; + CodeObject* co; + Instruction instr; + SymtabAPI::Symtab* symTab; + std::string binaryPathStr(binaryPath); + bool isParsable = SymtabAPI::Symtab::openFile(symTab, binaryPathStr); + if(isParsable == false) { + const char* error = "error: file can not be parsed"; + cout << error; + return -1; + } + sts = new SymtabCodeSource(binaryPath); + co = new CodeObject(sts); + // parse the binary given as a command line arg + co->parse(); + + // get list of all functions in the binary + const CodeObject::funclist& all = co->funcs(); + if(all.size() == 0) { + const char* error = "error: no functions in file"; + cout << error; + return -1; + } + // create an Instruction decoder which will convert the binary opcodes to strings + InstructionDecoder decoder((const void *)nullptr, 1, sts->getArch()); + for(auto fit = all.begin(); fit != all.end(); ++fit) { + Function* f = *fit; + int instr_count = 0; + // if current function has zero basic blocks, d o n t output it + if(f->blocks().empty()) + continue; + cout << "\n\n" << hex << setfill('0') << setw(2 * sts->getAddressWidth()) << f->addr() << " <" << f->name() << ">:\n"; + auto fbl = f->blocks().end(); + fbl--; + for (Block *b : f->blocks()) { + Address crtAddr = b->start(); + Address lastAddr = b->end(); + while(crtAddr < lastAddr) { + // decode current instruction + const unsigned char *instr_ptr = (const unsigned char *)f->isrc()->getPtrToInstruction(crtAddr); + instr = decoder.decode(instr_ptr); + + // failed to decode the instruction + if (instr.size() == 0) + break; + + // pretty print it + cout << hex << setfill(' ') << setw(8) << crtAddr << ": "; + cout << instr.format() << "\n"; + if (instr.size() > l1_width) { + cout << hex << setfill(' ') << setw(8) << crtAddr + l1_width << ": "; + for (size_t i = l1_width; i < instr.size(); i++) { + cout << hex << setfill('0') << setw(2) << (unsigned)instr_ptr[i] << " "; + } + cout << "\n"; + } + + // go to the address of the next instruction + crtAddr += instr.size(); + instr_count++; + } + } + } + return 0; +} diff --git a/compareLLVM/parser.py b/compareLLVM/parser.py new file mode 100755 index 0000000..ab1b588 --- /dev/null +++ b/compareLLVM/parser.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 + +import sys + +llvm_fname = sys.argv[1] +dyn_fname = sys.argv[2] +llvm_addrs = dict() +dyn_addrs = dict() + +def parse_func(llvmlines,llvmit,cutoff,addr_map): + nameline = llvmlines[llvmit] + llvmit+=1 + #print("Start of parsing ",nameline,llvmit) + while llvmit < cutoff and llvmlines[llvmit].strip() != "": + line = llvmlines[llvmit].strip() + if line.startswith("#"): + llvmit+=1 + continue + sep_index = line.index(":") + addr = int(line[:sep_index],16) + command = line[sep_index+1:] + addr_map[addr] = command + llvmit+=1 + #print("End of parsing ",nameline,llvmit) + return llvmit + +def parse_llvm(fname): + print("opening file",fname) + llvmlines = open(fname,"r").readlines() + llvmlen = len(llvmlines) + print(llvmlen) + llvmit = 3 + secheader = "Disassembly of section" + while llvmit < llvmlen: + #print("Accessing llvmit", llvmit,len(llvmlines)) + if llvmlines[llvmit].startswith(secheader): + llvmit+=2 + else: + llvmit = parse_func(llvmlines,llvmit,llvmlen,llvm_addrs) + llvmit+=1 + +def parse_dyninst(fname): + print("opening file",fname) + dynlines = open(fname,"r").readlines() + dynlen = len(dynlines) + print(dynlen) + dynit = 2 + while dynit < dynlen: + dynit = parse_func(dynlines,dynit,dynlen,dyn_addrs) + dynit+=2 + +def output_def(llvm_addrs,dyn_addrs): + print("Collected %d llvm addrs"%len(llvm_addrs.keys())) + print("Collected %d dyninst addrs"%len(dyn_addrs.keys())) + AnB = llvm_addrs.keys() - dyn_addrs.keys() + BnA = dyn_addrs.keys() - llvm_addrs.keys() + print("Number of address unique to llvm = %d"%len(AnB)) + for addr in sorted(AnB): + print("%x : %s"%(addr,llvm_addrs[addr])) + +parse_llvm(llvm_fname) +parse_dyninst(dyn_fname) +output_def(llvm_addrs,dyn_addrs) + diff --git a/compareLLVM/run.sh b/compareLLVM/run.sh new file mode 100755 index 0000000..f12c944 --- /dev/null +++ b/compareLLVM/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +filename=$1 +llvm-objdump -d --no-show-raw-insn $1 > ./res.llvm.out #~/issue-2050/parseapi/libparseAPI.so +./compareLLVM $1 > ./res.dyninst.out #~/issue-2050/parseapi/libparseAPI.so +./parser.py res.llvm.out res.dyninst.out +