Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(EXAMPLES_WARNING_FLAGS -Wall -Wextra -Wpedantic -Werror)

add_subdirectory(CFGraph)
add_subdirectory(codeCoverage)
add_subdirectory(compareLLVM)
add_subdirectory(dataflowAPI)
add_subdirectory(disassemble)
add_subdirectory(DynC)
Expand Down
25 changes: 25 additions & 0 deletions compareLLVM/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
project(compareLLVM LANGUAGES CXX)

add_executable(compareLLVM disassemble.cpp)
target_compile_options(compareLLVM PRIVATE ${EXAMPLES_WARNING_FLAGS})
target_link_libraries(compareLLVM Dyninst::dyninstAPI)

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/compareLLVM/parser.py
COMMAND ${CMAKE_COMMAND} -E create_symlink
"${CMAKE_SOURCE_DIR}/compareLLVM/parser.py"
"${CMAKE_BINARY_DIR}/compareLLVM/parser.py"
COMMENT "Creating Symlink for compareLLVM/parser.py"
)

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/compareLLVM/run.sh
COMMAND ${CMAKE_COMMAND} -E create_symlink
"${CMAKE_SOURCE_DIR}/compareLLVM/run.sh"
"${CMAKE_BINARY_DIR}/compareLLVM/run.sh"
COMMENT "Creating Symlink for compareLLVM/run.sh"
)

#add_custom_target(create_my_link ALL DEPENDS ${CMAKE_BINARY_DIR}/compareLLVM/parser.py)

add_custom_target(create_my_link ALL DEPENDS ${CMAKE_BINARY_DIR}/compareLLVM/parser.py ${CMAKE_BINARY_DIR}/compareLLVM/run.sh)
5 changes: 5 additions & 0 deletions compareLLVM/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CompareLLVM

## Compare InstructionAPI disassmble to llvm-objdump


98 changes: 98 additions & 0 deletions compareLLVM/disassemble.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
Copyright (C) 2015 Alin Mindroc
(mindroc dot alin at gmail dot com)

This is a sample program that shows how to use InstructionAPI in order to
print the assembly code and functions in a provided binary.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
*/
#include "CodeObject.h"
#include "InstructionDecoder.h"

#include <iostream>
#include <iomanip>
using namespace std;
using namespace Dyninst;
using namespace ParseAPI;
using namespace InstructionAPI;

// How many bytes of the instruction hex dump should be printed
// on the first line. The remaining will go to the second line
// on the assumption that an instruction is at most 15 bytes long.
static const int l1_width = 7;

int main(int argc, char** argv) {
if(argc != 2) {
printf("Usage: %s <binary path>\n", argv[0]);
return -1;
}
char* binaryPath = argv[1];

SymtabCodeSource* sts;
CodeObject* co;
Instruction instr;
SymtabAPI::Symtab* symTab;
std::string binaryPathStr(binaryPath);
bool isParsable = SymtabAPI::Symtab::openFile(symTab, binaryPathStr);
if(isParsable == false) {
const char* error = "error: file can not be parsed";
cout << error;
return -1;
}
sts = new SymtabCodeSource(binaryPath);
co = new CodeObject(sts);
// parse the binary given as a command line arg
co->parse();

// get list of all functions in the binary
const CodeObject::funclist& all = co->funcs();
if(all.size() == 0) {
const char* error = "error: no functions in file";
cout << error;
return -1;
}
// create an Instruction decoder which will convert the binary opcodes to strings
InstructionDecoder decoder((const void *)nullptr, 1, sts->getArch());
for(auto fit = all.begin(); fit != all.end(); ++fit) {
Function* f = *fit;
int instr_count = 0;
// if current function has zero basic blocks, d o n t output it
if(f->blocks().empty())
continue;
cout << "\n\n" << hex << setfill('0') << setw(2 * sts->getAddressWidth()) << f->addr() << " <" << f->name() << ">:\n";
auto fbl = f->blocks().end();
fbl--;
for (Block *b : f->blocks()) {
Address crtAddr = b->start();
Address lastAddr = b->end();
while(crtAddr < lastAddr) {
// decode current instruction
const unsigned char *instr_ptr = (const unsigned char *)f->isrc()->getPtrToInstruction(crtAddr);
instr = decoder.decode(instr_ptr);

// failed to decode the instruction
if (instr.size() == 0)
break;

// pretty print it
cout << hex << setfill(' ') << setw(8) << crtAddr << ": ";
cout << instr.format() << "\n";
if (instr.size() > l1_width) {
cout << hex << setfill(' ') << setw(8) << crtAddr + l1_width << ": ";
for (size_t i = l1_width; i < instr.size(); i++) {
cout << hex << setfill('0') << setw(2) << (unsigned)instr_ptr[i] << " ";
}
cout << "\n";
}

// go to the address of the next instruction
crtAddr += instr.size();
instr_count++;
}
}
}
return 0;
}
64 changes: 64 additions & 0 deletions compareLLVM/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/python3

import sys

llvm_fname = sys.argv[1]
dyn_fname = sys.argv[2]
llvm_addrs = dict()
dyn_addrs = dict()

def parse_func(llvmlines,llvmit,cutoff,addr_map):
nameline = llvmlines[llvmit]
llvmit+=1
#print("Start of parsing ",nameline,llvmit)
while llvmit < cutoff and llvmlines[llvmit].strip() != "":
line = llvmlines[llvmit].strip()
if line.startswith("#"):
llvmit+=1
continue
sep_index = line.index(":")
addr = int(line[:sep_index],16)
command = line[sep_index+1:]
addr_map[addr] = command
llvmit+=1
#print("End of parsing ",nameline,llvmit)
return llvmit

def parse_llvm(fname):
print("opening file",fname)
llvmlines = open(fname,"r").readlines()
llvmlen = len(llvmlines)
print(llvmlen)
llvmit = 3
secheader = "Disassembly of section"
while llvmit < llvmlen:
#print("Accessing llvmit", llvmit,len(llvmlines))
if llvmlines[llvmit].startswith(secheader):
llvmit+=2
else:
llvmit = parse_func(llvmlines,llvmit,llvmlen,llvm_addrs)
llvmit+=1

def parse_dyninst(fname):
print("opening file",fname)
dynlines = open(fname,"r").readlines()
dynlen = len(dynlines)
print(dynlen)
dynit = 2
while dynit < dynlen:
dynit = parse_func(dynlines,dynit,dynlen,dyn_addrs)
dynit+=2

def output_def(llvm_addrs,dyn_addrs):
print("Collected %d llvm addrs"%len(llvm_addrs.keys()))
print("Collected %d dyninst addrs"%len(dyn_addrs.keys()))
AnB = llvm_addrs.keys() - dyn_addrs.keys()
BnA = dyn_addrs.keys() - llvm_addrs.keys()
print("Number of address unique to llvm = %d"%len(AnB))
for addr in sorted(AnB):
print("%x : %s"%(addr,llvm_addrs[addr]))

parse_llvm(llvm_fname)
parse_dyninst(dyn_fname)
output_def(llvm_addrs,dyn_addrs)

6 changes: 6 additions & 0 deletions compareLLVM/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
filename=$1
llvm-objdump -d --no-show-raw-insn $1 > ./res.llvm.out #~/issue-2050/parseapi/libparseAPI.so
./compareLLVM $1 > ./res.dyninst.out #~/issue-2050/parseapi/libparseAPI.so
./parser.py res.llvm.out res.dyninst.out