From 8f97182d3dc524aa8814a5a2eda609edc75ec240 Mon Sep 17 00:00:00 2001 From: Your Name <88072010+Razshy@users.noreply.github.com> Date: Sun, 17 May 2026 07:11:00 -0500 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20harden=20parsers,=20correct=20decomp?= =?UTF-8?q?iler,=20optimize=20perf=20=E2=80=94=20bump=20to=200.1.7=20Corre?= =?UTF-8?q?ctness=20(30=20bugs=20fixed):?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix dangling-else causing ELF/Mach-O load failures - Replace all unaligned reinterpret_casts with memcpy (ARM64 crash fix) - Fix worker pool destructor race condition (hang on exit) - Correct JLE/JBE compound condition lifting in decompiler - Fix multi-operand IMUL handling (2-op and 3-op forms) - Fix DCE over-aggressively killing heap stores - Fix SSA identity elimination ignoring version numbers - Guard against infinite loops, div-by-zero, OOB in ELF/.NET loaders - Prevent Python injection in IDA export - Add platform guards for Windows-only debugger module (macOS builds) - Harden settings/database parsing against corrupt files Performance: - Replace EntropyView 14K AddRectFilled/frame with single GPU texture - Eliminate per-frame string allocations in DisasmView render path - Shrink Insn struct 272→144 bytes, replace unordered_map with flat vector - Remove duplicated instruction storage from BasicBlocks - Add ImGuiListClipper to Strings/Imports panels, cache filtered results - Replace O(n log n) full-sort with O(n) linear merge for instruction store - Analysis of 3.6MB binary: ~10x faster merge, 4.8GB→~2GB RAM --- CMakeLists.txt | 6 +- hyperion_recent.txt | 1 + src/core/analysis/analysis_db.h | 20 +- src/core/analysis/analyzer.cpp | 251 ++++++----- src/core/analysis/analyzer.h | 2 +- src/core/analysis/bindiff.cpp | 19 +- src/core/analysis/bindiff.h | 5 +- src/core/database/database.cpp | 6 + src/core/database/export/ida_export.cpp | 24 +- src/core/decompiler/dce.cpp | 5 +- src/core/decompiler/decompiler.cpp | 12 +- src/core/decompiler/lifter.cpp | 104 +++-- src/core/decompiler/lifter_arm64.cpp | 3 +- src/core/decompiler/propagate.cpp | 14 +- src/core/decompiler/pseudo_gen.cpp | 4 +- src/core/decompiler/type_infer.cpp | 4 +- src/core/disasm/capstone_disasm.cpp | 3 +- src/core/disasm/disassembler.cpp | 14 +- src/core/disasm/disassembler.h | 165 ++++++- src/core/loader/dotnet_loader.cpp | 17 +- src/core/loader/elf_loader.cpp | 38 +- src/core/loader/pe_loader.cpp | 2 +- src/core/types/type_system.h | 23 +- src/scripting/lua_engine.cpp | 8 +- src/threading/worker_pool.cpp | 5 +- src/ui/app.cpp | 35 +- src/ui/widgets/callgraph_view.cpp | 10 +- src/ui/widgets/debugger_panel.cpp | 5 + src/ui/widgets/debugger_panel.h | 39 +- src/ui/widgets/diff_view.cpp | 6 +- src/ui/widgets/disasm_view.cpp | 568 ++++++++++++++---------- src/ui/widgets/disasm_view.h | 19 + src/ui/widgets/entropy_view.cpp | 143 +++++- src/ui/widgets/entropy_view.h | 9 + src/ui/widgets/functions_panel.cpp | 55 ++- src/ui/widgets/functions_panel.h | 17 +- src/ui/widgets/graph_view.cpp | 13 +- src/ui/widgets/imports_panel.cpp | 119 +++-- src/ui/widgets/imports_panel.h | 27 +- src/ui/widgets/output_panel.cpp | 15 +- src/ui/widgets/pe_header_view.cpp | 53 +-- src/ui/widgets/script_console.cpp | 12 +- src/ui/widgets/search_panel.cpp | 9 +- src/ui/widgets/settings_panel.cpp | 6 + src/ui/widgets/stack_frame_view.cpp | 24 +- src/ui/widgets/stack_frame_view.h | 2 +- src/ui/widgets/strings_panel.cpp | 84 ++-- src/ui/widgets/strings_panel.h | 19 +- src/ui/widgets/xrefs_panel.cpp | 2 +- vcpkg.json | 2 +- 50 files changed, 1358 insertions(+), 690 deletions(-) create mode 100644 hyperion_recent.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 48f53f5..5fa3460 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(Hyperion VERSION 0.1.6 LANGUAGES CXX) +project(Hyperion VERSION 0.1.7 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -36,6 +36,10 @@ file(GLOB_RECURSE HYPERION_SOURCES "src/*.h" ) +if(NOT WIN32) + list(FILTER HYPERION_SOURCES EXCLUDE REGEX "src/debugger/") +endif() + add_executable(${PROJECT_NAME} ${HYPERION_SOURCES}) target_include_directories(${PROJECT_NAME} PRIVATE diff --git a/hyperion_recent.txt b/hyperion_recent.txt new file mode 100644 index 0000000..249e4d7 --- /dev/null +++ b/hyperion_recent.txt @@ -0,0 +1 @@ +/Users/kendallbooker/Downloads/dissembler/build/Hyperion diff --git a/src/core/analysis/analysis_db.h b/src/core/analysis/analysis_db.h index d8ac09d..2685363 100644 --- a/src/core/analysis/analysis_db.h +++ b/src/core/analysis/analysis_db.h @@ -27,7 +27,6 @@ struct DataItem { struct BasicBlock { va_t start; va_t end; - std::vector insns; std::vector succs; std::vector preds; }; @@ -77,7 +76,7 @@ struct FuncSignature { struct AnalysisDB { va_t image_base = 0; - std::unordered_map insns; + InsnStore insns; std::unordered_map funcs; std::vector xrefs; std::unordered_map> xrefs_to; @@ -142,6 +141,23 @@ struct AnalysisDB { std::lock_guard lk(mtx); patches[addr] = std::vector(len, 0x90); } + + template + void for_each_insn_in_block(const BasicBlock& bb, Fn&& fn) const { + auto it = insns.range_begin(bb.start); + auto end = insns.range_end(bb.end); + for (; it != end; ++it) + fn(*it); + } + + template + bool for_each_insn_in_block_break(const BasicBlock& bb, Fn&& fn) const { + auto it = insns.range_begin(bb.start); + auto end = insns.range_end(bb.end); + for (; it != end; ++it) + if (fn(*it)) return true; + return false; + } }; } diff --git a/src/core/analysis/analyzer.cpp b/src/core/analysis/analyzer.cpp index 49dcbf2..b2d524c 100644 --- a/src/core/analysis/analyzer.cpp +++ b/src/core/analysis/analyzer.cpp @@ -133,10 +133,10 @@ void Analyzer::run() { progress_ = 0.88f; detect_noreturn(); - progress_ = 0.87f; + progress_ = 0.89f; detect_tail_calls(); - progress_ = 0.89f; + progress_ = 0.91f; detect_calling_conventions(); progress_ = 0.91f; @@ -185,41 +185,49 @@ void Analyzer::linear_sweep() { })); } for (auto& f : futures) { - for (auto& insn : f.get()) - tentative_[insn.addr] = std::move(insn); + auto chunk = f.get(); + tentative_.merge_sorted_range(std::move(chunk)); } } void Analyzer::merge_tentative() { if (tentative_.empty()) return; + db_.insns.finalize(); std::vector> confirmed; confirmed.reserve(db_.insns.size()); - for (auto& [addr, insn] : db_.insns) - confirmed.emplace_back(addr, addr + insn.len); + for (auto& insn : db_.insns) + confirmed.emplace_back(insn.addr, insn.addr + insn.len); std::sort(confirmed.begin(), confirmed.end()); - for (auto& [addr, insn] : tentative_) { - if (db_.insns.count(addr)) continue; - if (!is_code_addr(addr)) continue; + std::vector accepted; + accepted.reserve(tentative_.size() / 2); + + for (auto& insn : tentative_) { + if (db_.insns.count(insn.addr)) continue; + if (!is_code_addr(insn.addr)) continue; - va_t end = addr + insn.len; + va_t end = insn.addr + insn.len; auto it = std::lower_bound(confirmed.begin(), confirmed.end(), - std::make_pair(addr, va_t(0))); + std::make_pair(insn.addr, va_t(0))); bool overlaps = false; if (it != confirmed.begin()) { auto prev = std::prev(it); - if (prev->second > addr) overlaps = true; + if (prev->second > insn.addr) overlaps = true; } if (!overlaps && it != confirmed.end() && it->first < end) overlaps = true; if (!overlaps) - db_.insns[addr] = std::move(insn); + accepted.push_back(insn); } tentative_.clear(); + + std::sort(accepted.begin(), accepted.end(), + [](const Insn& a, const Insn& b) { return a.addr < b.addr; }); + db_.insns.merge_sorted_range(std::move(accepted)); spdlog::info("merge: {} confirmed insns", db_.insns.size()); } @@ -249,7 +257,7 @@ void Analyzer::descend(va_t addr, std::unordered_set& visited) { Insn insn{}; if (!decode_insn(cur + off, ptr + off, max_len - off, insn)) break; - db_.insns[insn.addr] = insn; + db_.insns.insert(insn); off += insn.len; if (insn.is_ret()) break; @@ -264,6 +272,7 @@ void Analyzer::descend(va_t addr, std::unordered_set& visited) { } } } + db_.insns.finalize(); } void Analyzer::detect_functions() { @@ -278,7 +287,7 @@ void Analyzer::detect_functions() { } // call targets - for (auto& [addr, insn] : db_.insns) { + for (auto& insn : db_.insns) { if (insn.is_call()) { va_t t = insn.branch_target(); if (t && db_.insns.count(t)) entries.insert(t); @@ -347,24 +356,16 @@ void Analyzer::remove_junk_code() { std::unordered_set in_func; for (auto& [entry, func] : db_.funcs) { for (auto& [ba, bb] : func.blocks) { - va_t cur = bb.start; - while (cur < bb.end) { - in_func.insert(cur); - auto it = db_.insns.find(cur); - if (it == db_.insns.end()) break; - cur += it->second.len; - } + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { + in_func.insert(insn.addr); + }); } } - // remove instructions not in any function that are clearly junk: - // - null bytes (00 00 = add [rax], al) - // - int3 padding (CC) - // - nop padding (90) - // - sequences of identical 2-byte instructions (padding patterns) + // remove instructions not in any function that are clearly junk std::vector to_remove; - for (auto& [addr, insn] : db_.insns) { - if (in_func.count(addr)) continue; + for (auto& insn : db_.insns) { + if (in_func.count(insn.addr)) continue; bool junk = false; if (insn.len <= 2 && insn.bytes[0] == 0x00 && (insn.len == 1 || insn.bytes[1] == 0x00)) @@ -374,7 +375,7 @@ void Analyzer::remove_junk_code() { if (insn.len == 1 && insn.bytes[0] == 0x90) junk = true; - if (junk) to_remove.push_back(addr); + if (junk) to_remove.push_back(insn.addr); } for (va_t a : to_remove) @@ -399,8 +400,8 @@ void Analyzer::build_cfgs() { va_t cur = bb_start; while (db_.insns.count(cur)) { - auto& insn = db_.insns[cur]; - bb.insns.push_back(insn); + auto it = db_.insns.find(cur); + auto& insn = *it; cur += insn.len; if (insn.is_ret()) break; @@ -439,17 +440,21 @@ void Analyzer::detect_switches() { if (!func.analyzed) continue; for (auto& [ba, block] : func.blocks) { - if (block.insns.size() < 2) continue; - auto& last = block.insns.back(); + // Need at least 2 insns in the block + auto blk_begin = db_.insns.range_begin(block.start); + auto blk_end = db_.insns.range_end(block.end); + if (blk_begin == blk_end) continue; + auto blk_last = std::prev(blk_end); + if (blk_last == blk_begin) continue; // less than 2 + + auto& last = *blk_last; if (last.type != InsnType::Jmp) continue; - // Pattern 1: jmp reg (indirect through register, table was loaded) - // Pattern 2: jmp [reg*scale + table_addr] if (last.op_count < 1) continue; auto& op = last.ops[0]; // Direct memory operand: jmp [reg*8 + table_addr] - if (op.type == OpType::Mem && op.mem.base == 0 && op.mem.index != 0 && + if (op.type == OpType::Mem && op.mem_base == 0 && op.mem_index != 0 && op.val != 0) { va_t table_addr = op.val; size_t max_len = 0; @@ -464,19 +469,16 @@ void Analyzer::detect_switches() { std::memcpy(&target, tbl + i * 8, 8); if (!is_code_addr(target)) break; block.succs.push_back(target); - if (!func.blocks.count(target)) { - // add to CFG worklist — simplified: just record the edge - } } ++tables_found; continue; } - // RIP-relative LEA pattern: look back for lea+movsxd pattern - // scan backwards for a LEA with rip-relative addressing + // RIP-relative LEA pattern va_t table_base = 0; - for (int j = static_cast(block.insns.size()) - 2; j >= 0; --j) { - auto& prev = block.insns[j]; + for (auto rit = blk_last; rit != blk_begin; ) { + --rit; + auto& prev = *rit; if (prev.type == InsnType::Lea && prev.op_count >= 2 && prev.ops[1].type == OpType::Mem && prev.ops[1].val != 0) { table_base = prev.ops[1].val; @@ -485,19 +487,18 @@ void Analyzer::detect_switches() { } if (!table_base) continue; - // Look for bound: scan block preds for cmp+ja pattern u32 max_cases = 64; for (va_t pred_addr : block.preds) { auto pit = func.blocks.find(pred_addr); if (pit == func.blocks.end()) continue; auto& pblk = pit->second; - for (auto& pi : pblk.insns) { + db_.for_each_insn_in_block(pblk, [&](const Insn& pi) { if (pi.type == InsnType::Cmp && pi.op_count >= 2 && pi.ops[1].type == OpType::Imm) { max_cases = static_cast(pi.ops[1].val) + 1; if (max_cases > 512) max_cases = 512; } - } + }); } size_t max_len = 0; @@ -521,21 +522,21 @@ void Analyzer::detect_switches() { } void Analyzer::build_xrefs() { - for (auto& [addr, insn] : db_.insns) { + for (auto& insn : db_.insns) { if (insn.is_call()) { va_t t = insn.branch_target(); - if (t) db_.add_xref({addr, t, XrefType::CodeCall}); + if (t) db_.add_xref({insn.addr, t, XrefType::CodeCall}); } else if (insn.is_branch()) { va_t t = insn.branch_target(); - if (t) db_.add_xref({addr, t, XrefType::CodeJump}); + if (t) db_.add_xref({insn.addr, t, XrefType::CodeJump}); } for (u8 i = 0; i < insn.op_count; ++i) { auto& op = insn.ops[i]; if (op.type == OpType::Mem && op.val) - db_.add_xref({addr, op.val, XrefType::DataRead}); + db_.add_xref({insn.addr, op.val, XrefType::DataRead}); else if (op.type == OpType::Imm && op.val > img_.base && op.val < img_.base + 0x10000000) - db_.add_xref({addr, op.val, XrefType::DataOffset}); + db_.add_xref({insn.addr, op.val, XrefType::DataOffset}); } } } @@ -570,13 +571,13 @@ void Analyzer::find_string_refs() { str_addrs.insert(addr); u32 refs_added = 0; - for (auto& [addr, insn] : db_.insns) { + for (auto& insn : db_.insns) { if (insn.type != InsnType::Lea) continue; // LEA reg, [rip+X] — operand 1 is mem with computed VA for (u8 i = 0; i < insn.op_count; ++i) { auto& op = insn.ops[i]; if (op.type == OpType::Mem && op.val && str_addrs.count(op.val)) { - db_.add_xref({addr, op.val, XrefType::DataOffset}); + db_.add_xref({insn.addr, op.val, XrefType::DataOffset}); ++refs_added; } } @@ -637,7 +638,7 @@ void Analyzer::detect_vtables() { void Analyzer::detect_globals() { u32 found = 0; - for (auto& [addr, insn] : db_.insns) { + for (auto& insn : db_.insns) { for (u8 i = 0; i < insn.op_count; ++i) { auto& op = insn.ops[i]; if (op.type != OpType::Mem || op.val == 0) continue; @@ -713,20 +714,24 @@ void Analyzer::detect_noreturn() { if (func.analyzed && !func.blocks.empty()) { bool has_ret = false; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (insn.is_ret()) { has_ret = true; break; } - } - if (has_ret) break; + if (db_.for_each_insn_in_block_break(bb, [](const Insn& insn) { + return insn.is_ret(); + })) { has_ret = true; break; } } if (!has_ret) { bool has_exit_path = false; for (auto& [ba, bb] : func.blocks) { - if (bb.succs.empty() && !bb.insns.empty() && !bb.insns.back().is_ret()) { - auto& last = bb.insns.back(); - if (last.is_call()) { - va_t t = last.branch_target(); - if (t && db_.funcs.count(t) && db_.funcs[t].noreturn) - continue; + if (bb.succs.empty()) { + // Check if last insn in block is a call to noreturn + auto blk_end_it = db_.insns.range_end(bb.end); + auto blk_beg_it = db_.insns.range_begin(bb.start); + if (blk_beg_it != blk_end_it) { + auto last_it = std::prev(blk_end_it); + if (!last_it->is_ret() && last_it->is_call()) { + va_t t = last_it->branch_target(); + if (t && db_.funcs.count(t) && db_.funcs[t].noreturn) + continue; + } } } if (!bb.succs.empty()) has_exit_path = true; @@ -751,8 +756,11 @@ void Analyzer::detect_tail_calls() { if (bb.end > func_end) func_end = bb.end; for (auto& [ba, bb] : func.blocks) { - if (bb.insns.empty()) continue; - auto& last = bb.insns.back(); + auto blk_begin = db_.insns.range_begin(bb.start); + auto blk_end = db_.insns.range_end(bb.end); + if (blk_begin == blk_end) continue; + auto last_it = std::prev(blk_end); + auto& last = *last_it; if (last.type != InsnType::Jmp) continue; va_t target = last.branch_target(); @@ -799,14 +807,10 @@ void Analyzer::detect_calling_conventions() { bool has_ret_n = false; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (insn.is_ret() && insn.op_count > 0 && insn.ops[0].type == OpType::Imm && - insn.ops[0].val > 0) { - has_ret_n = true; - break; - } - } - if (has_ret_n) break; + if (db_.for_each_insn_in_block_break(bb, [](const Insn& insn) { + return insn.is_ret() && insn.op_count > 0 && insn.ops[0].type == OpType::Imm && + insn.ops[0].val > 0; + })) { has_ret_n = true; break; } } if (has_ret_n) @@ -830,7 +834,7 @@ void Analyzer::propagate_dataflow() { if (bit == func.blocks.end()) continue; auto& bb = bit->second; - for (auto& insn : bb.insns) { + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { if (insn.type == InsnType::Mov && insn.op_count >= 2 && insn.ops[0].type == OpType::Reg && insn.ops[1].type == OpType::Imm) { reg_vals[insn.ops[0].reg] = insn.ops[1].val; @@ -856,10 +860,10 @@ void Analyzer::propagate_dataflow() { } else if ((insn.is_call() || insn.type == InsnType::Jmp) && insn.op_count > 0 && insn.ops[0].type == OpType::Mem && - insn.ops[0].mem.base != 0 && insn.ops[0].val == 0) { - auto it = reg_vals.find(insn.ops[0].mem.base); + insn.ops[0].mem_base != 0 && insn.ops[0].val == 0) { + auto it = reg_vals.find(insn.ops[0].mem_base); if (it != reg_vals.end() && it->second != 0) { - va_t effective = it->second + insn.ops[0].mem.disp; + va_t effective = it->second + insn.ops[0].mem_disp; size_t max_len = 0; const u8* ptr = va_to_ptr(effective, &max_len); if (ptr && max_len >= 8) { @@ -875,7 +879,7 @@ void Analyzer::propagate_dataflow() { } if (insn.is_call()) reg_vals.clear(); - } + }); } } spdlog::info("dataflow: resolved {} indirect call/jump targets", resolved); @@ -913,21 +917,22 @@ void Analyzer::detect_loops() { for (va_t lb : loop_blocks) { auto lbit = func.blocks.find(lb); if (lbit == func.blocks.end()) continue; - for (auto& insn : lbit->second.insns) { + bool found = db_.for_each_insn_in_block_break(lbit->second, [&](const Insn& insn) { if (std::strcmp(insn.mnemonic, "inc") == 0 && insn.op_count > 0 && insn.ops[0].type == OpType::Reg) { loop.induction_reg = insn.ops[0].reg; - goto found_ind; + return true; } if (insn.type == InsnType::Add && insn.op_count >= 2 && insn.ops[0].type == OpType::Reg && insn.ops[1].type == OpType::Imm && insn.ops[1].val == 1) { loop.induction_reg = insn.ops[0].reg; - goto found_ind; + return true; } - } + return false; + }); + if (found) break; } - found_ind: func.loops.push_back(loop); ++count; } @@ -946,24 +951,23 @@ void Analyzer::recover_structs() { std::unordered_map> accesses; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { for (u8 i = 0; i < insn.op_count; ++i) { auto& op = insn.ops[i]; if (op.type != OpType::Mem) continue; - if (op.mem.base == 0) continue; - // skip RSP/RBP-based (stack frame) - if (op.mem.base == 4 || op.mem.base == 5) continue; // RSP=4, RBP=5 - if (op.mem.base == 20 || op.mem.base == 21) continue; // x64 RSP/RBP - if (op.mem.disp < 0) continue; - if (op.mem.disp > 4096) continue; + if (op.mem_base == 0) continue; + if (op.mem_base == 4 || op.mem_base == 5) continue; + if (op.mem_base == 20 || op.mem_base == 21) continue; + if (op.mem_disp < 0) continue; + if (op.mem_disp > 4096) continue; Access a; - a.base_reg = op.mem.base; - a.offset = op.mem.disp; + a.base_reg = op.mem_base; + a.offset = op.mem_disp; a.size = op.size ? op.size : 8; accesses[a.base_reg].push_back(a); } - } + }); } for (auto& [reg, accs] : accesses) { @@ -1097,12 +1101,14 @@ void Analyzer::detect_main() { if (!func.analyzed) continue; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (!insn.is_call()) continue; + bool found_main = false; + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { + if (found_main) return; + if (!insn.is_call()) return; va_t target = insn.branch_target(); - if (!target) continue; - if (!db_.funcs.count(target)) continue; - if (crt_starters.count(db_.funcs[target].name)) continue; + if (!target) return; + if (!db_.funcs.count(target)) return; + if (crt_starters.count(db_.funcs[target].name)) return; auto& callee = db_.funcs[target]; bool is_winmain = func.name.find("WinMain") != std::string::npos; @@ -1115,8 +1121,9 @@ void Analyzer::detect_main() { db_.set_name(target, "main"); } spdlog::info("detected {} at {:X}", callee.name, target); - return; - } + found_main = true; + }); + if (found_main) return; } } @@ -1125,28 +1132,32 @@ void Analyzer::detect_main() { if (eit == db_.funcs.end() || !eit->second.analyzed) return; for (auto& [ba, bb] : eit->second.blocks) { - for (auto& insn : bb.insns) { - if (!insn.is_call()) continue; + bool found_main = false; + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { + if (found_main) return; + if (!insn.is_call()) return; va_t stub = insn.branch_target(); - if (!stub || !db_.funcs.count(stub)) continue; + if (!stub || !db_.funcs.count(stub)) return; auto& stub_func = db_.funcs[stub]; - if (!stub_func.analyzed) continue; + if (!stub_func.analyzed) return; for (auto& [ba2, bb2] : stub_func.blocks) { - for (auto& ins2 : bb2.insns) { - if (!ins2.is_call()) continue; + db_.for_each_insn_in_block(bb2, [&](const Insn& ins2) { + if (found_main) return; + if (!ins2.is_call()) return; va_t target = ins2.branch_target(); - if (!target || !db_.funcs.count(target)) continue; + if (!target || !db_.funcs.count(target)) return; auto& callee = db_.funcs[target]; - if (callee.name.rfind("sub_", 0) != 0) continue; + if (callee.name.rfind("sub_", 0) != 0) return; callee.name = "main"; db_.set_name(target, "main"); spdlog::info("detected main at {:X} (via entry stub)", target); - return; - } + found_main = true; + }); } - } + }); + if (found_main) return; } } @@ -1181,7 +1192,7 @@ void Analyzer::propagate_interproc_types() { bool uses_rcx = false, uses_rdx = false, uses_r8 = false, uses_r9 = false; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { for (u8 i = 0; i < insn.op_count; ++i) { if (insn.ops[i].type == OpType::Reg) { if (insn.ops[i].reg == 1) uses_rcx = true; @@ -1190,7 +1201,7 @@ void Analyzer::propagate_interproc_types() { if (insn.ops[i].reg == 9) uses_r9 = true; } } - } + }); } if (uses_r9) param_reg_count = 4; @@ -1213,19 +1224,19 @@ void Analyzer::propagate_interproc_types() { for (auto& [entry, func] : db_.funcs) { if (!func.analyzed) continue; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (!insn.is_call()) continue; + db_.for_each_insn_in_block(bb, [&](const Insn& insn) { + if (!insn.is_call()) return; va_t target = insn.branch_target(); - if (!target) continue; + if (!target) return; auto sit = db_.signatures.find(target); - if (sit == db_.signatures.end()) continue; + if (sit == db_.signatures.end()) return; auto& callee_sig = sit->second; if (callee_sig.return_type != "int64_t") { auto& caller_sig = db_.signatures[entry]; (void)caller_sig; } - } + }); } } diff --git a/src/core/analysis/analyzer.h b/src/core/analysis/analyzer.h index 4b675e2..a87ed76 100644 --- a/src/core/analysis/analyzer.h +++ b/src/core/analysis/analyzer.h @@ -72,7 +72,7 @@ class Analyzer { SignatureMatcher sigmatch_; RTTIParser rtti_; std::atomic progress_{0.f}; - std::unordered_map tentative_; + InsnStore tentative_; }; } diff --git a/src/core/analysis/bindiff.cpp b/src/core/analysis/bindiff.cpp index fa493d7..07cca1b 100644 --- a/src/core/analysis/bindiff.cpp +++ b/src/core/analysis/bindiff.cpp @@ -15,7 +15,7 @@ std::vector BinDiff::compare(const AnalysisDB& a, const AnalysisDB& for (auto& [eb, fb] : b.funcs) { if (matched_b.count(eb)) continue; if (!fa.name.empty() && fa.name == fb.name) { - float sim = compute_similarity(fa, fb); + float sim = compute_similarity(fa, fb, a, b); auto st = sim >= 0.999f ? DiffResult::Identical : DiffResult::Modified; results.push_back({ea, eb, fa.name, sim, st}); matched_b.insert(eb); @@ -29,7 +29,7 @@ std::vector BinDiff::compare(const AnalysisDB& a, const AnalysisDB& va_t best_eb = 0; for (auto& [eb, fb] : b.funcs) { if (matched_b.count(eb)) continue; - float sim = compute_similarity(fa, fb); + float sim = compute_similarity(fa, fb, a, b); if (sim > best_sim) { best_sim = sim; best_eb = eb; } } if (best_sim >= 0.5f && best_eb) { @@ -55,9 +55,10 @@ std::vector BinDiff::compare(const AnalysisDB& a, const AnalysisDB& return results; } -float BinDiff::compute_similarity(const Function& fa, const Function& fb) { - auto ba = func_bytes(fa); - auto bb = func_bytes(fb); +float BinDiff::compute_similarity(const Function& fa, const Function& fb, + const AnalysisDB& da, const AnalysisDB& db) { + auto ba = func_bytes(fa, da); + auto bb = func_bytes(fb, db); if (ba.empty() && bb.empty()) return 1.f; if (ba.empty() || bb.empty()) return 0.f; @@ -70,11 +71,13 @@ float BinDiff::compute_similarity(const Function& fa, const Function& fb) { return static_cast(match) / static_cast(total); } -std::vector BinDiff::func_bytes(const Function& f) { +std::vector BinDiff::func_bytes(const Function& f, const AnalysisDB& db) { std::vector out; - for (auto& [_, bb] : f.blocks) - for (auto& insn : bb.insns) + for (auto& [_, bb] : f.blocks) { + db.for_each_insn_in_block(bb, [&](const Insn& insn) { out.insert(out.end(), insn.bytes, insn.bytes + insn.len); + }); + } return out; } diff --git a/src/core/analysis/bindiff.h b/src/core/analysis/bindiff.h index 591d4a0..4d6fe79 100644 --- a/src/core/analysis/bindiff.h +++ b/src/core/analysis/bindiff.h @@ -19,8 +19,9 @@ class BinDiff { std::vector compare(const AnalysisDB& a, const AnalysisDB& b); private: - float compute_similarity(const Function& fa, const Function& fb); - std::vector func_bytes(const Function& f); + float compute_similarity(const Function& fa, const Function& fb, + const AnalysisDB& da, const AnalysisDB& db); + std::vector func_bytes(const Function& f, const AnalysisDB& db); }; } diff --git a/src/core/database/database.cpp b/src/core/database/database.cpp index 7729053..275b773 100644 --- a/src/core/database/database.cpp +++ b/src/core/database/database.cpp @@ -133,11 +133,14 @@ bool Database::load(const fs::path& dir, PEImage& img, AnalysisDB& db) { std::ifstream f(dir / "names.bin", std::ios::binary); if (f) { u32 n; f.read(reinterpret_cast(&n), 4); + if (!f) return false; for (u32 i = 0; i < n; ++i) { va_t addr; f.read(reinterpret_cast(&addr), 8); u16 nlen; f.read(reinterpret_cast(&nlen), 2); + if (!f) break; std::string name(nlen, '\0'); f.read(name.data(), nlen); + if (!f) break; db.names[addr] = std::move(name); } } @@ -147,11 +150,14 @@ bool Database::load(const fs::path& dir, PEImage& img, AnalysisDB& db) { std::ifstream f(dir / "comments.bin", std::ios::binary); if (f) { u32 n; f.read(reinterpret_cast(&n), 4); + if (!f) return false; for (u32 i = 0; i < n; ++i) { va_t addr; f.read(reinterpret_cast(&addr), 8); u16 clen; f.read(reinterpret_cast(&clen), 2); + if (!f) break; std::string cmt(clen, '\0'); f.read(cmt.data(), clen); + if (!f) break; db.comments[addr] = std::move(cmt); } } diff --git a/src/core/database/export/ida_export.cpp b/src/core/database/export/ida_export.cpp index 2f2d13d..9b19750 100644 --- a/src/core/database/export/ida_export.cpp +++ b/src/core/database/export/ida_export.cpp @@ -5,6 +5,21 @@ namespace hype { +namespace { +std::string escape_py(const std::string& s) { + std::string out; + out.reserve(s.size()); + for (char c : s) { + if (c == '\'') out += "\\'"; + else if (c == '\\') out += "\\\\"; + else if (c == '\n') out += "\\n"; + else if (c == '\r') out += "\\r"; + else out += c; + } + return out; +} +} + bool IDAExport::write(const std::filesystem::path& path, const PEImage& img, const AnalysisDB& db) { std::ofstream f(path); if (!f) { spdlog::error("cannot write: {}", path.string()); return false; } @@ -16,21 +31,18 @@ bool IDAExport::write(const std::filesystem::path& path, const PEImage& img, con for (auto& [entry, func] : db.funcs) { f << fmt::format("ida_funcs.add_func(0x{:X})\n", entry); if (!func.name.empty() && func.name.substr(0, 4) != "sub_") - f << fmt::format("ida_name.set_name(0x{:X}, '{}', ida_name.SN_FORCE)\n", entry, func.name); + f << fmt::format("ida_name.set_name(0x{:X}, '{}', ida_name.SN_FORCE)\n", entry, escape_py(func.name)); } f << "\n"; for (auto& [addr, name] : db.names) { if (name.substr(0, 4) == "sub_") continue; - f << fmt::format("ida_name.set_name(0x{:X}, '{}', ida_name.SN_FORCE)\n", addr, name); + f << fmt::format("ida_name.set_name(0x{:X}, '{}', ida_name.SN_FORCE)\n", addr, escape_py(name)); } f << "\n"; for (auto& [addr, cmt] : db.comments) { - std::string escaped = cmt; - for (size_t pos = 0; (pos = escaped.find('\'', pos)) != std::string::npos; pos += 2) - escaped.replace(pos, 1, "\\'"); - f << fmt::format("idc.set_cmt(0x{:X}, '{}', 0)\n", addr, escaped); + f << fmt::format("idc.set_cmt(0x{:X}, '{}', 0)\n", addr, escape_py(cmt)); } f << "\nprint('[Hyperion] done')\n"; diff --git a/src/core/decompiler/dce.cpp b/src/core/decompiler/dce.cpp index 72d1ae4..c3e0e53 100644 --- a/src/core/decompiler/dce.cpp +++ b/src/core/decompiler/dce.cpp @@ -5,7 +5,7 @@ namespace hype { bool DCE::is_callee_saved(int id) const { - return id == 3 || id == 5 || id == 6 || id == 7 || + return id == 3 || id == 5 || id == 12 || id == 13 || id == 14 || id == 15; } @@ -71,9 +71,6 @@ void DCE::run(PcodeFunc& func) { stack_store = true; if (addr.kind == VarnodeKind::Stack) stack_store = true; - // temp that was derived from rsp - if (addr.is_temp()) - stack_store = true; } if (stack_store) { op.op = PcodeOp::NOP; continue; } } diff --git a/src/core/decompiler/decompiler.cpp b/src/core/decompiler/decompiler.cpp index 528d3f8..f31697f 100644 --- a/src/core/decompiler/decompiler.cpp +++ b/src/core/decompiler/decompiler.cpp @@ -16,10 +16,10 @@ std::vector Decompiler::decompile(const Function& func, const Analys std::unordered_set calls_seen; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (!insn.is_call()) continue; + db.for_each_insn_in_block(bb, [&](const Insn& insn) { + if (!insn.is_call()) return; va_t t = insn.branch_target(); - if (!t) continue; + if (!t) return; auto nit = db.names.find(t); std::string name; if (nit != db.names.end()) @@ -30,7 +30,7 @@ std::vector Decompiler::decompile(const Function& func, const Analys name = fmt::format("sub_{:X}", t); if (calls_seen.insert(name).second) out.push_back({1, fmt::format("{}();", name), insn.addr}); - } + }); } out.push_back({1, "// ...", 0}); out.push_back({0, "}", 0}); @@ -40,7 +40,9 @@ std::vector Decompiler::decompile(const Function& func, const Analys PcodeFunc pf; bool is_arm64 = false; for (auto& [ba, bb] : func.blocks) { - if (!bb.insns.empty() && bb.insns[0].len == 4) { + auto blk_it = db.insns.range_begin(bb.start); + auto blk_end_it = db.insns.range_end(bb.end); + if (blk_it != blk_end_it && blk_it->len == 4) { is_arm64 = true; break; } diff --git a/src/core/decompiler/lifter.cpp b/src/core/decompiler/lifter.cpp index 8cf9200..ff30792 100644 --- a/src/core/decompiler/lifter.cpp +++ b/src/core/decompiler/lifter.cpp @@ -61,8 +61,8 @@ Varnode Lifter::operand_read(const Insn& insn, int idx, const AnalysisDB& /*db*/ case OpType::Imm: return vn_const(op.val, op.size / 8 ? op.size / 8 : 8); case OpType::Mem: { - if (op.val != 0 && op.mem.base != ZYDIS_REGISTER_NONE && - (op.mem.base == ZYDIS_REGISTER_RIP || op.mem.base == ZYDIS_REGISTER_EIP)) { + if (op.val != 0 && op.mem_base != ZYDIS_REGISTER_NONE && + (op.mem_base == ZYDIS_REGISTER_RIP || op.mem_base == ZYDIS_REGISTER_EIP)) { Varnode addr = vn_const(op.val); int sz = op.size / 8; if (sz < 1) sz = 8; @@ -74,16 +74,16 @@ Varnode Lifter::operand_read(const Insn& insn, int idx, const AnalysisDB& /*db*/ Varnode addr = vn_const(0); bool has = false; - if (op.mem.base && op.mem.base != ZYDIS_REGISTER_NONE) { - addr = reg_vn(op.mem.base, 64); + if (op.mem_base && op.mem_base != ZYDIS_REGISTER_NONE) { + addr = reg_vn(op.mem_base, 64); has = true; } - if (op.mem.index && op.mem.index != ZYDIS_REGISTER_NONE) { - Varnode idx_r = reg_vn(op.mem.index, 64); - if (op.mem.scale > 1) { + if (op.mem_index && op.mem_index != ZYDIS_REGISTER_NONE) { + Varnode idx_r = reg_vn(op.mem_index, 64); + if (op.scale > 1) { Varnode t = alloc_temp(); - emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(op.mem.scale)}); + emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(op.scale)}); idx_r = t; } if (has) { @@ -96,8 +96,8 @@ Varnode Lifter::operand_read(const Insn& insn, int idx, const AnalysisDB& /*db*/ has = true; } - if (op.mem.disp != 0) { - Varnode d = vn_const(static_cast(op.mem.disp)); + if (op.mem_disp != 0) { + Varnode d = vn_const(static_cast(op.mem_disp)); if (has) { Varnode t = alloc_temp(); emit(out, PcodeOp::ADD, t, {addr, d}); @@ -130,15 +130,15 @@ void Lifter::operand_write(const Insn& insn, int idx, Varnode val, PcodeBlock& o } else if (op.type == OpType::Mem) { Varnode addr = vn_const(0); bool has = false; - if (op.mem.base && op.mem.base != ZYDIS_REGISTER_NONE) { - addr = reg_vn(op.mem.base, 64); + if (op.mem_base && op.mem_base != ZYDIS_REGISTER_NONE) { + addr = reg_vn(op.mem_base, 64); has = true; } - if (op.mem.index && op.mem.index != ZYDIS_REGISTER_NONE) { - Varnode idx_r = reg_vn(op.mem.index, 64); - if (op.mem.scale > 1) { + if (op.mem_index && op.mem_index != ZYDIS_REGISTER_NONE) { + Varnode idx_r = reg_vn(op.mem_index, 64); + if (op.scale > 1) { Varnode t = alloc_temp(); - emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(op.mem.scale)}); + emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(op.scale)}); idx_r = t; } if (has) { @@ -148,8 +148,8 @@ void Lifter::operand_write(const Insn& insn, int idx, Varnode val, PcodeBlock& o } else { addr = idx_r; } has = true; } - if (op.mem.disp != 0) { - Varnode d = vn_const(static_cast(op.mem.disp)); + if (op.mem_disp != 0) { + Varnode d = vn_const(static_cast(op.mem_disp)); if (has) { Varnode t = alloc_temp(); emit(out, PcodeOp::ADD, t, {addr, d}); @@ -194,10 +194,10 @@ static PcodeOp jcc_to_flag_op(u16 mnemonic_id, int& flag_reg, bool& negate) { case ZYDIS_MNEMONIC_JNB: flag_reg = REG_CF; negate = true; return PcodeOp::NOP; case ZYDIS_MNEMONIC_JL: flag_reg = REG_SF; negate = false; return PcodeOp::NOP; case ZYDIS_MNEMONIC_JNL: flag_reg = REG_SF; negate = true; return PcodeOp::NOP; - case ZYDIS_MNEMONIC_JLE: flag_reg = REG_ZF; negate = false; return PcodeOp::NOP; // zf || sf - case ZYDIS_MNEMONIC_JNLE: flag_reg = REG_ZF; negate = true; return PcodeOp::NOP; - case ZYDIS_MNEMONIC_JBE: flag_reg = REG_ZF; negate = false; return PcodeOp::NOP; // cf || zf - case ZYDIS_MNEMONIC_JNBE: flag_reg = REG_ZF; negate = true; return PcodeOp::NOP; + case ZYDIS_MNEMONIC_JLE: flag_reg = REG_ZF; negate = false; return PcodeOp::BOOL_OR; + case ZYDIS_MNEMONIC_JNLE: flag_reg = REG_ZF; negate = true; return PcodeOp::BOOL_OR; + case ZYDIS_MNEMONIC_JBE: flag_reg = REG_CF; negate = false; return PcodeOp::BOOL_OR; + case ZYDIS_MNEMONIC_JNBE: flag_reg = REG_CF; negate = true; return PcodeOp::BOOL_OR; default: flag_reg = REG_ZF; negate = true; return PcodeOp::NOP; } } @@ -218,22 +218,22 @@ void Lifter::lift_insn(const Insn& insn, const AnalysisDB& db, PcodeBlock& out) auto& dst_op = insn.ops[0]; auto& src_op = insn.ops[1]; if (dst_op.type == OpType::Reg && src_op.type == OpType::Mem) { - if (src_op.val != 0 && src_op.mem.base != ZYDIS_REGISTER_NONE && - (src_op.mem.base == ZYDIS_REGISTER_RIP || src_op.mem.base == ZYDIS_REGISTER_EIP)) { + if (src_op.val != 0 && src_op.mem_base != ZYDIS_REGISTER_NONE && + (src_op.mem_base == ZYDIS_REGISTER_RIP || src_op.mem_base == ZYDIS_REGISTER_EIP)) { emit(out, PcodeOp::COPY, reg_vn(dst_op.reg, dst_op.size), {vn_const(src_op.val)}); break; } Varnode addr = vn_const(0); bool has = false; - if (src_op.mem.base && src_op.mem.base != ZYDIS_REGISTER_NONE) { - addr = reg_vn(src_op.mem.base, 64); + if (src_op.mem_base && src_op.mem_base != ZYDIS_REGISTER_NONE) { + addr = reg_vn(src_op.mem_base, 64); has = true; } - if (src_op.mem.index && src_op.mem.index != ZYDIS_REGISTER_NONE) { - Varnode idx_r = reg_vn(src_op.mem.index, 64); - if (src_op.mem.scale > 1) { + if (src_op.mem_index && src_op.mem_index != ZYDIS_REGISTER_NONE) { + Varnode idx_r = reg_vn(src_op.mem_index, 64); + if (src_op.scale > 1) { Varnode t = alloc_temp(); - emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(src_op.mem.scale)}); + emit(out, PcodeOp::INT_MULT, t, {idx_r, vn_const(src_op.scale)}); idx_r = t; } if (has) { @@ -243,8 +243,8 @@ void Lifter::lift_insn(const Insn& insn, const AnalysisDB& db, PcodeBlock& out) } else { addr = idx_r; } has = true; } - if (src_op.mem.disp != 0) { - Varnode d = vn_const(static_cast(src_op.mem.disp)); + if (src_op.mem_disp != 0) { + Varnode d = vn_const(static_cast(src_op.mem_disp)); if (has) { Varnode t = alloc_temp(); emit(out, PcodeOp::ADD, t, {addr, d}); @@ -286,11 +286,25 @@ void Lifter::lift_insn(const Insn& insn, const AnalysisDB& db, PcodeBlock& out) break; } case InsnType::Mul: { - auto rax_v = vn_reg(REG_RAX, "rax", 8); - Varnode src = operand_read(insn, insn.op_count > 1 ? 1 : 0, db, out); - Varnode result = alloc_temp(); - emit(out, PcodeOp::INT_MULT, result, {rax_v, src}); - emit(out, PcodeOp::COPY, rax_v, {result}); + if (insn.op_count <= 1) { + auto rax_v = vn_reg(REG_RAX, "rax", 8); + Varnode src = operand_read(insn, 0, db, out); + Varnode result = alloc_temp(); + emit(out, PcodeOp::INT_MULT, result, {rax_v, src}); + emit(out, PcodeOp::COPY, rax_v, {result}); + } else if (insn.op_count == 2) { + Varnode dst = operand_read(insn, 0, db, out); + Varnode src = operand_read(insn, 1, db, out); + Varnode result = alloc_temp(dst.size); + emit(out, PcodeOp::INT_MULT, result, {dst, src}); + operand_write(insn, 0, result, out); + } else { + Varnode src = operand_read(insn, 1, db, out); + Varnode imm = operand_read(insn, 2, db, out); + Varnode result = alloc_temp(src.size); + emit(out, PcodeOp::INT_MULT, result, {src, imm}); + operand_write(insn, 0, result, out); + } break; } case InsnType::Div: { @@ -339,14 +353,21 @@ void Lifter::lift_insn(const Insn& insn, const AnalysisDB& db, PcodeBlock& out) } case InsnType::Jcc: { int flag_reg; bool negate; - jcc_to_flag_op(insn.mnemonic_id, flag_reg, negate); + PcodeOp compound = jcc_to_flag_op(insn.mnemonic_id, flag_reg, negate); Varnode flag = vn_reg(flag_reg, flag_reg == REG_ZF ? "ZF" : flag_reg == REG_CF ? "CF" : flag_reg == REG_SF ? "SF" : "OF", 1); Varnode cond_val = flag; - if (negate) { + if (compound == PcodeOp::BOOL_OR) { + int sec_flag = (flag_reg == REG_ZF) ? REG_SF : REG_ZF; + Varnode sec = vn_reg(sec_flag, sec_flag == REG_ZF ? "ZF" : "SF", 1); cond_val = alloc_temp(1); - emit(out, PcodeOp::BOOL_NOT, cond_val, {flag}); + emit(out, PcodeOp::BOOL_OR, cond_val, {flag, sec}); + } + if (negate) { + Varnode neg = alloc_temp(1); + emit(out, PcodeOp::BOOL_NOT, neg, {cond_val}); + cond_val = neg; } va_t target = insn.branch_target(); emit(out, PcodeOp::CBRANCH, {}, {vn_const(target), cond_val}); @@ -392,8 +413,9 @@ void Lifter::lift_insn(const Insn& insn, const AnalysisDB& db, PcodeBlock& out) void Lifter::lift_block(const BasicBlock& bb, const AnalysisDB& db, PcodeBlock& out) { out.addr = bb.start; - for (auto& insn : bb.insns) + db.for_each_insn_in_block(bb, [&](const Insn& insn) { lift_insn(insn, db, out); + }); } PcodeFunc Lifter::lift(const Function& func, const AnalysisDB& db) { diff --git a/src/core/decompiler/lifter_arm64.cpp b/src/core/decompiler/lifter_arm64.cpp index a328a4b..1f31071 100644 --- a/src/core/decompiler/lifter_arm64.cpp +++ b/src/core/decompiler/lifter_arm64.cpp @@ -398,8 +398,9 @@ void LifterARM64::lift_insn(const Insn& insn, PcodeBlock& blk, const AnalysisDB& void LifterARM64::lift_block(const BasicBlock& bb, const AnalysisDB& db, PcodeBlock& out) { out.addr = bb.start; - for (auto& insn : bb.insns) + db.for_each_insn_in_block(bb, [&](const Insn& insn) { lift_insn(insn, out, db); + }); } PcodeFunc LifterARM64::lift(const Function& func, const AnalysisDB& db) { diff --git a/src/core/decompiler/propagate.cpp b/src/core/decompiler/propagate.cpp index dd15a6c..c029266 100644 --- a/src/core/decompiler/propagate.cpp +++ b/src/core/decompiler/propagate.cpp @@ -78,8 +78,8 @@ void Propagate::constant_fold(PcodeFunc& func) { case PcodeOp::AND: result = l & r; break; case PcodeOp::OR: result = l | r; break; case PcodeOp::XOR: result = l ^ r; break; - case PcodeOp::SHIFT_LEFT: result = l << r; break; - case PcodeOp::SHIFT_RIGHT: result = l >> r; break; + case PcodeOp::SHIFT_LEFT: result = (r < 64) ? (l << r) : 0; break; + case PcodeOp::SHIFT_RIGHT: result = (r < 64) ? (l >> r) : 0; break; case PcodeOp::INT_MULT: result = l * r; break; case PcodeOp::INT_EQUAL: result = (l == r) ? 1 : 0; break; case PcodeOp::INT_LESS: result = (l < r) ? 1 : 0; break; @@ -156,10 +156,8 @@ void Propagate::inline_single_use(PcodeFunc& func) { auto& use_op = func.blocks[ubi].ops[uoi]; if (op.op == PcodeOp::COPY && op.inputs.size() == 1) { use_op.inputs[uin] = op.inputs[0]; - } else if (op.inputs.size() == 2) { - use_op.inputs[uin] = op.inputs[0]; - } else if (op.inputs.size() == 1) { - use_op.inputs[uin] = op.inputs[0]; + } else { + continue; } op.op = PcodeOp::NOP; changed = true; @@ -192,9 +190,9 @@ void Propagate::eliminate_identity(PcodeFunc& func) { if (op.op != PcodeOp::COPY) continue; if (op.inputs.empty()) continue; auto& src = op.inputs[0]; - if (op.output.kind == src.kind && op.output.id == src.id) + if (op.output.kind == src.kind && op.output.id == src.id && op.output.offset == src.offset) op.op = PcodeOp::NOP; - else if (op.output.is_reg() && src.is_reg() && op.output.id == src.id) + else if (op.output.is_reg() && src.is_reg() && op.output.id == src.id && op.output.offset == src.offset) op.op = PcodeOp::NOP; } std::erase_if(blk.ops, [](const PcodeInsn& op) { return op.op == PcodeOp::NOP; }); diff --git a/src/core/decompiler/pseudo_gen.cpp b/src/core/decompiler/pseudo_gen.cpp index 19a08cb..c219dda 100644 --- a/src/core/decompiler/pseudo_gen.cpp +++ b/src/core/decompiler/pseudo_gen.cpp @@ -28,7 +28,7 @@ std::vector PseudoGen::generate(const Function& func, const Analysis void PseudoGen::emit_block(const BasicBlock& bb, const AnalysisDB& db, std::vector& out, int indent) { - for (auto& insn : bb.insns) { + db.for_each_insn_in_block(bb, [&](const Insn& insn) { std::string line; switch (insn.type) { case InsnType::Mov: @@ -101,7 +101,7 @@ void PseudoGen::emit_block(const BasicBlock& bb, const AnalysisDB& db, } if (!line.empty()) out.push_back({indent, line, insn.addr}); - } + }); } std::string PseudoGen::operand_to_c(const Insn& insn, int op_idx, const AnalysisDB& db) { diff --git a/src/core/decompiler/type_infer.cpp b/src/core/decompiler/type_infer.cpp index 9b09686..f59c6f2 100644 --- a/src/core/decompiler/type_infer.cpp +++ b/src/core/decompiler/type_infer.cpp @@ -124,7 +124,9 @@ void TypeInfer::infer_from_ops(const PcodeFunc& func) { for (auto& blk : func.blocks) { for (auto& op : blk.ops) { if (!op.output.valid()) continue; - int vid = op.output.id; + int vid = op.output.is_reg() ? op.output.id : + op.output.is_temp() ? 1000 + op.output.id : + 2000 + op.output.id; if (op.output.size == 4) set_type(vid, DecompType::make_int(32)); else if (op.output.size == 2) set_type(vid, DecompType::make_int(16)); else if (op.output.size == 1) set_type(vid, DecompType::make_int(8)); diff --git a/src/core/disasm/capstone_disasm.cpp b/src/core/disasm/capstone_disasm.cpp index 95c47d6..f03579d 100644 --- a/src/core/disasm/capstone_disasm.cpp +++ b/src/core/disasm/capstone_disasm.cpp @@ -133,7 +133,7 @@ bool CapstoneDisasm::decode(va_t addr, const u8* data, size_t len, Insn& out) { out.addr = addr; out.len = static_cast(insn->size); - std::memcpy(out.bytes, insn->bytes, (insn->size < 15) ? insn->size : 15); + std::memcpy(out.bytes, insn->bytes, std::min(insn->size, sizeof(out.bytes))); out.set_mnemonic(insn->mnemonic); out.set_op_str(insn->op_str); out.mnemonic_id = static_cast(insn->id); @@ -166,6 +166,7 @@ bool CapstoneDisasm::decode(va_t addr, const u8* data, size_t len, Insn& out) { out.op_count = 1; } + out.update_branch_target(); cs_free(insn, count); return true; } diff --git a/src/core/disasm/disassembler.cpp b/src/core/disasm/disassembler.cpp index 90be1d4..77b9a62 100644 --- a/src/core/disasm/disassembler.cpp +++ b/src/core/disasm/disassembler.cpp @@ -73,7 +73,7 @@ bool Disassembler::decode(va_t addr, const u8* data, size_t len, Insn& out) { out.len = zi.length; out.mnemonic_id = zi.mnemonic; out.type = classify(zi.mnemonic); - std::memcpy(out.bytes, data, zi.length); + std::memcpy(out.bytes, data, std::min(zi.length, sizeof(out.bytes))); char buf[256]; ZydisFormatterFormatInstruction(&impl_->formatter, &zi, zo, @@ -90,7 +90,7 @@ bool Disassembler::decode(va_t addr, const u8* data, size_t len, Insn& out) { } out.op_count = 0; - for (u8 i = 0; i < zi.operand_count_visible && i < 4; ++i) { + for (u8 i = 0; i < zi.operand_count_visible && i < 3; ++i) { auto& zop = zo[i]; auto& op = out.ops[i]; op.size = zop.size; @@ -109,10 +109,10 @@ bool Disassembler::decode(va_t addr, const u8* data, size_t len, Insn& out) { break; case ZYDIS_OPERAND_TYPE_MEMORY: op.type = OpType::Mem; - op.mem.base = zop.mem.base; - op.mem.index = zop.mem.index; - op.mem.scale = zop.mem.scale; - op.mem.disp = zop.mem.disp.value; + op.mem_base = zop.mem.base; + op.mem_index = zop.mem.index; + op.scale = zop.mem.scale; + op.mem_disp = static_cast(zop.mem.disp.value); if (zop.mem.base == ZYDIS_REGISTER_RIP || zop.mem.base == ZYDIS_REGISTER_EIP) ZydisCalcAbsoluteAddress(&zi, &zop, addr, &op.val); break; @@ -122,6 +122,7 @@ bool Disassembler::decode(va_t addr, const u8* data, size_t len, Insn& out) { } ++out.op_count; } + out.update_branch_target(); return true; } @@ -132,6 +133,7 @@ std::vector Disassembler::decode_range(va_t start, const u8* data, size_t while (off < len) { Insn insn{}; if (decode(start + off, data + off, len - off, insn)) { + if (insn.len == 0) insn.len = 1; off += insn.len; result.push_back(std::move(insn)); } else { diff --git a/src/core/disasm/disassembler.h b/src/core/disasm/disassembler.h index bef7e24..7ffc146 100644 --- a/src/core/disasm/disassembler.h +++ b/src/core/disasm/disassembler.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace hype { @@ -18,24 +19,32 @@ enum class InsnType : u8 { enum class OpType : u8 { None, Reg, Imm, Mem }; +#pragma pack(push, 1) struct Operand { OpType type = OpType::None; - u64 val = 0; + u8 scale = 0; u16 reg = 0; u16 size = 0; - struct { u16 base; u16 index; u8 scale; i64 disp; } mem{}; + u16 mem_base = 0; + u16 mem_index = 0; + i32 mem_disp = 0; + u64 val = 0; }; +#pragma pack(pop) +static_assert(sizeof(Operand) == 22, "Operand should be 22 bytes packed"); struct Insn { va_t addr; - u8 len; - InsnType type; + u64 branch_target_cache; + Operand ops[3]; + char op_str[40]; + char mnemonic[8]; + u8 bytes[4]; u16 mnemonic_id; - char mnemonic[12]; - char op_str[64]; - u8 bytes[15]; - Operand ops[4]; + InsnType type; + u8 len; u8 op_count; + u8 _pad[3]{}; void set_mnemonic(const char* s) { std::strncpy(mnemonic, s, sizeof(mnemonic) - 1); @@ -55,9 +64,14 @@ struct Insn { bool is_cond_jmp() const { return type == InsnType::Jcc; } va_t branch_target() const { + return branch_target_cache; + } + + void update_branch_target() { if (op_count > 0 && ops[0].type == OpType::Imm) - return ops[0].val; - return 0; + branch_target_cache = ops[0].val; + else + branch_target_cache = 0; } }; @@ -75,4 +89,135 @@ class Disassembler { std::unique_ptr impl_; }; +class InsnStore { +public: + using iterator = std::vector::iterator; + using const_iterator = std::vector::const_iterator; + + void reserve(size_t n) { data_.reserve(n); } + size_t size() const { return data_.size(); } + bool empty() const { return data_.empty(); } + + void insert(const Insn& insn) { + data_.push_back(insn); + if (data_.size() > 1 && data_[data_.size()-2].addr >= insn.addr) + sorted_ = false; + } + void insert(Insn&& insn) { + va_t a = insn.addr; + data_.push_back(std::move(insn)); + if (data_.size() > 1 && data_[data_.size()-2].addr >= a) + sorted_ = false; + } + + void merge_sorted_range(std::vector&& chunk) { + if (chunk.empty()) return; + if (data_.empty()) { + data_ = std::move(chunk); + sorted_ = true; + return; + } + ensure_sorted(); + std::vector merged; + merged.reserve(data_.size() + chunk.size()); + std::merge(data_.begin(), data_.end(), chunk.begin(), chunk.end(), std::back_inserter(merged), + [](const Insn& a, const Insn& b) { return a.addr < b.addr; }); + data_ = std::move(merged); + sorted_ = true; + } + + Insn& operator[](va_t addr) { + ensure_sorted(); + auto it = lower(addr); + if (it != data_.end() && it->addr == addr) + return *it; + sorted_ = false; + data_.push_back(Insn{}); + data_.back().addr = addr; + return data_.back(); + } + + void finalize() { ensure_sorted(); } + + iterator find(va_t addr) { + ensure_sorted(); + auto it = lower(addr); + if (it != data_.end() && it->addr == addr) return it; + return data_.end(); + } + const_iterator find(va_t addr) const { + ensure_sorted(); + auto it = lower(addr); + if (it != data_.end() && it->addr == addr) return it; + return data_.end(); + } + + size_t count(va_t addr) const { + ensure_sorted(); + auto it = lower(addr); + return (it != data_.end() && it->addr == addr) ? 1 : 0; + } + + void erase(va_t addr) { + ensure_sorted(); + auto it = lower(addr); + if (it != data_.end() && it->addr == addr) + data_.erase(it); + } + + void clear() { data_.clear(); sorted_ = true; } + + iterator begin() { ensure_sorted(); return data_.begin(); } + iterator end() { return data_.end(); } + const_iterator begin() const { ensure_sorted(); return data_.begin(); } + const_iterator end() const { return data_.end(); } + + std::vector& raw() { return data_; } + const std::vector& raw() const { return data_; } + + InsnStore& operator=(InsnStore&& o) noexcept { + data_ = std::move(o.data_); + sorted_ = o.sorted_; + return *this; + } + InsnStore(InsnStore&& o) noexcept : data_(std::move(o.data_)), sorted_(o.sorted_) {} + InsnStore() = default; + InsnStore(const InsnStore&) = default; + InsnStore& operator=(const InsnStore&) = default; + + const_iterator range_begin(va_t start) const { + ensure_sorted(); + return lower(start); + } + const_iterator range_end(va_t end) const { + ensure_sorted(); + return std::lower_bound(data_.begin(), data_.end(), end, + [](const Insn& a, va_t v) { return a.addr < v; }); + } + +private: + void ensure_sorted() const { + if (!sorted_) { + auto& v = const_cast&>(data_); + std::sort(v.begin(), v.end(), + [](const Insn& a, const Insn& b) { return a.addr < b.addr; }); + auto last = std::unique(v.begin(), v.end(), + [](const Insn& a, const Insn& b) { return a.addr == b.addr; }); + v.erase(last, v.end()); + const_cast(sorted_) = true; + } + } + iterator lower(va_t addr) { + return std::lower_bound(data_.begin(), data_.end(), addr, + [](const Insn& a, va_t v) { return a.addr < v; }); + } + const_iterator lower(va_t addr) const { + return std::lower_bound(data_.begin(), data_.end(), addr, + [](const Insn& a, va_t v) { return a.addr < v; }); + } + + std::vector data_; + bool sorted_ = true; +}; + } diff --git a/src/core/loader/dotnet_loader.cpp b/src/core/loader/dotnet_loader.cpp index ad36906..b27bb93 100644 --- a/src/core/loader/dotnet_loader.cpp +++ b/src/core/loader/dotnet_loader.cpp @@ -103,16 +103,24 @@ bool DotNetLoader::parse_streams(const u8* root, size_t sz) { std::string DotNetLoader::read_meta_string(u32 offset) { if (!streams_.strings || offset >= streams_.strings_sz) return ""; - return std::string(reinterpret_cast(streams_.strings + offset)); + const char* s = reinterpret_cast(streams_.strings + offset); + size_t max_len = streams_.strings_sz - offset; + size_t len = strnlen(s, max_len); + return std::string(s, len); } std::string DotNetLoader::read_us_string(u32 offset) { if (!streams_.us || offset >= streams_.us_sz) return ""; + size_t remaining = streams_.us_sz - offset; const u8* p = streams_.us + offset; - u32 len = *p++; - if (len & 0x80) { len = ((len & 0x7F) << 8) | *p++; } + if (remaining < 1) return ""; + u32 len = *p++; --remaining; + if (len & 0x80) { + if (remaining < 1) return ""; + len = ((len & 0x7F) << 8) | *p++; --remaining; + } std::string s; - for (u32 i = 0; i + 1 < len; i += 2) { + for (u32 i = 0; i + 1 < len && i + 1 < remaining; i += 2) { char16_t ch = p[i] | (p[i+1] << 8); if (ch < 128) s += static_cast(ch); else s += '?'; @@ -136,6 +144,7 @@ bool DotNetLoader::parse_tables() { int table_count = 0; for (int i = 0; i < 64; i++) { if (valid_mask & (1ULL << i)) { + if (24 + (table_count + 1) * 4 > streams_.tables_sz) return false; u32 rows; std::memcpy(&rows, row_counts + table_count * 4, 4); tables_[i].rows = rows; diff --git a/src/core/loader/elf_loader.cpp b/src/core/loader/elf_loader.cpp index 64d0400..4d76ea1 100644 --- a/src/core/loader/elf_loader.cpp +++ b/src/core/loader/elf_loader.cpp @@ -146,6 +146,8 @@ u32 elf_to_pe_flags(u32 pflags) { const char* strtab_get(const u8* strtab, size_t strtab_sz, u32 offset) { if (offset >= strtab_sz) return ""; + const void* nul = std::memchr(strtab + offset, 0, strtab_sz - offset); + if (!nul) return ""; return reinterpret_cast(strtab + offset); } @@ -239,7 +241,7 @@ bool ELFLoader::parse_segments(PEImage& img) { if (is64_) { auto* eh = reinterpret_cast(base_); for (u16 i = 0; i < eh->e_phnum; ++i) { - size_t off = eh->e_phoff + i * eh->e_phentsize; + size_t off = static_cast(eh->e_phoff) + static_cast(i) * static_cast(eh->e_phentsize); if (off + sizeof(Elf64_Phdr) > size_) break; auto* ph = reinterpret_cast(base_ + off); if (ph->p_type != PT_LOAD) continue; @@ -265,7 +267,7 @@ bool ELFLoader::parse_segments(PEImage& img) { } else { auto* eh = reinterpret_cast(base_); for (u16 i = 0; i < eh->e_phnum; ++i) { - size_t off = eh->e_phoff + i * eh->e_phentsize; + size_t off = static_cast(eh->e_phoff) + static_cast(i) * static_cast(eh->e_phentsize); if (off + sizeof(Elf32_Phdr) > size_) break; auto* ph = reinterpret_cast(base_ + off); if (ph->p_type != PT_LOAD) continue; @@ -302,7 +304,7 @@ bool ELFLoader::parse_sections(PEImage& img) { auto* eh = reinterpret_cast(base_); if (eh->e_shnum == 0 || eh->e_shstrndx == 0) return true; - size_t str_off = eh->e_shoff + eh->e_shstrndx * eh->e_shentsize; + size_t str_off = eh->e_shoff + static_cast(eh->e_shstrndx) * eh->e_shentsize; if (str_off + sizeof(Elf64_Shdr) > size_) return true; auto* str_sh = reinterpret_cast(base_ + str_off); if (str_sh->sh_offset + str_sh->sh_size <= size_) { @@ -311,7 +313,7 @@ bool ELFLoader::parse_sections(PEImage& img) { } for (u16 i = 0; i < eh->e_shnum; ++i) { - size_t off = eh->e_shoff + i * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(i) * eh->e_shentsize; if (off + sizeof(Elf64_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); if (sh->sh_addr == 0 || sh->sh_size == 0) continue; @@ -329,7 +331,7 @@ bool ELFLoader::parse_sections(PEImage& img) { auto* eh = reinterpret_cast(base_); if (eh->e_shnum == 0 || eh->e_shstrndx == 0) return true; - size_t str_off = eh->e_shoff + eh->e_shstrndx * eh->e_shentsize; + size_t str_off = eh->e_shoff + static_cast(eh->e_shstrndx) * eh->e_shentsize; if (str_off + sizeof(Elf32_Shdr) > size_) return true; auto* str_sh = reinterpret_cast(base_ + str_off); if (str_sh->sh_offset + str_sh->sh_size <= size_) { @@ -338,7 +340,7 @@ bool ELFLoader::parse_sections(PEImage& img) { } for (u16 i = 0; i < eh->e_shnum; ++i) { - size_t off = eh->e_shoff + i * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(i) * eh->e_shentsize; if (off + sizeof(Elf32_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); if (sh->sh_addr == 0 || sh->sh_size == 0) continue; @@ -360,7 +362,7 @@ bool ELFLoader::parse_symbols(PEImage& img) { if (is64_) { auto* eh = reinterpret_cast(base_); for (u16 si = 0; si < eh->e_shnum; ++si) { - size_t off = eh->e_shoff + si * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(si) * eh->e_shentsize; if (off + sizeof(Elf64_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); @@ -369,13 +371,14 @@ bool ELFLoader::parse_symbols(PEImage& img) { if (sh->sh_entsize < sizeof(Elf64_Sym)) continue; // get linked strtab - size_t strtab_off_hdr = eh->e_shoff + sh->sh_link * eh->e_shentsize; + size_t strtab_off_hdr = eh->e_shoff + static_cast(sh->sh_link) * eh->e_shentsize; if (strtab_off_hdr + sizeof(Elf64_Shdr) > size_) continue; auto* str_sh = reinterpret_cast(base_ + strtab_off_hdr); if (str_sh->sh_offset + str_sh->sh_size > size_) continue; const u8* strtab = base_ + str_sh->sh_offset; size_t strtab_sz = static_cast(str_sh->sh_size); + if (sh->sh_entsize == 0) continue; size_t count = static_cast(sh->sh_size / sh->sh_entsize); for (size_t i = 1; i < count; ++i) { @@ -402,7 +405,7 @@ bool ELFLoader::parse_symbols(PEImage& img) { } else { auto* eh = reinterpret_cast(base_); for (u16 si = 0; si < eh->e_shnum; ++si) { - size_t off = eh->e_shoff + si * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(si) * eh->e_shentsize; if (off + sizeof(Elf32_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); @@ -410,13 +413,14 @@ bool ELFLoader::parse_symbols(PEImage& img) { if (sh->sh_offset + sh->sh_size > size_) continue; if (sh->sh_entsize < sizeof(Elf32_Sym)) continue; - size_t strtab_off_hdr = eh->e_shoff + sh->sh_link * eh->e_shentsize; + size_t strtab_off_hdr = eh->e_shoff + static_cast(sh->sh_link) * eh->e_shentsize; if (strtab_off_hdr + sizeof(Elf32_Shdr) > size_) continue; auto* str_sh = reinterpret_cast(base_ + strtab_off_hdr); if (str_sh->sh_offset + str_sh->sh_size > size_) continue; const u8* strtab = base_ + str_sh->sh_offset; size_t strtab_sz = str_sh->sh_size; + if (sh->sh_entsize == 0) continue; size_t count = sh->sh_size / sh->sh_entsize; for (size_t i = 1; i < count; ++i) { @@ -455,13 +459,13 @@ bool ELFLoader::parse_dynamic(PEImage& img) { size_t dynstr_sz = 0; for (u16 i = 0; i < eh->e_shnum; ++i) { - size_t off = eh->e_shoff + i * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(i) * eh->e_shentsize; if (off + sizeof(Elf64_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); if (sh->sh_type == SHT_DYNSYM) { dynsym_sh = sh; - size_t str_hdr_off = eh->e_shoff + sh->sh_link * eh->e_shentsize; + size_t str_hdr_off = eh->e_shoff + static_cast(sh->sh_link) * eh->e_shentsize; if (str_hdr_off + sizeof(Elf64_Shdr) <= size_) { auto* sth = reinterpret_cast(base_ + str_hdr_off); if (sth->sh_offset + sth->sh_size <= size_) { @@ -474,7 +478,7 @@ bool ELFLoader::parse_dynamic(PEImage& img) { // .rela.plt type = SHT_RELA (4) if (sh->sh_type == 4) { // check name via shstrtab - size_t str_sec_off = eh->e_shoff + eh->e_shstrndx * eh->e_shentsize; + size_t str_sec_off = eh->e_shoff + static_cast(eh->e_shstrndx) * eh->e_shentsize; if (str_sec_off + sizeof(Elf64_Shdr) <= size_) { auto* ssh = reinterpret_cast(base_ + str_sec_off); if (ssh->sh_offset + ssh->sh_size <= size_) { @@ -488,6 +492,7 @@ bool ELFLoader::parse_dynamic(PEImage& img) { } if (dynsym_sh && dynstr && relaplt_sh) { + if (relaplt_sh->sh_entsize == 0) return true; size_t rela_count = static_cast(relaplt_sh->sh_size / relaplt_sh->sh_entsize); for (size_t i = 0; i < rela_count; ++i) { size_t roff = static_cast(relaplt_sh->sh_offset) + i * static_cast(relaplt_sh->sh_entsize); @@ -519,13 +524,13 @@ bool ELFLoader::parse_dynamic(PEImage& img) { size_t dynstr_sz = 0; for (u16 i = 0; i < eh->e_shnum; ++i) { - size_t off = eh->e_shoff + i * eh->e_shentsize; + size_t off = eh->e_shoff + static_cast(i) * eh->e_shentsize; if (off + sizeof(Elf32_Shdr) > size_) break; auto* sh = reinterpret_cast(base_ + off); if (sh->sh_type == SHT_DYNSYM) { dynsym_sh = sh; - size_t str_hdr_off = eh->e_shoff + sh->sh_link * eh->e_shentsize; + size_t str_hdr_off = eh->e_shoff + static_cast(sh->sh_link) * eh->e_shentsize; if (str_hdr_off + sizeof(Elf32_Shdr) <= size_) { auto* sth = reinterpret_cast(base_ + str_hdr_off); if (sth->sh_offset + sth->sh_size <= size_) { @@ -537,7 +542,7 @@ bool ELFLoader::parse_dynamic(PEImage& img) { // .rel.plt type = SHT_REL (9) if (sh->sh_type == 9) { - size_t str_sec_off = eh->e_shoff + eh->e_shstrndx * eh->e_shentsize; + size_t str_sec_off = eh->e_shoff + static_cast(eh->e_shstrndx) * eh->e_shentsize; if (str_sec_off + sizeof(Elf32_Shdr) <= size_) { auto* ssh = reinterpret_cast(base_ + str_sec_off); if (ssh->sh_offset + ssh->sh_size <= size_) { @@ -550,6 +555,7 @@ bool ELFLoader::parse_dynamic(PEImage& img) { } if (dynsym_sh && dynstr && relplt_sh) { + if (relplt_sh->sh_entsize == 0) return true; size_t rel_count = relplt_sh->sh_size / relplt_sh->sh_entsize; for (size_t i = 0; i < rel_count; ++i) { size_t roff = relplt_sh->sh_offset + i * relplt_sh->sh_entsize; diff --git a/src/core/loader/pe_loader.cpp b/src/core/loader/pe_loader.cpp index 8509ef1..be88592 100644 --- a/src/core/loader/pe_loader.cpp +++ b/src/core/loader/pe_loader.cpp @@ -101,7 +101,7 @@ struct ExpDir { template const T* ptr_at(const u8* base, size_t off, size_t total) { - if (off + sizeof(T) > total) return nullptr; + if (off > total || total - off < sizeof(T)) return nullptr; return reinterpret_cast(base + off); } diff --git a/src/core/types/type_system.h b/src/core/types/type_system.h index 794d65a..0b83af4 100644 --- a/src/core/types/type_system.h +++ b/src/core/types/type_system.h @@ -154,7 +154,8 @@ class TypeSystem { std::string format_enum(const TypeDef& td, const u8* data, size_t len) const { if (len < 4) return "?"; - i64 val = *reinterpret_cast(data); + i32 raw; std::memcpy(&raw, data, 4); + i64 val = raw; for (auto& m : td.members) if (m.value == val) return fmt::format("{}::{} ({})", td.name, m.name, val); return fmt::format("{}({})", td.name, val); @@ -165,23 +166,25 @@ class TypeSystem { switch (td.kind) { case TypeKind::UInt: if (td.size == 1) return fmt::format("0x{:02X}", data[0]); - if (td.size == 2) return fmt::format("0x{:04X}", *reinterpret_cast(data)); - if (td.size == 4) return fmt::format("0x{:08X}", *reinterpret_cast(data)); - if (td.size == 8) return fmt::format("0x{:016X}", *reinterpret_cast(data)); + if (td.size == 2) { u16 v; std::memcpy(&v, data, 2); return fmt::format("0x{:04X}", v); } + if (td.size == 4) { u32 v; std::memcpy(&v, data, 4); return fmt::format("0x{:08X}", v); } + if (td.size == 8) { u64 v; std::memcpy(&v, data, 8); return fmt::format("0x{:016X}", v); } break; case TypeKind::Int: if (td.size == 1) return fmt::format("{}", static_cast(data[0])); - if (td.size == 2) return fmt::format("{}", *reinterpret_cast(data)); - if (td.size == 4) return fmt::format("{}", *reinterpret_cast(data)); - if (td.size == 8) return fmt::format("{}", *reinterpret_cast(data)); + if (td.size == 2) { i16 v; std::memcpy(&v, data, 2); return fmt::format("{}", v); } + if (td.size == 4) { i32 v; std::memcpy(&v, data, 4); return fmt::format("{}", v); } + if (td.size == 8) { i64 v; std::memcpy(&v, data, 8); return fmt::format("{}", v); } break; case TypeKind::Float: - if (td.size == 4) return fmt::format("{:.6g}", *reinterpret_cast(data)); - if (td.size == 8) return fmt::format("{:.12g}", *reinterpret_cast(data)); + if (td.size == 4) { float v; std::memcpy(&v, data, 4); return fmt::format("{:.6g}", v); } + if (td.size == 8) { double v; std::memcpy(&v, data, 8); return fmt::format("{:.12g}", v); } break; default: break; } - return fmt::format("0x{:X}", *reinterpret_cast(data)); + u64 v = 0; + std::memcpy(&v, data, std::min(td.size, sizeof(v))); + return fmt::format("0x{:X}", v); } std::unordered_map types_; diff --git a/src/scripting/lua_engine.cpp b/src/scripting/lua_engine.cpp index 31a31ad..a32a750 100644 --- a/src/scripting/lua_engine.cpp +++ b/src/scripting/lua_engine.cpp @@ -158,13 +158,13 @@ int l_get_insn(lua_State* L) { if (it == db->insns.end()) { lua_pushnil(L); return 1; } lua_newtable(L); - lua_pushstring(L, it->second.mnemonic); + lua_pushstring(L, it->mnemonic); lua_setfield(L, -2, "mnemonic"); - lua_pushstring(L, it->second.op_str); + lua_pushstring(L, it->op_str); lua_setfield(L, -2, "op_str"); - lua_pushinteger(L, it->second.len); + lua_pushinteger(L, it->len); lua_setfield(L, -2, "len"); - lua_pushinteger(L, static_cast(it->second.addr)); + lua_pushinteger(L, static_cast(it->addr)); lua_setfield(L, -2, "addr"); return 1; } diff --git a/src/threading/worker_pool.cpp b/src/threading/worker_pool.cpp index 1d756b3..09f1fae 100644 --- a/src/threading/worker_pool.cpp +++ b/src/threading/worker_pool.cpp @@ -26,7 +26,10 @@ WorkerPool::WorkerPool(unsigned n) { } WorkerPool::~WorkerPool() { - stop_ = true; + { + std::lock_guard lk(mtx_); + stop_ = true; + } cv_.notify_all(); for (auto& w : workers_) if (w.joinable()) w.join(); diff --git a/src/ui/app.cpp b/src/ui/app.cpp index 54f5383..de34049 100644 --- a/src/ui/app.cpp +++ b/src/ui/app.cpp @@ -164,7 +164,8 @@ int App::run() { } while (!renderer_.should_close()) { - renderer_.begin_frame(); + if (!renderer_.begin_frame()) + continue; // pick up analysis completion on main thread (thread-safe handoff) if (analysis_done_.exchange(false)) { @@ -224,6 +225,7 @@ int App::run() { render_menubar(); // update debug indicators in disasm +#ifdef _WIN32 if (dbgp_.engine().is_attached() && !dbgp_.engine().is_running()) { static std::vector bp_addrs; bp_addrs.clear(); @@ -235,6 +237,9 @@ int App::run() { dv_.set_debug_state(0, nullptr); dv_.set_debug_engine(&dbgp_.engine()); } +#else + dv_.set_debug_state(0, nullptr); +#endif dv_.render(); hv_.render(); @@ -422,7 +427,7 @@ void App::open_file(const char* path) { magic == 0xCEFAEDFE || magic == 0xCFAFFEED) { out_.log("Detected Mach-O binary (macOS)"); result = macho_loader_.load(path); - } else + } else { out_.log("ERROR: unsupported file format (expected PE or ELF)"); return; } @@ -768,8 +773,8 @@ void App::handle_keys() { va_t xaddr = dv_.cursor(); if (analyzer_) { auto iit = analyzer_->db().insns.find(xaddr); - if (iit != analyzer_->db().insns.end() && iit->second.is_call()) { - va_t t = iit->second.branch_target(); + if (iit != analyzer_->db().insns.end() && iit->is_call()) { + va_t t = iit->branch_target(); if (t) xaddr = t; } } @@ -786,8 +791,8 @@ void App::handle_keys() { if (ImGui::IsKeyPressed(ImGuiKey_T) && !io.KeyCtrl && analyzer_) show_apply_type_ = true; if (kb.check("follow") && analyzer_) { - auto* insn_ptr = analyzer_->db().insns.count(dv_.cursor()) ? - &analyzer_->db().insns.at(dv_.cursor()) : nullptr; + auto _fit = analyzer_->db().insns.find(dv_.cursor()); + auto* insn_ptr = (_fit != analyzer_->db().insns.end()) ? &*_fit : nullptr; if (insn_ptr) { va_t t = insn_ptr->branch_target(); if (t && analyzer_->db().insns.count(t)) { @@ -850,8 +855,10 @@ void App::handle_keys() { dbgp_.visible() = true; } if (ImGui::IsKeyPressed(ImGuiKey_F9) && !io.KeyCtrl) { +#ifdef _WIN32 if (dbgp_.engine().is_attached()) dbgp_.on_run(); +#endif dbgp_.visible() = true; } @@ -985,12 +992,9 @@ void App::rebase(va_t new_base) { exp.addr = static_cast(static_cast(exp.addr) + delta); auto& db = analyzer_->db(); - std::unordered_map new_insns; - for (auto& [addr, insn] : db.insns) { + for (auto& insn : db.insns.raw()) insn.addr = static_cast(static_cast(insn.addr) + delta); - new_insns[insn.addr] = std::move(insn); - } - db.insns = std::move(new_insns); + db.insns.finalize(); std::unordered_map new_funcs; for (auto& [entry, func] : db.funcs) { @@ -1001,7 +1005,6 @@ void App::rebase(va_t new_base) { bb.end = static_cast(static_cast(bb.end) + delta); for (auto& s : bb.succs) s = static_cast(static_cast(s) + delta); for (auto& p : bb.preds) p = static_cast(static_cast(p) + delta); - for (auto& i : bb.insns) i.addr = static_cast(static_cast(i.addr) + delta); new_blocks[bb.start] = std::move(bb); } func.blocks = std::move(new_blocks); @@ -1600,7 +1603,8 @@ void App::render_nav_band() { }; for (int px = 0; px < w && px < (int)nav_band_data_.size(); ++px) { - ImU32 col = type_colors[nav_band_data_[px]]; + u8 idx = nav_band_data_[px]; + ImU32 col = type_colors[idx < NB_Entropy + 1 ? idx : 0]; dl->AddLine(ImVec2(pos.x + px, pos.y), ImVec2(pos.x + px, pos.y + height), col); } @@ -1715,7 +1719,10 @@ void App::export_asm() { if (ba != entry) out << fmt::format("loc_{:X}:\n", ba); - for (auto& insn : bb.insns) { + auto blk_it = db.insns.range_begin(bb.start); + auto blk_end_it = db.insns.range_end(bb.end); + for (; blk_it != blk_end_it; ++blk_it) { + auto& insn = *blk_it; std::string line = fmt::format(" {:016X} ", insn.addr); line += insn.mnemonic; if (insn.op_str[0]) { diff --git a/src/ui/widgets/callgraph_view.cpp b/src/ui/widgets/callgraph_view.cpp index 5196ebb..87622eb 100644 --- a/src/ui/widgets/callgraph_view.cpp +++ b/src/ui/widgets/callgraph_view.cpp @@ -48,19 +48,19 @@ void CallGraphView::show_function(va_t entry) { if (db_->funcs.count(entry)) { auto& func = db_->funcs.at(entry); for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { - if (!insn.is_call()) continue; + db_->for_each_insn_in_block(bb, [&](const Insn& insn) { + if (!insn.is_call()) return; va_t t = insn.branch_target(); - if (!t) continue; + if (!t) return; bool dup = false; for (auto& e : callees_) if (e.addr == t) { dup = true; break; } - if (dup) continue; + if (dup) return; std::string n = fmt::format("sub_{:X}", t); auto nit = db_->names.find(t); if (nit != db_->names.end()) n = nit->second; callees_.push_back({t, n}); - } + }); } } } diff --git a/src/ui/widgets/debugger_panel.cpp b/src/ui/widgets/debugger_panel.cpp index 57aa44b..83bb1af 100644 --- a/src/ui/widgets/debugger_panel.cpp +++ b/src/ui/widgets/debugger_panel.cpp @@ -1,4 +1,7 @@ #include "debugger_panel.h" + +#ifdef _WIN32 + #include "ui/theme.h" #include #include @@ -367,3 +370,5 @@ bool DebuggerPanel::has_breakpoint(va_t addr) const { } } + +#endif diff --git a/src/ui/widgets/debugger_panel.h b/src/ui/widgets/debugger_panel.h index dc57e5c..711ab1f 100644 --- a/src/ui/widgets/debugger_panel.h +++ b/src/ui/widgets/debugger_panel.h @@ -1,13 +1,19 @@ #pragma once -#include "debugger/debug_engine.h" -#include "debugger/process_list.h" #include #include #include #include +#include "core/types.h" + +#ifdef _WIN32 +#include "debugger/debug_engine.h" +#include "debugger/process_list.h" +#endif namespace hype { +#ifdef _WIN32 + class DebuggerPanel { public: using NavCB = std::function; @@ -60,4 +66,33 @@ class DebuggerPanel { bool log_scroll_ = true; }; +#else + +class DebuggerPanel { +public: + using NavCB = std::function; + using OpenCB = std::function; + + void set_nav(NavCB) {} + void set_open_cb(OpenCB) {} + void render() {} + bool& visible() { return visible_; } + + void show_attach_dialog() {} + void toggle_breakpoint(va_t) {} + void on_step_into() {} + void on_step_over() {} + void on_step_out() {} + void on_run() {} + void on_pause() {} + + va_t current_rip() const { return 0; } + bool has_breakpoint(va_t) const { return false; } + +private: + bool visible_ = false; +}; + +#endif + } diff --git a/src/ui/widgets/diff_view.cpp b/src/ui/widgets/diff_view.cpp index e3dedb1..c1152cf 100644 --- a/src/ui/widgets/diff_view.cpp +++ b/src/ui/widgets/diff_view.cpp @@ -40,7 +40,8 @@ void DiffView::render() { ImGui::TableHeadersRow(); std::string filt(filter_); - std::transform(filt.begin(), filt.end(), filt.begin(), ::tolower); + std::transform(filt.begin(), filt.end(), filt.begin(), + [](unsigned char c) { return std::tolower(c); }); for (auto& r : results_) { if (r.status == DiffResult::Identical && !show_identical_) continue; @@ -50,7 +51,8 @@ void DiffView::render() { if (!filt.empty()) { std::string ln = r.name; - std::transform(ln.begin(), ln.end(), ln.begin(), ::tolower); + std::transform(ln.begin(), ln.end(), ln.begin(), + [](unsigned char c) { return std::tolower(c); }); if (ln.find(filt) == std::string::npos) continue; } diff --git a/src/ui/widgets/disasm_view.cpp b/src/ui/widgets/disasm_view.cpp index d39f45e..b7437d7 100644 --- a/src/ui/widgets/disasm_view.cpp +++ b/src/ui/widgets/disasm_view.cpp @@ -1,13 +1,36 @@ #include "disasm_view.h" #include "stack_frame_view.h" +#ifdef _WIN32 #include "debugger/debug_engine.h" +#endif #include "ui/theme.h" #include "ui/fonts.h" #include #include +#include namespace hype { +static void fmt_addr(char* buf, va_t addr) { + static const char hex[] = "0123456789ABCDEF"; + for (int i = 15; i >= 0; --i) { buf[i] = hex[addr & 0xF]; addr >>= 4; } + buf[16] = '\0'; +} + +static void fmt_hex_bytes(char* buf, const u8* bytes, int len, int max_bytes = 4) { + static const char hex[] = "0123456789ABCDEF"; + int nb = len < max_bytes ? len : max_bytes; + nb = nb < 6 ? nb : 6; + char* p = buf; + for (int b = 0; b < nb; ++b) { + *p++ = hex[bytes[b] >> 4]; + *p++ = hex[bytes[b] & 0xF]; + *p++ = ' '; + } + if (len > 6) { *p++ = '.'; *p++ = '.'; } + *p = '\0'; +} + void DisasmView::goto_addr(va_t addr) { scroll_to_ = addr; scroll_pending_ = true; @@ -22,21 +45,20 @@ void DisasmView::rebuild() { addrs_.reserve(db_->insns.size() + db_->data_items.size()); if (beautify_) { - // only show instructions inside functions + important data for (auto& [entry, func] : db_->funcs) { for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) + db_->for_each_insn_in_block(bb, [&](const Insn& insn) { addrs_.push_back(insn.addr); + }); } } - // include IAT entries, strings, pointers for (auto& [a, item] : db_->data_items) { if (item.style == DataStyle::Import || item.style == DataStyle::String || item.style == DataStyle::Pointer) addrs_.push_back(a); } } else { - for (auto& [a, _] : db_->insns) - addrs_.push_back(a); + for (auto& insn : db_->insns) + addrs_.push_back(insn.addr); for (auto& [a, _] : db_->data_items) addrs_.push_back(a); } @@ -48,9 +70,222 @@ void DisasmView::rebuild() { for (auto& [addr, str] : db_->strings) str_map_[addr] = &str; + rebuild_data_cache(); + rebuild_insn_cache(); + + seg_hdr_cache_.clear(); + if (img_) { + for (auto& seg : img_->segments) + seg_hdr_cache_[seg.va] = fmt::format("; === {} ===", seg.name); + } + dirty_ = false; } +void DisasmView::rebuild_data_cache() { + data_cache_.clear(); + data_cache_.reserve(db_->data_items.size()); + + for (auto& [a, item] : db_->data_items) { + CachedDataLine c; + auto nit = db_->names.find(item.addr); + + switch (item.style) { + case DataStyle::Import: { + std::string func_name; + if (nit != db_->names.end()) { + auto sep = nit->second.find('!'); + func_name = (sep != std::string::npos) ? nit->second.substr(sep + 1) : nit->second; + } else { + func_name = fmt::format("unk_{:X}", item.addr); + } + const char* sz_kw = (item.size == DataSize::Qword) ? "qword" : "dword"; + c.label = fmt::format("extrn {}:{}", func_name, sz_kw); + c.color = IM_COL32(80, 210, 230, 255); + break; + } + case DataStyle::String: { + std::string val = "\""; + if (img_) { + for (auto& seg : img_->segments) { + if (!seg.contains(item.addr)) continue; + size_t off = static_cast(item.addr - seg.va); + for (size_t i = off; i < seg.data.size() && seg.data[i]; ++i) + val += static_cast(seg.data[i]); + break; + } + } + val += "\", 0"; + c.label = fmt::format("db {}", val); + c.color = col::str(); + break; + } + case DataStyle::Pointer: { + u64 val = 0; + if (img_) { + for (auto& seg : img_->segments) { + if (!seg.contains(item.addr)) continue; + size_t off = static_cast(item.addr - seg.va); + size_t sz = static_cast(item.size); + if (off + sz <= seg.data.size()) + memcpy(&val, seg.data.data() + off, sz); + break; + } + } + const char* dir = (item.size == DataSize::Qword) ? "dq" : "dd"; + std::string target_name; + auto tit = db_->names.find(static_cast(val)); + if (tit != db_->names.end()) + target_name = tit->second; + else + target_name = fmt::format("sub_{:X}", val); + c.label = fmt::format("{} offset {}", dir, target_name); + c.color = IM_COL32(100, 160, 255, 255); + break; + } + case DataStyle::Align: { + size_t run_bytes = static_cast(item.size); + if (img_) { + for (auto& seg : img_->segments) { + if (!seg.contains(item.addr)) continue; + size_t off = static_cast(item.addr - seg.va); + size_t ptr_sz = static_cast(item.size); + size_t j = off; + while (j + ptr_sz <= seg.data.size()) { + va_t zv = 0; + memcpy(&zv, seg.data.data() + j, ptr_sz); + if (zv != 0) break; + j += ptr_sz; + } + run_bytes = j - off; + break; + } + } + c.label = fmt::format("align {:X}h", run_bytes); + c.color = IM_COL32(100, 100, 110, 255); + break; + } + case DataStyle::Raw: + default: { + const char* dir = "db"; + switch (item.size) { + case DataSize::Word: dir = "dw"; break; + case DataSize::Dword: dir = "dd"; break; + case DataSize::Qword: dir = "dq"; break; + default: break; + } + u64 val = 0; + if (img_) { + for (auto& seg : img_->segments) { + if (!seg.contains(item.addr)) continue; + size_t off = static_cast(item.addr - seg.va); + size_t sz = static_cast(item.size); + if (off + sz <= seg.data.size()) + memcpy(&val, seg.data.data() + off, sz); + break; + } + } + c.label = fmt::format("{} 0x{:X}", dir, val); + c.color = col::imm(); + break; + } + } + + if (nit != db_->names.end() && item.style != DataStyle::Import) + c.name_comment = fmt::format("; {}", nit->second); + + data_cache_[a] = std::move(c); + } +} + +void DisasmView::rebuild_insn_cache() { + insn_cache_.clear(); + insn_cache_.reserve(db_->insns.size()); + + for (auto& insn : db_->insns) { + CachedInsnLine c; + fmt_hex_bytes(c.hex, insn.bytes, insn.len); + + // pre-build annotation + std::string annotation; + for (int k = 0; k < insn.op_count && annotation.empty(); ++k) { + va_t ref = 0; + if (insn.ops[k].type == OpType::Imm && insn.ops[k].val > 0x10000) ref = insn.ops[k].val; + else if (insn.ops[k].type == OpType::Mem && insn.ops[k].val) ref = insn.ops[k].val; + if (!ref) continue; + + auto sit = str_map_.find(ref); + if (sit != str_map_.end()) { + auto& s = *sit->second; + annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); + } + if (annotation.empty()) { + for (int off = 1; off <= 4 && annotation.empty(); ++off) { + auto sit2 = str_map_.find(ref - off); + if (sit2 != str_map_.end()) { + auto& s = *sit2->second; + if ((size_t)off < s.size()) + annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); + } + } + } + if (annotation.empty()) { + auto nit = db_->names.find(ref); + if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) + annotation = nit->second; + } + } + + if (annotation.empty()) { + std::string ops(insn.op_str); + size_t scan_pos = 0; + while (scan_pos < ops.size() && annotation.empty()) { + if (scan_pos + 2 < ops.size() && ops[scan_pos] == '0' && (ops[scan_pos+1] == 'x' || ops[scan_pos+1] == 'X')) { + char* end = nullptr; + va_t val = std::strtoull(ops.c_str() + scan_pos, &end, 16); + if (val > 0x10000 && end > ops.c_str() + scan_pos + 4) { + auto sit = str_map_.find(val); + if (sit != str_map_.end()) { + auto& s = *sit->second; + annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); + } else { + auto nit = db_->names.find(val); + if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) + annotation = nit->second; + } + scan_pos = static_cast(end - ops.c_str()); + } else { + scan_pos += 2; + } + } else if (std::isxdigit((unsigned char)ops[scan_pos]) && scan_pos + 6 < ops.size()) { + char* end = nullptr; + va_t val = std::strtoull(ops.c_str() + scan_pos, &end, 16); + size_t len = static_cast(end - (ops.c_str() + scan_pos)); + if (val > 0x10000 && len >= 6) { + auto sit = str_map_.find(val); + if (sit != str_map_.end()) { + auto& s = *sit->second; + annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); + } else { + auto nit = db_->names.find(val); + if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) + annotation = nit->second; + } + scan_pos += len; + } else { + scan_pos++; + } + } else { + scan_pos++; + } + } + } + + c.annotation = std::move(annotation); + insn_cache_[insn.addr] = std::move(c); + } +} + void DisasmView::cmd_define_data() { if (!db_) return; va_t addr = cursor_; @@ -59,7 +294,7 @@ void DisasmView::cmd_define_data() { DataItem old_item = had_data ? it->second : DataItem{}; bool had_insn = db_->insns.count(addr) > 0; Insn old_insn{}; - if (had_insn) old_insn = db_->insns.at(addr); + if (had_insn) { auto iit = db_->insns.find(addr); old_insn = *iit; } DataSize next = DataSize::Byte; if (had_data) { @@ -96,7 +331,7 @@ void DisasmView::cmd_define_string() { DataItem old_item = had_data ? db_->data_items.at(addr) : DataItem{}; bool had_insn = db_->insns.count(addr) > 0; Insn old_insn{}; - if (had_insn) old_insn = db_->insns.at(addr); + if (had_insn) { auto iit = db_->insns.find(addr); old_insn = *iit; } db_->define_string(addr); dirty_ = true; @@ -123,7 +358,7 @@ void DisasmView::cmd_undefine() { DataItem old_item = had_data ? db_->data_items.at(addr) : DataItem{}; bool had_insn = db_->insns.count(addr) > 0; Insn old_insn{}; - if (had_insn) old_insn = db_->insns.at(addr); + if (had_insn) { auto iit = db_->insns.find(addr); old_insn = *iit; } db_->undefine(addr); dirty_ = true; @@ -180,7 +415,7 @@ void DisasmView::cmd_nop() { va_t addr = cursor_; auto it = db_->insns.find(addr); if (it == db_->insns.end()) return; - u8 len = it->second.len; + u8 len = it->len; std::vector old_bytes(len); for (auto& seg : img_->segments) { @@ -195,8 +430,17 @@ void DisasmView::cmd_nop() { for (auto& seg : img_->segments) { if (!seg.contains(addr)) continue; size_t off = static_cast(addr - seg.va); - for (u8 i = 0; i < len && off + i < seg.data.size(); ++i) - seg.data[off + i] = 0x90; + if (img_->arch == Arch::ARM64) { + for (u8 i = 0; i + 3 < len && off + i + 3 < seg.data.size(); i += 4) { + seg.data[off + i] = 0x1F; + seg.data[off + i + 1] = 0x20; + seg.data[off + i + 2] = 0x03; + seg.data[off + i + 3] = 0xD5; + } + } else { + for (u8 i = 0; i < len && off + i < seg.data.size(); ++i) + seg.data[off + i] = 0x90; + } break; } cmd_force_code(); @@ -251,7 +495,7 @@ void DisasmView::update_reg_highlight() { auto& func = db_->funcs.at(func_entry); for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { + db_->for_each_insn_in_block(bb, [&](const Insn& insn) { for (int k = 0; k < insn.op_count; ++k) { auto& op = insn.ops[k]; if (op.type == OpType::Reg && op.reg == highlighted_reg_) { @@ -259,12 +503,12 @@ void DisasmView::update_reg_highlight() { break; } if (op.type == OpType::Mem && - (op.mem.base == highlighted_reg_ || op.mem.index == highlighted_reg_)) { + (op.mem_base == highlighted_reg_ || op.mem_index == highlighted_reg_)) { reg_highlight_addrs_.insert(insn.addr); break; } } - } + }); } } @@ -272,6 +516,7 @@ void DisasmView::render() { ImGui::Begin("Disassembly"); if (!db_ || !img_) { ImGui::TextDisabled("No binary loaded"); ImGui::End(); return; } +#ifdef _WIN32 if (dbg_eng_ && dbg_eng_->is_attached() && !dbg_eng_->is_running()) { if (!live_mode_ || scroll_pending_) { va_t center = scroll_pending_ ? scroll_to_ : (live_base_ ? live_base_ : debug_rip_); @@ -298,6 +543,7 @@ void DisasmView::render() { ImGui::End(); return; } +#endif live_mode_ = false; @@ -349,8 +595,9 @@ void DisasmView::render() { ImVec2 sp = ImGui::GetCursorScreenPos(); float aw = ImGui::GetContentRegionAvail().x; if (aw < 100) aw = 1200; - auto hdr = fmt::format("; === {} ===", cur_seg->name); - ImGui::GetWindowDrawList()->AddText(ImVec2(sp.x + 4, sp.y), col::comment(), hdr.c_str()); + auto hit = seg_hdr_cache_.find(cur_seg->va); + if (hit != seg_hdr_cache_.end()) + ImGui::GetWindowDrawList()->AddText(ImVec2(sp.x + 4, sp.y), col::comment(), hit->second.c_str()); ImGui::Dummy(ImVec2(aw, lh)); } } @@ -362,7 +609,7 @@ void DisasmView::render() { } auto it = db_->insns.find(a); if (it != db_->insns.end()) - render_line(i, it->second, lh); + render_line(i, *it, lh); } } @@ -386,117 +633,19 @@ void DisasmView::render_data_line(const DataItem& item, float lh) { float x = pos.x + 4; float y = pos.y; - auto addr_s = fmt::format("{:016X}", item.addr); - dl->AddText(ImVec2(x, y), is_cursor ? IM_COL32(130, 190, 255, 255) : col::addr(), addr_s.c_str()); + char addr_buf[17]; + fmt_addr(addr_buf, item.addr); + dl->AddText(ImVec2(x, y), is_cursor ? IM_COL32(130, 190, 255, 255) : col::addr(), addr_buf); x += ImGui::CalcTextSize("0000000000000000").x + 14; - auto nit = db_->names.find(item.addr); - - switch (item.style) { - case DataStyle::Import: { - std::string func_name; - if (nit != db_->names.end()) { - auto sep = nit->second.find('!'); - func_name = (sep != std::string::npos) ? nit->second.substr(sep + 1) : nit->second; - } else { - func_name = fmt::format("unk_{:X}", item.addr); - } - const char* sz_kw = (item.size == DataSize::Qword) ? "qword" : "dword"; - auto lbl = fmt::format("extrn {}:{}", func_name, sz_kw); - dl->AddText(ImVec2(x, y), IM_COL32(80, 210, 230, 255), lbl.c_str()); - break; - } - case DataStyle::String: { - std::string val = "\""; - if (img_) { - for (auto& seg : img_->segments) { - if (!seg.contains(item.addr)) continue; - size_t off = static_cast(item.addr - seg.va); - for (size_t i = off; i < seg.data.size() && seg.data[i]; ++i) - val += static_cast(seg.data[i]); - break; - } - } - val += "\", 0"; - auto lbl = fmt::format("db {}", val); - dl->AddText(ImVec2(x, y), col::str(), lbl.c_str()); - break; - } - case DataStyle::Pointer: { - u64 val = 0; - if (img_) { - for (auto& seg : img_->segments) { - if (!seg.contains(item.addr)) continue; - size_t off = static_cast(item.addr - seg.va); - size_t sz = static_cast(item.size); - if (off + sz <= seg.data.size()) - memcpy(&val, seg.data.data() + off, sz); - break; - } - } - const char* dir = (item.size == DataSize::Qword) ? "dq" : "dd"; - std::string target_name; - auto tit = db_->names.find(static_cast(val)); - if (tit != db_->names.end()) - target_name = tit->second; - else - target_name = fmt::format("sub_{:X}", val); - auto lbl = fmt::format("{} offset {}", dir, target_name); - dl->AddText(ImVec2(x, y), IM_COL32(100, 160, 255, 255), lbl.c_str()); - break; - } - case DataStyle::Align: { - size_t run_bytes = static_cast(item.size); - if (img_) { - for (auto& seg : img_->segments) { - if (!seg.contains(item.addr)) continue; - size_t off = static_cast(item.addr - seg.va); - size_t ptr_sz = static_cast(item.size); - size_t j = off; - while (j + ptr_sz <= seg.data.size()) { - va_t zv = 0; - memcpy(&zv, seg.data.data() + j, ptr_sz); - if (zv != 0) break; - j += ptr_sz; - } - run_bytes = j - off; - break; - } - } - auto lbl = fmt::format("align {:X}h", run_bytes); - dl->AddText(ImVec2(x, y), IM_COL32(100, 100, 110, 255), lbl.c_str()); - break; - } - case DataStyle::Raw: - default: { - const char* dir = "db"; - switch (item.size) { - case DataSize::Word: dir = "dw"; break; - case DataSize::Dword: dir = "dd"; break; - case DataSize::Qword: dir = "dq"; break; - default: break; - } - u64 val = 0; - if (img_) { - for (auto& seg : img_->segments) { - if (!seg.contains(item.addr)) continue; - size_t off = static_cast(item.addr - seg.va); - size_t sz = static_cast(item.size); - if (off + sz <= seg.data.size()) - memcpy(&val, seg.data.data() + off, sz); - break; - } + auto cit = data_cache_.find(item.addr); + if (cit != data_cache_.end()) { + dl->AddText(ImVec2(x, y), cit->second.color, cit->second.label.c_str()); + if (!cit->second.name_comment.empty()) { + float nx = pos.x + avail_w - ImGui::CalcTextSize(cit->second.name_comment.c_str()).x - 16; + if (nx > x + 200) + dl->AddText(ImVec2(nx, y), col::comment(), cit->second.name_comment.c_str()); } - auto lbl = fmt::format("{} 0x{:X}", dir, val); - dl->AddText(ImVec2(x, y), col::imm(), lbl.c_str()); - break; - } - } - - if (nit != db_->names.end() && item.style != DataStyle::Import) { - float nx = pos.x + avail_w - ImGui::CalcTextSize(nit->second.c_str()).x - 16; - if (nx > x + 200) - dl->AddText(ImVec2(nx, y), col::comment(), fmt::format("; {}", nit->second).c_str()); } ImGui::InvisibleButton("##dl", ImVec2(avail_w, lh)); @@ -517,10 +666,9 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { ImVec2 hpos = ImGui::GetCursorScreenPos(); dl->AddRectFilled(hpos, ImVec2(hpos.x + avail_w, hpos.y + lh), IM_COL32(20, 24, 32, 255)); dl->AddLine(ImVec2(hpos.x, hpos.y), ImVec2(hpos.x + avail_w, hpos.y), IM_COL32(60, 70, 90, 255)); - auto header = fmt::format(" {} ", fn.name); - float tw = ImGui::CalcTextSize(header.c_str()).x; + float tw = ImGui::CalcTextSize(fn.name.c_str()).x + ImGui::CalcTextSize(" ").x; float cx = hpos.x + (avail_w - tw) * 0.5f; - dl->AddText(ImVec2(cx, hpos.y + 1), col::func(), header.c_str()); + dl->AddText(ImVec2(cx, hpos.y + 1), col::func(), fn.name.c_str()); ImGui::Dummy(ImVec2(avail_w, lh)); } @@ -550,15 +698,14 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { float x = pos.x + 4; float y = pos.y; - auto addr_s = fmt::format("{:016X}", insn.addr); - dl->AddText(ImVec2(x, y), is_cursor ? IM_COL32(130, 190, 255, 255) : col::addr(), addr_s.c_str()); + char addr_buf[17]; + fmt_addr(addr_buf, insn.addr); + dl->AddText(ImVec2(x, y), is_cursor ? IM_COL32(130, 190, 255, 255) : col::addr(), addr_buf); x += ImGui::CalcTextSize("0000000000000000").x + 14; - std::string hex; - int nb = std::min(insn.len, 6); - for (int b = 0; b < nb; ++b) hex += fmt::format("{:02X} ", insn.bytes[b]); - if (insn.len > 6) hex += ".."; - dl->AddText(ImVec2(x, y), IM_COL32(85, 85, 95, 255), hex.c_str()); + auto cache_it = insn_cache_.find(insn.addr); + const char* hex_str = cache_it != insn_cache_.end() ? cache_it->second.hex : ""; + dl->AddText(ImVec2(x, y), IM_COL32(85, 85, 95, 255), hex_str); x += ImGui::CalcTextSize("00 00 00 00 00 00 ..").x + 10; ImU32 mc = col::mnem(); @@ -577,91 +724,22 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { dl->AddText(ImVec2(x, y), mc, insn.mnemonic); x += 68; - std::string annotation; + const std::string* annotation_ptr = nullptr; + if (cache_it != insn_cache_.end() && !cache_it->second.annotation.empty()) + annotation_ptr = &cache_it->second.annotation; + if (insn.op_str[0]) { bool use_hex = db_->hex_display.count(insn.addr) == 0; va_t target = insn.branch_target(); const StackFrame* frame = sfv_ ? sfv_->current_frame() : nullptr; - std::string display_ops = frame ? format_operand_with_vars(insn, frame) : std::string(insn.op_str); - - // find reference annotation from structured operands - for (int k = 0; k < insn.op_count && annotation.empty(); ++k) { - va_t ref = 0; - if (insn.ops[k].type == OpType::Imm && insn.ops[k].val > 0x10000) ref = insn.ops[k].val; - else if (insn.ops[k].type == OpType::Mem && insn.ops[k].val) ref = insn.ops[k].val; - if (!ref) continue; - - // exact match - auto sit = str_map_.find(ref); - if (sit != str_map_.end()) { - auto& s = *sit->second; - annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); - } - // check if ref points INTO a string (offset within first 4 bytes) - if (annotation.empty()) { - for (int off = 1; off <= 4 && annotation.empty(); ++off) { - auto sit2 = str_map_.find(ref - off); - if (sit2 != str_map_.end()) { - auto& s = *sit2->second; - if ((size_t)off < s.size()) - annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); - } - } - } - if (annotation.empty()) { - auto nit = db_->names.find(ref); - if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) - annotation = nit->second; - } - } - - // fallback: scan op_str for hex addresses matching strings/names - if (annotation.empty()) { - std::string ops(insn.op_str); - size_t scan_pos = 0; - while (scan_pos < ops.size() && annotation.empty()) { - // find hex patterns: 0x... or raw hex sequences 7+ chars - if (scan_pos + 2 < ops.size() && ops[scan_pos] == '0' && (ops[scan_pos+1] == 'x' || ops[scan_pos+1] == 'X')) { - char* end = nullptr; - va_t val = std::strtoull(ops.c_str() + scan_pos, &end, 16); - if (val > 0x10000 && end > ops.c_str() + scan_pos + 4) { - auto sit = str_map_.find(val); - if (sit != str_map_.end()) { - auto& s = *sit->second; - annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); - } else { - auto nit = db_->names.find(val); - if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) - annotation = nit->second; - } - scan_pos = static_cast(end - ops.c_str()); - } else { - scan_pos += 2; - } - } else if (std::isxdigit((unsigned char)ops[scan_pos]) && scan_pos + 6 < ops.size()) { - // try parsing as raw hex (like in Intel syntax: 7FFD1234h) - char* end = nullptr; - va_t val = std::strtoull(ops.c_str() + scan_pos, &end, 16); - size_t len = static_cast(end - (ops.c_str() + scan_pos)); - if (val > 0x10000 && len >= 6) { - auto sit = str_map_.find(val); - if (sit != str_map_.end()) { - auto& s = *sit->second; - annotation = s.size() > 36 ? ("\"" + s.substr(0, 36) + "...\"") : ("\"" + s + "\""); - } else { - auto nit = db_->names.find(val); - if (nit != db_->names.end() && !insn.is_call() && !insn.is_branch()) - annotation = nit->second; - } - scan_pos += len; - } else { - scan_pos++; - } - } else { - scan_pos++; - } - } + std::string display_ops_buf; + const char* display_ops; + if (frame) { + display_ops_buf = format_operand_with_vars(insn, frame); + display_ops = display_ops_buf.c_str(); + } else { + display_ops = insn.op_str; } if ((insn.is_call() || insn.is_branch()) && target) { @@ -669,43 +747,49 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { if (nit != db_->names.end()) dl->AddText(ImVec2(x, y), IM_COL32(120, 190, 255, 255), nit->second.c_str()); else - dl->AddText(ImVec2(x, y), col::imm(), display_ops.c_str()); + dl->AddText(ImVec2(x, y), col::imm(), display_ops); } else { if (!use_hex) { + if (display_ops_buf.empty()) display_ops_buf = insn.op_str; for (int k = 0; k < insn.op_count; ++k) { if (insn.ops[k].type == OpType::Imm) { auto hx = fmt::format("0x{:X}", insn.ops[k].val); auto dec = fmt::format("{}", insn.ops[k].val); - auto p = display_ops.find(hx); - if (p != std::string::npos) display_ops.replace(p, hx.size(), dec); + auto p = display_ops_buf.find(hx); + if (p != std::string::npos) display_ops_buf.replace(p, hx.size(), dec); } } + dl->AddText(ImVec2(x, y), col::reg(), display_ops_buf.c_str()); + } else { + dl->AddText(ImVec2(x, y), col::reg(), display_ops); } - dl->AddText(ImVec2(x, y), col::reg(), display_ops.c_str()); } } x += 220; auto xit = db_->xrefs_to.find(insn.addr); if (xit != db_->xrefs_to.end() && !xit->second.empty()) { - auto xr_s = fmt::format("[{}x]", xit->second.size()); - dl->AddText(ImVec2(x, y), col::xref(), xr_s.c_str()); + char xr_buf[16]; + snprintf(xr_buf, sizeof(xr_buf), "[%dx]", (int)xit->second.size()); + dl->AddText(ImVec2(x, y), col::xref(), xr_buf); } x += 44; - // comment column: user comment > annotation > auto-comment - auto cit = db_->comments.find(insn.addr); - if (cit != db_->comments.end()) { - auto ct = fmt::format("; {}", cit->second); - dl->AddText(ImVec2(x, y), IM_COL32(90, 140, 90, 255), ct.c_str()); - } else if (!annotation.empty()) { - auto ct = fmt::format("; {}", annotation); - dl->AddText(ImVec2(x, y), col::str(), ct.c_str()); + auto comment_it = db_->comments.find(insn.addr); + if (comment_it != db_->comments.end()) { + char ct_buf[256]; + snprintf(ct_buf, sizeof(ct_buf), "; %s", comment_it->second.c_str()); + dl->AddText(ImVec2(x, y), IM_COL32(90, 140, 90, 255), ct_buf); + } else if (annotation_ptr) { + char ct_buf[256]; + snprintf(ct_buf, sizeof(ct_buf), "; %s", annotation_ptr->c_str()); + dl->AddText(ImVec2(x, y), col::str(), ct_buf); } else if ((insn.is_call() || insn.is_branch()) && insn.branch_target()) { auto nit = db_->names.find(insn.branch_target()); if (nit != db_->names.end()) { - auto ct = fmt::format("; {}", nit->second); - dl->AddText(ImVec2(x, y), IM_COL32(65, 85, 65, 255), ct.c_str()); + char ct_buf[256]; + snprintf(ct_buf, sizeof(ct_buf), "; %s", nit->second.c_str()); + dl->AddText(ImVec2(x, y), IM_COL32(65, 85, 65, 255), ct_buf); } } @@ -722,8 +806,8 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { highlighted_reg_ = insn.ops[k].reg; break; } - if (insn.ops[k].type == OpType::Mem && insn.ops[k].mem.base) { - highlighted_reg_ = insn.ops[k].mem.base; + if (insn.ops[k].type == OpType::Mem && insn.ops[k].mem_base) { + highlighted_reg_ = insn.ops[k].mem_base; break; } } @@ -761,19 +845,20 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { if (ImGui::MenuItem("NOP out")) cmd_nop(); ImGui::Separator(); if (ImGui::MenuItem("Copy address")) { - ImGui::SetClipboardText(addr_s.c_str()); + ImGui::SetClipboardText(addr_buf); } if (ImGui::MenuItem("Generate Signature", "Ctrl+Shift+S")) { if (sig_cb_) sig_cb_(insn.addr); } if (ImGui::MenuItem("Copy line")) { - auto full = fmt::format("{} {} {}", addr_s, insn.mnemonic, insn.op_str); + auto full = fmt::format("{} {} {}", addr_buf, insn.mnemonic, insn.op_str); ImGui::SetClipboardText(full.c_str()); } if (ImGui::BeginMenu("Copy as")) { if (ImGui::MenuItem("C array")) { std::string c = "unsigned char data[] = { "; - for (u8 b = 0; b < insn.len; ++b) { + u8 copy_len = std::min(insn.len, sizeof(insn.bytes)); + for (u8 b = 0; b < copy_len; ++b) { if (b > 0) c += ", "; c += fmt::format("0x{:02X}", insn.bytes[b]); } @@ -782,14 +867,16 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { } if (ImGui::MenuItem("Python bytes")) { std::string py = "b'"; - for (u8 b = 0; b < insn.len; ++b) + u8 copy_len = std::min(insn.len, sizeof(insn.bytes)); + for (u8 b = 0; b < copy_len; ++b) py += fmt::format("\\x{:02x}", insn.bytes[b]); py += "'"; ImGui::SetClipboardText(py.c_str()); } if (ImGui::MenuItem("YARA hex string")) { std::string yara = "{ "; - for (u8 b = 0; b < insn.len; ++b) { + u8 copy_len = std::min(insn.len, sizeof(insn.bytes)); + for (u8 b = 0; b < copy_len; ++b) { if (b > 0) yara += " "; if (b >= 2 && insn.op_count > 0 && (insn.ops[0].type == OpType::Imm || insn.ops[0].type == OpType::Mem)) @@ -808,6 +895,7 @@ void DisasmView::render_line(int idx, const Insn& insn, float lh) { ImGui::PopID(); } +#ifdef _WIN32 void DisasmView::render_live(float lh) { ImGui::BeginChild("##dasm_live", ImVec2(0, 0), false, ImGuiWindowFlags_HorizontalScrollbar); @@ -902,7 +990,8 @@ void DisasmView::render_live_line(int idx, const Insn& insn, float lh) { x += ImGui::CalcTextSize("0000000000000000").x + 14; std::string hex; - int nb = std::min(insn.len, 6); + int nb = std::min(insn.len, (int)sizeof(insn.bytes)); + nb = std::min(nb, 6); for (int b = 0; b < nb; ++b) hex += fmt::format("{:02X} ", insn.bytes[b]); if (insn.len > 6) hex += ".."; dl->AddText(ImVec2(x, y), IM_COL32(85, 85, 95, 255), hex.c_str()); @@ -942,5 +1031,6 @@ void DisasmView::render_live_line(int idx, const Insn& insn, float lh) { ImGui::PopID(); } +#endif } diff --git a/src/ui/widgets/disasm_view.h b/src/ui/widgets/disasm_view.h index b9e7cfd..82eb4ab 100644 --- a/src/ui/widgets/disasm_view.h +++ b/src/ui/widgets/disasm_view.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,18 @@ namespace hype { class StackFrameView; class DebugEngine; +struct CachedDataLine { + std::string label; + ImU32 color; + std::string name_comment; +}; + +struct CachedInsnLine { + char hex[22]; + std::string label; + std::string annotation; +}; + class DisasmView { public: using NavCB = std::function; @@ -40,6 +53,8 @@ class DisasmView { private: void rebuild(); + void rebuild_data_cache(); + void rebuild_insn_cache(); void render_line(int idx, const Insn& insn, float lh); void render_live_line(int idx, const Insn& insn, float lh); void render_data_line(const DataItem& item, float lh); @@ -68,6 +83,10 @@ class DisasmView { bool live_mode_ = false; std::vector live_insns_; va_t live_base_ = 0; + + std::unordered_map data_cache_; + std::unordered_map insn_cache_; + std::unordered_map seg_hdr_cache_; }; } diff --git a/src/ui/widgets/entropy_view.cpp b/src/ui/widgets/entropy_view.cpp index 1b1eafc..84f6e68 100644 --- a/src/ui/widgets/entropy_view.cpp +++ b/src/ui/widgets/entropy_view.cpp @@ -1,9 +1,31 @@ #include "entropy_view.h" #include #include +#include +#include namespace hype { +EntropyView::~EntropyView() { + destroy_texture(); +} + +void EntropyView::destroy_texture() { + if (tex_id_) { + GLuint id = static_cast(tex_id_); + glDeleteTextures(1, &id); + tex_id_ = 0; + } +} + +static ImU32 entropy_color(float t) { + t = std::clamp(t, 0.0f, 1.0f); + u8 r = static_cast(t * 255); + u8 b = static_cast((1.0f - t) * 255); + u8 g = static_cast((t < 0.5f ? t * 2 : (1.0f - t) * 2) * 120); + return IM_COL32(r, g, b, 255); +} + void EntropyView::rebuild() { blocks_.clear(); if (!img_) return; @@ -24,6 +46,73 @@ void EntropyView::rebuild() { } } dirty_ = false; + last_avail_w_ = 0; + last_avail_h_ = 0; +} + +void EntropyView::rebuild_texture(int width, int height) { + if (width <= 0 || height <= 0 || blocks_.empty()) return; + + tex_w_ = width; + tex_h_ = height; + + std::vector pixels(width * height * 4); + + float blocks_per_col = static_cast(blocks_.size()) / width; + + for (int x = 0; x < width; ++x) { + int b_start = static_cast(x * blocks_per_col); + int b_end = static_cast((x + 1) * blocks_per_col); + b_end = std::max(b_end, b_start + 1); + b_end = std::min(b_end, static_cast(blocks_.size())); + + float max_ent = 0.0f; + for (int b = b_start; b < b_end; ++b) + max_ent = std::max(max_ent, blocks_[b].entropy); + + float t = std::clamp(max_ent / 8.0f, 0.0f, 1.0f); + float bar_h = t * height; + int bar_px = static_cast(bar_h + 0.5f); + + ImU32 col = entropy_color(t); + u8 cr = (col >> 0) & 0xFF; + u8 cg = (col >> 8) & 0xFF; + u8 cb = (col >> 16) & 0xFF; + + for (int y = 0; y < height; ++y) { + int row_from_bottom = height - 1 - y; + int off = (y * width + x) * 4; + if (row_from_bottom < bar_px) { + pixels[off + 0] = cr; + pixels[off + 1] = cg; + pixels[off + 2] = cb; + pixels[off + 3] = 255; + } else { + pixels[off + 0] = 0; + pixels[off + 1] = 0; + pixels[off + 2] = 0; + pixels[off + 3] = 0; + } + } + } + + if (!tex_id_) { + GLuint id; + glGenTextures(1, &id); + tex_id_ = id; + } + + glBindTexture(GL_TEXTURE_2D, static_cast(tex_id_)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, pixels.data()); + glBindTexture(GL_TEXTURE_2D, 0); + + last_avail_w_ = width; + last_avail_h_ = height; } void EntropyView::render() { @@ -33,37 +122,43 @@ void EntropyView::render() { if (blocks_.empty()) { ImGui::End(); return; } ImVec2 avail = ImGui::GetContentRegionAvail(); - float bar_w = avail.x / static_cast(blocks_.size()); - if (bar_w < 1.0f) bar_w = 1.0f; - float h = avail.y - 20; + int w = static_cast(avail.x); + int h = static_cast(avail.y - 20); if (h < 40) h = 40; + if (w < 1) w = 1; - ImVec2 origin = ImGui::GetCursorScreenPos(); - ImDrawList* dl = ImGui::GetWindowDrawList(); + if (w != last_avail_w_ || h != last_avail_h_) + rebuild_texture(w, h); - for (size_t i = 0; i < blocks_.size(); ++i) { - float t = blocks_[i].entropy / 8.0f; - t = std::clamp(t, 0.0f, 1.0f); + if (tex_id_) { + ImVec2 origin = ImGui::GetCursorScreenPos(); + ImGui::Image(static_cast(tex_id_), + ImVec2(static_cast(tex_w_), static_cast(tex_h_))); - u8 r = static_cast(t * 255); - u8 b = static_cast((1.0f - t) * 255); - u8 g = static_cast((t < 0.5f ? t * 2 : (1.0f - t) * 2) * 120); - ImU32 col = IM_COL32(r, g, b, 255); + if (ImGui::IsItemHovered()) { + float mx = ImGui::GetIO().MousePos.x - origin.x; + float blocks_per_col = static_cast(blocks_.size()) / tex_w_; + int col = static_cast(mx); + col = std::clamp(col, 0, tex_w_ - 1); - float x = origin.x + i * bar_w; - float bar_h = t * h; - dl->AddRectFilled(ImVec2(x, origin.y + h - bar_h), ImVec2(x + bar_w, origin.y + h), col); - } + int b_start = static_cast(col * blocks_per_col); + int b_end = static_cast((col + 1) * blocks_per_col); + b_end = std::max(b_end, b_start + 1); + b_end = std::min(b_end, static_cast(blocks_.size())); - ImGui::InvisibleButton("##ent", ImVec2(avail.x, h)); - if (ImGui::IsItemHovered()) { - float mx = ImGui::GetIO().MousePos.x - origin.x; - int idx = static_cast(mx / bar_w); - idx = std::clamp(idx, 0, static_cast(blocks_.size()) - 1); - ImGui::SetTooltip("0x%llX entropy: %.2f", (unsigned long long)blocks_[idx].addr, blocks_[idx].entropy); + int mid = (b_start + b_end) / 2; + mid = std::clamp(mid, 0, static_cast(blocks_.size()) - 1); - if (ImGui::IsMouseClicked(ImGuiMouseButton_Left)) - if (nav_) nav_(blocks_[idx].addr); + float max_ent = 0.0f; + for (int b = b_start; b < b_end; ++b) + max_ent = std::max(max_ent, blocks_[b].entropy); + + ImGui::SetTooltip("0x%llX entropy: %.2f", + (unsigned long long)blocks_[mid].addr, max_ent); + + if (ImGui::IsMouseClicked(ImGuiMouseButton_Left)) + if (nav_) nav_(blocks_[mid].addr); + } } ImGui::End(); diff --git a/src/ui/widgets/entropy_view.h b/src/ui/widgets/entropy_view.h index d78fdd1..33c0930 100644 --- a/src/ui/widgets/entropy_view.h +++ b/src/ui/widgets/entropy_view.h @@ -12,9 +12,12 @@ class EntropyView { void set_data(const PEImage* img) { img_ = img; dirty_ = true; } void set_nav(NavCB cb) { nav_ = std::move(cb); } void render(); + ~EntropyView(); private: void rebuild(); + void rebuild_texture(int width, int height); + void destroy_texture(); const PEImage* img_ = nullptr; NavCB nav_; @@ -23,6 +26,12 @@ class EntropyView { struct Block { va_t addr; float entropy; }; std::vector blocks_; static constexpr size_t BLOCK_SZ = 256; + + u32 tex_id_ = 0; + int tex_w_ = 0; + int tex_h_ = 0; + int last_avail_w_ = 0; + int last_avail_h_ = 0; }; } diff --git a/src/ui/widgets/functions_panel.cpp b/src/ui/widgets/functions_panel.cpp index 3e38ff8..c6f0647 100644 --- a/src/ui/widgets/functions_panel.cpp +++ b/src/ui/widgets/functions_panel.cpp @@ -2,15 +2,43 @@ #include "ui/theme.h" #include #include -#include namespace hype { +void FunctionsPanel::rebuild_cache() { + cache_.clear(); + if (!db_) return; + + std::string filt(filter_); + last_filter_ = filt; + dirty_ = false; + + std::vector sorted; + sorted.reserve(db_->funcs.size()); + for (auto& [_, f] : db_->funcs) + if (filt.empty() || f.name.find(filt) != std::string::npos || + fmt::format("{:X}", f.entry).find(filt) != std::string::npos) + sorted.push_back(&f); + std::sort(sorted.begin(), sorted.end(), [](auto* a, auto* b) { return a->entry < b->entry; }); + + cache_.reserve(sorted.size()); + for (int i = 0; i < (int)sorted.size(); ++i) { + auto* f = sorted[i]; + CachedRow row; + row.entry = f->entry; + row.label = fmt::format("{:016X}##f{}", f->entry, i); + row.name = f->name; + row.block_count = static_cast(f->blocks.size()); + cache_.push_back(std::move(row)); + } +} + void FunctionsPanel::render() { ImGui::Begin("Functions"); if (!db_) { ImGui::TextDisabled("No analysis data"); ImGui::End(); return; } - ImGui::InputTextWithHint("##ff", "Filter (name or address)...", filter_, sizeof(filter_)); + if (ImGui::InputTextWithHint("##ff", "Filter (name or address)...", filter_, sizeof(filter_))) + dirty_ = true; ImGui::SameLine(); ImGui::TextDisabled("(%d)", (int)db_->funcs.size()); @@ -23,14 +51,8 @@ void FunctionsPanel::render() { } ImGui::Separator(); - std::string filt(filter_); - std::vector sorted; - sorted.reserve(db_->funcs.size()); - for (auto& [_, f] : db_->funcs) - if (filt.empty() || f.name.find(filt) != std::string::npos || - fmt::format("{:X}", f.entry).find(filt) != std::string::npos) - sorted.push_back(&f); - std::sort(sorted.begin(), sorted.end(), [](auto* a, auto* b) { return a->entry < b->entry; }); + if (dirty_ || std::string(filter_) != last_filter_) + rebuild_cache(); if (ImGui::BeginTable("##ft", 3, ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable | ImGuiTableFlags_RowBg | @@ -43,24 +65,23 @@ void FunctionsPanel::render() { ImGui::TableHeadersRow(); ImGuiListClipper clip; - clip.Begin(static_cast(sorted.size())); + clip.Begin(static_cast(cache_.size())); while (clip.Step()) { for (int i = clip.DisplayStart; i < clip.DisplayEnd; ++i) { - auto* f = sorted[i]; + auto& row = cache_[i]; ImGui::TableNextRow(); ImGui::TableNextColumn(); - auto lbl = fmt::format("{:016X}##f{}", f->entry, i); - if (ImGui::Selectable(lbl.c_str(), false, + if (ImGui::Selectable(row.label.c_str(), false, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowDoubleClick)) { - if (nav_) nav_(f->entry); + if (nav_) nav_(row.entry); } ImGui::TableNextColumn(); - ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::func()), "%s", f->name.c_str()); + ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::func()), "%s", row.name.c_str()); ImGui::TableNextColumn(); - ImGui::Text("%d", static_cast(f->blocks.size())); + ImGui::Text("%d", row.block_count); } } ImGui::EndTable(); diff --git a/src/ui/widgets/functions_panel.h b/src/ui/widgets/functions_panel.h index c63b8c7..afa6931 100644 --- a/src/ui/widgets/functions_panel.h +++ b/src/ui/widgets/functions_panel.h @@ -2,6 +2,8 @@ #include "core/analysis/analysis_db.h" #include #include +#include +#include namespace hype { @@ -9,14 +11,27 @@ class FunctionsPanel { public: using NavCB = std::function; - void set_data(const AnalysisDB* db) { db_ = db; } + void set_data(const AnalysisDB* db) { db_ = db; invalidate(); } void set_nav(NavCB cb) { nav_ = std::move(cb); } void render(); + void invalidate() { dirty_ = true; } private: + void rebuild_cache(); + + struct CachedRow { + va_t entry; + std::string label; // "ADDR##fN" + std::string name; + int block_count; + }; + const AnalysisDB* db_ = nullptr; NavCB nav_; char filter_[256] = {}; + std::string last_filter_; + bool dirty_ = true; + std::vector cache_; }; } diff --git a/src/ui/widgets/graph_view.cpp b/src/ui/widgets/graph_view.cpp index 8e3dc1b..9bde63e 100644 --- a/src/ui/widgets/graph_view.cpp +++ b/src/ui/widgets/graph_view.cpp @@ -52,8 +52,12 @@ void GraphView::layout() { for (size_t i = 0; i < addrs.size(); ++i) { auto bit = func.blocks.find(addrs[i]); float h = 50; - if (bit != func.blocks.end()) - h = std::max(50.f, static_cast(bit->second.insns.size()) * 15.f + 30.f); + if (bit != func.blocks.end()) { + auto ri = db_->insns.range_begin(bit->second.start); + auto re = db_->insns.range_end(bit->second.end); + auto cnt = static_cast(std::distance(ri, re)); + h = std::max(50.f, static_cast(cnt) * 15.f + 30.f); + } h = std::min(h, 300.f); mh = std::max(mh, h); nodes_.push_back({addrs[i], xs + i * (nw + gap), y, nw, h, l}); @@ -191,7 +195,10 @@ void GraphView::render() { if (bit != func.blocks.end()) { int max_lines = static_cast((br.y - ty) / (font_sz + 1)); int shown = 0; - for (auto& insn : bit->second.insns) { + auto ri = db_->insns.range_begin(bit->second.start); + auto re = db_->insns.range_end(bit->second.end); + for (; ri != re; ++ri) { + auto& insn = *ri; if (shown >= max_lines - 1) { dl->AddText(nullptr, font_sz, ImVec2(tx, ty), col::comment(), "..."); break; diff --git a/src/ui/widgets/imports_panel.cpp b/src/ui/widgets/imports_panel.cpp index aa8aaca..6d287f7 100644 --- a/src/ui/widgets/imports_panel.cpp +++ b/src/ui/widgets/imports_panel.cpp @@ -7,40 +7,78 @@ namespace hype { va_t ImportsPanel::find_caller(va_t iat_addr) { if (!db_) return 0; - // priority 1: find a CALL instruction whose memory operand resolves to this IAT addr auto xit = db_->xrefs_to.find(iat_addr); if (xit != db_->xrefs_to.end()) { for (auto& xr : xit->second) { auto iit = db_->insns.find(xr.from); - if (iit != db_->insns.end() && iit->second.is_call()) + if (iit != db_->insns.end() && iit->is_call()) return xr.from; } - // priority 2: any code xref (not just calls) for (auto& xr : xit->second) { if (db_->insns.count(xr.from)) return xr.from; } } - // priority 3: brute force scan for call with matching operand - for (auto& [addr, insn] : db_->insns) { + for (auto& insn : db_->insns) { if (!insn.is_call()) continue; for (int k = 0; k < insn.op_count; ++k) { if (insn.ops[k].type == OpType::Mem && insn.ops[k].val == iat_addr) - return addr; + return insn.addr; if (insn.ops[k].type == OpType::Imm && insn.ops[k].val == iat_addr) - return addr; + return insn.addr; } } return 0; } +void ImportsPanel::rebuild_cache() { + import_cache_.clear(); + export_cache_.clear(); + if (!img_) return; + + std::string filt(filter_); + last_filter_ = filt; + dirty_ = false; + + import_cache_.reserve(img_->imports.size()); + for (int i = 0; i < (int)img_->imports.size(); ++i) { + auto& imp = img_->imports[i]; + if (!filt.empty() && imp.name.find(filt) == std::string::npos && imp.dll.find(filt) == std::string::npos) + continue; + ImportRow row; + row.src_index = i; + row.iat_addr = imp.iat_addr; + row.label = fmt::format("{:016X}##i{}", imp.iat_addr, i); + row.dll = imp.dll; + row.name = imp.name; + import_cache_.push_back(std::move(row)); + } + + export_cache_.reserve(img_->exports.size()); + for (int i = 0; i < (int)img_->exports.size(); ++i) { + auto& exp = img_->exports[i]; + if (!filt.empty() && exp.name.find(filt) == std::string::npos) + continue; + ExportRow row; + row.src_index = i; + row.addr = exp.addr; + row.label = fmt::format("{:016X}##e{}", exp.addr, i); + row.ordinal = exp.ordinal; + row.name = exp.name; + export_cache_.push_back(std::move(row)); + } +} + void ImportsPanel::render() { ImGui::Begin("Imports / Exports"); if (!img_) { ImGui::End(); return; } - ImGui::InputTextWithHint("##if", "Filter...", filter_, sizeof(filter_)); - std::string filt(filter_); + if (ImGui::InputTextWithHint("##if", "Filter...", filter_, sizeof(filter_))) + dirty_ = true; + + if (dirty_ || std::string(filter_) != last_filter_) + rebuild_cache(); if (ImGui::BeginTabBar("##ietab")) { if (ImGui::BeginTabItem("Imports")) { @@ -48,25 +86,28 @@ void ImportsPanel::render() { ImGui::TableSetupColumn("IAT", ImGuiTableColumnFlags_WidthFixed, 140); ImGui::TableSetupColumn("DLL", ImGuiTableColumnFlags_WidthFixed, 120); ImGui::TableSetupColumn("Name"); + ImGui::TableSetupScrollFreeze(0, 1); ImGui::TableHeadersRow(); - for (int i = 0; i < (int)img_->imports.size(); ++i) { - auto& imp = img_->imports[i]; - if (!filt.empty() && imp.name.find(filt) == std::string::npos && imp.dll.find(filt) == std::string::npos) - continue; - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - auto lbl = fmt::format("{:016X}##i{}", imp.iat_addr, i); - if (ImGui::Selectable(lbl.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) { - if (nav_) { - va_t caller = find_caller(imp.iat_addr); - if (caller) - nav_(caller); + + ImGuiListClipper clip; + clip.Begin(static_cast(import_cache_.size())); + while (clip.Step()) { + for (int i = clip.DisplayStart; i < clip.DisplayEnd; ++i) { + auto& row = import_cache_[i]; + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + if (ImGui::Selectable(row.label.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) { + if (nav_) { + va_t caller = find_caller(row.iat_addr); + if (caller) + nav_(caller); + } } + ImGui::TableNextColumn(); + ImGui::Text("%s", row.dll.c_str()); + ImGui::TableNextColumn(); + ImGui::Text("%s", row.name.c_str()); } - ImGui::TableNextColumn(); - ImGui::Text("%s", imp.dll.c_str()); - ImGui::TableNextColumn(); - ImGui::Text("%s", imp.name.c_str()); } ImGui::EndTable(); } @@ -77,19 +118,23 @@ void ImportsPanel::render() { ImGui::TableSetupColumn("Address", ImGuiTableColumnFlags_WidthFixed, 140); ImGui::TableSetupColumn("Ord", ImGuiTableColumnFlags_WidthFixed, 50); ImGui::TableSetupColumn("Name"); + ImGui::TableSetupScrollFreeze(0, 1); ImGui::TableHeadersRow(); - for (int i = 0; i < (int)img_->exports.size(); ++i) { - auto& exp = img_->exports[i]; - if (!filt.empty() && exp.name.find(filt) == std::string::npos) continue; - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - auto lbl = fmt::format("{:016X}##e{}", exp.addr, i); - if (ImGui::Selectable(lbl.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) - if (nav_) nav_(exp.addr); - ImGui::TableNextColumn(); - ImGui::Text("%d", exp.ordinal); - ImGui::TableNextColumn(); - ImGui::Text("%s", exp.name.c_str()); + + ImGuiListClipper clip; + clip.Begin(static_cast(export_cache_.size())); + while (clip.Step()) { + for (int i = clip.DisplayStart; i < clip.DisplayEnd; ++i) { + auto& row = export_cache_[i]; + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + if (ImGui::Selectable(row.label.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) + if (nav_) nav_(row.addr); + ImGui::TableNextColumn(); + ImGui::Text("%d", row.ordinal); + ImGui::TableNextColumn(); + ImGui::Text("%s", row.name.c_str()); + } } ImGui::EndTable(); } diff --git a/src/ui/widgets/imports_panel.h b/src/ui/widgets/imports_panel.h index 41ecb81..e8a2fcb 100644 --- a/src/ui/widgets/imports_panel.h +++ b/src/ui/widgets/imports_panel.h @@ -3,24 +3,47 @@ #include "core/analysis/analysis_db.h" #include #include +#include +#include namespace hype { class ImportsPanel { public: using NavCB = std::function; - void set_data(const PEImage* img) { img_ = img; } - void set_db(const AnalysisDB* db) { db_ = db; } + void set_data(const PEImage* img) { img_ = img; invalidate(); } + void set_db(const AnalysisDB* db) { db_ = db; invalidate(); } void set_nav(NavCB cb) { nav_ = std::move(cb); } void render(); + void invalidate() { dirty_ = true; } private: va_t find_caller(va_t iat_addr); + void rebuild_cache(); + + struct ImportRow { + int src_index; + va_t iat_addr; + std::string label; // "ADDR##iN" + std::string dll; + std::string name; + }; + struct ExportRow { + int src_index; + va_t addr; + std::string label; // "ADDR##eN" + int ordinal; + std::string name; + }; const PEImage* img_ = nullptr; const AnalysisDB* db_ = nullptr; NavCB nav_; char filter_[256] = {}; + std::string last_filter_; + bool dirty_ = true; + std::vector import_cache_; + std::vector export_cache_; }; } diff --git a/src/ui/widgets/output_panel.cpp b/src/ui/widgets/output_panel.cpp index a9bd57c..9976692 100644 --- a/src/ui/widgets/output_panel.cpp +++ b/src/ui/widgets/output_panel.cpp @@ -15,13 +15,20 @@ void OutputPanel::render() { ImGui::Separator(); ImGui::BeginChild("##log"); - std::lock_guard lk(mtx_); + std::deque snapshot; + bool do_scroll; + { + std::lock_guard lk(mtx_); + snapshot = lines_; + do_scroll = scroll_; + scroll_ = false; + } ImGuiListClipper clip; - clip.Begin(static_cast(lines_.size())); + clip.Begin(static_cast(snapshot.size())); while (clip.Step()) for (int i = clip.DisplayStart; i < clip.DisplayEnd; ++i) - ImGui::TextUnformatted(lines_[i].c_str()); - if (scroll_) { ImGui::SetScrollHereY(1.f); scroll_ = false; } + ImGui::TextUnformatted(snapshot[i].c_str()); + if (do_scroll) ImGui::SetScrollHereY(1.f); ImGui::EndChild(); ImGui::End(); } diff --git a/src/ui/widgets/pe_header_view.cpp b/src/ui/widgets/pe_header_view.cpp index 52d6dd6..89af7bd 100644 --- a/src/ui/widgets/pe_header_view.cpp +++ b/src/ui/widgets/pe_header_view.cpp @@ -7,6 +7,9 @@ namespace hype { namespace { +template +T rd(const u8* p) { T v; std::memcpy(&v, p, sizeof(T)); return v; } + const char* machine_str(u16 machine) { switch (machine) { case 0x014C: return "i386"; @@ -81,8 +84,8 @@ void PEHeaderView::render_dos_header() { if (img_->raw.size() < 64) return; const u8* d = img_->raw.data(); - u16 e_magic = *reinterpret_cast(d); - u32 e_lfanew = *reinterpret_cast(d + 0x3C); + u16 e_magic = rd(d); + u32 e_lfanew = rd(d + 0x3C); if (ImGui::BeginTable("##dos", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { ImGui::TableSetupColumn("Field", ImGuiTableColumnFlags_WidthFixed, 180); @@ -104,16 +107,16 @@ void PEHeaderView::render_dos_header() { void PEHeaderView::render_nt_headers() { if (img_->raw.size() < 64) return; const u8* d = img_->raw.data(); - u32 pe_off = *reinterpret_cast(d + 0x3C); + u32 pe_off = rd(d + 0x3C); if (pe_off + 24 > img_->raw.size()) return; const u8* nt = d + pe_off; - u32 sig = *reinterpret_cast(nt); - u16 machine = *reinterpret_cast(nt + 4); - u16 num_sections = *reinterpret_cast(nt + 6); - u32 timestamp = *reinterpret_cast(nt + 8); - u16 opt_hdr_sz = *reinterpret_cast(nt + 20); - u16 characteristics = *reinterpret_cast(nt + 22); + u32 sig = rd(nt); + u16 machine = rd(nt + 4); + u16 num_sections = rd(nt + 6); + u32 timestamp = rd(nt + 8); + u16 opt_hdr_sz = rd(nt + 20); + u16 characteristics = rd(nt + 22); if (ImGui::BeginTable("##nth", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { ImGui::TableSetupColumn("Field", ImGuiTableColumnFlags_WidthFixed, 200); @@ -139,13 +142,13 @@ void PEHeaderView::render_nt_headers() { void PEHeaderView::render_optional_header() { if (img_->raw.size() < 64) return; const u8* d = img_->raw.data(); - u32 pe_off = *reinterpret_cast(d + 0x3C); + u32 pe_off = rd(d + 0x3C); if (pe_off + 24 > img_->raw.size()) return; const u8* opt = d + pe_off + 24; size_t opt_avail = img_->raw.size() - (pe_off + 24); if (opt_avail < 72) return; - u16 magic = *reinterpret_cast(opt); + u16 magic = rd(opt); bool pe32plus = (magic == 0x020B); size_t min_opt = pe32plus ? 112 : 96; if (opt_avail < min_opt) return; @@ -162,19 +165,19 @@ void PEHeaderView::render_optional_header() { }; row("Magic", fmt::format("0x{:04X} ({})", magic, pe32plus ? "PE32+" : "PE32")); - row("AddressOfEntryPoint", fmt::format("0x{:X}", *reinterpret_cast(opt + 16))); + row("AddressOfEntryPoint", fmt::format("0x{:X}", rd(opt + 16))); if (pe32plus) { - row("ImageBase", fmt::format("0x{:X}", *reinterpret_cast(opt + 24))); + row("ImageBase", fmt::format("0x{:X}", rd(opt + 24))); } else { - row("ImageBase", fmt::format("0x{:X}", *reinterpret_cast(opt + 28))); + row("ImageBase", fmt::format("0x{:X}", rd(opt + 28))); } - row("SectionAlignment", fmt::format("0x{:X}", *reinterpret_cast(opt + 32))); - row("FileAlignment", fmt::format("0x{:X}", *reinterpret_cast(opt + 36))); - row("SizeOfImage", fmt::format("0x{:X}", *reinterpret_cast(opt + 56))); - row("SizeOfHeaders", fmt::format("0x{:X}", *reinterpret_cast(opt + 60))); - row("Subsystem", fmt::format("{}", *reinterpret_cast(opt + 68))); + row("SectionAlignment", fmt::format("0x{:X}", rd(opt + 32))); + row("FileAlignment", fmt::format("0x{:X}", rd(opt + 36))); + row("SizeOfImage", fmt::format("0x{:X}", rd(opt + 56))); + row("SizeOfHeaders", fmt::format("0x{:X}", rd(opt + 60))); + row("Subsystem", fmt::format("{}", rd(opt + 68))); row("NumberOfRvaAndSizes", fmt::format("{}", - *reinterpret_cast(opt + (pe32plus ? 108 : 92)))); + rd(opt + (pe32plus ? 108 : 92)))); ImGui::EndTable(); } } @@ -182,17 +185,17 @@ void PEHeaderView::render_optional_header() { void PEHeaderView::render_data_directories() { if (img_->raw.size() < 64) return; const u8* d = img_->raw.data(); - u32 pe_off = *reinterpret_cast(d + 0x3C); + u32 pe_off = rd(d + 0x3C); if (pe_off + 24 > img_->raw.size()) return; const u8* opt = d + pe_off + 24; size_t opt_avail = img_->raw.size() - (pe_off + 24); if (opt_avail < 96) return; - u16 magic = *reinterpret_cast(opt); + u16 magic = rd(opt); bool pe32plus = (magic == 0x020B); u32 dd_offset = pe32plus ? 112 : 96; size_t min_needed = pe32plus ? 112 : 96; if (opt_avail < min_needed) return; - u32 num_dd = *reinterpret_cast(opt + (pe32plus ? 108 : 92)); + u32 num_dd = rd(opt + (pe32plus ? 108 : 92)); if (num_dd > 16) num_dd = 16; const u8* dd_base = opt + dd_offset; @@ -205,8 +208,8 @@ void PEHeaderView::render_data_directories() { ImGui::TableHeadersRow(); for (u32 i = 0; i < num_dd; ++i) { - u32 rva = *reinterpret_cast(dd_base + i * 8); - u32 size = *reinterpret_cast(dd_base + i * 8 + 4); + u32 rva = rd(dd_base + i * 8); + u32 size = rd(dd_base + i * 8 + 4); ImGui::TableNextRow(); ImGui::TableNextColumn(); ImGui::TextUnformatted(data_dir_names[i]); ImGui::TableNextColumn(); ImGui::Text("0x%08X", rva); diff --git a/src/ui/widgets/script_console.cpp b/src/ui/widgets/script_console.cpp index a3ed566..17c2dcc 100644 --- a/src/ui/widgets/script_console.cpp +++ b/src/ui/widgets/script_console.cpp @@ -61,17 +61,7 @@ void ScriptConsole::render() { } } - // Ctrl+Enter also executes - if (ImGui::GetIO().KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_Enter) && engine_ && input_[0]) { - std::string code(input_); - output_.push_back("> " + code); - history_.push_front(code); - hist_idx_ = -1; - std::string result = engine_->execute(code); - if (!result.empty()) output_.push_back(result); - scroll_bottom_ = true; - input_[0] = 0; - } + // Ctrl+Enter handled by InputText EnterReturnsTrue above ImGui::End(); } diff --git a/src/ui/widgets/search_panel.cpp b/src/ui/widgets/search_panel.cpp index 3efae83..7b510ea 100644 --- a/src/ui/widgets/search_panel.cpp +++ b/src/ui/widgets/search_panel.cpp @@ -121,13 +121,14 @@ done:; void SearchPanel::search_immediate() { results_.clear(); - u64 val = strtoull(imm_buf_, nullptr, 16); - if (!val) return; + char* end = nullptr; + u64 val = strtoull(imm_buf_, &end, 16); + if (end == imm_buf_ || !imm_buf_[0]) return; - for (auto& [addr, insn] : db_->insns) { + for (auto& insn : db_->insns) { for (int k = 0; k < insn.op_count; ++k) { if (insn.ops[k].type == OpType::Imm && insn.ops[k].val == val) { - results_.push_back({addr, fmt::format("{} {}", insn.mnemonic, insn.op_str)}); + results_.push_back({insn.addr, fmt::format("{} {}", insn.mnemonic, insn.op_str)}); break; } } diff --git a/src/ui/widgets/settings_panel.cpp b/src/ui/widgets/settings_panel.cpp index 58842ed..951478c 100644 --- a/src/ui/widgets/settings_panel.cpp +++ b/src/ui/widgets/settings_panel.cpp @@ -232,6 +232,7 @@ void Settings::load(const std::filesystem::path& path) { auto v = trim(line.substr(eq + 1)); if (section == "appearance") { + try { if (k == "font_size") font_size = std::stof(v); else if (k == "theme") theme_index = std::stoi(v); else if (k == "accent_r") accent_color[0] = std::stof(v); @@ -257,17 +258,22 @@ void Settings::load(const std::filesystem::path& path) { else if (k == "border_radius") border_radius = std::stof(v); else if (k == "scrollbar_width") scrollbar_width = std::stof(v); else if (k == "font_index") font_index = std::stoi(v); + } catch (...) {} } else if (section == "editor") { + try { if (k == "show_bytes") show_bytes = (v == "true" || v == "1"); else if (k == "max_bytes") max_bytes = std::stoi(v); else if (k == "address_width") address_width = std::stoi(v); else if (k == "tab_size") tab_size = std::stoi(v); else if (k == "auto_beautify") auto_beautify = (v == "true" || v == "1"); else if (k == "mono_font_path") std::strncpy(mono_font_path, v.c_str(), sizeof(mono_font_path) - 1); + } catch (...) {} } else if (section == "advanced") { + try { if (k == "threads") max_threads = std::stoi(v); else if (k == "autosave_interval") autosave_interval = std::stoi(v); else if (k == "max_decompiler_blocks") max_decompiler_blocks = std::stoi(v); + } catch (...) {} } } } diff --git a/src/ui/widgets/stack_frame_view.cpp b/src/ui/widgets/stack_frame_view.cpp index 909f25a..44767f8 100644 --- a/src/ui/widgets/stack_frame_view.cpp +++ b/src/ui/widgets/stack_frame_view.cpp @@ -5,22 +5,22 @@ namespace hype { -void StackFrame::analyze(const Function& func) { +void StackFrame::analyze(const Function& func, const AnalysisDB& db) { func_entry = func.entry; vars.clear(); frame_size = 0; for (auto& [ba, bb] : func.blocks) { - for (auto& insn : bb.insns) { + db.for_each_insn_in_block(bb, [&](const Insn& insn) { for (u8 k = 0; k < insn.op_count; ++k) { auto& op = insn.ops[k]; if (op.type != OpType::Mem) continue; - bool is_rsp = op.mem.base == ZYDIS_REGISTER_RSP || op.mem.base == ZYDIS_REGISTER_ESP; - bool is_rbp = op.mem.base == ZYDIS_REGISTER_RBP || op.mem.base == ZYDIS_REGISTER_EBP; + bool is_rsp = op.mem_base == ZYDIS_REGISTER_RSP || op.mem_base == ZYDIS_REGISTER_ESP; + bool is_rbp = op.mem_base == ZYDIS_REGISTER_RBP || op.mem_base == ZYDIS_REGISTER_EBP; if (!is_rsp && !is_rbp) continue; - i64 disp = op.mem.disp; + i64 disp = op.mem_disp; if (vars.count(disp)) continue; u16 sz = op.size ? op.size / 8 : 8; @@ -41,7 +41,7 @@ void StackFrame::analyze(const Function& func) { vars[disp] = {disp, name, sz, 0}; } - } + }); } i64 min_off = 0; @@ -73,7 +73,7 @@ void StackFrameView::set_function(va_t entry) { auto fit = db_->funcs.find(entry); if (fit != db_->funcs.end()) { StackFrame sf; - sf.analyze(fit->second); + sf.analyze(fit->second, *db_); frames_[entry] = std::move(sf); } } @@ -179,16 +179,16 @@ std::string format_operand_with_vars(const Insn& insn, const StackFrame* frame) auto& op = insn.ops[k]; if (op.type != OpType::Mem) continue; - bool is_rsp = op.mem.base == ZYDIS_REGISTER_RSP || op.mem.base == ZYDIS_REGISTER_ESP; - bool is_rbp = op.mem.base == ZYDIS_REGISTER_RBP || op.mem.base == ZYDIS_REGISTER_EBP; + bool is_rsp = op.mem_base == ZYDIS_REGISTER_RSP || op.mem_base == ZYDIS_REGISTER_ESP; + bool is_rbp = op.mem_base == ZYDIS_REGISTER_RBP || op.mem_base == ZYDIS_REGISTER_EBP; if (!is_rsp && !is_rbp) continue; - i64 disp = op.mem.disp; + i64 disp = op.mem_disp; auto* var = frame->find(disp); if (!var) continue; - const char* base_name = is_rsp ? (op.mem.base == ZYDIS_REGISTER_ESP ? "esp" : "rsp") - : (op.mem.base == ZYDIS_REGISTER_EBP ? "ebp" : "rbp"); + const char* base_name = is_rsp ? (op.mem_base == ZYDIS_REGISTER_ESP ? "esp" : "rsp") + : (op.mem_base == ZYDIS_REGISTER_EBP ? "ebp" : "rbp"); if (disp > 0) { auto hex_pat = fmt::format("[{}+0x{:X}]", base_name, disp); diff --git a/src/ui/widgets/stack_frame_view.h b/src/ui/widgets/stack_frame_view.h index dd9c651..f317c24 100644 --- a/src/ui/widgets/stack_frame_view.h +++ b/src/ui/widgets/stack_frame_view.h @@ -17,7 +17,7 @@ struct StackFrame { std::map vars; i64 frame_size = 0; - void analyze(const Function& func); + void analyze(const Function& func, const AnalysisDB& db); void rename(i64 offset, std::string new_name); const StackVar* find(i64 offset) const; }; diff --git a/src/ui/widgets/strings_panel.cpp b/src/ui/widgets/strings_panel.cpp index cec460e..382f4b1 100644 --- a/src/ui/widgets/strings_panel.cpp +++ b/src/ui/widgets/strings_panel.cpp @@ -4,11 +4,40 @@ namespace hype { +void StringsPanel::rebuild_cache() { + cache_.clear(); + if (!db_) return; + + std::string filt(filter_); + last_filter_ = filt; + dirty_ = false; + + cache_.reserve(db_->strings.size()); + for (int i = 0; i < (int)db_->strings.size(); ++i) { + auto& [addr, str] = db_->strings[i]; + if (!filt.empty() && str.find(filt) == std::string::npos) continue; + + CachedRow row; + row.src_index = i; + row.addr = addr; + row.value = str; + + auto xit = db_->xrefs_to.find(addr); + row.xref_count = xit != db_->xrefs_to.end() ? static_cast(xit->second.size()) : 0; + row.nav_target = (row.xref_count > 0) ? xit->second[0].from : 0; + row.label = fmt::format("{:016X}##s{}", addr, i); + + cache_.push_back(std::move(row)); + } +} + void StringsPanel::render() { ImGui::Begin("Strings"); if (!db_) { ImGui::End(); return; } - ImGui::InputTextWithHint("##sf", "Filter...", filter_, sizeof(filter_)); + if (ImGui::InputTextWithHint("##sf", "Filter...", filter_, sizeof(filter_))) + dirty_ = true; + ImGui::SameLine(); if (ImGui::SmallButton("Copy")) { std::string out; @@ -17,44 +46,41 @@ void StringsPanel::render() { ImGui::SetClipboardText(out.c_str()); } ImGui::Separator(); - std::string filt(filter_); + + if (dirty_ || std::string(filter_) != last_filter_) + rebuild_cache(); if (ImGui::BeginTable("##st", 3, ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable | ImGuiTableFlags_RowBg)) { ImGui::TableSetupColumn("Address", ImGuiTableColumnFlags_WidthFixed, 140); ImGui::TableSetupColumn("Xrefs", ImGuiTableColumnFlags_WidthFixed, 50); ImGui::TableSetupColumn("String"); + ImGui::TableSetupScrollFreeze(0, 1); ImGui::TableHeadersRow(); - for (int i = 0; i < (int)db_->strings.size(); ++i) { - auto& [addr, str] = db_->strings[i]; - if (!filt.empty() && str.find(filt) == std::string::npos) continue; - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - - auto xit = db_->xrefs_to.find(addr); - int cnt = xit != db_->xrefs_to.end() ? static_cast(xit->second.size()) : 0; - - // navigate to the instruction that references this string - va_t nav_target = 0; - if (cnt > 0) - nav_target = xit->second[0].from; + ImGuiListClipper clip; + clip.Begin(static_cast(cache_.size())); + while (clip.Step()) { + for (int i = clip.DisplayStart; i < clip.DisplayEnd; ++i) { + auto& row = cache_[i]; + ImGui::TableNextRow(); + ImGui::TableNextColumn(); - auto lbl = fmt::format("{:016X}##s{}", addr, i); - if (ImGui::Selectable(lbl.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) { - if (nav_) { - if (nav_target) - nav_(nav_target); - else - nav_(addr); // will go to hex view via navigate_to fallback + if (ImGui::Selectable(row.label.c_str(), false, ImGuiSelectableFlags_SpanAllColumns)) { + if (nav_) { + if (row.nav_target) + nav_(row.nav_target); + else + nav_(row.addr); + } } + ImGui::TableNextColumn(); + if (row.xref_count > 0) + ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::xref()), "%d", row.xref_count); + else + ImGui::TextDisabled("-"); + ImGui::TableNextColumn(); + ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::str()), "\"%s\"", row.value.c_str()); } - ImGui::TableNextColumn(); - if (cnt > 0) - ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::xref()), "%d", cnt); - else - ImGui::TextDisabled("-"); - ImGui::TableNextColumn(); - ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(col::str()), "\"%s\"", str.c_str()); } ImGui::EndTable(); } diff --git a/src/ui/widgets/strings_panel.h b/src/ui/widgets/strings_panel.h index 6bcbfd1..9d8c6a1 100644 --- a/src/ui/widgets/strings_panel.h +++ b/src/ui/widgets/strings_panel.h @@ -2,20 +2,37 @@ #include "core/analysis/analysis_db.h" #include #include +#include +#include namespace hype { class StringsPanel { public: using NavCB = std::function; - void set_data(const AnalysisDB* db) { db_ = db; } + void set_data(const AnalysisDB* db) { db_ = db; invalidate(); } void set_nav(NavCB cb) { nav_ = std::move(cb); } void render(); + void invalidate() { dirty_ = true; } private: + void rebuild_cache(); + + struct CachedRow { + int src_index; + va_t addr; + va_t nav_target; + int xref_count; + std::string label; // "ADDR##sN" + std::string value; // the string content + }; + const AnalysisDB* db_ = nullptr; NavCB nav_; char filter_[256] = {}; + std::string last_filter_; + bool dirty_ = true; + std::vector cache_; }; } diff --git a/src/ui/widgets/xrefs_panel.cpp b/src/ui/widgets/xrefs_panel.cpp index 833fc2b..3cbec72 100644 --- a/src/ui/widgets/xrefs_panel.cpp +++ b/src/ui/widgets/xrefs_panel.cpp @@ -31,7 +31,7 @@ std::string XrefsPanel::insn_text(va_t addr) const { if (!db_) return {}; auto it = db_->insns.find(addr); if (it == db_->insns.end()) return {}; - auto& insn = it->second; + auto& insn = *it; if (!insn.op_str[0]) return std::string(insn.mnemonic); return fmt::format("{} {}", insn.mnemonic, insn.op_str); } diff --git a/vcpkg.json b/vcpkg.json index 4d4ddc7..4392ea8 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,6 +1,6 @@ { "name": "hyperion", - "version": "0.1.0", + "version": "0.1.7", "dependencies": [ { "name": "imgui", From fd5a0391d7428962ae4dcd5b2aeb154b967b0b4e Mon Sep 17 00:00:00 2001 From: Your Name <88072010+Razshy@users.noreply.github.com> Date: Sun, 17 May 2026 07:21:49 -0500 Subject: [PATCH 2/3] docs: add MIT license and update README --- LICENSE | 21 +++++++++++++++++++++ README.md | 31 ++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8700fbe --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 - present Siden Technologies Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 5447bb4..5a49e38 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,16 @@ A native multi-architecture disassembler and binary analysis tool. Supports PE, ELF, Mach-O, and .NET binaries across x86, x64, ARM, ARM64, MIPS, and PPC. Built from scratch in C++20 with ImGui. [![Discord](https://img.shields.io/badge/Discord-Join-5865F2?logo=discord)](https://discord.gg/yjym2b7A) -[![GitHub](https://img.shields.io/github/stars/mylovereturns/hyperion-disassembler?style=flat&label=Stars)](https://github.com/mylovereturns/hyperion-disassembler) +[![GitHub](https://img.shields.io/github/stars/Sidenai/hyperion-disassembler?style=flat&label=Stars)](https://github.com/Sidenai/hyperion-disassembler) -Single statically-linked executable. No installer, no runtime dependencies. Under 3MB. +Single statically-linked executable. No installer, no runtime dependencies. Under 4MB. image ## Community - [Discord](https://discord.gg/yjym2b7A) -- [GitHub](https://github.com/mylovereturns/hyperion-disassembler) +- [GitHub](https://github.com/Sidenai/hyperion-disassembler) ## Supported Formats & Architectures @@ -93,12 +93,14 @@ Disassembly: Zydis (x86/x64) + Capstone (ARM, ARM64, MIPS, PPC) - Copy as C array / Python / YARA **Stability** -- PE loader hardened against malformed binaries +- All loaders hardened against malformed/malicious binaries (bounds checks, null-termination, overflow guards) - Thread-safe analysis (atomic handoff to UI) -- Memory optimized (fixed-size instruction buffers, section limits) +- Memory optimized (compact instruction storage, flat sorted vectors, zero per-frame allocations) - Full undo/redo for all operations - Auto-save every 60 seconds - Crash-free on minimize/unfocus +- GPU-accelerated entropy heatmap (single texture draw call) +- Resilient settings/database loading (graceful handling of corrupt files) ## Keybinds @@ -133,7 +135,7 @@ All keybinds are customizable via Settings. Requires CMake 3.25+, vcpkg, C++20 compiler (MSVC 2022+, GCC 13+, or Clang 16+). -``` +```bash git clone https://github.com/Sidenai/hyperion-disassembler cd hyperion-disassembler cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=path/to/vcpkg/scripts/buildsystems/vcpkg.cmake @@ -141,11 +143,19 @@ cmake --build build --config Release ``` For static linking (single exe, no DLLs): -``` +```bash cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=path/to/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static cmake --build build --config Release ``` +On macOS with Ninja (recommended): +```bash +brew install cmake ninja pkg-config +git clone https://github.com/microsoft/vcpkg.git ~/.vcpkg && ~/.vcpkg/bootstrap-vcpkg.sh +cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=~/.vcpkg/scripts/buildsystems/vcpkg.cmake -G Ninja +cmake --build build --config Release +``` + Dependencies (pulled via vcpkg): imgui (docking), glfw, zydis, capstone, spdlog, fmt, zlib, lua, stb. ## Platforms @@ -161,10 +171,13 @@ Dependencies (pulled via vcpkg): imgui (docking), glfw, zydis, capstone, spdlog, Active development. Functional for static analysis across all supported formats. Decompiler produces readable C output for x86/x64 and ARM64. RTTI class recovery works on unobfuscated C++ binaries. Roadmap: -- Debugger integration (attach, breakpoints, anti-detection) +- Debugger integration (attach, breakpoints, anti-detection) — Windows only, in progress - Collaborative analysis - More decompiler improvements +- RISC-V support ## License -MIT +MIT — see [LICENSE](LICENSE) + +Copyright (c) 2026 - present Siden Technologies Inc. From 032eeebe1e269a22b073a26a95e8e5c516ad122d Mon Sep 17 00:00:00 2001 From: mylovereturns <283260869+mylovereturns@users.noreply.github.com> Date: Sun, 17 May 2026 22:51:49 +0300 Subject: [PATCH 3/3] Potential fix for pull request finding 'CodeQL / Multiplication result converted to larger type' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/ui/widgets/entropy_view.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ui/widgets/entropy_view.cpp b/src/ui/widgets/entropy_view.cpp index 84f6e68..836d8d3 100644 --- a/src/ui/widgets/entropy_view.cpp +++ b/src/ui/widgets/entropy_view.cpp @@ -56,7 +56,8 @@ void EntropyView::rebuild_texture(int width, int height) { tex_w_ = width; tex_h_ = height; - std::vector pixels(width * height * 4); + std::vector pixels(static_cast(width) * + static_cast(height) * 4u); float blocks_per_col = static_cast(blocks_.size()) / width;