From a2b495aea991285ef6e0d7c93182eabf6c6eb218 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 20:22:03 +0000 Subject: [PATCH] Harden the view layer against malformed input (found via real arm64 binaries) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ran the full view-node dumper under ASan/UBSan over real macOS arm64 binaries (Python extension .so from arm64 wheels) plus thousands of truncated and byte-flipped variants. This surfaced — and this commit fixes — a batch of real crashes that the header-only moex-parse and the bundled samples never reached: - Wrong static_cast in the rebase opcode view (REBASE_OPCODE_DO_REBASE_ADD_ ADDR_ULEB cast to the ADD_ADDR_ULEB wrapper) — undefined downcast. - Unbounded table walks reading past a truncated/crafted file: symbol table (nlist), indirect symbols & relocations, data-in-code, function starts, string table, section data sizes, and the DYLD_INFO rebase/bind opcode streams. Each is now clamped to the mapped file. - nsects not bounded to the segment command size, so section structs could be read past the file during node construction. - Stack overflow from unbounded view-tree recursion (default max_depth=0) in the dumper walks and the GUI tree build; both now cap recursion depth. - ParseStringLiteral now only returns NUL-terminated strings so callers can safely treat them as C strings. - Signed LEB128 sign-extension used a negative left shift (UB); use an unsigned shift. After the fixes, ~1300 ASan/UBSan fuzz iterations over the real binaries report no out-of-bounds access and no out-of-memory. (A pervasive but benign misaligned-read UB from raw pointer dereferences into the mapping remains; it does not fault on x86_64/arm64 and is tracked for the NodeData migration.) https://claude.ai/code/session_013kBiVXftgoEsyGVyrvfGok --- src/libmoex/node/MachSection.cpp | 19 ++++++++- src/libmoex/node/Util.cpp | 15 +++++-- .../node/loadcmd/LoadCommand_DYLD_INFO.cpp | 22 ++++++++++ .../node/loadcmd/LoadCommand_DYSYMTAB.cpp | 35 ++++++++++++---- .../loadcmd/LoadCommand_LINKEDIT_DATA.cpp | 35 ++++++++++++++-- .../node/loadcmd/LoadCommand_SEGMENT.cpp | 14 ++++++- .../node/loadcmd/LoadCommand_SYMTAB.cpp | 26 ++++++++++-- src/libmoex/viewnode/ViewNodeDumper.cpp | 13 +++++- .../views/DynamicLoaderInfoViewNode.cpp | 2 +- src/libmoex/viewnode/views/SectionViewNode.h | 40 +++++++++++++++++-- .../viewnode/views/StringTableViewNode.cpp | 16 +++++++- src/src/controller/LayoutController.cpp | 7 +++- src/src/controller/LayoutController.h | 2 +- 13 files changed, 213 insertions(+), 33 deletions(-) diff --git a/src/libmoex/node/MachSection.cpp b/src/libmoex/node/MachSection.cpp index 952abce..02bd376 100644 --- a/src/libmoex/node/MachSection.cpp +++ b/src/libmoex/node/MachSection.cpp @@ -31,8 +31,23 @@ char *MachSection::GetOffset(){ return offset; } uint32_t MachSection::GetSize() { - uint32_t size = (uint32_t) sect().size_both(); - return size; + uint64_t size = sect().size_both(); + // Clamp the section data size to the mapped file so the data readers + // (string/literal/pointer parsing) cannot run past a truncated section. + auto c = ctx(); + if(c && c->file_start != nullptr){ + const char *off = GetOffset(); + const char *fstart = static_cast(c->file_start); + if(off < fstart) + return 0; + const uint64_t off_in_file = static_cast(off - fstart); + if(off_in_file >= c->file_size) + return 0; + const uint64_t avail = c->file_size - off_in_file; + if(size > avail) + size = avail; + } + return (uint32_t)size; } void MachSection::ForEachAs_S_CSTRING_LITERALS(std::function callback){ diff --git a/src/libmoex/node/Util.cpp b/src/libmoex/node/Util.cpp index 356153a..ce4703a 100644 --- a/src/libmoex/node/Util.cpp +++ b/src/libmoex/node/Util.cpp @@ -473,10 +473,11 @@ namespace util { bit += 7; } while (byte & 0x80); - // sign extend negative numbers + // sign extend negative numbers (shift unsigned to avoid UB on a + // negative left shift, then reinterpret). if ( (byte & 0x40) != 0 && bit < 64 ) { - result |= (-1LL) << bit; + result |= static_cast(~0ULL << bit); } data = result; @@ -499,10 +500,18 @@ namespace util { ++cur; continue; } - results.push_back(cur); + char *str_start = cur; while (cur < end && *cur != 0) { ++cur; } + // Only return strings that are NUL-terminated within the region, so + // callers can safely treat the pointer as a C string. + if(cur < end){ + results.push_back(str_start); + ++cur; // skip terminator + }else{ + break; // unterminated trailing bytes (truncated/crafted input) + } } return results; diff --git a/src/libmoex/node/loadcmd/LoadCommand_DYLD_INFO.cpp b/src/libmoex/node/loadcmd/LoadCommand_DYLD_INFO.cpp index 39cba24..7042fb4 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_DYLD_INFO.cpp +++ b/src/libmoex/node/loadcmd/LoadCommand_DYLD_INFO.cpp @@ -100,6 +100,17 @@ void LoadCommand_DYLD_INFO::ForEachRebaseOpcode(std::functionheader_start() + cmd()->rebase_off; uint32_t size = cmd()->rebase_size; char * end = begin + size; + // Clamp to the mapped file: a truncated/crafted rebase_off/size can run the + // opcode walk past the end of the file. + { + auto fc = header()->ctx(); + if(fc && fc->file_start != nullptr){ + char *fstart = (char*)fc->file_start; + char *fend = fstart + fc->file_size; + if(begin < fstart || begin > fend) end = begin; + else if(end > fend) end = fend; + } + } char * cur = begin; while(cur < end && !done){ // read and move next @@ -256,6 +267,17 @@ void LoadCommand_DYLD_INFO::ForEachBindingOpcode(BindNodeType node_type,uint32_t char * begin = header()->header_start() + bind_off; uint32_t size = bind_size; char * end = begin + size; + // Clamp to the mapped file: a truncated/crafted bind_off/size can run the + // opcode walk past the end of the file. + { + auto fc = header()->ctx(); + if(fc && fc->file_start != nullptr){ + char *fstart = (char*)fc->file_start; + char *fend = fstart + fc->file_size; + if(begin < fstart || begin > fend) end = begin; + else if(end > fend) end = fend; + } + } char * cur = begin; while(cur < end && !done) { // read and move next diff --git a/src/libmoex/node/loadcmd/LoadCommand_DYSYMTAB.cpp b/src/libmoex/node/loadcmd/LoadCommand_DYSYMTAB.cpp index 6ce4a20..f3febc9 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_DYSYMTAB.cpp +++ b/src/libmoex/node/loadcmd/LoadCommand_DYSYMTAB.cpp @@ -7,8 +7,22 @@ MOEX_NAMESPACE_BEGIN - - +namespace { +// Clamp [offset, offset+size) to the mapped file so a truncated or crafted +// table cannot make the iterators below dereference past the end. +uint64_t ClampSizeToFile(const NodeContextPtr &ctx, const char *offset, uint64_t size){ + if(!ctx || ctx->file_start == nullptr) + return size; + const char *fstart = static_cast(ctx->file_start); + if(offset < fstart) + return 0; + const uint64_t off = static_cast(offset - fstart); + if(off >= ctx->file_size) + return 0; + const uint64_t avail = ctx->file_size - off; + return size < avail ? size : avail; +} +} // namespace std::tuple LoadCommand_LC_DYSYMTAB::GetDataRange() { @@ -33,8 +47,9 @@ std::tuple LoadCommand_LC_DYSYMTAB::GetDataRange() void LoadCommand_LC_DYSYMTAB::ForEachIndirectSymbols(std::function callback){ char * offset = (char*)header_->header_start() + cmd_->indirectsymoff; - uint32_t size = cmd_->nindirectsyms * sizeof(uint32_t); - auto array = util::ParseDataAsSize(offset,size,sizeof(uint32_t)); + uint64_t size = static_cast(cmd_->nindirectsyms) * sizeof(uint32_t); + size = ClampSizeToFile(ctx(), offset, size); + auto array = util::ParseDataAsSize(offset,(uint32_t)size,sizeof(uint32_t)); for(char *cur : array){ callback((uint32_t*)(void*)cur); } @@ -45,8 +60,10 @@ void LoadCommand_LC_DYSYMTAB::ForEachExternalRelocations(std::functionheader_start() + cmd_->extreloff; const uint32_t entry_size = sizeof(struct qv_relocation_info); - for(uint32_t i = 0; i < cmd_->nextrel; ++i){ - callback(offset + i * entry_size, i); + const uint64_t avail = ClampSizeToFile(ctx(), offset, static_cast(cmd_->nextrel) * entry_size); + const uint64_t count = avail / entry_size; + for(uint64_t i = 0; i < count; ++i){ + callback(offset + i * entry_size, (uint32_t)i); } } @@ -55,8 +72,10 @@ void LoadCommand_LC_DYSYMTAB::ForEachLocalRelocations(std::functionheader_start() + cmd_->locreloff; const uint32_t entry_size = sizeof(struct qv_relocation_info); - for(uint32_t i = 0; i < cmd_->nlocrel; ++i){ - callback(offset + i * entry_size, i); + const uint64_t avail = ClampSizeToFile(ctx(), offset, static_cast(cmd_->nlocrel) * entry_size); + const uint64_t count = avail / entry_size; + for(uint64_t i = 0; i < count; ++i){ + callback(offset + i * entry_size, (uint32_t)i); } } diff --git a/src/libmoex/node/loadcmd/LoadCommand_LINKEDIT_DATA.cpp b/src/libmoex/node/loadcmd/LoadCommand_LINKEDIT_DATA.cpp index 5adb113..82fcf27 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_LINKEDIT_DATA.cpp +++ b/src/libmoex/node/loadcmd/LoadCommand_LINKEDIT_DATA.cpp @@ -25,14 +25,30 @@ std::vector &LoadCommand_LC_DATA_IN_CODE::GetDices(){ } char* cur_offset = (char*)this->header_->header_start() + cmd_->dataoff; - uint32_t cur_size = 0; - while(cur_size < cmd_->datasize){ + uint64_t datasize = cmd_->datasize; + // Clamp the table to the mapped file so a truncated/crafted datasize cannot + // create entries that point past the end of the file. + auto fc = this->ctx(); + if(fc && fc->file_start != nullptr){ + const char* fstart = static_cast(fc->file_start); + const char* fend = fstart + fc->file_size; + if(cur_offset < fstart || cur_offset > fend){ + return dices_; + } + const uint64_t avail = static_cast(fend - cur_offset); + if(datasize > avail) datasize = avail; + } + + uint64_t cur_size = 0; + while(true){ DataInCodeEntryPtr entry = std::make_shared(); + const uint64_t esz = entry->DATA_SIZE(); + if(esz == 0 || cur_size + esz > datasize) break; entry->Init(cur_offset,ctx_); dices_.push_back(entry); - cur_offset += entry->DATA_SIZE(); - cur_size += entry->DATA_SIZE(); + cur_offset += esz; + cur_size += esz; } return dices_; @@ -45,6 +61,17 @@ std::vector &LoadCommand_LC_FUNCTION_STARTS::GetFunctions(){ const char* start = (char*)this->header_->header_start() + cmd_->dataoff; const char* end = start + cmd_->datasize; + // Clamp the data range to the mapped file: a truncated/crafted command can + // claim a datasize that runs past the end of the file. + auto fctx = this->ctx(); + if(fctx && fctx->file_start != nullptr){ + const char* fstart = static_cast(fctx->file_start); + const char* fend = fstart + fctx->file_size; + if(start < fstart || start > fend){ + return functions_; + } + if(end > fend) end = fend; + } const char* cur_offset = start; while(cur_offset < end){ Uleb128Data data; diff --git a/src/libmoex/node/loadcmd/LoadCommand_SEGMENT.cpp b/src/libmoex/node/loadcmd/LoadCommand_SEGMENT.cpp index 6cac9fd..cf83501 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_SEGMENT.cpp +++ b/src/libmoex/node/loadcmd/LoadCommand_SEGMENT.cpp @@ -9,7 +9,13 @@ MOEX_NAMESPACE_BEGIN void LoadCommand_LC_SEGMENT::Init(void * offset,NodeContextPtr & ctx){ LoadCommandImpl::Init(offset,ctx); - for(uint32_t idx = 0; idx < cmd_->nsects; ++idx){ + // Section structs live inside this load command; bound nsects to what + // cmdsize (already validated against the file) can actually hold so a + // crafted nsects cannot create sections that point past the command/file. + uint64_t avail = cmd_->cmdsize > data_size_cmd ? (cmd_->cmdsize - data_size_cmd) : 0; + uint64_t count = avail / sizeof(qv_section); + if(count > cmd_->nsects) count = cmd_->nsects; + for(uint64_t idx = 0; idx < count; ++idx){ qv_section * cur = reinterpret_cast((char*)offset_ + data_size_cmd + idx * sizeof(qv_section)); MachSectionPtr section = std::make_shared(); @@ -34,7 +40,11 @@ std::vector> LoadCommand_LC_SEGMENT::GetIni void LoadCommand_LC_SEGMENT_64::Init(void * offset,NodeContextPtr & ctx){ LoadCommandImpl::Init(offset,ctx); - for(uint32_t idx = 0; idx < cmd_->nsects; ++idx){ + // See LoadCommand_LC_SEGMENT::Init: bound nsects to what cmdsize can hold. + uint64_t avail = cmd_->cmdsize > data_size_cmd ? (cmd_->cmdsize - data_size_cmd) : 0; + uint64_t count = avail / sizeof(qv_section_64); + if(count > cmd_->nsects) count = cmd_->nsects; + for(uint64_t idx = 0; idx < count; ++idx){ qv_section_64 * cur = reinterpret_cast((char*)offset_ + data_size_cmd + idx * sizeof(qv_section_64)); MachSectionPtr section = std::make_shared(); section->set_header(header_); diff --git a/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.cpp b/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.cpp index 7a40498..36ebd01 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.cpp +++ b/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.cpp @@ -10,15 +10,35 @@ void LoadCommand_LC_SYMTAB::LazyInit(){ // symbol list char * symbol_offset = (char*)GetSymbolTableOffsetAddress(); - if(header_->Is64()){ - for(uint32_t idx = 0; idx < GetSymbolTableSize(); ++idx){ + const bool is64 = header_->Is64(); + const uint64_t entry_size = is64 ? sizeof(struct qv_nlist_64) : sizeof(struct qv_nlist); + + // Bound the entry count to what actually fits in the mapped file: a + // truncated or crafted symtab can claim more nlist entries than exist, and + // NList holds raw pointers into the file, so reading past the end crashes. + uint64_t count = GetSymbolTableSize(); + if(ctx_ && ctx_->file_start != nullptr){ + const char *fstart = static_cast(ctx_->file_start); + if(symbol_offset < fstart){ + count = 0; + }else{ + const uint64_t off_in_file = static_cast(symbol_offset - fstart); + const uint64_t avail = off_in_file <= ctx_->file_size + ? (ctx_->file_size - off_in_file) / entry_size + : 0; + if(count > avail) count = avail; + } + } + + if(is64){ + for(uint64_t idx = 0; idx < count; ++idx){ struct qv_nlist_64 * cur = reinterpret_cast(symbol_offset + idx * sizeof(struct qv_nlist_64)); NListPtr symbol = std::make_shared(); symbol->Init(cur,ctx_,true); nlists_.push_back(symbol); } }else{ - for(uint32_t idx = 0; idx < GetSymbolTableSize(); ++idx) { + for(uint64_t idx = 0; idx < count; ++idx) { struct qv_nlist *cur = reinterpret_cast(symbol_offset + idx * sizeof(struct qv_nlist)); NListPtr symbol = std::make_shared(); symbol->Init(cur, ctx_,false); diff --git a/src/libmoex/viewnode/ViewNodeDumper.cpp b/src/libmoex/viewnode/ViewNodeDumper.cpp index d6b6d94..c38c93f 100644 --- a/src/libmoex/viewnode/ViewNodeDumper.cpp +++ b/src/libmoex/viewnode/ViewNodeDumper.cpp @@ -11,6 +11,11 @@ namespace { using Json = nlohmann::json; +// Hard cap on view-tree recursion depth. A real Mach-O view tree is only a +// handful of levels deep; this guards against stack overflow if malformed input +// ever yields a pathologically deep or cyclic node tree. +static const size_t kMaxDumpDepth = 256; + static std::vector CollectChildren(ViewNode *node) { std::vector children; node->ForEachChild([&](ViewNode *child) { @@ -278,6 +283,9 @@ static void DumpNodeText(ViewNode *node, if (options.max_depth > 0 && depth >= options.max_depth) { return; } + if (depth >= kMaxDumpDepth) { + return; + } auto children = CollectChildren(node); for (auto *child : children) { @@ -312,7 +320,7 @@ static Json NodeToJson(ViewNode *node, } j["children"] = Json::array(); - if (options.max_depth == 0 || depth < options.max_depth) { + if ((options.max_depth == 0 || depth < options.max_depth) && depth < kMaxDumpDepth) { auto children = CollectChildren(node); for (size_t i = 0; i < children.size(); ++i) { auto *child = children[i]; @@ -360,6 +368,9 @@ static void AccumulateSummary(ViewNode *node, if (options.max_depth > 0 && depth >= options.max_depth) { return; } + if (depth >= kMaxDumpDepth) { + return; + } auto children = CollectChildren(node); for (auto *child : children) { diff --git a/src/libmoex/viewnode/views/DynamicLoaderInfoViewNode.cpp b/src/libmoex/viewnode/views/DynamicLoaderInfoViewNode.cpp index fb89870..7b0ac1f 100644 --- a/src/libmoex/viewnode/views/DynamicLoaderInfoViewNode.cpp +++ b/src/libmoex/viewnode/views/DynamicLoaderInfoViewNode.cpp @@ -490,7 +490,7 @@ void RebaseInfoViewNode::InitViewDatas() break; } case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:{ - auto code = static_cast(codebase); + auto code = static_cast(codebase); print_->AddRow({ToHexString(info->header()->GetRAW(ctx->pbyte)),ToHexString((int)ctx->byte), "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", diff --git a/src/libmoex/viewnode/views/SectionViewNode.h b/src/libmoex/viewnode/views/SectionViewNode.h index 0e074b0..1f60eec 100644 --- a/src/libmoex/viewnode/views/SectionViewNode.h +++ b/src/libmoex/viewnode/views/SectionViewNode.h @@ -26,8 +26,24 @@ class SectionViewChildNode : public ViewNode{ return offset; } uint32_t GetSize(){ - uint32_t size = (uint32_t)d_->sect().size_both(); - return size; + uint64_t size = d_->sect().size_both(); + // Clamp the section data size to what is actually present in the mapped + // file: a truncated or crafted section can claim more bytes than exist, + // and the data readers below would otherwise run off the end. + auto ctx = d_->ctx(); + if(ctx && ctx->file_start != nullptr){ + const char *off = GetOffset(); + const char *fstart = static_cast(ctx->file_start); + if(off < fstart) + return 0; + const uint64_t off_in_file = static_cast(off - fstart); + if(off_in_file >= ctx->file_size) + return 0; + const uint64_t avail = ctx->file_size - off_in_file; + if(size > avail) + size = avail; + } + return (uint32_t)size; } void InitViewDatas()override { @@ -56,8 +72,24 @@ class SectionViewNode : public ViewNode{ return offset; } uint32_t GetSize(){ - uint32_t size = (uint32_t)d_->sect().size_both(); - return size; + uint64_t size = d_->sect().size_both(); + // Clamp the section data size to what is actually present in the mapped + // file: a truncated or crafted section can claim more bytes than exist, + // and the data readers below would otherwise run off the end. + auto ctx = d_->ctx(); + if(ctx && ctx->file_start != nullptr){ + const char *off = GetOffset(); + const char *fstart = static_cast(ctx->file_start); + if(off < fstart) + return 0; + const uint64_t off_in_file = static_cast(off - fstart); + if(off_in_file >= ctx->file_size) + return 0; + const uint64_t avail = ctx->file_size - off_in_file; + if(size > avail) + size = avail; + } + return (uint32_t)size; } void InitViewDatas()override; diff --git a/src/libmoex/viewnode/views/StringTableViewNode.cpp b/src/libmoex/viewnode/views/StringTableViewNode.cpp index 1578948..45d85ef 100644 --- a/src/libmoex/viewnode/views/StringTableViewNode.cpp +++ b/src/libmoex/viewnode/views/StringTableViewNode.cpp @@ -32,9 +32,21 @@ void StringTableViewNode::InitViewDatas() char * stroffset = (char*)seg->GetStringTableOffsetAddress(); - uint32_t strsize = seg->GetStringTableSize(); + uint64_t strsize = seg->GetStringTableSize(); - auto string_results = util::ParseStringLiteral(stroffset,strsize); + // Clamp the string table to the mapped file (truncated/crafted strsize). + auto sctx = mh_->ctx(); + if(sctx && sctx->file_start != nullptr){ + const char *fstart = static_cast(sctx->file_start); + if(stroffset < fstart || static_cast(stroffset - fstart) >= sctx->file_size){ + strsize = 0; + }else{ + const uint64_t avail = sctx->file_size - static_cast(stroffset - fstart); + if(strsize > avail) strsize = avail; + } + } + + auto string_results = util::ParseStringLiteral(stroffset,(uint32_t)strsize); int lineno = 0; for(char * cur : string_results){ diff --git a/src/src/controller/LayoutController.cpp b/src/src/controller/LayoutController.cpp index 09d71ce..580b9f9 100755 --- a/src/src/controller/LayoutController.cpp +++ b/src/src/controller/LayoutController.cpp @@ -54,14 +54,17 @@ void LayoutController::buildModel() initChildren(root,item); } -void LayoutController::initChildren(moex::ViewNode *parentNode,QStandardItem *parentItem){ +void LayoutController::initChildren(moex::ViewNode *parentNode,QStandardItem *parentItem,int depth){ + // Guard against a pathologically deep / cyclic node tree from malformed input. + if(depth >= 256) + return; parentNode->ForEachChild([&](moex::ViewNode* node){ QStandardItem *subItem = new QStandardItem(QString::fromStdString(node->GetDisplayName())); parentItem->appendRow(subItem); subItem->setData(QVariant::fromValue((void*)node)); // Loop - initChildren(node,subItem); + initChildren(node,subItem,depth + 1); }); } diff --git a/src/src/controller/LayoutController.h b/src/src/controller/LayoutController.h index dc35574..586fb99 100755 --- a/src/src/controller/LayoutController.h +++ b/src/src/controller/LayoutController.h @@ -23,7 +23,7 @@ class LayoutController bool parse(QString & error); // Builds the tree model from the parsed result. GUI thread only. void buildModel(); - void initChildren(moex::ViewNode *parentNode,QStandardItem *parentItem); + void initChildren(moex::ViewNode *parentNode,QStandardItem *parentItem,int depth = 0); int getExpandDepth(); QString lastError() const { return lastError_; }