diff --git a/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.h b/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.h index dfdb8a1..aaaecc8 100644 --- a/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.h +++ b/src/libmoex/node/loadcmd/LoadCommand_SYMTAB.h @@ -8,6 +8,7 @@ #include "libmoex/node/LoadCommand.h" #include "libmoex/node/Common.h" #include "libmoex/node/MachHeader.h" +#include MOEX_NAMESPACE_BEGIN @@ -95,9 +96,34 @@ class LoadCommand_LC_SYMTAB : public LoadCommandImpl{ } std::string GetStringByStrX(uint32_t strx){ - char * stroffset = (char*)GetStringTableOffsetAddress(); - std::string name(stroffset + strx); - return name; + char * table = (char*)GetStringTableOffsetAddress(); + const uint32_t strsize = cmd_->strsize; + if (strx >= strsize) { + return std::string(); + } + const char * start = table + strx; + + // Never read past the string table extent or the mapped file: a + // truncated or crafted string table may lack a terminating NUL. + std::size_t max_len = static_cast(strsize - strx); + auto ctx = header_->ctx(); + if (ctx) { + const char * file_start = static_cast(ctx->file_start); + const char * file_end = file_start + ctx->file_size; + if (start < file_start || start >= file_end) { + return std::string(); + } + const std::size_t max_in_file = static_cast(file_end - start); + if (max_in_file < max_len) { + max_len = max_in_file; + } + } + + const char * nul = static_cast(memchr(start, '\0', max_len)); + if (nul != nullptr) { + return std::string(start, nul); + } + return std::string(start, start + max_len); } public: diff --git a/src/libmoex/viewnode/ViewNodeDumper.cpp b/src/libmoex/viewnode/ViewNodeDumper.cpp index 9aef6ef..d6b6d94 100644 --- a/src/libmoex/viewnode/ViewNodeDumper.cpp +++ b/src/libmoex/viewnode/ViewNodeDumper.cpp @@ -57,6 +57,47 @@ static std::string SanitizeCell(const std::string &input) { return out; } +// Returns a valid UTF-8 copy of the input, replacing any malformed byte with +// '?'. Cell values can hold raw bytes from malformed/truncated binaries, and +// the bundled nlohmann::json (3.1.2) throws on invalid UTF-8 during dump(). +static std::string ToUtf8Safe(const std::string &in) { + std::string out; + out.reserve(in.size()); + const std::size_t n = in.size(); + std::size_t i = 0; + while (i < n) { + const unsigned char c = static_cast(in[i]); + if (c < 0x80) { + out += static_cast(c); + ++i; + continue; + } + std::size_t len; + uint32_t min_cp; + if ((c & 0xE0) == 0xC0) { len = 2; min_cp = 0x80; } + else if ((c & 0xF0) == 0xE0) { len = 3; min_cp = 0x800; } + else if ((c & 0xF8) == 0xF0) { len = 4; min_cp = 0x10000; } + else { out += '?'; ++i; continue; } + + if (i + len > n) { out += '?'; ++i; continue; } + uint32_t cp = c & (0x7Fu >> len); + bool ok = true; + for (std::size_t k = 1; k < len; ++k) { + const unsigned char cc = static_cast(in[i + k]); + if ((cc & 0xC0) != 0x80) { ok = false; break; } + cp = (cp << 6) | (cc & 0x3Fu); + } + if (!ok || cp < min_cp || cp > 0x10FFFFu || (cp >= 0xD800u && cp <= 0xDFFFu)) { + out += '?'; + ++i; + continue; + } + out.append(in, i, len); + i += len; + } + return out; +} + static bool NodeHasImmediateContent(ViewNode *node) { const auto &table = node->table(); const auto &binary = node->binary(); @@ -172,7 +213,7 @@ static Json TableToJson(const TableViewDataPtr &table, const ViewNodeDumpOptions if (IncludeTableHeaders(options)) { j["headers"] = Json::array(); for (const auto &header : table->headers) { - j["headers"].push_back(header->data); + j["headers"].push_back(ToUtf8Safe(header->data)); } } @@ -194,13 +235,13 @@ static Json TableToJson(const TableViewDataPtr &table, const ViewNodeDumpOptions r["values"] = Json::array(); r["cells"] = Json::object(); for (const auto &item : row->items) { - r["values"].push_back(item->data); + r["values"].push_back(ToUtf8Safe(item->data)); } for (size_t col = 0; col < row->items.size(); ++col) { const std::string key = col < table->headers.size() - ? table->headers[col]->data + ? ToUtf8Safe(table->headers[col]->data) : ("column" + std::to_string(col)); - r["cells"][key] = row->items[col]->data; + r["cells"][key] = ToUtf8Safe(row->items[col]->data); } if (row->size > 0) { r["byteLength"] = row->size; @@ -251,9 +292,9 @@ static Json NodeToJson(ViewNode *node, size_t child_index = 0) { node->Init(); Json j; - j["name"] = node->GetDisplayName(); + j["name"] = ToUtf8Safe(node->GetDisplayName()); j["depth"] = depth; - j["path"] = JoinPath(path_segments); + j["path"] = ToUtf8Safe(JoinPath(path_segments)); j["kind"] = ClassifyNodeKind(node); j["childIndex"] = child_index; diff --git a/tests/regression/run_crash_regression.sh b/tests/regression/run_crash_regression.sh index 014f28e..eb5d63a 100755 --- a/tests/regression/run_crash_regression.sh +++ b/tests/regression/run_crash_regression.sh @@ -10,6 +10,19 @@ if [[ ! -x "${PARSER_BIN}" ]]; then exit 2 fi +# Optional full app binary. When present we also run the complete view-node +# dumper (--cli) over the malformed corpus, which exercises the rich parsing +# layer (load commands, code signature, ObjC/Swift metadata, dyld info, ...) +# that moex-parse alone does not reach. +APP_BUNDLE_BIN="${ROOT_DIR}/build/MachOExplorer.app/Contents/MacOS/MachOExplorer" +PLAIN_APP_BIN="${ROOT_DIR}/build/MachOExplorer" +APP_BIN="" +if [[ -x "${APP_BUNDLE_BIN}" ]]; then + APP_BIN="${APP_BUNDLE_BIN}" +elif [[ -x "${PLAIN_APP_BIN}" ]]; then + APP_BIN="${PLAIN_APP_BIN}" +fi + TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/moex-crash-reg.XXXXXX")" trap 'rm -rf "${TMP_DIR}"' EXIT @@ -117,9 +130,31 @@ else echo "[ok] valid FAT64 parsed successfully: $(basename "${valid_fat64}")" fi +# Deep pass: run the full view-node dumper over the same corpus so the rich +# parsing layer is also crash-tested against malformed input. +CLI_TOTAL=0 +if [[ -n "${APP_BIN}" ]]; then + for f in "${TMP_DIR}"/*; do + for fmt in text json; do + CLI_TOTAL=$((CLI_TOTAL + 1)) + set +e + "${APP_BIN}" --cli --format "${fmt}" "${f}" >/dev/null 2>&1 + cli_rc=$? + set -e + if [[ "${cli_rc}" -ge 128 ]]; then + echo "[fail] view-node dumper crashed (signal exit=${cli_rc}, format=${fmt}) for: ${f}" + FAIL=1 + fi + done + done + echo "[ok] view-node dumper handled corpus safely (runs=${CLI_TOTAL})" +else + echo "[skip] full app binary not built; view-node dumper crash pass skipped" +fi + if [[ "${FAIL}" -ne 0 ]]; then echo "crash-regression: failed" exit 1 fi -echo "crash-regression: passed (total=${TOTAL} rejected=${REJECTED} accepted=${ACCEPTED})" +echo "crash-regression: passed (total=${TOTAL} rejected=${REJECTED} accepted=${ACCEPTED} cli-runs=${CLI_TOTAL})"