Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions src/libmoex/node/loadcmd/LoadCommand_SYMTAB.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "libmoex/node/LoadCommand.h"
#include "libmoex/node/Common.h"
#include "libmoex/node/MachHeader.h"
#include <cstring>

MOEX_NAMESPACE_BEGIN

Expand Down Expand Up @@ -95,9 +96,34 @@ class LoadCommand_LC_SYMTAB : public LoadCommandImpl<qv_symtab_command>{
}

std::string GetStringByStrX(uint32_t strx){
char * stroffset = (char*)GetStringTableOffsetAddress();
std::string name(stroffset + strx);
return name;
char * table = (char*)GetStringTableOffsetAddress();
const uint32_t strsize = cmd_->strsize;
if (strx >= strsize) {
return std::string();
}
const char * start = table + strx;

// Never read past the string table extent or the mapped file: a
// truncated or crafted string table may lack a terminating NUL.
std::size_t max_len = static_cast<std::size_t>(strsize - strx);
auto ctx = header_->ctx();
if (ctx) {
const char * file_start = static_cast<const char*>(ctx->file_start);
const char * file_end = file_start + ctx->file_size;
if (start < file_start || start >= file_end) {
return std::string();
}
const std::size_t max_in_file = static_cast<std::size_t>(file_end - start);
if (max_in_file < max_len) {
max_len = max_in_file;
}
}

const char * nul = static_cast<const char*>(memchr(start, '\0', max_len));
if (nul != nullptr) {
return std::string(start, nul);
}
return std::string(start, start + max_len);
}

public:
Expand Down
53 changes: 47 additions & 6 deletions src/libmoex/viewnode/ViewNodeDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,47 @@ static std::string SanitizeCell(const std::string &input) {
return out;
}

// Returns a valid UTF-8 copy of the input, replacing any malformed byte with
// '?'. Cell values can hold raw bytes from malformed/truncated binaries, and
// the bundled nlohmann::json (3.1.2) throws on invalid UTF-8 during dump().
static std::string ToUtf8Safe(const std::string &in) {
std::string out;
out.reserve(in.size());
const std::size_t n = in.size();
std::size_t i = 0;
while (i < n) {
const unsigned char c = static_cast<unsigned char>(in[i]);
if (c < 0x80) {
out += static_cast<char>(c);
++i;
continue;
}
std::size_t len;
uint32_t min_cp;
if ((c & 0xE0) == 0xC0) { len = 2; min_cp = 0x80; }
else if ((c & 0xF0) == 0xE0) { len = 3; min_cp = 0x800; }
else if ((c & 0xF8) == 0xF0) { len = 4; min_cp = 0x10000; }
else { out += '?'; ++i; continue; }

if (i + len > n) { out += '?'; ++i; continue; }
uint32_t cp = c & (0x7Fu >> len);
bool ok = true;
for (std::size_t k = 1; k < len; ++k) {
const unsigned char cc = static_cast<unsigned char>(in[i + k]);
if ((cc & 0xC0) != 0x80) { ok = false; break; }
cp = (cp << 6) | (cc & 0x3Fu);
}
if (!ok || cp < min_cp || cp > 0x10FFFFu || (cp >= 0xD800u && cp <= 0xDFFFu)) {
out += '?';
++i;
continue;
}
out.append(in, i, len);
i += len;
}
return out;
}

static bool NodeHasImmediateContent(ViewNode *node) {
const auto &table = node->table();
const auto &binary = node->binary();
Expand Down Expand Up @@ -172,7 +213,7 @@ static Json TableToJson(const TableViewDataPtr &table, const ViewNodeDumpOptions
if (IncludeTableHeaders(options)) {
j["headers"] = Json::array();
for (const auto &header : table->headers) {
j["headers"].push_back(header->data);
j["headers"].push_back(ToUtf8Safe(header->data));
}
}

Expand All @@ -194,13 +235,13 @@ static Json TableToJson(const TableViewDataPtr &table, const ViewNodeDumpOptions
r["values"] = Json::array();
r["cells"] = Json::object();
for (const auto &item : row->items) {
r["values"].push_back(item->data);
r["values"].push_back(ToUtf8Safe(item->data));
}
for (size_t col = 0; col < row->items.size(); ++col) {
const std::string key = col < table->headers.size()
? table->headers[col]->data
? ToUtf8Safe(table->headers[col]->data)
: ("column" + std::to_string(col));
r["cells"][key] = row->items[col]->data;
r["cells"][key] = ToUtf8Safe(row->items[col]->data);
}
if (row->size > 0) {
r["byteLength"] = row->size;
Expand Down Expand Up @@ -251,9 +292,9 @@ static Json NodeToJson(ViewNode *node,
size_t child_index = 0) {
node->Init();
Json j;
j["name"] = node->GetDisplayName();
j["name"] = ToUtf8Safe(node->GetDisplayName());
j["depth"] = depth;
j["path"] = JoinPath(path_segments);
j["path"] = ToUtf8Safe(JoinPath(path_segments));
j["kind"] = ClassifyNodeKind(node);
j["childIndex"] = child_index;

Expand Down
37 changes: 36 additions & 1 deletion tests/regression/run_crash_regression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@ if [[ ! -x "${PARSER_BIN}" ]]; then
exit 2
fi

# Optional full app binary. When present we also run the complete view-node
# dumper (--cli) over the malformed corpus, which exercises the rich parsing
# layer (load commands, code signature, ObjC/Swift metadata, dyld info, ...)
# that moex-parse alone does not reach.
APP_BUNDLE_BIN="${ROOT_DIR}/build/MachOExplorer.app/Contents/MacOS/MachOExplorer"
PLAIN_APP_BIN="${ROOT_DIR}/build/MachOExplorer"
APP_BIN=""
if [[ -x "${APP_BUNDLE_BIN}" ]]; then
APP_BIN="${APP_BUNDLE_BIN}"
elif [[ -x "${PLAIN_APP_BIN}" ]]; then
APP_BIN="${PLAIN_APP_BIN}"
fi

TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/moex-crash-reg.XXXXXX")"
trap 'rm -rf "${TMP_DIR}"' EXIT

Expand Down Expand Up @@ -117,9 +130,31 @@ else
echo "[ok] valid FAT64 parsed successfully: $(basename "${valid_fat64}")"
fi

# Deep pass: run the full view-node dumper over the same corpus so the rich
# parsing layer is also crash-tested against malformed input.
CLI_TOTAL=0
if [[ -n "${APP_BIN}" ]]; then
for f in "${TMP_DIR}"/*; do
for fmt in text json; do
CLI_TOTAL=$((CLI_TOTAL + 1))
set +e
"${APP_BIN}" --cli --format "${fmt}" "${f}" >/dev/null 2>&1
cli_rc=$?
set -e
if [[ "${cli_rc}" -ge 128 ]]; then
echo "[fail] view-node dumper crashed (signal exit=${cli_rc}, format=${fmt}) for: ${f}"
FAIL=1
fi
done
done
echo "[ok] view-node dumper handled corpus safely (runs=${CLI_TOTAL})"
else
echo "[skip] full app binary not built; view-node dumper crash pass skipped"
fi

if [[ "${FAIL}" -ne 0 ]]; then
echo "crash-regression: failed"
exit 1
fi

echo "crash-regression: passed (total=${TOTAL} rejected=${REJECTED} accepted=${ACCEPTED})"
echo "crash-regression: passed (total=${TOTAL} rejected=${REJECTED} accepted=${ACCEPTED} cli-runs=${CLI_TOTAL})"
Loading