From 5e25a61f76b4dd00d76406fe416503fe5292f02a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 20 May 2026 16:37:15 +0000 Subject: [PATCH] fix(picklescanner): handle GET/BINGET to missing memo without aborting scan PickleUnsafeOpScan currently raises KeyError when STACK_GLOBAL analysis encounters a GET/BINGET/LONG_BINGET opcode referencing a memo index that was never PUT. The unhandled exception propagates up and the scanner returns zero issues for the whole file, even when unsafe globals appear earlier in the byte stream. This is one instance of the EDP (Exception-Directed Programming) class discussed in arXiv:2508.19774 ("Hide and Seek"; GHSA-9gvj-pp9x-gcfr) and the "Invalid Memo Reference" category in #338: a malformed-but-parseable pickle whose only function is to crash the scanner before the malicious payload is reported. Fix: when GET/BINGET references a missing memo index, treat the resolved value as "unknown" (matching the existing convention at the next branch of the same conditional for unrecognized opcodes) and continue scanning the remainder of the stream. Test: tests/test_modelscan.py::test_scan_picklescanner_resilient_to_invalid_memo_binget generates a minimal pickle that calls os.system via GLOBAL+REDUCE and follows it with STACK_GLOBAL referencing memo indices 7 and 8 (never PUT). Before this fix, modelscan returns 0 issues. After, it reports the os.system CRITICAL. Refs #338. --- modelscan/tools/picklescanner.py | 12 +++++++- tests/test_modelscan.py | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/modelscan/tools/picklescanner.py b/modelscan/tools/picklescanner.py index 44c4e2a0..6b5530f3 100644 --- a/modelscan/tools/picklescanner.py +++ b/modelscan/tools/picklescanner.py @@ -93,7 +93,17 @@ def _list_globals( ]: continue if ops[n - offset][0].name in ["GET", "BINGET", "LONG_BINGET"]: - values.append(memo[int(ops[n - offset][1])]) + memo_key = int(ops[n - offset][1]) + if memo_key in memo: + values.append(memo[memo_key]) + else: + logger.debug( + "GET/BINGET at position %s references missing memo " + "index %s; categorizing as unknown.", + n - offset, + memo_key, + ) + values.append("unknown") elif ops[n - offset][0].name not in [ "SHORT_BINUNICODE", "UNICODE", diff --git a/tests/test_modelscan.py b/tests/test_modelscan.py index a82e4ecc..d5545e5b 100644 --- a/tests/test_modelscan.py +++ b/tests/test_modelscan.py @@ -1644,3 +1644,56 @@ def test_main_defaultgroup(file_path: Any) -> None: pass finally: sys.argv = argv + + +def edp_invalid_memo_binget_gen() -> bytes: + """Construct a pickle that exercises the EDP / invalid-memo-reference class. + + The stream first invokes ``os.system`` via ``GLOBAL`` + ``REDUCE`` (the + unsafe payload), then follows it with a ``STACK_GLOBAL`` whose operands + resolve via ``BINGET`` into memo slots that were never PUT. A scanner + that aborts on the resulting ``KeyError`` during ``STACK_GLOBAL`` + analysis would miss the ``os.system`` call entirely, even though that + call appears earlier in the byte stream. + + The demo command is the benign string ``"echo pwned"`` to keep the + fixture self-contained and non-destructive. + + Refs: Issue #338 (Invalid Memo Reference category); GHSA-9gvj-pp9x-gcfr. + """ + p = pickle.PROTO + b"\x05" + p += pickle.GLOBAL + b"os\nsystem\n" + p += pickle.UNICODE + b"echo pwned\n" + p += pickle.TUPLE1 + p += pickle.REDUCE + # Two BINGET opcodes referencing memo slots that were never PUT; + # the subsequent STACK_GLOBAL would try to resolve them. + p += pickle.BINGET + b"\x07" + p += pickle.BINGET + b"\x08" + p += pickle.STACK_GLOBAL + p += pickle.STOP + return p + + +def test_scan_picklescanner_resilient_to_invalid_memo_binget(tmp_path: Any) -> None: + """The pickle scanner must report unsafe globals seen before a malformed + BINGET/GET memo reference, rather than aborting analysis on the + KeyError. + + Regression for the EDP / Invalid Memo Reference class + (refs Issue #338; GHSA-9gvj-pp9x-gcfr). The fixture contains an + ``os.system`` call followed by a ``STACK_GLOBAL`` whose operands point + at memo slots that were never PUT; without the fix in + ``picklescanner._list_globals``, the scanner aborts before flagging + the ``os.system`` global. + """ + test_file = tmp_path / "edp_invalid_memo_binget.pkl" + test_file.write_bytes(edp_invalid_memo_binget_gen()) + + ms = ModelScan() + ms.scan(str(test_file)) + + assert len(ms.issues.all_issues) > 0, ( + "modelscan must flag the os.system unsafe global appearing before the " + "malformed BINGET, even when STACK_GLOBAL resolution fails." + )