From b639d2deb7ef0458a68c753ccc01952baddd57fd Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 16 Mar 2026 05:39:49 -0400 Subject: [PATCH 1/3] fix(security): bound pickle metadata reads to prevent DoS --- modelaudit/scanners/pickle_scanner.py | 14 +++++++++++++- tests/test_metadata_extractor.py | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/modelaudit/scanners/pickle_scanner.py b/modelaudit/scanners/pickle_scanner.py index e0ee2b2e..e76d98fa 100644 --- a/modelaudit/scanners/pickle_scanner.py +++ b/modelaudit/scanners/pickle_scanner.py @@ -6126,14 +6126,26 @@ def extract_metadata(self, file_path: str) -> dict[str, Any]: metadata = super().extract_metadata(file_path) allow_deserialization = bool(self.config.get("allow_metadata_deserialization")) + max_metadata_read_size = int(self.config.get("max_metadata_pickle_read_size", 10 * 1024 * 1024)) try: import pickle import pickletools from io import BytesIO + file_size = self.get_file_size(file_path) + if max_metadata_read_size > 0 and file_size > max_metadata_read_size: + raise ValueError( + f"Pickle metadata read limit exceeded: {file_size} bytes (max: {max_metadata_read_size})" + ) + with open(file_path, "rb") as f: - pickle_data = f.read() + pickle_data = f.read(max_metadata_read_size + 1 if max_metadata_read_size > 0 else -1) + + if max_metadata_read_size > 0 and len(pickle_data) > max_metadata_read_size: + raise ValueError( + f"Pickle metadata read limit exceeded: {len(pickle_data)} bytes (max: {max_metadata_read_size})" + ) # Analyze pickle structure metadata.update( diff --git a/tests/test_metadata_extractor.py b/tests/test_metadata_extractor.py index 6ca38ddb..bb75440a 100644 --- a/tests/test_metadata_extractor.py +++ b/tests/test_metadata_extractor.py @@ -466,6 +466,19 @@ def __reduce__(self): assert "REDUCE" in metadata.get("dangerous_opcodes", []) assert metadata.get("has_dangerous_opcodes") is True + def test_pickle_metadata_enforces_read_limit(self, tmp_path: Path) -> None: + """Ensure pickle metadata extraction rejects files over the metadata read limit.""" + from modelaudit.scanners.pickle_scanner import PickleScanner + + pkl_file = tmp_path / "oversized.pkl" + pkl_file.write_bytes(b"x" * 128) + + scanner = PickleScanner({"max_metadata_pickle_read_size": 64}) + metadata = scanner.extract_metadata(str(pkl_file)) + + assert "extraction_error" in metadata + assert "read limit exceeded" in metadata["extraction_error"] + def test_pickle_safe_data_no_dangerous_opcodes(self, tmp_path: Path) -> None: """Ensure simple data structures don't trigger dangerous opcode detection.""" from modelaudit.scanners.pickle_scanner import PickleScanner From d3ff55770dc6479b37517aae44fe3adec9dd4f0e Mon Sep 17 00:00:00 2001 From: mldangelo Date: Mon, 16 Mar 2026 03:42:30 -0700 Subject: [PATCH 2/3] fix: reject invalid pickle metadata read limits --- modelaudit/scanners/pickle_scanner.py | 11 ++++++++--- tests/test_metadata_extractor.py | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/modelaudit/scanners/pickle_scanner.py b/modelaudit/scanners/pickle_scanner.py index e76d98fa..2723233b 100644 --- a/modelaudit/scanners/pickle_scanner.py +++ b/modelaudit/scanners/pickle_scanner.py @@ -6133,16 +6133,21 @@ def extract_metadata(self, file_path: str) -> dict[str, Any]: import pickletools from io import BytesIO + if max_metadata_read_size <= 0: + raise ValueError( + f"Invalid pickle metadata read limit: {max_metadata_read_size} (must be greater than 0)" + ) + file_size = self.get_file_size(file_path) - if max_metadata_read_size > 0 and file_size > max_metadata_read_size: + if file_size > max_metadata_read_size: raise ValueError( f"Pickle metadata read limit exceeded: {file_size} bytes (max: {max_metadata_read_size})" ) with open(file_path, "rb") as f: - pickle_data = f.read(max_metadata_read_size + 1 if max_metadata_read_size > 0 else -1) + pickle_data = f.read(max_metadata_read_size + 1) - if max_metadata_read_size > 0 and len(pickle_data) > max_metadata_read_size: + if len(pickle_data) > max_metadata_read_size: raise ValueError( f"Pickle metadata read limit exceeded: {len(pickle_data)} bytes (max: {max_metadata_read_size})" ) diff --git a/tests/test_metadata_extractor.py b/tests/test_metadata_extractor.py index bb75440a..5a98843d 100644 --- a/tests/test_metadata_extractor.py +++ b/tests/test_metadata_extractor.py @@ -466,18 +466,26 @@ def __reduce__(self): assert "REDUCE" in metadata.get("dangerous_opcodes", []) assert metadata.get("has_dangerous_opcodes") is True - def test_pickle_metadata_enforces_read_limit(self, tmp_path: Path) -> None: - """Ensure pickle metadata extraction rejects files over the metadata read limit.""" + @pytest.mark.parametrize( + ("limit", "expected_error"), + [ + (64, "read limit exceeded"), + (0, "must be greater than 0"), + (-1, "must be greater than 0"), + ], + ) + def test_pickle_metadata_enforces_read_limit(self, tmp_path: Path, limit: int, expected_error: str) -> None: + """Ensure pickle metadata extraction rejects oversized and invalid read limits.""" from modelaudit.scanners.pickle_scanner import PickleScanner pkl_file = tmp_path / "oversized.pkl" pkl_file.write_bytes(b"x" * 128) - scanner = PickleScanner({"max_metadata_pickle_read_size": 64}) + scanner = PickleScanner({"max_metadata_pickle_read_size": limit}) metadata = scanner.extract_metadata(str(pkl_file)) assert "extraction_error" in metadata - assert "read limit exceeded" in metadata["extraction_error"] + assert expected_error in metadata["extraction_error"] def test_pickle_safe_data_no_dangerous_opcodes(self, tmp_path: Path) -> None: """Ensure simple data structures don't trigger dangerous opcode detection.""" From 7cb61a301ba5ec13c480a41dedc38702d56d38af Mon Sep 17 00:00:00 2001 From: Michael D'Angelo Date: Thu, 19 Mar 2026 22:32:44 -0700 Subject: [PATCH 3/3] fix: enforce pickle metadata hard cap Clamp caller-supplied metadata read limits to 10 MiB, keep malformed limit parsing inside extract_metadata error handling, and cover the success and hard-cap paths in regression tests. Co-authored-by: Codex --- modelaudit/scanners/pickle_scanner.py | 5 ++++- tests/test_metadata_extractor.py | 29 +++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/modelaudit/scanners/pickle_scanner.py b/modelaudit/scanners/pickle_scanner.py index 7e21e46b..59238a41 100644 --- a/modelaudit/scanners/pickle_scanner.py +++ b/modelaudit/scanners/pickle_scanner.py @@ -6882,17 +6882,20 @@ def extract_metadata(self, file_path: str) -> dict[str, Any]: metadata = super().extract_metadata(file_path) allow_deserialization = bool(self.config.get("allow_metadata_deserialization")) - max_metadata_read_size = int(self.config.get("max_metadata_pickle_read_size", 10 * 1024 * 1024)) + metadata_read_cap = 10 * 1024 * 1024 try: import pickle import pickletools from io import BytesIO + max_metadata_read_size = int(self.config.get("max_metadata_pickle_read_size", metadata_read_cap)) + if max_metadata_read_size <= 0: raise ValueError( f"Invalid pickle metadata read limit: {max_metadata_read_size} (must be greater than 0)" ) + max_metadata_read_size = min(max_metadata_read_size, metadata_read_cap) file_size = self.get_file_size(file_path) if file_size > max_metadata_read_size: diff --git a/tests/test_metadata_extractor.py b/tests/test_metadata_extractor.py index 5a98843d..fe322f13 100644 --- a/tests/test_metadata_extractor.py +++ b/tests/test_metadata_extractor.py @@ -469,12 +469,18 @@ def __reduce__(self): @pytest.mark.parametrize( ("limit", "expected_error"), [ + (256, None), (64, "read limit exceeded"), (0, "must be greater than 0"), (-1, "must be greater than 0"), ], ) - def test_pickle_metadata_enforces_read_limit(self, tmp_path: Path, limit: int, expected_error: str) -> None: + def test_pickle_metadata_enforces_read_limit( + self, + tmp_path: Path, + limit: int, + expected_error: str | None, + ) -> None: """Ensure pickle metadata extraction rejects oversized and invalid read limits.""" from modelaudit.scanners.pickle_scanner import PickleScanner @@ -484,8 +490,27 @@ def test_pickle_metadata_enforces_read_limit(self, tmp_path: Path, limit: int, e scanner = PickleScanner({"max_metadata_pickle_read_size": limit}) metadata = scanner.extract_metadata(str(pkl_file)) + if expected_error is None: + assert "extraction_error" not in metadata + assert metadata["pickle_size"] == 128 + else: + assert "extraction_error" in metadata + assert expected_error in metadata["extraction_error"] + + def test_pickle_metadata_caps_configured_read_limit_at_10_mib(self, tmp_path: Path) -> None: + """Oversized caller-supplied limits should still be clamped to 10 MiB.""" + from modelaudit.scanners.pickle_scanner import PickleScanner + + pkl_file = tmp_path / "oversized-cap.pkl" + with pkl_file.open("wb") as handle: + handle.seek(10 * 1024 * 1024) + handle.write(b"x") + + scanner = PickleScanner({"max_metadata_pickle_read_size": 20 * 1024 * 1024}) + metadata = scanner.extract_metadata(str(pkl_file)) + assert "extraction_error" in metadata - assert expected_error in metadata["extraction_error"] + assert "max: 10485760" in metadata["extraction_error"] def test_pickle_safe_data_no_dangerous_opcodes(self, tmp_path: Path) -> None: """Ensure simple data structures don't trigger dangerous opcode detection."""