promptfoo · mldangelo · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -81,6 +81,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **cli:** preserve original local files during `--stream` directory scans instead of unlinking them after analysis
+- **security:** recurse into object-dtype `.npy` payloads and `.npz` object members with the pickle scanner while preserving CVE-2019-6446 warnings and archive-member context
 - **security:** remove `dill.load` / `dill.loads` from the pickle safe-global allowlist so recursive dill deserializers stay flagged as dangerous loader entry points
 - **security:** add exact dangerous helper coverage for validated torch and NumPy refs such as `numpy.f2py.crackfortran.getlincoef`, `torch._dynamo.guards.GuardBuilder.get`, and `torch.utils.collect_env.run`
 - **security:** add exact dangerous-global coverage for `numpy.load`, `site.main`, `_io.FileIO`, `test.support.script_helper.assert_python_ok`, `_osx_support._read_output`, `_aix_support._read_cmd_output`, `_pyrepl.pager.pipe_pager`, `torch.serialization.load`, and `torch._inductor.codecache.compile_file` (9 PickleScan-only loader and execution primitives)

diff --git a/modelaudit/cli.py b/modelaudit/cli.py
@@ -1683,11 +1683,12 @@ def enhanced_progress_callback(message, percentage):
                         # Create file iterator
                         file_generator = iterate_files_streaming(actual_path)
 
-                        # Scan with streaming mode - propagate all config
+                        # Scan with streaming mode - propagate all config.
+                        # Local files already live on disk, so preserve the originals.
                         streaming_result = scan_model_streaming(
                             file_generator=file_generator,
                             timeout=final_timeout,
-                            delete_after_scan=True,  # Delete files after scanning in streaming mode
+                            delete_after_scan=False,
                             progress_callback=progress_callback,
                             blacklist_patterns=list(blacklist) if blacklist else None,
                             max_file_size=final_max_file_size,

diff --git a/modelaudit/core.py b/modelaudit/core.py
@@ -45,6 +45,35 @@
 
 logger = logging.getLogger("modelaudit.core")
 
+OPERATIONAL_ERROR_INDICATORS = (
+    "Error during scan",
+    "Error checking file size",
+    "Error scanning file",
+    "Scanner crashed",
+    "Scan timeout",
+    "Path does not exist",
+    "Path is not readable",
+    "Permission denied",
+    "File not found",
+    "not installed, cannot scan",
+    "Missing dependency",
+    "Import error",
+    "Module not found",
+    "not a valid",
+    "Invalid file format",
+    "Corrupted file",
+    "Bad file signature",
+    "Unable to parse",
+    "Out of memory",
+    "Disk space",
+    "Too many open files",
+)
+
+
+def _has_operational_error_message(message: Any) -> bool:
+    """Return True when an issue message reflects an operational scan failure."""
+    return isinstance(message, str) and any(indicator in message for indicator in OPERATIONAL_ERROR_INDICATORS)
+
 
 def _to_telemetry_severity(severity: Any) -> str:
     """Normalize severity values to stable telemetry strings."""
@@ -272,8 +301,12 @@ def _group_checks_by_asset(checks_list: list[Any]) -> dict[tuple[str, str], list
         check_name = check.get("name", "Unknown Check")
         location = check.get("location", "")
         primary_asset = _extract_primary_asset_from_location(location)
+        details = check.get("details")
+        zip_entry = details.get("zip_entry") if isinstance(details, dict) else None
 
-        group_key = (check_name, primary_asset)
+        asset_group = f"{primary_asset}:{zip_entry}" if isinstance(zip_entry, str) and zip_entry else primary_asset
+
+        group_key = (check_name, asset_group)
         check_groups[group_key].append(check)
 
     return check_groups
@@ -1029,39 +1062,10 @@ def scan_model_directory_or_file(
     # Determine if there were operational scan errors vs security findings
     # has_errors should only be True for operational errors (scanner crashes,
     # file not found, etc.) not for security findings detected in models
-    operational_error_indicators = [
-        # Scanner execution errors
-        "Error during scan",
-        "Error checking file size",
-        "Error scanning file",
-        "Scanner crashed",
-        "Scan timeout",
-        # File system errors
-        "Path does not exist",
-        "Path is not readable",
-        "Permission denied",
-        "File not found",
-        # Dependency/environment errors
-        "not installed, cannot scan",
-        "Missing dependency",
-        "Import error",
-        "Module not found",
-        # File format/corruption errors
-        "not a valid",
-        "Invalid file format",
-        "Corrupted file",
-        "Bad file signature",
-        "Unable to parse",
-        # Resource/system errors
-        "Out of memory",
-        "Disk space",
-        "Too many open files",
-    ]
-
     # Check for operational errors in issues
     results.has_errors = (
         any(
-            any(indicator in issue.message for indicator in operational_error_indicators)
+            _has_operational_error_message(issue.message)
             for issue in results.issues
             if issue.severity in {IssueSeverity.WARNING, IssueSeverity.CRITICAL}
         )
@@ -1591,6 +1595,9 @@ def scan_model_streaming(
                 if scan_result:
                     metadata_dict = dict(scan_result.metadata or {})
                     metadata_dict.setdefault("file_size", file_path.stat().st_size)
+                    operational_scan_failure = any(
+                        _has_operational_error_message(issue.message) for issue in (scan_result.issues or [])
+                    )
 
                     existing_hashes = metadata_dict.get("file_hashes")
                     if isinstance(existing_hashes, dict):
@@ -1602,10 +1609,10 @@ def scan_model_streaming(
                     scan_result_dict = {
                         "bytes_scanned": scan_result.bytes_scanned,
                         "files_scanned": 1,  # Each scan_result represents one file
-                        # ScanResult.has_errors means "critical findings", but
-                        # ModelAuditResultModel.has_errors is reserved for
-                        # operational scan failures.
-                        "has_errors": not scan_result.success,
+                        # Preserve the main scan semantics: success=False does not
+                        # imply an operational error when the scanner completed
+                        # and only reported informational integrity findings.
+                        "has_errors": operational_scan_failure,
                         "success": scan_result.success,
                         "issues": [issue.__dict__ for issue in (scan_result.issues or [])],
                         "checks": [check.__dict__ for check in (scan_result.checks or [])],

diff --git a/modelaudit/scanners/numpy_scanner.py b/modelaudit/scanners/numpy_scanner.py
@@ -4,9 +4,10 @@
 
 import sys
 import warnings
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, BinaryIO, ClassVar
 
 from .base import BaseScanner, IssueSeverity, ScanResult
+from .pickle_scanner import PickleScanner
 
 # Import NumPy with compatibility handling
 try:
@@ -88,6 +89,17 @@ def _validate_array_dimensions(self, shape: tuple[int, ...]) -> None:
     CVE_2019_6446_CVSS = 9.8
     CVE_2019_6446_CWE = "CWE-502"
 
+    def _scan_embedded_pickle_payload(
+        self,
+        file_obj: BinaryIO,
+        payload_size: int,
+        context_path: str,
+    ) -> ScanResult:
+        """Reuse PickleScanner analysis for object-dtype NumPy payloads."""
+        pickle_scanner = PickleScanner(config=self.config)
+        pickle_scanner.current_file_path = context_path
+        return pickle_scanner._scan_pickle_bytes(file_obj, payload_size)
+
     def _validate_dtype(self, dtype: Any) -> None:
         """Validate numpy dtype for security"""
         # Check for problematic data types
@@ -256,7 +268,8 @@ def scan(self, path: str) -> ScanResult:
                         # enabling arbitrary code execution.
                         # dtype.hasobject catches structured dtypes with
                         # object fields; kind=="O" catches plain object arrays.
-                        if dtype.kind == "O" or bool(getattr(dtype, "hasobject", False)):
+                        has_object_dtype = dtype.kind == "O" or bool(getattr(dtype, "hasobject", False))
+                        if has_object_dtype:
                             result.add_check(
                                 name=f"{self.CVE_2019_6446_ID}: Object Dtype Pickle Deserialization",
                                 passed=False,
@@ -299,6 +312,60 @@ def scan(self, path: str) -> ScanResult:
                                 ),
                             )
 
+                            f.seek(data_offset)
+                            embedded_result = self._scan_embedded_pickle_payload(
+                                f,
+                                file_size - data_offset,
+                                path,
+                            )
+                            result.issues.extend(embedded_result.issues)
+                            result.checks.extend(embedded_result.checks)
+
+                            pickle_end_offset = embedded_result.metadata.get("first_pickle_end_pos")
+                            if isinstance(pickle_end_offset, int) and pickle_end_offset < file_size:
+                                trailing_bytes = file_size - pickle_end_offset
+                                result.add_check(
+                                    name="File Integrity Check",
+                                    passed=False,
+                                    message=(
+                                        "Object-dtype payload contains trailing bytes after the embedded pickle stream"
+                                    ),
+                                    severity=IssueSeverity.INFO,
+                                    location=path,
+                                    rule_code="S902",
+                                    details={
+                                        "expected_pickle_end": pickle_end_offset,
+                                        "actual_size": file_size,
+                                        "trailing_bytes": trailing_bytes,
+                                        "dtype": str(dtype),
+                                    },
+                                )
+                                result.finish(success=False)
+                                return result
+
+                            # Object-dtype .npy payloads are stored as a pickle stream rather than
+                            # fixed-width element data, so the numeric dtype/size validation path
+                            # is not applicable after we recurse into the embedded pickle payload.
+                            result.add_check(
+                                name="Data Type Safety Check",
+                                passed=True,
+                                message=f"Object dtype '{dtype}' handled via recursive pickle analysis",
+                                location=path,
+                                rule_code=None,
+                                details={
+                                    "dtype": str(dtype),
+                                    "dtype_kind": dtype.kind,
+                                    "handled_via": "embedded_pickle_scan",
+                                    "cve_id": self.CVE_2019_6446_ID,
+                                },
+                            )
+                            result.bytes_scanned = file_size
+                            result.metadata.update(
+                                {"shape": shape, "dtype": str(dtype), "fortran_order": fortran},
+                            )
+                            result.finish(success=True)
+                            return result
+
                         self._validate_dtype(dtype)
                         result.add_check(
                             name="Data Type Safety Check",

diff --git a/modelaudit/scanners/pickle_scanner.py b/modelaudit/scanners/pickle_scanner.py
@@ -4428,12 +4428,14 @@ def _scan_pickle_bytes(self, file_obj: BinaryIO, file_size: int) -> ScanResult:
         suspicious_count = 0
 
         # For large files, use chunked reading to avoid memory issues
-        MAX_MEMORY_READ = 50 * 1024 * 1024  # 50MB max in memory at once
+        MAX_MEMORY_READ = 10 * 1024 * 1024  # 10MB max in memory at once
 
         current_pos = file_obj.tell()
 
-        # Read file data - either all at once for small files or first chunk for large files
-        # For large files, read first 50MB for pattern analysis (critical malicious code is usually at the beginning)
+        # Read file data - either all at once for small files or first chunk for large files.
+        # For large files, read only the first 10MB for pattern analysis to cap
+        # embedded-pickle memory usage while still inspecting the most security-
+        # relevant prefix.
         file_data = file_obj.read() if file_size <= MAX_MEMORY_READ else file_obj.read(MAX_MEMORY_READ)
 
         file_obj.seek(current_pos)  # Reset position
@@ -4629,7 +4631,9 @@ def _scan_pickle_bytes(self, file_obj: BinaryIO, file_size: int) -> ScanResult:
                     elif opcode.name == "STOP":
                         current_stack_depth = 0
                         if first_pickle_end_pos is None:
-                            first_pickle_end_pos = start_pos + pos + 1
+                            # pickletools reports absolute positions even when parsing
+                            # starts from a non-zero file offset.
+                            first_pickle_end_pos = pos + 1
 
                     # Store stack depth warnings for ML-context-aware processing later
                     if current_stack_depth > base_stack_depth_limit:

diff --git a/modelaudit/scanners/zip_scanner.py b/modelaudit/scanners/zip_scanner.py
@@ -117,6 +117,44 @@ def scan(self, path: str) -> ScanResult:
         result.metadata["file_size"] = os.path.getsize(path)
         return result
 
+    def _rewrite_nested_result_context(
+        self, scan_result: ScanResult, tmp_path: str, archive_path: str, entry_name: str
+    ) -> None:
+        """Rewrite nested result locations so archive members, not temp files, are reported."""
+        archive_location = f"{archive_path}:{entry_name}"
+
+        for issue in scan_result.issues:
+            if issue.location:
+                if issue.location.startswith(tmp_path):
+                    issue.location = issue.location.replace(tmp_path, archive_location, 1)
+                else:
+                    issue.location = f"{archive_location} {issue.location}"
+            else:
+                issue.location = archive_location
+
+            existing_issue_entry = issue.details.get("zip_entry")
+            issue.details["zip_entry"] = (
+                f"{entry_name}:{existing_issue_entry}"
+                if isinstance(existing_issue_entry, str) and existing_issue_entry
+                else entry_name
+            )
+
+        for check in scan_result.checks:
+            if check.location:
+                if check.location.startswith(tmp_path):
+                    check.location = check.location.replace(tmp_path, archive_location, 1)
+                else:
+                    check.location = f"{archive_location} {check.location}"
+            else:
+                check.location = archive_location
+
+            existing_check_entry = check.details.get("zip_entry")
+            check.details["zip_entry"] = (
+                f"{entry_name}:{existing_check_entry}"
+                if isinstance(existing_check_entry, str) and existing_check_entry
+                else entry_name
+            )
+
     def _scan_zip_file(self, path: str, depth: int = 0) -> ScanResult:
         """Recursively scan a ZIP file and its contents"""
         result = ScanResult(scanner_name=self.name)
@@ -319,16 +357,7 @@ def _scan_zip_file(self, path: str, depth: int = 0) -> ScanResult:
                     if name.lower().endswith(".zip"):
                         try:
                             nested_result = self._scan_zip_file(tmp_path, depth + 1)
-                            # Update locations in nested results
-                            for issue in nested_result.issues:
-                                if issue.location and issue.location.startswith(
-                                    tmp_path,
-                                ):
-                                    issue.location = issue.location.replace(
-                                        tmp_path,
-                                        f"{path}:{name}",
-                                        1,
-                                    )
+                            self._rewrite_nested_result_context(nested_result, tmp_path, path, name)
                             result.merge(nested_result)
 
                             asset_entry = asset_from_scan_result(
@@ -355,26 +384,7 @@ def _scan_zip_file(self, path: str, depth: int = 0) -> ScanResult:
 
                             # Use core.scan_file to scan with appropriate scanner
                             file_result = core.scan_file(tmp_path, self.config)
-
-                            # Update locations in file results
-                            for issue in file_result.issues:
-                                if issue.location:
-                                    if issue.location.startswith(tmp_path):
-                                        issue.location = issue.location.replace(
-                                            tmp_path,
-                                            f"{path}:{name}",
-                                            1,
-                                        )
-                                    else:
-                                        issue.location = f"{path}:{name} {issue.location}"
-                                else:
-                                    issue.location = f"{path}:{name}"
-
-                                # Add zip entry name to details
-                                if issue.details:
-                                    issue.details["zip_entry"] = name
-                                else:
-                                    issue.details = {"zip_entry": name}
+                            self._rewrite_nested_result_context(file_result, tmp_path, path, name)
 
                             result.merge(file_result)