@@ -2160,6 +2160,207 @@ def _raise_memory_error(*args: object, **kwargs: object) -> object:
21602160 assert not any (issue .severity in {IssueSeverity .WARNING , IssueSeverity .CRITICAL } for issue in result .issues )
21612161
21622162
2163+ def test_scan_dill_memory_error_without_dill_globals_not_downgraded (
2164+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2165+ ) -> None :
2166+ """Dangerous-looking dill prefixes must not qualify for the INFO downgrade."""
2167+ model_path = tmp_path / "suspicious.dill"
2168+ model_path .write_bytes (b"\x80 \x04 cdill\n loads\n q\x00 ." + b"dill" + b"\x00 " * (256 * 1024 ))
2169+
2170+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2171+ raise MemoryError ("simulated parser memory limit" )
2172+
2173+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2174+ monkeypatch .setattr (
2175+ PickleScanner ,
2176+ "_extract_globals_advanced" ,
2177+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : set (),
2178+ )
2179+
2180+ result = PickleScanner ().scan (str (model_path ))
2181+
2182+ assert not any (check .name == "Pickle Parse Resource Limit" for check in result .checks )
2183+ format_validation_checks = [check for check in result .checks if check .name == "Pickle Format Validation" ]
2184+ assert len (format_validation_checks ) == 1
2185+ assert format_validation_checks [0 ].status == CheckStatus .FAILED
2186+ assert format_validation_checks [0 ].severity == IssueSeverity .WARNING
2187+ assert format_validation_checks [0 ].details ["exception_type" ] == "MemoryError"
2188+
2189+
2190+ def test_scan_joblib_memory_error_requires_joblib_globals (tmp_path : Path , monkeypatch : pytest .MonkeyPatch ) -> None :
2191+ """Only .joblib files with parsed framework globals should downgrade to INFO."""
2192+ model_path = tmp_path / "legitimate.joblib"
2193+ model_path .write_bytes (b"\x80 \x04 cjoblib.numpy_pickle\n NumpyArrayWrapper\n q\x00 ." + b"\x00 " * (256 * 1024 ))
2194+
2195+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2196+ raise MemoryError ("simulated parser memory limit" )
2197+
2198+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2199+ monkeypatch .setattr (
2200+ PickleScanner ,
2201+ "_extract_globals_advanced" ,
2202+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : {
2203+ ("joblib.numpy_pickle" , "NumpyArrayWrapper" , "GLOBAL" )
2204+ },
2205+ )
2206+
2207+ result = PickleScanner ().scan (str (model_path ))
2208+
2209+ resource_limit_checks = [check for check in result .checks if check .name == "Pickle Parse Resource Limit" ]
2210+ assert len (resource_limit_checks ) == 1
2211+ resource_limit_check = resource_limit_checks [0 ]
2212+ assert resource_limit_check .status == CheckStatus .FAILED
2213+ assert resource_limit_check .severity == IssueSeverity .INFO
2214+ assert resource_limit_check .details ["reason" ] == "memory_limit_on_legitimate_model"
2215+ assert resource_limit_check .details ["exception_type" ] == "MemoryError"
2216+ assert not any (
2217+ issue .severity in {IssueSeverity .WARNING , IssueSeverity .CRITICAL }
2218+ and "Unable to parse pickle file" in issue .message
2219+ for issue in result .issues
2220+ )
2221+
2222+
2223+ def test_scan_joblib_memory_error_without_joblib_globals_not_downgraded (
2224+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2225+ ) -> None :
2226+ """Marker bytes alone must not qualify a .joblib file for INFO downgrade."""
2227+ model_path = tmp_path / "suspicious.joblib"
2228+ model_path .write_bytes (b"\x80 \x04 joblibsklearn" + b"\x00 " * (256 * 1024 ))
2229+
2230+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2231+ raise MemoryError ("simulated parser memory limit" )
2232+
2233+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2234+ monkeypatch .setattr (
2235+ PickleScanner ,
2236+ "_extract_globals_advanced" ,
2237+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : set (),
2238+ )
2239+
2240+ result = PickleScanner ().scan (str (model_path ))
2241+
2242+ assert not any (check .name == "Pickle Parse Resource Limit" for check in result .checks )
2243+ format_validation_checks = [check for check in result .checks if check .name == "Pickle Format Validation" ]
2244+ assert len (format_validation_checks ) == 1
2245+ assert format_validation_checks [0 ].status == CheckStatus .FAILED
2246+ assert format_validation_checks [0 ].severity == IssueSeverity .WARNING
2247+ assert format_validation_checks [0 ].details ["exception_type" ] == "MemoryError"
2248+
2249+
2250+ def test_scan_dill_memory_error_with_dill_globals_is_informational (
2251+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2252+ ) -> None :
2253+ """Legitimate dill globals should still allow the scanner-limitation downgrade."""
2254+ model_path = tmp_path / "legitimate.dill"
2255+ model_path .write_bytes (b"\x80 \x04 " + b"\x00 " * (256 * 1024 ))
2256+
2257+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2258+ raise MemoryError ("simulated parser memory limit" )
2259+
2260+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2261+ monkeypatch .setattr (
2262+ PickleScanner ,
2263+ "_extract_globals_advanced" ,
2264+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : {("dill" , "dump" , "GLOBAL" )},
2265+ )
2266+
2267+ result = PickleScanner ().scan (str (model_path ))
2268+
2269+ resource_limit_checks = [check for check in result .checks if check .name == "Pickle Parse Resource Limit" ]
2270+ assert len (resource_limit_checks ) == 1
2271+ resource_limit_check = resource_limit_checks [0 ]
2272+ assert resource_limit_check .status == CheckStatus .FAILED
2273+ assert resource_limit_check .severity == IssueSeverity .INFO
2274+ assert resource_limit_check .details ["reason" ] == "memory_limit_on_legitimate_model"
2275+ assert resource_limit_check .details ["exception_type" ] == "MemoryError"
2276+ assert not any (issue .severity in {IssueSeverity .WARNING , IssueSeverity .CRITICAL } for issue in result .issues )
2277+
2278+
2279+ def test_scan_plain_dill_memory_error_without_globals_is_informational (
2280+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2281+ ) -> None :
2282+ """Plain-object dill files should keep the scanner-limitation downgrade path."""
2283+ model_path = tmp_path / "plain.dill"
2284+ model_path .write_bytes (dill .dumps ([1 , 2 , 3 ]))
2285+
2286+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2287+ raise MemoryError ("simulated parser memory limit" )
2288+
2289+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2290+ monkeypatch .setattr (
2291+ PickleScanner ,
2292+ "_extract_globals_advanced" ,
2293+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : set (),
2294+ )
2295+
2296+ result = PickleScanner ().scan (str (model_path ))
2297+
2298+ resource_limit_checks = [check for check in result .checks if check .name == "Pickle Parse Resource Limit" ]
2299+ assert len (resource_limit_checks ) == 1
2300+ resource_limit_check = resource_limit_checks [0 ]
2301+ assert resource_limit_check .status == CheckStatus .FAILED
2302+ assert resource_limit_check .severity == IssueSeverity .INFO
2303+ assert resource_limit_check .details ["reason" ] == "memory_limit_on_legitimate_model"
2304+ assert resource_limit_check .details ["exception_type" ] == "MemoryError"
2305+ assert not any (issue .severity in {IssueSeverity .WARNING , IssueSeverity .CRITICAL } for issue in result .issues )
2306+
2307+
2308+ def test_scan_dill_memory_error_with_internal_dill_globals_is_informational (
2309+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2310+ ) -> None :
2311+ """Internal dill globals should qualify for the scanner-limitation downgrade."""
2312+ model_path = tmp_path / "legitimate.dill"
2313+ model_path .write_bytes (b"\x80 \x04 " + b"\x00 " * (256 * 1024 ))
2314+
2315+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2316+ raise MemoryError ("simulated parser memory limit" )
2317+
2318+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2319+ monkeypatch .setattr (
2320+ PickleScanner ,
2321+ "_extract_globals_advanced" ,
2322+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : {("_dill" , "dump" , "GLOBAL" )},
2323+ )
2324+
2325+ result = PickleScanner ().scan (str (model_path ))
2326+
2327+ resource_limit_checks = [check for check in result .checks if check .name == "Pickle Parse Resource Limit" ]
2328+ assert len (resource_limit_checks ) == 1
2329+ resource_limit_check = resource_limit_checks [0 ]
2330+ assert resource_limit_check .status == CheckStatus .FAILED
2331+ assert resource_limit_check .severity == IssueSeverity .INFO
2332+ assert resource_limit_check .details ["reason" ] == "memory_limit_on_legitimate_model"
2333+ assert resource_limit_check .details ["exception_type" ] == "MemoryError"
2334+ assert not any (issue .severity in {IssueSeverity .WARNING , IssueSeverity .CRITICAL } for issue in result .issues )
2335+
2336+
2337+ def test_scan_joblib_memory_error_with_dangerous_prefix_not_downgraded (
2338+ tmp_path : Path , monkeypatch : pytest .MonkeyPatch
2339+ ) -> None :
2340+ """Marker bytes must not hide a dangerous pickle prefix in .joblib files."""
2341+ model_path = tmp_path / "suspicious.joblib"
2342+ model_path .write_bytes (b"\x80 \x02 cbuiltins\n eval\n q\x00 ." + b"joblibsklearnnumpy" + b"\x00 " * (256 * 1024 ))
2343+
2344+ def _raise_memory_error (* args : object , ** kwargs : object ) -> object :
2345+ raise MemoryError ("simulated parser memory limit" )
2346+
2347+ monkeypatch .setattr ("modelaudit.scanners.pickle_scanner.pickletools.genops" , _raise_memory_error )
2348+ monkeypatch .setattr (
2349+ PickleScanner ,
2350+ "_extract_globals_advanced" ,
2351+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : set (),
2352+ )
2353+
2354+ result = PickleScanner ().scan (str (model_path ))
2355+
2356+ assert not any (check .name == "Pickle Parse Resource Limit" for check in result .checks )
2357+ format_validation_checks = [check for check in result .checks if check .name == "Pickle Format Validation" ]
2358+ assert len (format_validation_checks ) == 1
2359+ assert format_validation_checks [0 ].status == CheckStatus .FAILED
2360+ assert format_validation_checks [0 ].severity == IssueSeverity .WARNING
2361+ assert format_validation_checks [0 ].details ["exception_type" ] == "MemoryError"
2362+
2363+
21632364def test_scan_memory_error_with_dangerous_globals_not_downgraded (
21642365 tmp_path : Path , monkeypatch : pytest .MonkeyPatch
21652366) -> None :
@@ -2179,7 +2380,7 @@ def _raise_memory_error(*args: object, **kwargs: object) -> object:
21792380 monkeypatch .setattr (
21802381 PickleScanner ,
21812382 "_extract_globals_advanced" ,
2182- lambda self , file_obj , multiple_pickles = True : {("builtins" , "eval" , "GLOBAL" )},
2383+ lambda self , file_obj , multiple_pickles = True , scan_start_time = None : {("builtins" , "eval" , "GLOBAL" )},
21832384 )
21842385
21852386 result = PickleScanner ().scan (str (model_path ))
0 commit comments