22
33import base64
44import os
5+ import tempfile
56import time
67import uuid
78from typing import Any
9+ from multiprocessing import Process , Queue
810
911from fastapi import FastAPI
1012from pydantic import BaseModel , Field
@@ -295,6 +297,169 @@ def _run_winrm(*, script: str) -> str:
295297 return stdout .strip () or stderr .strip ()
296298
297299
300+ def _extract_suspicious_api_calls (report : dict [str , Any ]) -> dict [str , Any ]:
301+ suspicious_markers = {
302+ "CreateRemoteThread" ,
303+ "CreateRemoteThreadEx" ,
304+ "VirtualAllocEx" ,
305+ "WriteProcessMemory" ,
306+ "ReadProcessMemory" ,
307+ "QueueUserAPC" ,
308+ "SetWindowsHookEx" ,
309+ "NtCreateThreadEx" ,
310+ "URLDownloadToFile" ,
311+ "InternetOpenUrl" ,
312+ "WinHttpOpen" ,
313+ "WinHttpConnect" ,
314+ "WinHttpOpenRequest" ,
315+ "WinHttpSendRequest" ,
316+ "WinHttpReceiveResponse" ,
317+ "CreateService" ,
318+ "StartService" ,
319+ "RegSetValue" ,
320+ "RegSetValueEx" ,
321+ "ShellExecute" ,
322+ "ShellExecuteEx" ,
323+ "WinExec" ,
324+ "CreateProcess" ,
325+ "CreateProcessW" ,
326+ }
327+
328+ calls : list [str ] = []
329+ suspicious_hits : list [str ] = []
330+
331+ api_calls = report .get ("api_calls" )
332+ if isinstance (api_calls , list ):
333+ for c in api_calls :
334+ if not isinstance (c , dict ):
335+ continue
336+ api = c .get ("api" ) or c .get ("name" )
337+ if isinstance (api , str ) and api :
338+ calls .append (api )
339+
340+ # Some report variants nest calls under "modules" -> "api_calls"
341+ if not calls :
342+ modules = report .get ("modules" )
343+ if isinstance (modules , list ):
344+ for m in modules :
345+ if not isinstance (m , dict ):
346+ continue
347+ m_calls = m .get ("api_calls" )
348+ if isinstance (m_calls , list ):
349+ for c in m_calls :
350+ if not isinstance (c , dict ):
351+ continue
352+ api = c .get ("api" ) or c .get ("name" )
353+ if isinstance (api , str ) and api :
354+ calls .append (api )
355+
356+ for api in calls :
357+ base = api .split ("!" )[- 1 ]
358+ if base in suspicious_markers :
359+ suspicious_hits .append (base )
360+
361+ return {
362+ "api_calls_count" : len (calls ),
363+ "suspicious_api_hits" : sorted (set (suspicious_hits )),
364+ "suspicious_api_hits_count" : len (set (suspicious_hits )),
365+ }
366+
367+
368+ def _speakeasy_worker (* , sample_path : str , result_q : Queue ) -> None :
369+ try :
370+ from speakeasy import Speakeasy # type: ignore
371+
372+ se = Speakeasy ()
373+ module = se .load_module (sample_path )
374+ se .run_module (module )
375+ report = se .get_report ()
376+ if not isinstance (report , dict ):
377+ result_q .put ({"ok" : False , "reason" : "invalid speakeasy report" })
378+ return
379+ result_q .put ({"ok" : True , "report" : report })
380+ except Exception as e :
381+ result_q .put ({"ok" : False , "reason" : f"emulation failed: { e } " })
382+
383+
384+ def _run_emulate (* , raw : bytes , filename : str , timeout_sec : int ) -> dict [str , Any ]:
385+ run_id = str (uuid .uuid4 ())
386+
387+ with tempfile .TemporaryDirectory (prefix = "provity-sandbox-" ) as td :
388+ safe_name = os .path .basename (filename ) or "sample.bin"
389+ sample_path = os .path .join (td , safe_name )
390+ with open (sample_path , "wb" ) as f :
391+ f .write (raw )
392+
393+ q : Queue = Queue (maxsize = 1 )
394+ p = Process (target = _speakeasy_worker , kwargs = {"sample_path" : sample_path , "result_q" : q })
395+ start = time .time ()
396+ p .start ()
397+ p .join (timeout = max (5 , int (timeout_sec )))
398+
399+ if p .is_alive ():
400+ p .terminate ()
401+ p .join (timeout = 2 )
402+ return {
403+ "ok" : False ,
404+ "run_id" : run_id ,
405+ "reason" : "emulation timeout" ,
406+ "elapsed_sec" : int (time .time () - start ),
407+ }
408+
409+ if q .empty ():
410+ return {
411+ "ok" : False ,
412+ "run_id" : run_id ,
413+ "reason" : "emulation produced no result" ,
414+ "elapsed_sec" : int (time .time () - start ),
415+ }
416+
417+ res = q .get ()
418+ if not isinstance (res , dict ) or res .get ("ok" ) is not True :
419+ return {
420+ "ok" : False ,
421+ "run_id" : run_id ,
422+ "reason" : str ((res or {}).get ("reason" ) or "emulation failed" ),
423+ "elapsed_sec" : int (time .time () - start ),
424+ }
425+
426+ report = res .get ("report" )
427+ if not isinstance (report , dict ):
428+ return {
429+ "ok" : False ,
430+ "run_id" : run_id ,
431+ "reason" : "invalid emulation report" ,
432+ "elapsed_sec" : int (time .time () - start ),
433+ }
434+
435+ summary = _extract_suspicious_api_calls (report )
436+ hits = int (summary .get ("suspicious_api_hits_count" ) or 0 )
437+ api_calls_count = int (summary .get ("api_calls_count" ) or 0 )
438+
439+ verdict = "unknown"
440+ score = 0
441+ if hits >= 2 :
442+ verdict = "malicious"
443+ score = 85
444+ elif hits == 1 or api_calls_count > 50 :
445+ verdict = "suspicious"
446+ score = 45
447+
448+ return {
449+ "ok" : True ,
450+ "run_id" : run_id ,
451+ "reason" : "emulate" ,
452+ "elapsed_sec" : int (time .time () - start ),
453+ "verdict" : verdict ,
454+ "score" : score ,
455+ "detections" : [],
456+ "emulation" : {
457+ ** summary ,
458+ "notes" : ["SANDBOX_MODE=emulate" , "verdict derived from suspicious API usage" ],
459+ },
460+ }
461+
462+
298463@app .post ("/scan" )
299464def scan (req : ScanRequest ) -> dict [str , Any ]:
300465 # Allow a mock mode for wiring/testing without a VM.
@@ -319,6 +484,9 @@ def scan(req: ScanRequest) -> dict[str, Any]:
319484 "notes" : ["SANDBOX_MODE=mock" ],
320485 }
321486
487+ if mode == "emulate" :
488+ return _run_emulate (raw = raw , filename = req .filename , timeout_sec = req .timeout_sec )
489+
322490 start = time .time ()
323491 try :
324492 ps = _build_powershell_script (b64 = req .file_b64 , filename = req .filename , timeout_sec = req .timeout_sec )
0 commit comments