Skip to content

Commit 916c8b7

Browse files
committed
Add emulate mode for docker-only dynamic verdict
1 parent 89b317a commit 916c8b7

2 files changed

Lines changed: 169 additions & 0 deletions

File tree

sandbox_controller/app.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
import base64
44
import os
5+
import tempfile
56
import time
67
import uuid
78
from typing import Any
9+
from multiprocessing import Process, Queue
810

911
from fastapi import FastAPI
1012
from pydantic import BaseModel, Field
@@ -295,6 +297,169 @@ def _run_winrm(*, script: str) -> str:
295297
return stdout.strip() or stderr.strip()
296298

297299

300+
def _extract_suspicious_api_calls(report: dict[str, Any]) -> dict[str, Any]:
301+
suspicious_markers = {
302+
"CreateRemoteThread",
303+
"CreateRemoteThreadEx",
304+
"VirtualAllocEx",
305+
"WriteProcessMemory",
306+
"ReadProcessMemory",
307+
"QueueUserAPC",
308+
"SetWindowsHookEx",
309+
"NtCreateThreadEx",
310+
"URLDownloadToFile",
311+
"InternetOpenUrl",
312+
"WinHttpOpen",
313+
"WinHttpConnect",
314+
"WinHttpOpenRequest",
315+
"WinHttpSendRequest",
316+
"WinHttpReceiveResponse",
317+
"CreateService",
318+
"StartService",
319+
"RegSetValue",
320+
"RegSetValueEx",
321+
"ShellExecute",
322+
"ShellExecuteEx",
323+
"WinExec",
324+
"CreateProcess",
325+
"CreateProcessW",
326+
}
327+
328+
calls: list[str] = []
329+
suspicious_hits: list[str] = []
330+
331+
api_calls = report.get("api_calls")
332+
if isinstance(api_calls, list):
333+
for c in api_calls:
334+
if not isinstance(c, dict):
335+
continue
336+
api = c.get("api") or c.get("name")
337+
if isinstance(api, str) and api:
338+
calls.append(api)
339+
340+
# Some report variants nest calls under "modules" -> "api_calls"
341+
if not calls:
342+
modules = report.get("modules")
343+
if isinstance(modules, list):
344+
for m in modules:
345+
if not isinstance(m, dict):
346+
continue
347+
m_calls = m.get("api_calls")
348+
if isinstance(m_calls, list):
349+
for c in m_calls:
350+
if not isinstance(c, dict):
351+
continue
352+
api = c.get("api") or c.get("name")
353+
if isinstance(api, str) and api:
354+
calls.append(api)
355+
356+
for api in calls:
357+
base = api.split("!")[-1]
358+
if base in suspicious_markers:
359+
suspicious_hits.append(base)
360+
361+
return {
362+
"api_calls_count": len(calls),
363+
"suspicious_api_hits": sorted(set(suspicious_hits)),
364+
"suspicious_api_hits_count": len(set(suspicious_hits)),
365+
}
366+
367+
368+
def _speakeasy_worker(*, sample_path: str, result_q: Queue) -> None:
369+
try:
370+
from speakeasy import Speakeasy # type: ignore
371+
372+
se = Speakeasy()
373+
module = se.load_module(sample_path)
374+
se.run_module(module)
375+
report = se.get_report()
376+
if not isinstance(report, dict):
377+
result_q.put({"ok": False, "reason": "invalid speakeasy report"})
378+
return
379+
result_q.put({"ok": True, "report": report})
380+
except Exception as e:
381+
result_q.put({"ok": False, "reason": f"emulation failed: {e}"})
382+
383+
384+
def _run_emulate(*, raw: bytes, filename: str, timeout_sec: int) -> dict[str, Any]:
385+
run_id = str(uuid.uuid4())
386+
387+
with tempfile.TemporaryDirectory(prefix="provity-sandbox-") as td:
388+
safe_name = os.path.basename(filename) or "sample.bin"
389+
sample_path = os.path.join(td, safe_name)
390+
with open(sample_path, "wb") as f:
391+
f.write(raw)
392+
393+
q: Queue = Queue(maxsize=1)
394+
p = Process(target=_speakeasy_worker, kwargs={"sample_path": sample_path, "result_q": q})
395+
start = time.time()
396+
p.start()
397+
p.join(timeout=max(5, int(timeout_sec)))
398+
399+
if p.is_alive():
400+
p.terminate()
401+
p.join(timeout=2)
402+
return {
403+
"ok": False,
404+
"run_id": run_id,
405+
"reason": "emulation timeout",
406+
"elapsed_sec": int(time.time() - start),
407+
}
408+
409+
if q.empty():
410+
return {
411+
"ok": False,
412+
"run_id": run_id,
413+
"reason": "emulation produced no result",
414+
"elapsed_sec": int(time.time() - start),
415+
}
416+
417+
res = q.get()
418+
if not isinstance(res, dict) or res.get("ok") is not True:
419+
return {
420+
"ok": False,
421+
"run_id": run_id,
422+
"reason": str((res or {}).get("reason") or "emulation failed"),
423+
"elapsed_sec": int(time.time() - start),
424+
}
425+
426+
report = res.get("report")
427+
if not isinstance(report, dict):
428+
return {
429+
"ok": False,
430+
"run_id": run_id,
431+
"reason": "invalid emulation report",
432+
"elapsed_sec": int(time.time() - start),
433+
}
434+
435+
summary = _extract_suspicious_api_calls(report)
436+
hits = int(summary.get("suspicious_api_hits_count") or 0)
437+
api_calls_count = int(summary.get("api_calls_count") or 0)
438+
439+
verdict = "unknown"
440+
score = 0
441+
if hits >= 2:
442+
verdict = "malicious"
443+
score = 85
444+
elif hits == 1 or api_calls_count > 50:
445+
verdict = "suspicious"
446+
score = 45
447+
448+
return {
449+
"ok": True,
450+
"run_id": run_id,
451+
"reason": "emulate",
452+
"elapsed_sec": int(time.time() - start),
453+
"verdict": verdict,
454+
"score": score,
455+
"detections": [],
456+
"emulation": {
457+
**summary,
458+
"notes": ["SANDBOX_MODE=emulate", "verdict derived from suspicious API usage"],
459+
},
460+
}
461+
462+
298463
@app.post("/scan")
299464
def scan(req: ScanRequest) -> dict[str, Any]:
300465
# Allow a mock mode for wiring/testing without a VM.
@@ -319,6 +484,9 @@ def scan(req: ScanRequest) -> dict[str, Any]:
319484
"notes": ["SANDBOX_MODE=mock"],
320485
}
321486

487+
if mode == "emulate":
488+
return _run_emulate(raw=raw, filename=req.filename, timeout_sec=req.timeout_sec)
489+
322490
start = time.time()
323491
try:
324492
ps = _build_powershell_script(b64=req.file_b64, filename=req.filename, timeout_sec=req.timeout_sec)

sandbox_controller/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ fastapi>=0.110
22
uvicorn[standard]>=0.27
33
pywinrm>=0.4.3
44
pydantic>=2.6
5+
speakeasy-emulator

0 commit comments

Comments
 (0)