usestrix · Ahmex000 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/README.md b/README.md
@@ -177,6 +177,39 @@ strix --target api.your-app.com --instruction "Focus on business logic flaws and
 strix --target api.your-app.com --instruction-file ./instruction.md
 ```
 
+### Resuming Interrupted Scans
+
+Long scans can be interrupted by Ctrl+C, crashes, power loss, or Docker issues.
+Strix automatically saves a checkpoint after every agent iteration so you can resume exactly where you left off.
+
+```bash
+# First run — starts fresh, saves checkpoint automatically
+strix --target https://example.com --run-name my-scan
+
+# If interrupted, run the same command again — auto-resumes from checkpoint
+strix --target https://example.com --run-name my-scan
+
+# Explicit resume flag (same effect, makes intent clear)
+strix --target https://example.com --run-name my-scan --resume
+
+# Force a completely fresh scan (deletes existing checkpoint)
+strix --target https://example.com --run-name my-scan --new
+```
+
+**What is restored on resume:**
+- Full LLM conversation history (the agent remembers everything it did)
+- Discovered vulnerabilities and findings
+- Iteration counter — the agent continues from exactly where it stopped
+- A fresh Docker sandbox is always created (old containers may be gone)
+
+**Checkpoint location:** `strix_runs/<run-name>/checkpoint.json`
+Checkpoints are deleted automatically when a scan completes successfully.
+
+> **Tip:** `--run-name` is optional. If omitted, Strix auto-generates a name like `example-com_a1b2`.
+> Auto-resume only works when you re-use the same `--run-name`.
+
+---
+
 ### Headless Mode
 
 Run Strix programmatically without interactive UI using the `-n/--non-interactive` flag—perfect for servers and automated jobs. The CLI prints real-time vulnerability findings, and the final report before exiting. Exits with non-zero code when vulnerabilities are found.

diff --git a/VANGUARD9_SCAN_PROMPTS.md b/VANGUARD9_SCAN_PROMPTS.md
diff --git a/strix/agents/StrixAgent/strix_agent.py b/strix/agents/StrixAgent/strix_agent.py
@@ -5,7 +5,16 @@
 
 
 class StrixAgent(BaseAgent):
-    max_iterations = 300
+    # Default iterations per scan mode.  Deep mode gets a large budget so the
+    # phase gate system can run 4 full phases without hitting the iteration cap.
+    max_iterations = 10000
+
+    # Map scan-mode names to iteration budgets and phase counts.
+    _SCAN_MODE_CONFIGS: dict[str, dict] = {
+        "quick":    {"max_iterations": 1000,  "max_phases": 2},
+        "standard": {"max_iterations": 5000,  "max_phases": 3},
+        "deep":     {"max_iterations": 10000, "max_phases": 4},
+    }
 
     def __init__(self, config: dict[str, Any]):
         default_skills = []
@@ -16,8 +25,19 @@ def __init__(self, config: dict[str, Any]):
 
         self.default_llm_config = LLMConfig(skills=default_skills)
 
+        # Apply scan-mode budget before super().__init__ reads self.max_iterations
+        scan_mode = config.get("scan_mode", "deep")
+        mode_cfg = self._SCAN_MODE_CONFIGS.get(scan_mode, self._SCAN_MODE_CONFIGS["deep"])
+        if "max_iterations" not in config:
+            self.max_iterations = mode_cfg["max_iterations"]
+
         super().__init__(config)
 
+        # Configure phase count on the state after BaseAgent sets it up.
+        # Only root agents use phases (sub-agents complete on first finish).
+        if self.state.parent_id is None:
+            self.state.max_phases = mode_cfg["max_phases"]
+
     async def execute_scan(self, scan_config: dict[str, Any]) -> dict[str, Any]:  # noqa: PLR0912
         user_instructions = scan_config.get("user_instructions", "")
         targets = scan_config.get("targets", [])

diff --git a/strix/agents/StrixAgent/system_prompt.jinja b/strix/agents/StrixAgent/system_prompt.jinja
diff --git a/strix/agents/base_agent.py b/strix/agents/base_agent.py
@@ -78,6 +78,14 @@ def __init__(self, config: dict[str, Any]):
             self.state.waiting_timeout = 0
         self.llm = LLM(self.llm_config, agent_name=self.agent_name)
 
+        # Added for Resume Feature - optional, zero impact when absent
+        self._checkpoint_manager = config.get("checkpoint_manager")
+        self._scan_config: dict[str, Any] = config.get("scan_config", {})
+        self._target_hash: str = config.get("target_hash", "")
+        # True when this agent (root OR sub) is being restored from a checkpoint.
+        # Prevents _initialize_sandbox_and_state from adding a duplicate task message.
+        self._is_resumed: bool = bool(config.get("is_resumed", False))
+
         with contextlib.suppress(Exception):
             self.llm.set_agent_identity(self.state.agent_name, self.state.agent_id)
         self._current_task: asyncio.Task[Any] | None = None
@@ -182,30 +190,56 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
 
             self.state.increment_iteration()
 
+            # ------------------------------------------------------------------
+            # Coverage heartbeat — every 30 iterations inject a status pulse
+            # showing the agent how far it is and encouraging continued testing.
+            # Only injected for root agents (parent_id is None).
+            # ------------------------------------------------------------------
+            if (
+                self.state.parent_id is None
+                and self.state.iteration > 0
+                and self.state.iteration % 30 == 0
+            ):
+                self._inject_coverage_heartbeat(tracer)
+
+            # ------------------------------------------------------------------
+            # Stagnation detection — if the last 15 tool calls are all the same
+            # tool, the agent is spinning.  Kick it in a new direction.
+            # Only for root agents; only if we have enough history.
+            # ------------------------------------------------------------------
+            if (
+                self.state.parent_id is None
+                and len(self.state.actions_taken) >= 15
+            ):
+                self._check_and_break_stagnation()
+
+            # ------------------------------------------------------------------
+            # Approaching-max warning — pushed to 97% so it fires very late.
+            # Framed as "you still have time" rather than "finish now".
+            # ------------------------------------------------------------------
             if (
                 self.state.is_approaching_max_iterations()
                 and not self.state.max_iterations_warning_sent
             ):
                 self.state.max_iterations_warning_sent = True
                 remaining = self.state.max_iterations - self.state.iteration
+                current_phase = getattr(self.state, "current_phase", 0)
+                max_phases = getattr(self.state, "max_phases", 4)
                 warning_msg = (
-                    f"URGENT: You are approaching the maximum iteration limit. "
-                    f"Current: {self.state.iteration}/{self.state.max_iterations} "
+                    f"NOTICE: You are at iteration {self.state.iteration}/{self.state.max_iterations} "
                     f"({remaining} iterations remaining). "
-                    f"Please prioritize completing your required task(s) and calling "
-                    f"the appropriate finish tool (finish_scan for root agent, "
-                    f"agent_finish for sub-agents) as soon as possible."
+                    f"Current phase: {current_phase + 1}/{max_phases}. "
+                    f"Use remaining iterations to complete all untested endpoints and UI sections. "
+                    f"Only call finish_scan when you have completed Phase {max_phases}/{max_phases} "
+                    f"and have tested everything. Do NOT rush to finish — exhaustive coverage matters."
                 )
                 self.state.add_message("user", warning_msg)
 
             if self.state.iteration == self.state.max_iterations - 3:
                 final_warning_msg = (
-                    "CRITICAL: You have only 3 iterations left! "
-                    "Your next message MUST be the tool call to the appropriate "
-                    "finish tool: finish_scan if you are the root agent, or "
-                    "agent_finish if you are a sub-agent. "
-                    "No other actions should be taken except finishing your work "
-                    "immediately."
+                    "CRITICAL: Only 3 iterations left. "
+                    "Call finish_scan NOW with your complete findings report. "
+                    "Include all vulnerabilities discovered across all phases."
                 )
                 self.state.add_message("user", final_warning_msg)
 
@@ -215,6 +249,18 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
                 should_finish = await iteration_task
                 self._current_task = None
 
+                # Added for Resume Feature — save checkpoint after every successful
+                # iteration.  Non-fatal: any error is caught inside save().
+                # Only root agents checkpoint (parent_id is None).
+                if self._checkpoint_manager and self.state.parent_id is None:
+                    self._checkpoint_manager.save(
+                        self.state,
+                        tracer,
+                        self._scan_config,
+                        self._target_hash,
+                        self.max_iterations,
+                    )
+
                 if should_finish is None and self.interactive:
                     await self._enter_waiting_state(tracer, text_response=True)
                     continue
@@ -224,6 +270,9 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
                         self.state.set_completed({"success": True})
                         if tracer:
                             tracer.update_agent_status(self.state.agent_id, "completed")
+                        # Added for Resume Feature — clean completion, remove checkpoint
+                        if self._checkpoint_manager:
+                            self._checkpoint_manager.delete()
                         return self.state.final_result or {}
                     await self._enter_waiting_state(tracer, task_completed=True)
                     continue
@@ -362,7 +411,11 @@ async def _initialize_sandbox_and_state(self, task: str) -> None:
         if not self.state.task:
             self.state.task = task
 
-        self.state.add_message("user", task)
+        # Skip adding the task message when this agent (root or sub) is being
+        # restored from a checkpoint — the full message history including the
+        # task is already present.  Fresh agents always get the task message.
+        if not self._is_resumed:
+            self.state.add_message("user", task)
 
     async def _process_iteration(self, tracer: Optional["Tracer"]) -> bool | None:
         final_response = None
@@ -611,6 +664,83 @@ async def _handle_iteration_error(
             tracer.update_agent_status(self.state.agent_id, "error")
         return True
 
+    def _inject_coverage_heartbeat(self, tracer: Optional["Tracer"]) -> None:
+        """Inject a periodic status pulse so the agent knows its progress.
+
+        Pulls live data from the tracer (vulnerability count, tool execution
+        count) and from phase state so the agent has concrete numbers to act on.
+        """
+        try:
+            vuln_count = 0
+            tool_exec_count = 0
+            if tracer:
+                vuln_count = len(getattr(tracer, "vulnerability_reports", []))
+                tool_exec_count = len(getattr(tracer, "tool_executions", {}))
+
+            current_phase = getattr(self.state, "current_phase", 0)
+            max_phases = getattr(self.state, "max_phases", 4)
+            phase_iter_start = getattr(self.state, "phase_iteration_start", 0)
+            phase_iters_used = self.state.iteration - phase_iter_start
+            remaining = self.state.max_iterations - self.state.iteration
+
+            heartbeat = (
+                f"[SCAN HEARTBEAT — iteration {self.state.iteration}]\n"
+                f"Phase: {current_phase + 1}/{max_phases}\n"
+                f"Iterations in this phase: {phase_iters_used}\n"
+                f"Iterations remaining: {remaining}\n"
+                f"Vulnerabilities reported so far: {vuln_count}\n"
+                f"Total tool executions: {tool_exec_count}\n\n"
+                f"STATUS CHECK — before continuing, answer these mentally:\n"
+                f"• Have you tested EVERY discovered endpoint for auth bypass?\n"
+                f"• Have you tested EVERY form input for injection?\n"
+                f"• Have you opened and tested EVERY UI section/page/modal?\n"
+                f"• Have you tested privilege escalation across all user roles?\n"
+                f"If the answer to ANY of the above is 'no', keep testing. "
+                f"Do NOT call finish_scan until this phase's objectives are complete."
+            )
+            self.state.add_message("user", heartbeat)
+        except Exception:  # noqa: BLE001
+            pass  # heartbeat is non-fatal
+
+    def _check_and_break_stagnation(self) -> None:
+        """Detect if the agent is repeating the same tool and kick it out.
+
+        If the last 15 tool calls are all the same tool type, the agent is
+        spinning.  Inject a redirect prompt to force a change of approach.
+        """
+        try:
+            recent = self.state.actions_taken[-15:]
+            tool_names = []
+            for entry in recent:
+                action = entry.get("action", {})
+                # Tool invocations can be dicts with a 'name' or 'tool' key
+                name = action.get("name") or action.get("tool") or action.get("function", {}).get("name", "")
+                if name:
+                    tool_names.append(name)
+
+            if len(tool_names) < 10:
+                return
+
+            # If 80%+ of recent tools are the same, we're stagnating
+            if tool_names:
+                most_common = max(set(tool_names), key=tool_names.count)
+                ratio = tool_names.count(most_common) / len(tool_names)
+                if ratio >= 0.8:
+                    redirect = (
+                        f"[STAGNATION DETECTED] You have called '{most_common}' "
+                        f"{tool_names.count(most_common)} times in the last {len(tool_names)} "
+                        f"actions. You are stuck in a loop.\n\n"
+                        f"STOP what you are doing and switch to a completely different attack surface:\n"
+                        f"• If you were fuzzing parameters → switch to UI navigation and click new pages\n"
+                        f"• If you were browsing the UI → switch to API endpoint testing\n"
+                        f"• If you were testing one endpoint → move to a different endpoint\n"
+                        f"• If you were running automated tools → try manual testing instead\n\n"
+                        f"Pick a new area you have NOT yet tested and start there immediately."
+                    )
+                    self.state.add_message("user", redirect)
+        except Exception:  # noqa: BLE001
+            pass  # stagnation check is non-fatal
+
     def cancel_current_execution(self) -> None:
         self._force_stop = True
         if self._current_task and not self._current_task.done():

diff --git a/strix/agents/state.py b/strix/agents/state.py
@@ -29,6 +29,12 @@ class AgentState(BaseModel):
     final_result: dict[str, Any] | None = None
     max_iterations_warning_sent: bool = False
 
+    # Deep Phases system — finish_scan is intercepted per-phase and only
+    # completes on the final phase.  0-indexed: phases 0..max_phases-1.
+    current_phase: int = 0
+    max_phases: int = 4
+    phase_iteration_start: int = 0
+
     messages: list[dict[str, Any]] = Field(default_factory=list)
     context: dict[str, Any] = Field(default_factory=dict)
 
@@ -113,7 +119,7 @@ def resume_from_waiting(self, new_task: str | None = None) -> None:
     def has_reached_max_iterations(self) -> bool:
         return self.iteration >= self.max_iterations
 
-    def is_approaching_max_iterations(self, threshold: float = 0.85) -> bool:
+    def is_approaching_max_iterations(self, threshold: float = 0.97) -> bool:
         return self.iteration >= int(self.max_iterations * threshold)
 
     def has_waiting_timeout(self) -> bool:

diff --git a/strix/config/config.py b/strix/config/config.py
@@ -139,17 +139,14 @@ def apply_saved(cls, force: bool = False) -> dict[str, str]:
                 env_vars.pop(var_name, None)
             if cls._config_file_override is None:
                 cls.save({"env": env_vars})
-        if cls._llm_env_changed(env_vars):
-            for var_name in cls._llm_env_vars():
-                env_vars.pop(var_name, None)
-            if cls._config_file_override is None:
-                cls.save({"env": env_vars})
         applied = {}
 
         for var_name, var_value in env_vars.items():
-            if var_name in cls.tracked_vars() and (force or var_name not in os.environ):
-                os.environ[var_name] = var_value
-                applied[var_name] = var_value
+            if var_name in cls.tracked_vars():
+                # Shell env wins unless --force or the var is not set in shell.
+                if force or var_name not in os.environ:
+                    os.environ[var_name] = var_value
+                    applied[var_name] = var_value
 
         return applied