NVIDIA
diff --git a/‎.github/workflows/docker-build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docker-build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e-test.yml‎
Lines changed: 9 additions & 2 deletions b/‎.github/workflows/e2e-test.yml‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎.github/workflows/release-dev.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/release-dev.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.github/workflows/release-tag.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/release-tag.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.github/workflows/rust-native-build.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/rust-native-build.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎architecture/security-policy.md‎
Lines changed: 59 additions & 9 deletions b/‎architecture/security-policy.md‎
Lines changed: 59 additions & 9 deletions
diff --git a/‎crates/openshell-cli/src/main.rs‎
Lines changed: 73 additions & 0 deletions b/‎crates/openshell-cli/src/main.rs‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎crates/openshell-cli/src/run.rs‎
Lines changed: 57 additions & 1 deletion b/‎crates/openshell-cli/src/run.rs‎
Lines changed: 57 additions & 1 deletion
@@ -162,7 +162,7 @@ jobs:
       cargo-version: ${{ inputs['cargo-version'] }}
       image-tag: ${{ needs.resolve.outputs.image_tag_base }}
       checkout-ref: ${{ inputs['checkout-ref'] }}
-      features: openshell-core/dev-settings
+      features: ${{ inputs.component == 'gateway' && 'openshell-core/dev-settings bundled-z3' || 'openshell-core/dev-settings' }}
       artifact-name: ${{ needs.resolve.outputs.artifact_prefix }}-linux-${{ matrix.arch }}
     secrets: inherit
 
 
@@ -89,6 +89,10 @@ jobs:
           chown openshell-test: "/run/user/$(id -u openshell-test)"
           chmod 700 "/run/user/$(id -u openshell-test)"
           chown -R openshell-test: .
+          mkdir -p /home/openshell-test/.cache/mise /home/openshell-test/.cargo /home/openshell-test/.local/state/mise
+          chown -R openshell-test: /home/openshell-test/.cache /home/openshell-test/.cargo /home/openshell-test/.local
+          install -m 0755 "$(command -v mise)" /usr/local/bin/mise
+          chmod a+x /root /root/.local /root/.local/bin
           for dir in /root/.cargo /root/.rustup /root/.local/share/mise /opt/mise; do
             [ -d "$dir" ] && chmod -R a+rX "$dir"
           done
@@ -107,9 +111,12 @@ jobs:
             runuser -u openshell-test -- env \
               XDG_RUNTIME_DIR="/run/user/${TESTUID}" \
               HOME="/home/openshell-test" \
-              PATH="/root/.cargo/bin:/opt/mise/shims:/opt/mise/bin:${PATH}" \
-              CARGO_HOME="/root/.cargo" \
+              PATH="/usr/local/bin:/root/.cargo/bin:/opt/mise/shims:/root/.local/bin:${PATH}" \
+              CARGO_HOME="/home/openshell-test/.cargo" \
               RUSTUP_HOME="/root/.rustup" \
+              MISE_DATA_DIR="/opt/mise" \
+              MISE_CACHE_DIR="/home/openshell-test/.cache/mise" \
+              MISE_STATE_DIR="/home/openshell-test/.local/state/mise" \
               OPENSHELL_SUPERVISOR_IMAGE="${OPENSHELL_SUPERVISOR_IMAGE}" \
               OPENSHELL_REGISTRY="${OPENSHELL_REGISTRY}" \
               OPENSHELL_REGISTRY_HOST="${OPENSHELL_REGISTRY_HOST}" \
 
@@ -435,7 +435,7 @@ jobs:
         run: |
           set -euo pipefail
           mise x -- rustup target add ${{ matrix.target }}
-          mise x -- cargo zigbuild --release --target ${{ matrix.zig_target }} -p openshell-server --bin openshell-gateway
+          mise x -- cargo zigbuild --release --target ${{ matrix.zig_target }} -p openshell-server --bin openshell-gateway --features bundled-z3
           mkdir -p artifacts/bin
           install -m 0755 target/${{ matrix.target }}/release/openshell-gateway artifacts/bin/openshell-gateway
 
@@ -445,6 +445,11 @@ jobs:
           OUTPUT="$(artifacts/bin/openshell-gateway --version)"
           echo "$OUTPUT"
           grep -q '^openshell-gateway ' <<<"$OUTPUT"
+          ldd artifacts/bin/openshell-gateway || true
+          if ldd artifacts/bin/openshell-gateway | grep -q 'libz3'; then
+            echo "gateway binary must not depend on shared libz3; build with bundled-z3" >&2
+            exit 1
+          fi
 
       - name: Verify glibc symbol floor
         run: tasks/scripts/verify-glibc-symbols.sh 2.31 artifacts/bin/openshell-gateway
 
@@ -469,7 +469,7 @@ jobs:
         run: |
           set -euo pipefail
           mise x -- rustup target add ${{ matrix.target }}
-          mise x -- cargo zigbuild --release --target ${{ matrix.zig_target }} -p openshell-server --bin openshell-gateway
+          mise x -- cargo zigbuild --release --target ${{ matrix.zig_target }} -p openshell-server --bin openshell-gateway --features bundled-z3
           mkdir -p artifacts/bin
           install -m 0755 target/${{ matrix.target }}/release/openshell-gateway artifacts/bin/openshell-gateway
 
@@ -479,6 +479,11 @@ jobs:
           OUTPUT="$(artifacts/bin/openshell-gateway --version)"
           echo "$OUTPUT"
           grep -q '^openshell-gateway ' <<<"$OUTPUT"
+          ldd artifacts/bin/openshell-gateway || true
+          if ldd artifacts/bin/openshell-gateway | grep -q 'libz3'; then
+            echo "gateway binary must not depend on shared libz3; build with bundled-z3" >&2
+            exit 1
+          fi
 
       - name: Verify glibc symbol floor
         run: tasks/scripts/verify-glibc-symbols.sh 2.31 artifacts/bin/openshell-gateway
 
@@ -245,6 +245,10 @@ jobs:
           # Record linkage so image runtime drift is visible in logs.
           ldd --version
           ldd "$BIN" || true
+          if [[ "${{ inputs.component }}" == "gateway" ]] && ldd "$BIN" | grep -q 'libz3'; then
+            echo "gateway binary must not depend on shared libz3; enable bundled-z3 for image artifacts" >&2
+            exit 1
+          fi
 
       - name: Verify glibc symbol floor
         if: inputs.component == 'gateway'
 
@@ -89,21 +89,71 @@ because it changes the effective access model for every sandbox on the gateway.
 ## Policy Advisor
 
 The policy advisor pipeline turns observed denials into draft policy
-recommendations:
-
-1. The sandbox aggregates denied network events.
-2. A mechanistic mapper proposes minimal endpoint, binary, or rule additions.
-3. The gateway validates and stores draft recommendations.
-4. A human or admin workflow approves or rejects drafts.
-5. Approved drafts merge into the target sandbox policy.
+recommendations. There are two proposers (sandbox-side mechanistic mapper,
+agent-authored via `policy.local`); the gateway is the single referee.
+When enabled, L7 `policy_denied` responses include both structured
+`next_steps` and a short `agent_guidance` string so generic agents can continue
+through the proposal loop instead of treating the denial as terminal.
+
+1. **Submit.** Both proposers POST through the same `SubmitPolicyAnalysis`
+   path. Each chunk is persisted with its `analysis_mode` for audit provenance.
+2. **Validate.** The gateway runs the prover (`openshell-prover`) on every
+   chunk regardless of mode. The prover builds a Z3 model from the merged
+   policy plus the sandbox's attached-provider credential set, then computes
+   the delta of findings between the current baseline and the merged policy.
+3. **Auto-approval gate (proposer-agnostic, opt-in).** Auto-approval fires
+   when *both* (a) the prover delta is empty (`prover: no new findings`) AND
+   (b) the `proposal_approval_mode` setting resolves to `"auto"` — gateway
+   scope wins, sandbox scope is the per-sandbox override, default is
+   `"manual"`. When both hold, the gateway internally invokes the approve
+   path with actor identity `system:auto`. The audit event uses
+   `CONFIG:APPROVED` and carries `auto=true`, `source=<mode>`,
+   `prover_delta=empty`, and `resolved_from=<gateway|sandbox>` as unmapped
+   fields, with message text `"auto-approved: no new prover findings"` —
+   never `safe`. The opt-in gate preserves OpenShell's default-deny
+   posture: with no setting at either scope, every proposal lands in
+   `pending` for human review, even when the prover sees no findings.
+4. **Implicit supersede.** On any successful submission, the gateway scans
+   the sandbox's pending chunks for matches on `(host, port, binary)` and
+   auto-rejects the older ones with reason `"superseded by chunk X"`. This
+   gives the agent a refinement path (broad mechanistic L4 → narrow agent
+   L7) without an explicit `supersedes_chunk_id` field.
+5. **Escalation.** Anything else lands in `pending` for human review.
+
+## What the prover decides
+
+The prover answers four formal questions about each proposed policy
+change. Each "yes" answer becomes its own categorical finding — there is
+no severity grade. Any finding (of any category) blocks auto-approval.
+The categories are intended to be (mostly) mutually exclusive per
+underlying change: the gateway suppresses `capability_expansion` paths
+whose `(binary, host, port)` is also in the `credential_reach_expansion`
+delta, so a brand-new credentialed reach surfaces as one finding rather
+than one reach + N method findings.
+
+| Category | The prover detects… |
+|---|---|
+| `link_local_reach` | The proposal grants reach to a host in `169.254.0.0/16`, `fe80::/10`, or a known metadata hostname such as `metadata.google.internal`. Unconditional — cloud-metadata endpoints serve credentials regardless of sandbox state. |
+| `l7_bypass_credentialed` | The proposal lets a binary using a non-HTTP wire protocol (`git-remote-https`, `ssh`, `nc`) reach a host where a sandbox credential is in scope. The L7 proxy cannot inspect the wire protocol; the reviewer decides whether to trust the binary with the credential. |
+| `credential_reach_expansion` | A binary gained credentialed reach to a (host, port) it could not reach before. New authenticated reach is a stated intent change; the reviewer confirms the binary should authenticate to the host at all. |
+| `capability_expansion` | On a (binary, host, port) that already had credentialed reach, the policy adds a new HTTP method. The reviewer sees exactly which method was added (e.g., PUT) and decides if it's part of the agent's task. |
+
+"Credential in scope" is sandbox-coarse, not binary-fine: a credential is
+considered in scope if the sandbox has a provider attached whose
+`target_hosts` include the proposed endpoint's host, including runtime-like
+first-label wildcard coverage such as `*.github.com` covering
+`api.github.com`. v1 does not model credential scopes (read-only vs write);
+presence is enough.
 
 Proposals intentionally omit `allowed_ips`. If a proposed rule targets a host
 that resolves to a private IP, the proxy's runtime SSRF classification blocks
 the connection. The operator must then add an explicit `allowed_ips` entry to
 permit it — a two-step flow that keeps SSRF protection on by default.
 
-The advisor should propose narrow additions and preserve explicit-deny behavior.
-It is a workflow aid, not an automatic permission grant.
+The advisor proposes narrow additions and preserves explicit-deny behavior.
+Auto-approval is gated on prover determinism, not human judgment; an LLM-based
+contextual reviewer is a deliberate future addition layered on top of the
+deterministic prover gate.
 
 ## Security Logging
 
 
@@ -1148,6 +1148,11 @@ enum DoctorCommands {
 }
 
 #[derive(Subcommand, Debug)]
+// `Create` carries enough optional fields to be ~3x larger than the next
+// variant; boxing it would obscure the clap derive ergonomics for one
+// (rare) enum allocation per parse, which isn't worth the readability
+// cost.
+#[allow(clippy::large_enum_variant)]
 enum SandboxCommands {
     /// Create a sandbox.
     #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")]
@@ -1256,6 +1261,18 @@ enum SandboxCommands {
         #[arg(long = "label")]
         labels: Vec<String>,
 
+        /// Approval mode for agent-authored policy proposals.
+        ///
+        /// `manual` (default): every proposal lands in the draft inbox for
+        /// human review, regardless of the prover verdict.
+        ///
+        /// `auto`: proposals whose prover delta is empty are approved
+        /// automatically; proposals with findings still require human
+        /// approval. Auto mode is an explicit opt-in — `OpenShell`'s
+        /// default-deny posture is preserved unless you choose otherwise.
+        #[arg(long, value_parser = ["manual", "auto"], default_value = "manual")]
+        approval_mode: String,
+
         /// Command to run after "--" (defaults to an interactive shell).
         #[arg(last = true, allow_hyphen_values = true)]
         command: Vec<String>,
@@ -2526,6 +2543,7 @@ async fn main() -> Result<()> {
                     auto_providers,
                     no_auto_providers,
                     labels,
+                    approval_mode,
                     command,
                 } => {
                     // Resolve --tty / --no-tty into an Option<bool> override.
@@ -2594,6 +2612,7 @@ async fn main() -> Result<()> {
                         tty_override,
                         auto_providers_override,
                         &labels_map,
+                        &approval_mode,
                         &tls,
                     ))
                     .await?;
@@ -4134,6 +4153,60 @@ mod tests {
         }
     }
 
+    /// `sandbox create` defaults `--approval-mode` to `"manual"`. The CLI
+    /// always sends an explicit value so the wire form is human-readable
+    /// (the gateway treats `""` as `"manual"` too, but the CLI's job is to
+    /// be unambiguous).
+    #[test]
+    fn sandbox_create_approval_mode_defaults_to_manual() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create"])
+            .expect("sandbox create with no flags should parse");
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { approval_mode, .. }),
+                ..
+            }) => {
+                assert_eq!(approval_mode, "manual");
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    /// `--approval-mode auto` parses through.
+    #[test]
+    fn sandbox_create_approval_mode_accepts_auto() {
+        let cli =
+            Cli::try_parse_from(["openshell", "sandbox", "create", "--approval-mode", "auto"])
+                .expect("--approval-mode auto should parse");
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { approval_mode, .. }),
+                ..
+            }) => {
+                assert_eq!(approval_mode, "auto");
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    /// `--approval-mode <bogus>` is rejected by clap's value parser, so the
+    /// CLI can't smuggle through a future-mode value that the gateway
+    /// doesn't yet know about.
+    #[test]
+    fn sandbox_create_approval_mode_rejects_unknown_value() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--approval-mode",
+            "auto_on_low_risk",
+        ]);
+        assert!(
+            result.is_err(),
+            "--approval-mode auto_on_low_risk should be rejected until added to the value parser"
+        );
+    }
+
     #[test]
     fn sandbox_create_resource_flags_parse() {
         let cli = Cli::try_parse_from([
 
@@ -1693,6 +1693,7 @@ pub async fn sandbox_create(
     tty_override: Option<bool>,
     auto_providers_override: Option<bool>,
     labels: &HashMap<String, String>,
+    approval_mode: &str,
     tls: &TlsOptions,
 ) -> Result<()> {
     if editor.is_some() && !command.is_empty() {
@@ -1806,6 +1807,38 @@ pub async fn sandbox_create(
         let _ = save_last_sandbox(gateway, &sandbox_name);
     }
 
+    // Persist `--approval-mode` as a sandbox-scoped setting now that the
+    // sandbox exists. `manual` is the implicit default (no setting needed);
+    // any other value is written so it survives sandbox restarts and can be
+    // flipped later via `openshell settings set <name> proposal_approval_mode`.
+    // If the write fails the sandbox still runs in default `manual` — surface
+    // the recovery command so the user can retry.
+    if approval_mode != "manual" {
+        let setting = parse_cli_setting_value(settings::PROPOSAL_APPROVAL_MODE_KEY, approval_mode)?;
+        match client
+            .update_config(UpdateConfigRequest {
+                name: sandbox_name.clone(),
+                policy: None,
+                setting_key: settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(),
+                setting_value: Some(setting),
+                delete_setting: false,
+                global: false,
+                merge_operations: vec![],
+                expected_resource_version: 0,
+            })
+            .await
+        {
+            Ok(_) => {}
+            Err(status) => {
+                eprintln!(
+                    "{} failed to set approval mode '{approval_mode}' on sandbox '{sandbox_name}': {}\n  retry with: openshell settings set {sandbox_name} proposal_approval_mode {approval_mode}",
+                    "warning:".yellow().bold(),
+                    status.message(),
+                );
+            }
+        }
+    }
+
     // Set up display — interactive terminals get a step-based checklist with
     // spinners; non-interactive (pipes / CI) get timestamped lines.
     let mut display = if interactive {
@@ -5519,7 +5552,23 @@ fn parse_cli_setting_value(key: &str, raw_value: &str) -> Result<SettingValue> {
     })?;
 
     let value = match setting.kind {
-        SettingValueKind::String => setting_value::Value::StringValue(raw_value.to_string()),
+        SettingValueKind::String => {
+            // Reject typos client-side so `openshell settings set ...
+            // proposal_approval_mode autom` errors immediately instead of
+            // round-tripping through the server. The server enforces the
+            // same check independently for non-CLI callers.
+            setting
+                .validate_string_value(raw_value)
+                .map_err(|allowed| {
+                    miette::miette!(
+                        "invalid value '{}' for key '{}'; expected one of: {}",
+                        raw_value,
+                        key,
+                        allowed.join(", ")
+                    )
+                })?;
+            setting_value::Value::StringValue(raw_value.to_string())
+        }
         SettingValueKind::Int => {
             let parsed = raw_value.trim().parse::<i64>().map_err(|_| {
                 miette::miette!(
@@ -6739,6 +6788,13 @@ pub async fn sandbox_draft_get(
                 chunk.security_notes.yellow()
             );
         }
+        if !chunk.validation_result.is_empty() {
+            println!(
+                "  {} {}",
+                "Validation:".dimmed(),
+                chunk.validation_result.cyan()
+            );
+        }
 
         if let Some(ref rule) = chunk.proposed_rule {
             println!("  {} {}", "Endpoints:".dimmed(), format_endpoints(rule));