fix(proxy): return 403 for non-CONNECT requests, add deny logging, and revise error messages (#79)

johntmyers · web-flow · commit c06117eb1524 · 2026-03-03T13:06:17.000-08:00
Closes NVIDIA#42 - Change non-CONNECT proxy response from 405 to 403 to align with how CONNECT denials are surfaced - Add structured deny logging for non-CONNECT requests with hostname extraction from absolute-form URIs - Revise 7 user-facing error messages across proxy.rs and dev-sandbox-policy.rego to follow consistent principle: generic policy-deny messages for non-inference requests, descriptive messages for recognized inference endpoints - Update E2E test assertion to match new error message - Update architecture docs to reflect new behavior Co-authored-by: John Myers <johntmyers@users.noreply.github.com>
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
@@ -142,7 +142,7 @@ File mode does not spawn a refresh task -- routes are static for the sandbox lif
 Both route source modes degrade gracefully when routes are unavailable:
 
 - **Empty routes in file mode**: If `routes: []` in the file, `build_inference_context()` returns `None` and inference routing is disabled. This is confirmed by the `build_inference_context_empty_route_file_returns_none` test.
-- **Empty routes in cluster mode**: If the initial cluster bundle has zero routes, the sandbox still creates `InferenceContext` with an empty cache and starts background refresh. Intercepted inference requests return `503` (`{"error": "no inference routes configured"}`) until a later refresh provides routes.
+- **Empty routes in cluster mode**: If the initial cluster bundle has zero routes, the sandbox still creates `InferenceContext` with an empty cache and starts background refresh. Intercepted inference requests return `503` (`{"error": "inference endpoint detected without matching inference route"}`) until a later refresh provides routes.
 - **Cluster mode errors**: `PermissionDenied` or `NotFound` errors (detected via string matching on the gRPC error message) indicate no inference policy is configured for this sandbox. The sandbox logs this and proceeds without inference routing. Other gRPC errors also result in graceful degradation: inference routing is disabled, but the sandbox starts normally.
 - **File mode errors**: Parse failures or missing files in standalone mode are fatal -- `build_inference_context()` propagates the error and the sandbox refuses to start. Only an empty-but-valid routes list is gracefully disabled.
 
@@ -253,11 +253,11 @@ Built at sandbox startup in `crates/navigator-sandbox/src/lib.rs` by `build_infe
    - If `detect_inference_pattern()` matches:
      - Strip credential and framing/hop-by-hop headers (`Authorization`, `x-api-key`, `host`, `content-length`, and all hop-by-hop headers)
      - Acquire a read lock on the route cache
-     - If routes are empty, return `503` JSON: `{"error": "no inference routes configured"}`
+      - If routes are empty, return `503` JSON: `{"error": "inference endpoint detected without matching inference route"}`
      - Call `Router::proxy_with_candidates()` to select a route and forward the request locally
      - Return the backend's response to the client (response hop-by-hop and framing headers are stripped before formatting)
    - If no pattern matches:
-     - Return a `403` JSON error: `{"error": "only inference API calls are allowed on this connection"}`
+      - Return a `403` JSON error: `{"error": "connection not allowed by policy"}`
    - If the router call fails:
      - Map the `RouterError` to an HTTP status via `router_error_to_http()` and return a JSON error
 
@@ -634,8 +634,8 @@ The inference routing migration is a breaking protocol change. The `ProxyInferen
 | `InferenceContext` missing | Error: "InspectForInference requires inference context (router + routes)" |
 | TLS state not configured | Error: "InspectForInference requires TLS state for client termination" |
 | Request exceeds 10 MiB buffer | `413` Payload Too Large response to client |
-| Non-inference request on intercepted connection | `403` JSON error: `{"error": "only inference API calls are allowed on this connection"}` |
-| No routes in cache | `503` JSON error: `{"error": "no inference routes configured"}` |
+| Non-inference request on intercepted connection | `403` JSON error: `{"error": "connection not allowed by policy"}` |
+| No routes in cache | `503` JSON error: `{"error": "inference endpoint detected without matching inference route"}` |
 | Router returns `NoCompatibleRoute` | `400` JSON error |
 | Backend timeout or connection failure | `503` JSON error |
 | Backend protocol error or internal error | `502` JSON error |
diff --git a/architecture/sandbox.md b/architecture/sandbox.md
@@ -270,7 +270,7 @@ Uses the same input JSON shape as `evaluate_network()`. Evaluates the `data.navi
 
 - `"allow"` -- endpoint + binary explicitly matched in a network policy
 - `"inspect_for_inference"` -- no policy match but `inference.allowed_routes` is non-empty
-- `"deny"` -- no matching policy and no inference routing configured
+- `"deny"` -- network connections not allowed by policy
 
 The Rego logic:
 1. If `network_policy_for_request` exists (endpoint + binary match), return `"allow"`
@@ -582,7 +582,7 @@ Startup steps:
 
 ### Request parsing
 
-The proxy reads up to 8192 bytes (`MAX_HEADER_BYTES`) looking for `\r\n\r\n`. It validates the method is `CONNECT` (returning 405 for anything else) and parses the `host:port` target.
+The proxy reads up to 8192 bytes (`MAX_HEADER_BYTES`) looking for `\r\n\r\n`. It validates the method is `CONNECT` (returning 403 for anything else with a structured log) and parses the `host:port` target.
 
 ### Control-plane bypass
 
@@ -632,7 +632,7 @@ The `action` field carries the matched policy name (for `Allow` and `InspectForI
 
 Every CONNECT request produces an `info!()` log line with all context: source/destination addresses, binary path, PID, ancestor chain, cmdline paths, action (`allow`, `inspect_for_inference`, or `deny`), engine, matched policy, and deny reason.
 
-For `InspectForInference` connections, the initial log records `action=inspect_for_inference`. If the subsequent inference interception fails (TLS handshake failure, client disconnect, non-inference request, payload too large, missing context, or I/O error), a second `CONNECT` log is emitted with `action=deny` and a `reason` describing the failure. Successfully routed connections produce no second log. This two-log pattern gives operators visibility into why an `inspect_for_inference` decision ultimately resulted in a denial.
+For `InspectForInference` connections, the initial log records `action=inspect_for_inference`. If the subsequent inference interception fails (TLS handshake failure, client disconnect, request not allowed by policy, payload too large, missing context, or I/O error), a second `CONNECT` log is emitted with `action=deny` and a `reason` describing the failure. Successfully routed connections produce no second log. This two-log pattern gives operators visibility into why an `inspect_for_inference` decision ultimately resulted in a denial.
 
 ### SSRF protection (internal IP rejection)
 
@@ -651,7 +651,7 @@ enum InferenceOutcome {
 }
 ```
 
-Every exit path in `handle_inference_interception` produces an explicit outcome. The `Denied` variant carries a human-readable reason describing the failure. At the call site in `handle_tcp_connection`, `Denied` outcomes (and `Err` results) trigger a structured CONNECT deny log with the same fields as the initial decision log (see [Unified logging](#unified-logging)). The `route_inference_request` helper returns `Result<bool>` where `true` means the request was routed and `false` means it was a non-inference request that was denied inline.
+Every exit path in `handle_inference_interception` produces an explicit outcome. The `Denied` variant carries a human-readable reason describing the failure. At the call site in `handle_tcp_connection`, `Denied` outcomes (and `Err` results) trigger a structured CONNECT deny log with the same fields as the initial decision log (see [Unified logging](#unified-logging)). The `route_inference_request` helper returns `Result<bool>` where `true` means the request was routed and `false` means the request was not allowed by policy and was denied inline.
 
 The interception steps:
 
@@ -677,10 +677,10 @@ The interception steps:
 6. **Response handling**:
    - On success: the router's response (status code, headers, body) is formatted as an HTTP/1.1 response and sent back to the client after stripping response framing/hop-by-hop headers (`transfer-encoding`, `content-length`, `connection`, etc.)
    - On router failure: the error is mapped to an HTTP status code via `router_error_to_http()` and returned as a JSON error body (see error table below)
-   - Empty route cache: returns `503` JSON error (`{"error": "no inference routes configured"}`)
-   - Non-inference requests: returns `403 Forbidden` with a JSON error body (`{"error": "only inference API calls are allowed on this connection"}`)
+   - Empty route cache: returns `503` JSON error (`{"error": "inference endpoint detected without matching inference route"}`)
+   - Non-inference requests: returns `403 Forbidden` with a JSON error body (`{"error": "connection not allowed by policy"}`)
 
-7. **Connection lifecycle**: The handler loops to process multiple HTTP requests on the same connection (HTTP keep-alive). The loop ends when the client closes the connection or an unrecoverable error occurs. Once at least one request has been successfully routed (`routed_any` flag), subsequent failures (client disconnect, I/O error, payload too large, non-inference request) are treated as clean termination (`InferenceOutcome::Routed`) rather than denials.
+7. **Connection lifecycle**: The handler loops to process multiple HTTP requests on the same connection (HTTP keep-alive). The loop ends when the client closes the connection or an unrecoverable error occurs. Once at least one request has been successfully routed (`routed_any` flag), subsequent failures (client disconnect, I/O error, payload too large, request not allowed by policy) are treated as clean termination (`InferenceOutcome::Routed`) rather than denials.
 
 ### Router error to HTTP mapping
 
@@ -1118,8 +1118,8 @@ The sandbox uses `miette` for error reporting and `thiserror` for typed errors.
 | Inference interception: no compatible route | 400 Bad Request with JSON error body |
 | Inference interception: backend timeout/unavailable | 503 Service Unavailable with JSON error body |
 | Inference interception: backend protocol error | 502 Bad Gateway with JSON error body |
-| Inference interception: non-inference request (no prior routing) | 403 Forbidden with JSON error body + structured CONNECT deny log |
-| Inference interception: non-inference request (after prior routing) | 403 Forbidden with JSON error body (no deny log, connection counts as routed) |
+| Inference interception: request not allowed by policy (no prior routing) | 403 Forbidden with JSON error body + structured CONNECT deny log |
+| Inference interception: request not allowed by policy (after prior routing) | 403 Forbidden with JSON error body (no deny log, connection counts as routed) |
 | Log push gRPC connection fails | Task prints to stderr and exits; logs not pushed for sandbox lifetime |
 | Log push mpsc channel full (1024 lines) | Event dropped silently; logging never blocks |
 | Log push gRPC stream breaks | Push loop exits, flushes remaining batch |
diff --git a/crates/navigator-sandbox/src/proxy.rs b/crates/navigator-sandbox/src/proxy.rs
@@ -229,7 +229,13 @@ async fn handle_tcp_connection(
     let target = parts.next().unwrap_or("");
 
     if method != "CONNECT" {
-        respond(&mut client, b"HTTP/1.1 405 Method Not Allowed\r\n\r\n").await?;
+        let target_host = extract_host_from_uri(target);
+        info!(
+            method = %method,
+            target_host = %target_host,
+            "Non-CONNECT proxy request denied"
+        );
+        respond(&mut client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
         return Ok(());
     }
 
@@ -748,7 +754,7 @@ async fn handle_inference_interception(
 
     let Some(ctx) = inference_ctx else {
         return Ok(InferenceOutcome::Denied {
-            reason: "missing inference context".to_string(),
+            reason: "connection not allowed by policy".to_string(),
         });
     };
 
@@ -805,7 +811,7 @@ async fn handle_inference_interception(
                     routed_any = true;
                 } else if !routed_any {
                     return Ok(InferenceOutcome::Denied {
-                        reason: "non-inference request".to_string(),
+                        reason: "connection not allowed by policy".to_string(),
                     });
                 }
 
@@ -861,7 +867,7 @@ async fn route_inference_request(
         let routes = ctx.routes.read().await;
 
         if routes.is_empty() {
-            let body = serde_json::json!({"error": "no inference routes configured"});
+            let body = serde_json::json!({"error": "inference endpoint detected without matching inference route"});
             let body_bytes = body.to_string();
             let response = format_http_response(
                 503,
@@ -891,7 +897,7 @@ async fn route_inference_request(
                 write_all(tls_client, &response).await?;
             }
             Err(e) => {
-                warn!(error = %e, "Local inference routing failed");
+                warn!(error = %e, "inference endpoint detected but upstream service failed");
                 let (status, msg) = router_error_to_http(&e);
                 let body = serde_json::json!({"error": msg});
                 let body_bytes = body.to_string();
@@ -909,10 +915,9 @@ async fn route_inference_request(
         info!(
             method = %request.method,
             path = %request.path,
-            "Non-inference request denied (inference-only mode)"
+            "connection not allowed by policy"
         );
-        let body =
-            serde_json::json!({"error": "only inference API calls are allowed on this connection"});
+        let body = serde_json::json!({"error": "connection not allowed by policy"});
         let body_bytes = body.to_string();
         let response = format_http_response(
             403,
@@ -1226,6 +1231,33 @@ fn query_allowed_ips(
     }
 }
 
+/// Extract the hostname from an absolute-form URI used in plain HTTP proxy requests.
+///
+/// For example, `"http://example.com/path"` yields `"example.com"` and
+/// `"http://example.com:8080/path"` yields `"example.com"`. Returns `"unknown"`
+/// if the URI cannot be parsed.
+fn extract_host_from_uri(uri: &str) -> String {
+    // Absolute-form URIs look like "http://host[:port]/path"
+    // Strip the scheme prefix, then extract the authority (host[:port]) before the first '/'.
+    let after_scheme = uri.find("://").map(|i| &uri[i + 3..]).unwrap_or(uri);
+    let authority = after_scheme.split('/').next().unwrap_or(after_scheme);
+    // Strip port if present (handle IPv6 bracket notation)
+    let host = if authority.starts_with('[') {
+        // IPv6: [::1]:port
+        authority
+            .find(']')
+            .map(|i| &authority[..=i])
+            .unwrap_or(authority)
+    } else {
+        authority.split(':').next().unwrap_or(authority)
+    };
+    if host.is_empty() {
+        "unknown".to_string()
+    } else {
+        host.to_string()
+    }
+}
+
 fn parse_target(target: &str) -> Result<(String, u16)> {
     let (host, port_str) = target
         .split_once(':')
@@ -1698,4 +1730,52 @@ mod tests {
             "expected 'not in allowed_ips' in error: {err}"
         );
     }
+
+    // --- extract_host_from_uri tests ---
+
+    #[test]
+    fn test_extract_host_from_http_uri() {
+        assert_eq!(
+            extract_host_from_uri("http://example.com/path"),
+            "example.com"
+        );
+    }
+
+    #[test]
+    fn test_extract_host_from_https_uri() {
+        assert_eq!(
+            extract_host_from_uri("https://api.openai.com/v1/chat/completions"),
+            "api.openai.com"
+        );
+    }
+
+    #[test]
+    fn test_extract_host_from_uri_with_port() {
+        assert_eq!(
+            extract_host_from_uri("http://example.com:8080/path"),
+            "example.com"
+        );
+    }
+
+    #[test]
+    fn test_extract_host_from_uri_ipv6() {
+        assert_eq!(extract_host_from_uri("http://[::1]:8080/path"), "[::1]");
+    }
+
+    #[test]
+    fn test_extract_host_from_uri_no_path() {
+        assert_eq!(extract_host_from_uri("http://example.com"), "example.com");
+    }
+
+    #[test]
+    fn test_extract_host_from_uri_empty() {
+        assert_eq!(extract_host_from_uri(""), "unknown");
+    }
+
+    #[test]
+    fn test_extract_host_from_uri_malformed() {
+        // Gracefully handles garbage input
+        let result = extract_host_from_uri("not-a-uri");
+        assert!(!result.is_empty());
+    }
 }
diff --git a/dev-sandbox-policy.rego b/dev-sandbox-policy.rego
@@ -54,7 +54,7 @@ deny_reason := reason if {
 	reason := concat("; ", all_reasons)
 }
 
-deny_reason := "no matching policy and no inference routing configured" if {
+deny_reason := "network connections not allowed by policy" if {
 	input.network
 	input.exec
 	not network_policy_for_request
diff --git a/e2e/python/test_inference_routing.py b/e2e/python/test_inference_routing.py
@@ -125,7 +125,10 @@ def call_chat_completions() -> str:
             assert initial.exit_code == 0, f"stderr: {initial.stderr}"
             initial_output = initial.stdout.strip()
             assert initial_output.startswith("http_error_503"), initial_output
-            assert "no inference routes configured" in initial_output, initial_output
+            assert (
+                "inference endpoint detected without matching inference route"
+                in initial_output
+            ), initial_output
 
             inference_client.create(
                 name=route_name,

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ deny_reason := reason if {`
`54`	`54`	`reason := concat("; ", all_reasons)`
`55`	`55`	`}`
`56`	`56`
`57`		`-deny_reason := "no matching policy and no inference routing configured" if {`
	`57`	`+deny_reason := "network connections not allowed by policy" if {`
`58`	`58`	`input.network`
`59`	`59`	`input.exec`
`60`	`60`	`not network_policy_for_request`