NVIDIA · maxamillion · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
@@ -145,8 +145,12 @@ not readable by other local users on shared hosts. The same restriction is
 reapplied to the `<db>-wal` and `<db>-shm` sidecars (created by SQLite's
 default WAL journal mode), which mirror the same sensitive contents.
 
-Persisted state includes sandboxes, providers, SSH sessions, policy revisions,
-settings, inference configuration, and deployment records.
+Persisted state includes sandboxes, providers, provider credential refresh
+state, SSH sessions, policy revisions, settings, inference configuration, and
+deployment records. Provider refresh material is stored as a separate object
+scoped to the provider instance through `objects.scope`; the provider record
+keeps only the current injectable credential values and optional per-credential
+expiry timestamps.
 
 ### Optimistic Concurrency (CAS)
 
@@ -239,6 +243,72 @@ config path. A gateway-global policy can override sandbox-scoped policy. The
 sandbox supervisor polls for config revisions and hot-reloads dynamic policy
 when the policy engine accepts the update.
 
+Provider credential expiry is enforced during gateway-to-sandbox credential
+resolution and again by the sandbox placeholder resolver. This keeps expired
+credentials from resolving even when a running sandbox still has retained
+placeholder generations from an earlier provider credential snapshot.
+
+## Inference Resolution
+
+Cluster inference routes store only `provider_name`, `model_id`, and optional
+timeout. The gateway resolves endpoint URLs, protocols, credentials, auth
+style, and route-shaping metadata from the provider record when supervisors call
+`GetInferenceBundle`. Supported provider types for cluster inference are
+`openai`, `anthropic`, `nvidia`, and `google-vertex-ai`.
+
+The bundle carries enough information for sandbox-local routers to construct
+upstream URLs without re-deriving provider-specific routing logic. Each resolved
+route may include:
+
+| Field | Meaning |
+|---|---|
+| `model_in_path` | When true, the model identifier is part of the upstream URL path, not only the request body. |
+| `request_path_override` | Path override or suffix. With `model_in_path=false`, replaces the protocol-derived path; with `model_in_path=true`, appended after the model ID. |
+
+For standard providers these fields stay unset and the sandbox router uses default
+protocol paths. Vertex AI is model-aware: the gateway constructs the base URL
+from provider config (`VERTEX_AI_PROJECT_ID`, `VERTEX_AI_REGION`, optional
+`VERTEX_AI_PUBLISHER`) and emits route-shaping metadata so the sandbox router
+stays provider-agnostic.
+
+Host selection follows the configured region:
+
+| Region value | Vertex host |
+|---|---|
+| `global` | `aiplatform.googleapis.com` |
+| `us` or `eu` | `aiplatform.{region}.rep.googleapis.com` |
+| Any other (e.g. `us-central1`) | `{region}-aiplatform.googleapis.com` |
+
+Route shaping by publisher:
+
+- **Anthropic (Claude)** — `model_in_path=true`, base path under
+  `publishers/anthropic/models`, protocol `anthropic_messages` only. The gateway
+  resolves `request_path_override=:rawPredict`; the sandbox router keeps
+  `:rawPredict` for buffered requests and upgrades to `:streamRawPredict` only
+  for streaming proxy calls.
+- **All other models** (Gemini, third-party, unknown) — OpenAI-compatible
+  `.../endpoints/openapi` base with `request_path_override=/chat/completions`;
+  protocol `openai_chat_completions`.
+
+Callers may supply `GOOGLE_VERTEX_AI_BASE_URL` or `VERTEX_AI_BASE_URL` only for
+non-Anthropic routes. Anthropic base URL overrides are rejected because they
+cannot safely preserve model-path shaping and `anthropic_version` body
+adaptation. Overrides still pin `request_path_override=/chat/completions` and
+must use `https` with an official Vertex AI hostname (`aiplatform.googleapis.com`,
+`aiplatform.{us,eu}.rep.googleapis.com`, or `{region}-aiplatform.googleapis.com`).
+
+Header passthrough is protocol-dependent. Vertex Claude routes forward client
+`anthropic-beta` headers; `anthropic-version` is not forwarded because the
+sandbox router injects `anthropic_version` into the request body for Vertex
+rawPredict. Non-Anthropic Vertex routes do not inherit Anthropic passthrough
+headers.
+
+For `google-vertex-ai` providers created with CLI `--from-gcloud-adc`, the CLI
+calls gateway `ConfigureProviderRefresh` with OAuth2 refresh material from gcloud
+ADC, then `RotateProviderCredential` to mint the first access token before
+reporting success. A successful create therefore yields an immediately usable
+provider; failures roll back the provider record.
+
 ## Supervisor Relay
 
 Sandbox workloads maintain an outbound supervisor session to the gateway. This

@@ -56,7 +56,6 @@ reqwest = { workspace = true }
 # Error handling
 miette = { workspace = true }
 thiserror = { workspace = true }
-anyhow = { workspace = true }
 
 # File archiving (tar-over-SSH sync)
 tar = "0.4"

@@ -716,7 +716,7 @@ impl From<CliEditor> for openshell_cli::ssh::Editor {
 #[derive(Subcommand, Debug)]
 enum ProviderCommands {
     /// Create a provider config.
-    #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")]
+    #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials", "from_gcloud_adc"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")]
     Create {
         /// Provider name.
         #[arg(long)]
@@ -727,17 +727,23 @@ enum ProviderCommands {
         provider_type: String,
 
         /// Load provider credentials/config from existing local state.
-        #[arg(long, conflicts_with = "credentials")]
+        #[arg(long, conflicts_with_all = ["credentials", "from_gcloud_adc"])]
         from_existing: bool,
 
         /// Provider credential pair (`KEY=VALUE`) or env lookup key (`KEY`).
         #[arg(
             long = "credential",
             value_name = "KEY[=VALUE]",
-            conflicts_with = "from_existing"
+            conflicts_with_all = ["from_existing", "from_gcloud_adc"]
         )]
         credentials: Vec<String>,
 
+        /// Configure credentials from gcloud Application Default Credentials
+        /// (`~/.config/gcloud/application_default_credentials.json`).
+        /// Only valid for google-vertex-ai providers.
+        #[arg(long, group = "cred_source", conflicts_with_all = ["from_existing", "credentials"])]
+        from_gcloud_adc: bool,
+
         /// Provider config key/value pair.
         #[arg(long = "config", value_name = "KEY=VALUE")]
         config: Vec<String>,
@@ -2767,6 +2773,7 @@ async fn main() -> Result<()> {
                     provider_type,
                     from_existing,
                     credentials,
+                    from_gcloud_adc,
                     config,
                 } => {
                     run::provider_create(
@@ -2775,6 +2782,7 @@ async fn main() -> Result<()> {
                         provider_type.as_str(),
                         from_existing,
                         &credentials,
+                        from_gcloud_adc,
                         &config,
                         &tls,
                     )
@@ -3807,6 +3815,47 @@ mod tests {
         }
     }
 
+    #[test]
+    fn provider_create_rejects_from_gcloud_adc_with_from_existing() {
+        let err = Cli::try_parse_from([
+            "openshell",
+            "provider",
+            "create",
+            "--name",
+            "vertex-local",
+            "--type",
+            "google-vertex-ai",
+            "--from-existing",
+            "--from-gcloud-adc",
+        ])
+        .expect_err("clap should reject conflicting credential sources");
+
+        let msg = err.to_string();
+        assert!(msg.contains("--from-existing"));
+        assert!(msg.contains("--from-gcloud-adc"));
+    }
+
+    #[test]
+    fn provider_create_rejects_from_gcloud_adc_with_credential() {
+        let err = Cli::try_parse_from([
+            "openshell",
+            "provider",
+            "create",
+            "--name",
+            "vertex-local",
+            "--type",
+            "google-vertex-ai",
+            "--from-gcloud-adc",
+            "--credential",
+            "GOOGLE_VERTEX_AI_TOKEN=token",
+        ])
+        .expect_err("clap should reject conflicting credential sources");
+
+        let msg = err.to_string();
+        assert!(msg.contains("--credential"));
+        assert!(msg.contains("--from-gcloud-adc"));
+    }
+
     #[test]
     fn provider_refresh_commands_parse() {
         let status = Cli::try_parse_from([