Skip to content

Commit 821b7a8

Browse files
authored
feat(inference): verify endpoints before saving routes (#291)
Closes #273 Verify inference endpoints synchronously on the server during set/update, expose a --no-verify escape hatch in the CLI and Python helper, and return actionable failures when validation does not pass.
1 parent 808f4ae commit 821b7a8

15 files changed

Lines changed: 810 additions & 118 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

architecture/inference-routing.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ The gateway implements the `Inference` gRPC service defined in `proto/inference.
6666
1. Validates that both fields are non-empty.
6767
2. Fetches the named provider record from the store.
6868
3. Validates the provider by resolving its route (checking that the provider type is supported and has a usable API key).
69-
4. Builds a managed route spec that stores only `provider_name` and `model_id`. The spec intentionally leaves `base_url`, `api_key`, and `protocols` empty -- these are resolved dynamically at bundle time from the provider record.
70-
5. Upserts the route with name `inference.local`. Version starts at 1 and increments monotonically on each update.
69+
4. By default, performs a lightweight provider-shaped probe against the resolved upstream endpoint (for example, a tiny chat/messages request with `max_tokens: 1`) to confirm the endpoint is reachable and accepts the expected auth/request shape. `--no-verify` disables this probe when the endpoint is not up yet.
70+
5. Builds a managed route spec that stores only `provider_name` and `model_id`. The spec intentionally leaves `base_url`, `api_key`, and `protocols` empty -- these are resolved dynamically at bundle time from the provider record.
71+
6. Upserts the route with name `inference.local`. Version starts at 1 and increments monotonically on each update.
7172

7273
`GetClusterInference` returns `provider_name`, `model_id`, and `version` for the managed route. Returns `NOT_FOUND` if cluster inference is not configured.
7374

@@ -91,7 +92,7 @@ File: `proto/inference.proto`
9192

9293
Key messages:
9394

94-
- `SetClusterInferenceRequest` -- `provider_name` + `model_id`
95+
- `SetClusterInferenceRequest` -- `provider_name` + `model_id` + optional `no_verify` override, with verification enabled by default
9596
- `SetClusterInferenceResponse` -- `provider_name` + `model_id` + `version`
9697
- `GetInferenceBundleResponse` -- `repeated ResolvedRoute routes` + `revision` + `generated_at_ms`
9798
- `ResolvedRoute` -- `name`, `base_url`, `protocols`, `api_key`, `model_id`, `provider_type`
@@ -296,13 +297,15 @@ The system route is stored as a separate `InferenceRoute` record in the gateway
296297

297298
Cluster inference commands:
298299

299-
- `openshell cluster inference set --provider <name> --model <id>` -- configures user-facing cluster inference
300-
- `openshell cluster inference set --system --provider <name> --model <id>` -- configures system inference
301-
- `openshell cluster inference get` -- displays both user and system inference configuration
302-
- `openshell cluster inference get --system` -- displays only the system inference configuration
300+
- `openshell inference set --provider <name> --model <id>` -- configures user-facing cluster inference
301+
- `openshell inference set --system --provider <name> --model <id>` -- configures system inference
302+
- `openshell inference get` -- displays both user and system inference configuration
303+
- `openshell inference get --system` -- displays only the system inference configuration
303304

304305
The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`).
305306

307+
Inference writes verify by default. `--no-verify` is the explicit opt-out for endpoints that are not up yet.
308+
306309
## Provider Discovery
307310

308311
Files:

crates/openshell-bootstrap/src/docker.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -526,9 +526,13 @@ pub async fn ensure_container(
526526
port_bindings: Some(port_bindings),
527527
binds: Some(vec![format!("{}:/var/lib/rancher/k3s", volume_name(name))]),
528528
network_mode: Some(network_name(name)),
529-
// Add host.docker.internal mapping for DNS resolution
530-
// This allows the entrypoint script to configure CoreDNS to use the host gateway
531-
extra_hosts: Some(vec!["host.docker.internal:host-gateway".to_string()]),
529+
// Add host gateway aliases for DNS resolution.
530+
// This allows both the entrypoint script and the running gateway
531+
// process to reach services on the Docker host.
532+
extra_hosts: Some(vec![
533+
"host.docker.internal:host-gateway".to_string(),
534+
"host.openshell.internal:host-gateway".to_string(),
535+
]),
532536
..Default::default()
533537
};
534538

crates/openshell-cli/src/main.rs

Lines changed: 55 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -906,9 +906,6 @@ enum InferenceCommands {
906906
system: bool,
907907

908908
/// Skip endpoint verification before saving the route.
909-
///
910-
/// Accepted now so scripts can opt out explicitly ahead of a future
911-
/// default switch to verification.
912909
#[arg(long)]
913910
no_verify: bool,
914911
},
@@ -929,9 +926,6 @@ enum InferenceCommands {
929926
system: bool,
930927

931928
/// Skip endpoint verification before saving the route.
932-
///
933-
/// Accepted now so scripts can opt out explicitly ahead of a future
934-
/// default switch to verification.
935929
#[arg(long)]
936930
no_verify: bool,
937931
},
@@ -1810,24 +1804,27 @@ async fn main() -> Result<()> {
18101804
provider,
18111805
model,
18121806
system,
1813-
no_verify: _,
1807+
no_verify,
18141808
} => {
18151809
let route_name = if system { "sandbox-system" } else { "" };
1816-
run::gateway_inference_set(endpoint, &provider, &model, route_name, &tls)
1817-
.await?;
1810+
run::gateway_inference_set(
1811+
endpoint, &provider, &model, route_name, no_verify, &tls,
1812+
)
1813+
.await?;
18181814
}
18191815
InferenceCommands::Update {
18201816
provider,
18211817
model,
18221818
system,
1823-
no_verify: _,
1819+
no_verify,
18241820
} => {
18251821
let route_name = if system { "sandbox-system" } else { "" };
18261822
run::gateway_inference_update(
18271823
endpoint,
18281824
provider.as_deref(),
18291825
model.as_deref(),
18301826
route_name,
1827+
no_verify,
18311828
&tls,
18321829
)
18331830
.await?;
@@ -2559,6 +2556,54 @@ mod tests {
25592556
));
25602557
}
25612558

2559+
#[test]
2560+
fn inference_set_accepts_no_verify_flag() {
2561+
let cli = Cli::try_parse_from([
2562+
"openshell",
2563+
"inference",
2564+
"set",
2565+
"--provider",
2566+
"openai-dev",
2567+
"--model",
2568+
"gpt-4.1",
2569+
"--no-verify",
2570+
])
2571+
.expect("inference set should parse --no-verify");
2572+
2573+
assert!(matches!(
2574+
cli.command,
2575+
Some(Commands::Inference {
2576+
command: Some(InferenceCommands::Set {
2577+
no_verify: true,
2578+
..
2579+
})
2580+
})
2581+
));
2582+
}
2583+
2584+
#[test]
2585+
fn inference_update_accepts_no_verify_flag() {
2586+
let cli = Cli::try_parse_from([
2587+
"openshell",
2588+
"inference",
2589+
"update",
2590+
"--provider",
2591+
"openai-dev",
2592+
"--no-verify",
2593+
])
2594+
.expect("inference update should parse --no-verify");
2595+
2596+
assert!(matches!(
2597+
cli.command,
2598+
Some(Commands::Inference {
2599+
command: Some(InferenceCommands::Update {
2600+
no_verify: true,
2601+
..
2602+
})
2603+
})
2604+
));
2605+
}
2606+
25622607
#[test]
25632608
fn completion_script_uses_openshell_command_name() {
25642609
let script = normalize_completion_script(
@@ -2747,52 +2792,4 @@ mod tests {
27472792
other => panic!("expected SshProxy, got: {other:?}"),
27482793
}
27492794
}
2750-
2751-
#[test]
2752-
fn inference_set_accepts_no_verify_flag() {
2753-
let cli = Cli::try_parse_from([
2754-
"openshell",
2755-
"inference",
2756-
"set",
2757-
"--provider",
2758-
"openai-dev",
2759-
"--model",
2760-
"gpt-4.1",
2761-
"--no-verify",
2762-
])
2763-
.expect("inference set should parse --no-verify");
2764-
2765-
assert!(matches!(
2766-
cli.command,
2767-
Some(Commands::Inference {
2768-
command: Some(InferenceCommands::Set {
2769-
no_verify: true,
2770-
..
2771-
})
2772-
})
2773-
));
2774-
}
2775-
2776-
#[test]
2777-
fn inference_update_accepts_no_verify_flag() {
2778-
let cli = Cli::try_parse_from([
2779-
"openshell",
2780-
"inference",
2781-
"update",
2782-
"--provider",
2783-
"openai-dev",
2784-
"--no-verify",
2785-
])
2786-
.expect("inference update should parse --no-verify");
2787-
2788-
assert!(matches!(
2789-
cli.command,
2790-
Some(Commands::Inference {
2791-
command: Some(InferenceCommands::Update {
2792-
no_verify: true,
2793-
..
2794-
})
2795-
})
2796-
));
2797-
}
27982795
}

crates/openshell-cli/src/run.rs

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use std::io::{IsTerminal, Write};
4040
use std::path::{Path, PathBuf};
4141
use std::process::Command;
4242
use std::time::{Duration, Instant};
43-
use tonic::Code;
43+
use tonic::{Code, Status};
4444

4545
// Re-export SSH functions for backward compatibility
4646
pub use crate::ssh::{Editor, print_ssh_config};
@@ -3390,17 +3390,38 @@ pub async fn gateway_inference_set(
33903390
provider_name: &str,
33913391
model_id: &str,
33923392
route_name: &str,
3393+
no_verify: bool,
33933394
tls: &TlsOptions,
33943395
) -> Result<()> {
3396+
let progress = if std::io::stdout().is_terminal() {
3397+
let spinner = ProgressBar::new_spinner();
3398+
spinner.set_style(
3399+
ProgressStyle::with_template("{spinner:.cyan} {msg} ({elapsed})")
3400+
.unwrap_or_else(|_| ProgressStyle::default_spinner()),
3401+
);
3402+
spinner.set_message("Configuring inference...");
3403+
spinner.enable_steady_tick(Duration::from_millis(120));
3404+
Some(spinner)
3405+
} else {
3406+
None
3407+
};
3408+
33953409
let mut client = grpc_inference_client(server, tls).await?;
33963410
let response = client
33973411
.set_cluster_inference(SetClusterInferenceRequest {
33983412
provider_name: provider_name.to_string(),
33993413
model_id: model_id.to_string(),
34003414
route_name: route_name.to_string(),
3415+
verify: false,
3416+
no_verify,
34013417
})
3402-
.await
3403-
.into_diagnostic()?;
3418+
.await;
3419+
3420+
if let Some(progress) = &progress {
3421+
progress.finish_and_clear();
3422+
}
3423+
3424+
let response = response.map_err(format_inference_status)?;
34043425

34053426
let configured = response.into_inner();
34063427
let label = if configured.route_name == "sandbox-system" {
@@ -3414,6 +3435,12 @@ pub async fn gateway_inference_set(
34143435
println!(" {} {}", "Provider:".dimmed(), configured.provider_name);
34153436
println!(" {} {}", "Model:".dimmed(), configured.model_id);
34163437
println!(" {} {}", "Version:".dimmed(), configured.version);
3438+
if configured.validation_performed {
3439+
println!(" {}", "Validated Endpoints:".dimmed());
3440+
for endpoint in configured.validated_endpoints {
3441+
println!(" - {} ({})", endpoint.url, endpoint.protocol);
3442+
}
3443+
}
34173444
Ok(())
34183445
}
34193446

@@ -3422,6 +3449,7 @@ pub async fn gateway_inference_update(
34223449
provider_name: Option<&str>,
34233450
model_id: Option<&str>,
34243451
route_name: &str,
3452+
no_verify: bool,
34253453
tls: &TlsOptions,
34263454
) -> Result<()> {
34273455
if provider_name.is_none() && model_id.is_none() {
@@ -3444,14 +3472,34 @@ pub async fn gateway_inference_update(
34443472
let provider = provider_name.unwrap_or(&current.provider_name);
34453473
let model = model_id.unwrap_or(&current.model_id);
34463474

3475+
let progress = if std::io::stdout().is_terminal() {
3476+
let spinner = ProgressBar::new_spinner();
3477+
spinner.set_style(
3478+
ProgressStyle::with_template("{spinner:.cyan} {msg} ({elapsed})")
3479+
.unwrap_or_else(|_| ProgressStyle::default_spinner()),
3480+
);
3481+
spinner.set_message("Configuring inference...");
3482+
spinner.enable_steady_tick(Duration::from_millis(120));
3483+
Some(spinner)
3484+
} else {
3485+
None
3486+
};
3487+
34473488
let response = client
34483489
.set_cluster_inference(SetClusterInferenceRequest {
34493490
provider_name: provider.to_string(),
34503491
model_id: model.to_string(),
34513492
route_name: route_name.to_string(),
3493+
verify: false,
3494+
no_verify,
34523495
})
3453-
.await
3454-
.into_diagnostic()?;
3496+
.await;
3497+
3498+
if let Some(progress) = &progress {
3499+
progress.finish_and_clear();
3500+
}
3501+
3502+
let response = response.map_err(format_inference_status)?;
34553503

34563504
let configured = response.into_inner();
34573505
let label = if configured.route_name == "sandbox-system" {
@@ -3465,6 +3513,12 @@ pub async fn gateway_inference_update(
34653513
println!(" {} {}", "Provider:".dimmed(), configured.provider_name);
34663514
println!(" {} {}", "Model:".dimmed(), configured.model_id);
34673515
println!(" {} {}", "Version:".dimmed(), configured.version);
3516+
if configured.validation_performed {
3517+
println!(" {}", "Validated Endpoints:".dimmed());
3518+
for endpoint in configured.validated_endpoints {
3519+
println!(" - {} ({})", endpoint.url, endpoint.protocol);
3520+
}
3521+
}
34683522
Ok(())
34693523
}
34703524

@@ -3536,6 +3590,16 @@ async fn print_inference_route(
35363590
}
35373591
}
35383592

3593+
fn format_inference_status(status: Status) -> miette::Report {
3594+
let message = status.message().trim();
3595+
3596+
if message.is_empty() {
3597+
return miette::miette!("inference configuration failed ({})", status.code());
3598+
}
3599+
3600+
miette::miette!("{message}")
3601+
}
3602+
35393603
pub fn git_repo_root(local_path: &Path) -> Result<PathBuf> {
35403604
let git_dir = if local_path.is_dir() {
35413605
local_path

0 commit comments

Comments
 (0)