diff --git a/src/openhuman/inference/provider/factory.rs b/src/openhuman/inference/provider/factory.rs index c206b0cc1..ae255c9e6 100644 --- a/src/openhuman/inference/provider/factory.rs +++ b/src/openhuman/inference/provider/factory.rs @@ -35,6 +35,12 @@ use crate::openhuman::inference::provider::ProviderRuntimeOptions; pub const PROVIDER_OPENHUMAN: &str = "openhuman"; /// Prefix for Ollama-local providers: `"ollama:"`. pub const OLLAMA_PROVIDER_PREFIX: &str = "ollama:"; +/// Sentinel returned when a user has expressed custom/BYOK inference intent +/// (via a non-openhuman `inference_url`) but no matching `cloud_providers` +/// entry was found. Passed through `provider_for_role` and caught early in +/// `create_chat_provider_from_string` to produce a clear configuration error +/// instead of silently routing through the managed OpenHuman backend. +pub const BYOK_INCOMPLETE_SENTINEL: &str = "__byok_incomplete__"; fn is_abstract_tier_model(model: &str) -> bool { use crate::openhuman::config::{ @@ -149,6 +155,24 @@ pub fn create_chat_provider_from_string( p ); + // Fail-closed: BYOK intent was detected upstream but no matching provider + // entry was found. Surface a clear configuration error instead of silently + // routing through the managed OpenHuman backend. + if p == BYOK_INCOMPLETE_SENTINEL { + let inference_url = config + .inference_url + .as_deref() + .filter(|s| !s.trim().is_empty()) + .unwrap_or(""); + anyhow::bail!( + "[chat-factory] BYOK_INCOMPLETE: inference_url is set to a custom/direct endpoint \ + ({inference_url}) but no matching cloud_providers entry was found for role '{role}'. \ + To complete BYOK setup add a cloud_providers entry whose endpoint matches \ + {inference_url} (or use a workload-specific route). \ + To use the OpenHuman managed backend instead, clear inference_url from config." + ); + } + // Empty / legacy "cloud" sentinel → primary cloud target. if p.is_empty() || p == "cloud" { let resolved = resolve_primary_cloud_provider_string(config); @@ -332,14 +356,80 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String { if let Some(legacy) = legacy_custom_inference_provider_string(config) { return legacy; } + // Primary is explicitly OpenHuman but inference_url points at a custom + // endpoint with no matching provider entry — this is a half-migrated BYOK + // config. Fail closed so the user sees an actionable error rather than + // silently routing through the managed backend. + if has_custom_inference_intent(config) { + log::debug!( + "[providers][chat-factory] BYOK intent detected (host={}) \ + but no matching cloud_providers entry found; returning fail-closed sentinel", + redact_inference_url(config.inference_url.as_deref()) + ); + return BYOK_INCOMPLETE_SENTINEL.to_string(); + } } if let Some(entry) = primary { return cloud_entry_provider_string(entry, config); } - legacy_custom_inference_provider_string(config) - .unwrap_or_else(|| PROVIDER_OPENHUMAN.to_string()) + // No explicit primary configured. If inference_url signals custom intent but + // no matching provider entry exists, fail closed instead of falling back to + // the managed backend. + legacy_custom_inference_provider_string(config).unwrap_or_else(|| { + if has_custom_inference_intent(config) { + log::debug!( + "[providers][chat-factory] BYOK intent detected (host={}) \ + with no primary_cloud and no matching provider entry; returning fail-closed sentinel", + redact_inference_url(config.inference_url.as_deref()) + ); + BYOK_INCOMPLETE_SENTINEL.to_string() + } else { + PROVIDER_OPENHUMAN.to_string() + } + }) +} + +/// Extract the host portion of an inference URL for safe logging. +/// +/// Returns the host (e.g. `"api.example.com"`) so log lines are grep-friendly +/// without exposing tokens or credentials that may appear in query-string or +/// path components of a bearer-auth URL (e.g. `"https://host/v1?key=…"`). +/// Falls back to `""` when the URL cannot be parsed or is absent. +fn redact_inference_url(url: Option<&str>) -> &str { + url.and_then(|u| { + // Minimal host extraction: find the authority after "://". + let after_scheme = u.find("://").map(|i| &u[i + 3..])?; + // Authority ends at '/', '?', '#', or end-of-string. + let host_end = after_scheme + .find(['/', '?', '#']) + .unwrap_or(after_scheme.len()); + let authority = &after_scheme[..host_end]; + // Strip optional "user:pass@" and port. + let host = authority + .rfind('@') + .map_or(authority, |i| &authority[i + 1..]); + let host = host.rfind(':').map_or(host, |i| &host[..i]); + if host.is_empty() { + None + } else { + Some(host) + } + }) + .unwrap_or("") +} + +/// Return `true` when the config contains a non-openhuman `inference_url`, +/// indicating the user intends custom/BYOK routing rather than the managed +/// backend. +fn has_custom_inference_intent(config: &Config) -> bool { + config + .inference_url + .as_deref() + .map(str::trim) + .filter(|url| !url.is_empty()) + .is_some_and(|url| !looks_like_openhuman_backend(url)) } fn legacy_custom_inference_provider_string(config: &Config) -> Option { diff --git a/src/openhuman/inference/provider/factory_test.rs b/src/openhuman/inference/provider/factory_test.rs index aef8420d8..e112e3483 100644 --- a/src/openhuman/inference/provider/factory_test.rs +++ b/src/openhuman/inference/provider/factory_test.rs @@ -458,7 +458,10 @@ fn legacy_inference_url_custom_provider_wins_over_openhuman_primary_for_unset_ro } #[test] -fn legacy_inference_url_without_matching_provider_stays_on_openhuman_primary() { +fn legacy_inference_url_without_matching_provider_returns_byok_sentinel() { + // BYOK intent: primary is OpenHuman but inference_url points at a custom + // endpoint with no matching cloud_providers entry. Must fail closed — do + // NOT silently route through the managed backend. let mut other = openai_entry("p_other", "other"); other.endpoint = "https://other.example.com/v1".to_string(); @@ -466,7 +469,10 @@ fn legacy_inference_url_without_matching_provider_stays_on_openhuman_primary() { config.primary_cloud = Some("p_oh".to_string()); config.inference_url = Some("https://api.example.com/v1".to_string()); - assert_eq!(provider_for_role("reasoning", &config), "openhuman"); + assert_eq!( + provider_for_role("reasoning", &config), + BYOK_INCOMPLETE_SENTINEL + ); } #[test] @@ -708,3 +714,87 @@ fn make_openhuman_backend_keeps_reasoning_quick() { let (_, model) = make_openhuman_backend(&config).expect("factory should succeed"); assert_eq!(model, "reasoning-quick-v1"); } + +// ── BYOK fail-closed tests ──────────────────────────────────────────────────── + +#[test] +fn byok_intent_no_primary_no_matching_entry_returns_sentinel() { + // No primary_cloud set, inference_url points at a non-openhuman host with + // no matching cloud_providers entry → must return the fail-closed sentinel. + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + assert_eq!( + provider_for_role("reasoning", &config), + BYOK_INCOMPLETE_SENTINEL + ); +} + +#[test] +fn byok_intent_with_matching_entry_resolves_correctly() { + // Matching cloud_providers entry exists → legacy lookup succeeds; no sentinel. + let mut custom = openai_entry("p_custom", "custom"); + custom.endpoint = "https://custom-api.example.com/v1".to_string(); + + let mut config = config_with_providers(vec![custom]); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + + // Legacy URL matches the custom entry → "custom:gpt-4o" + assert_eq!(provider_for_role("reasoning", &config), "custom:gpt-4o"); +} + +#[test] +fn openhuman_inference_url_never_triggers_sentinel() { + // inference_url pointing at the managed backend is not BYOK intent. + let mut config = Config::default(); + config.inference_url = Some("https://api.openhuman.ai/v1".to_string()); + assert_eq!(provider_for_role("reasoning", &config), "openhuman"); +} + +#[test] +fn explicit_workload_route_bypasses_byok_sentinel() { + // A per-role provider route set explicitly always wins over the BYOK check. + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + config.reasoning_provider = Some("openhuman".to_string()); + // Explicit "openhuman" route → goes straight to backend, no sentinel. + assert_eq!(provider_for_role("reasoning", &config), "openhuman"); +} + +#[test] +fn byok_sentinel_makes_provider_creation_error_with_clear_message() { + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + + // Use match instead of unwrap_err(): Box doesn't impl Debug. + let msg = match create_chat_provider_from_string("reasoning", BYOK_INCOMPLETE_SENTINEL, &config) + { + Ok(_) => panic!("sentinel must produce an error, not a provider"), + Err(e) => e.to_string(), + }; + assert!( + msg.contains("BYOK_INCOMPLETE"), + "error must name BYOK_INCOMPLETE; got: {msg}" + ); + assert!( + msg.contains("custom-api.example.com"), + "error must include the configured inference_url; got: {msg}" + ); +} + +#[test] +fn byok_sentinel_error_mentions_configuration_action() { + // The error message must tell the user how to fix the issue. + let mut config = Config::default(); + config.inference_url = Some("https://byok.example.com/v1".to_string()); + + // Use match instead of unwrap_err(): Box doesn't impl Debug. + let msg = match create_chat_provider_from_string("chat", BYOK_INCOMPLETE_SENTINEL, &config) { + Ok(_) => panic!("sentinel must produce an error"), + Err(e) => e.to_string(), + }; + // Must mention adding a cloud_providers entry or clearing inference_url. + assert!( + msg.contains("cloud_providers") || msg.contains("inference_url"), + "error must suggest a remediation; got: {msg}" + ); +} diff --git a/src/openhuman/inference/provider/mod.rs b/src/openhuman/inference/provider/mod.rs index f47f71e2d..e2b110a16 100644 --- a/src/openhuman/inference/provider/mod.rs +++ b/src/openhuman/inference/provider/mod.rs @@ -29,5 +29,5 @@ pub use traits::{ pub use billing_error::is_budget_exhausted_message; pub use config_rejection::is_provider_config_rejection_message; -pub use factory::{create_chat_provider, provider_for_role}; +pub use factory::{create_chat_provider, provider_for_role, BYOK_INCOMPLETE_SENTINEL}; pub use ops::*;