From b740f2a1a80dd517bd261afaea082ba5bd1dcd39 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 22 May 2026 18:10:00 +0530 Subject: [PATCH 1/2] fix(inference): fail closed when BYOK intent cannot resolve a provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When inference_url is set to a non-openhuman endpoint but no matching cloud_providers entry exists, the factory was silently falling back to the managed OpenHuman backend — violating the trust boundary for BYOK and custom-routing configs. Changes: - Add BYOK_INCOMPLETE_SENTINEL constant returned by resolve_primary_cloud_provider_string when BYOK intent is detected (inference_url points at a non-openhuman host) but no matching cloud_providers entry can be found. - has_custom_inference_intent() helper detects the BYOK signal. - create_chat_provider_from_string() catches the sentinel early and surfaces an actionable BYOK_INCOMPLETE error naming the inference_url and telling the user how to fix it (add a cloud_providers entry or clear inference_url). - Update legacy_inference_url_without_matching_provider test: now correctly expects the sentinel, not "openhuman". - Six new regression tests covering BYOK sentinel paths, explicit-route bypass, openhuman-URL non-triggering, and error message content. Closes #2483 --- src/openhuman/inference/provider/factory.rs | 65 ++++++++++++- .../inference/provider/factory_test.rs | 97 ++++++++++++++++++- src/openhuman/inference/provider/mod.rs | 2 +- 3 files changed, 159 insertions(+), 5 deletions(-) diff --git a/src/openhuman/inference/provider/factory.rs b/src/openhuman/inference/provider/factory.rs index c206b0cc1..d46bb0360 100644 --- a/src/openhuman/inference/provider/factory.rs +++ b/src/openhuman/inference/provider/factory.rs @@ -35,6 +35,12 @@ use crate::openhuman::inference::provider::ProviderRuntimeOptions; pub const PROVIDER_OPENHUMAN: &str = "openhuman"; /// Prefix for Ollama-local providers: `"ollama:"`. pub const OLLAMA_PROVIDER_PREFIX: &str = "ollama:"; +/// Sentinel returned when a user has expressed custom/BYOK inference intent +/// (via a non-openhuman `inference_url`) but no matching `cloud_providers` +/// entry was found. Passed through `provider_for_role` and caught early in +/// `create_chat_provider_from_string` to produce a clear configuration error +/// instead of silently routing through the managed OpenHuman backend. +pub const BYOK_INCOMPLETE_SENTINEL: &str = "__byok_incomplete__"; fn is_abstract_tier_model(model: &str) -> bool { use crate::openhuman::config::{ @@ -149,6 +155,24 @@ pub fn create_chat_provider_from_string( p ); + // Fail-closed: BYOK intent was detected upstream but no matching provider + // entry was found. Surface a clear configuration error instead of silently + // routing through the managed OpenHuman backend. + if p == BYOK_INCOMPLETE_SENTINEL { + let inference_url = config + .inference_url + .as_deref() + .filter(|s| !s.trim().is_empty()) + .unwrap_or(""); + anyhow::bail!( + "[chat-factory] BYOK_INCOMPLETE: inference_url is set to a custom/direct endpoint \ + ({inference_url}) but no matching cloud_providers entry was found for role '{role}'. \ + To complete BYOK setup add a cloud_providers entry whose endpoint matches \ + {inference_url} (or use a workload-specific route). \ + To use the OpenHuman managed backend instead, clear inference_url from config." + ); + } + // Empty / legacy "cloud" sentinel → primary cloud target. if p.is_empty() || p == "cloud" { let resolved = resolve_primary_cloud_provider_string(config); @@ -332,14 +356,51 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String { if let Some(legacy) = legacy_custom_inference_provider_string(config) { return legacy; } + // Primary is explicitly OpenHuman but inference_url points at a custom + // endpoint with no matching provider entry — this is a half-migrated BYOK + // config. Fail closed so the user sees an actionable error rather than + // silently routing through the managed backend. + if has_custom_inference_intent(config) { + log::warn!( + "[providers][chat-factory] BYOK intent detected (inference_url={:?}) \ + but no matching cloud_providers entry found; returning fail-closed sentinel", + config.inference_url + ); + return BYOK_INCOMPLETE_SENTINEL.to_string(); + } } if let Some(entry) = primary { return cloud_entry_provider_string(entry, config); } - legacy_custom_inference_provider_string(config) - .unwrap_or_else(|| PROVIDER_OPENHUMAN.to_string()) + // No explicit primary configured. If inference_url signals custom intent but + // no matching provider entry exists, fail closed instead of falling back to + // the managed backend. + legacy_custom_inference_provider_string(config).unwrap_or_else(|| { + if has_custom_inference_intent(config) { + log::warn!( + "[providers][chat-factory] BYOK intent detected (inference_url={:?}) \ + with no primary_cloud and no matching provider entry; returning fail-closed sentinel", + config.inference_url + ); + BYOK_INCOMPLETE_SENTINEL.to_string() + } else { + PROVIDER_OPENHUMAN.to_string() + } + }) +} + +/// Return `true` when the config contains a non-openhuman `inference_url`, +/// indicating the user intends custom/BYOK routing rather than the managed +/// backend. +fn has_custom_inference_intent(config: &Config) -> bool { + config + .inference_url + .as_deref() + .map(str::trim) + .filter(|url| !url.is_empty()) + .is_some_and(|url| !looks_like_openhuman_backend(url)) } fn legacy_custom_inference_provider_string(config: &Config) -> Option { diff --git a/src/openhuman/inference/provider/factory_test.rs b/src/openhuman/inference/provider/factory_test.rs index aef8420d8..a331a8e3c 100644 --- a/src/openhuman/inference/provider/factory_test.rs +++ b/src/openhuman/inference/provider/factory_test.rs @@ -458,7 +458,10 @@ fn legacy_inference_url_custom_provider_wins_over_openhuman_primary_for_unset_ro } #[test] -fn legacy_inference_url_without_matching_provider_stays_on_openhuman_primary() { +fn legacy_inference_url_without_matching_provider_returns_byok_sentinel() { + // BYOK intent: primary is OpenHuman but inference_url points at a custom + // endpoint with no matching cloud_providers entry. Must fail closed — do + // NOT silently route through the managed backend. let mut other = openai_entry("p_other", "other"); other.endpoint = "https://other.example.com/v1".to_string(); @@ -466,7 +469,10 @@ fn legacy_inference_url_without_matching_provider_stays_on_openhuman_primary() { config.primary_cloud = Some("p_oh".to_string()); config.inference_url = Some("https://api.example.com/v1".to_string()); - assert_eq!(provider_for_role("reasoning", &config), "openhuman"); + assert_eq!( + provider_for_role("reasoning", &config), + BYOK_INCOMPLETE_SENTINEL + ); } #[test] @@ -708,3 +714,90 @@ fn make_openhuman_backend_keeps_reasoning_quick() { let (_, model) = make_openhuman_backend(&config).expect("factory should succeed"); assert_eq!(model, "reasoning-quick-v1"); } + +// ── BYOK fail-closed tests ──────────────────────────────────────────────────── + +#[test] +fn byok_intent_no_primary_no_matching_entry_returns_sentinel() { + // No primary_cloud set, inference_url points at a non-openhuman host with + // no matching cloud_providers entry → must return the fail-closed sentinel. + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + assert_eq!( + provider_for_role("reasoning", &config), + BYOK_INCOMPLETE_SENTINEL + ); +} + +#[test] +fn byok_intent_with_matching_entry_resolves_correctly() { + // Matching cloud_providers entry exists → legacy lookup succeeds; no sentinel. + let mut custom = openai_entry("p_custom", "custom"); + custom.endpoint = "https://custom-api.example.com/v1".to_string(); + + let mut config = config_with_providers(vec![custom]); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + + // Legacy URL matches the custom entry → "custom:gpt-4o" + assert_ne!( + provider_for_role("reasoning", &config), + BYOK_INCOMPLETE_SENTINEL + ); +} + +#[test] +fn openhuman_inference_url_never_triggers_sentinel() { + // inference_url pointing at the managed backend is not BYOK intent. + let mut config = Config::default(); + config.inference_url = Some("https://api.openhuman.ai/v1".to_string()); + assert_eq!(provider_for_role("reasoning", &config), "openhuman"); +} + +#[test] +fn explicit_workload_route_bypasses_byok_sentinel() { + // A per-role provider route set explicitly always wins over the BYOK check. + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + config.reasoning_provider = Some("openhuman".to_string()); + // Explicit "openhuman" route → goes straight to backend, no sentinel. + assert_eq!(provider_for_role("reasoning", &config), "openhuman"); +} + +#[test] +fn byok_sentinel_makes_provider_creation_error_with_clear_message() { + let mut config = Config::default(); + config.inference_url = Some("https://custom-api.example.com/v1".to_string()); + + // Use match instead of unwrap_err(): Box doesn't impl Debug. + let msg = match create_chat_provider_from_string("reasoning", BYOK_INCOMPLETE_SENTINEL, &config) + { + Ok(_) => panic!("sentinel must produce an error, not a provider"), + Err(e) => e.to_string(), + }; + assert!( + msg.contains("BYOK_INCOMPLETE"), + "error must name BYOK_INCOMPLETE; got: {msg}" + ); + assert!( + msg.contains("custom-api.example.com"), + "error must include the configured inference_url; got: {msg}" + ); +} + +#[test] +fn byok_sentinel_error_mentions_configuration_action() { + // The error message must tell the user how to fix the issue. + let mut config = Config::default(); + config.inference_url = Some("https://byok.example.com/v1".to_string()); + + // Use match instead of unwrap_err(): Box doesn't impl Debug. + let msg = match create_chat_provider_from_string("chat", BYOK_INCOMPLETE_SENTINEL, &config) { + Ok(_) => panic!("sentinel must produce an error"), + Err(e) => e.to_string(), + }; + // Must mention adding a cloud_providers entry or clearing inference_url. + assert!( + msg.contains("cloud_providers") || msg.contains("inference_url"), + "error must suggest a remediation; got: {msg}" + ); +} diff --git a/src/openhuman/inference/provider/mod.rs b/src/openhuman/inference/provider/mod.rs index f47f71e2d..e2b110a16 100644 --- a/src/openhuman/inference/provider/mod.rs +++ b/src/openhuman/inference/provider/mod.rs @@ -29,5 +29,5 @@ pub use traits::{ pub use billing_error::is_budget_exhausted_message; pub use config_rejection::is_provider_config_rejection_message; -pub use factory::{create_chat_provider, provider_for_role}; +pub use factory::{create_chat_provider, provider_for_role, BYOK_INCOMPLETE_SENTINEL}; pub use ops::*; From 08473ee38f93b8c069c91f62727c8227cacee3b5 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 22 May 2026 18:52:15 +0530 Subject: [PATCH 2/2] fix(inference): address CodeRabbit review on BYOK log verbosity and test precision Downgrade BYOK sentinel log calls from warn to debug (these are configuration-in-progress states, not errors), redact the inference_url to only expose the host portion so credentials in query strings are never logged, and tighten the test assertion to lock the exact resolved provider string instead of just ruling out the sentinel. --- src/openhuman/inference/provider/factory.rs | 41 ++++++++++++++++--- .../inference/provider/factory_test.rs | 5 +-- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/openhuman/inference/provider/factory.rs b/src/openhuman/inference/provider/factory.rs index d46bb0360..ae255c9e6 100644 --- a/src/openhuman/inference/provider/factory.rs +++ b/src/openhuman/inference/provider/factory.rs @@ -361,10 +361,10 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String { // config. Fail closed so the user sees an actionable error rather than // silently routing through the managed backend. if has_custom_inference_intent(config) { - log::warn!( - "[providers][chat-factory] BYOK intent detected (inference_url={:?}) \ + log::debug!( + "[providers][chat-factory] BYOK intent detected (host={}) \ but no matching cloud_providers entry found; returning fail-closed sentinel", - config.inference_url + redact_inference_url(config.inference_url.as_deref()) ); return BYOK_INCOMPLETE_SENTINEL.to_string(); } @@ -379,10 +379,10 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String { // the managed backend. legacy_custom_inference_provider_string(config).unwrap_or_else(|| { if has_custom_inference_intent(config) { - log::warn!( - "[providers][chat-factory] BYOK intent detected (inference_url={:?}) \ + log::debug!( + "[providers][chat-factory] BYOK intent detected (host={}) \ with no primary_cloud and no matching provider entry; returning fail-closed sentinel", - config.inference_url + redact_inference_url(config.inference_url.as_deref()) ); BYOK_INCOMPLETE_SENTINEL.to_string() } else { @@ -391,6 +391,35 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String { }) } +/// Extract the host portion of an inference URL for safe logging. +/// +/// Returns the host (e.g. `"api.example.com"`) so log lines are grep-friendly +/// without exposing tokens or credentials that may appear in query-string or +/// path components of a bearer-auth URL (e.g. `"https://host/v1?key=…"`). +/// Falls back to `""` when the URL cannot be parsed or is absent. +fn redact_inference_url(url: Option<&str>) -> &str { + url.and_then(|u| { + // Minimal host extraction: find the authority after "://". + let after_scheme = u.find("://").map(|i| &u[i + 3..])?; + // Authority ends at '/', '?', '#', or end-of-string. + let host_end = after_scheme + .find(['/', '?', '#']) + .unwrap_or(after_scheme.len()); + let authority = &after_scheme[..host_end]; + // Strip optional "user:pass@" and port. + let host = authority + .rfind('@') + .map_or(authority, |i| &authority[i + 1..]); + let host = host.rfind(':').map_or(host, |i| &host[..i]); + if host.is_empty() { + None + } else { + Some(host) + } + }) + .unwrap_or("") +} + /// Return `true` when the config contains a non-openhuman `inference_url`, /// indicating the user intends custom/BYOK routing rather than the managed /// backend. diff --git a/src/openhuman/inference/provider/factory_test.rs b/src/openhuman/inference/provider/factory_test.rs index a331a8e3c..e112e3483 100644 --- a/src/openhuman/inference/provider/factory_test.rs +++ b/src/openhuman/inference/provider/factory_test.rs @@ -739,10 +739,7 @@ fn byok_intent_with_matching_entry_resolves_correctly() { config.inference_url = Some("https://custom-api.example.com/v1".to_string()); // Legacy URL matches the custom entry → "custom:gpt-4o" - assert_ne!( - provider_for_role("reasoning", &config), - BYOK_INCOMPLETE_SENTINEL - ); + assert_eq!(provider_for_role("reasoning", &config), "custom:gpt-4o"); } #[test]