Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 92 additions & 2 deletions src/openhuman/inference/provider/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ use crate::openhuman::inference::provider::ProviderRuntimeOptions;
pub const PROVIDER_OPENHUMAN: &str = "openhuman";
/// Prefix for Ollama-local providers: `"ollama:<model>"`.
pub const OLLAMA_PROVIDER_PREFIX: &str = "ollama:";
/// Sentinel returned when a user has expressed custom/BYOK inference intent
/// (via a non-openhuman `inference_url`) but no matching `cloud_providers`
/// entry was found. Passed through `provider_for_role` and caught early in
/// `create_chat_provider_from_string` to produce a clear configuration error
/// instead of silently routing through the managed OpenHuman backend.
pub const BYOK_INCOMPLETE_SENTINEL: &str = "__byok_incomplete__";

fn is_abstract_tier_model(model: &str) -> bool {
use crate::openhuman::config::{
Expand Down Expand Up @@ -149,6 +155,24 @@ pub fn create_chat_provider_from_string(
p
);

// Fail-closed: BYOK intent was detected upstream but no matching provider
// entry was found. Surface a clear configuration error instead of silently
// routing through the managed OpenHuman backend.
if p == BYOK_INCOMPLETE_SENTINEL {
let inference_url = config
.inference_url
.as_deref()
.filter(|s| !s.trim().is_empty())
.unwrap_or("<unset>");
anyhow::bail!(
"[chat-factory] BYOK_INCOMPLETE: inference_url is set to a custom/direct endpoint \
({inference_url}) but no matching cloud_providers entry was found for role '{role}'. \
To complete BYOK setup add a cloud_providers entry whose endpoint matches \
{inference_url} (or use a workload-specific route). \
To use the OpenHuman managed backend instead, clear inference_url from config."
);
}

// Empty / legacy "cloud" sentinel → primary cloud target.
if p.is_empty() || p == "cloud" {
let resolved = resolve_primary_cloud_provider_string(config);
Expand Down Expand Up @@ -332,14 +356,80 @@ fn resolve_primary_cloud_provider_string(config: &Config) -> String {
if let Some(legacy) = legacy_custom_inference_provider_string(config) {
return legacy;
}
// Primary is explicitly OpenHuman but inference_url points at a custom
// endpoint with no matching provider entry — this is a half-migrated BYOK
// config. Fail closed so the user sees an actionable error rather than
// silently routing through the managed backend.
if has_custom_inference_intent(config) {
log::debug!(
"[providers][chat-factory] BYOK intent detected (host={}) \
but no matching cloud_providers entry found; returning fail-closed sentinel",
redact_inference_url(config.inference_url.as_deref())
);
return BYOK_INCOMPLETE_SENTINEL.to_string();
}
}

if let Some(entry) = primary {
return cloud_entry_provider_string(entry, config);
}

legacy_custom_inference_provider_string(config)
.unwrap_or_else(|| PROVIDER_OPENHUMAN.to_string())
// No explicit primary configured. If inference_url signals custom intent but
// no matching provider entry exists, fail closed instead of falling back to
// the managed backend.
legacy_custom_inference_provider_string(config).unwrap_or_else(|| {
if has_custom_inference_intent(config) {
log::debug!(
"[providers][chat-factory] BYOK intent detected (host={}) \
with no primary_cloud and no matching provider entry; returning fail-closed sentinel",
redact_inference_url(config.inference_url.as_deref())
);
BYOK_INCOMPLETE_SENTINEL.to_string()
} else {
PROVIDER_OPENHUMAN.to_string()
}
})
}

/// Extract the host portion of an inference URL for safe logging.
///
/// Returns the host (e.g. `"api.example.com"`) so log lines are grep-friendly
/// without exposing tokens or credentials that may appear in query-string or
/// path components of a bearer-auth URL (e.g. `"https://host/v1?key=…"`).
/// Falls back to `"<redacted>"` when the URL cannot be parsed or is absent.
fn redact_inference_url(url: Option<&str>) -> &str {
url.and_then(|u| {
// Minimal host extraction: find the authority after "://".
let after_scheme = u.find("://").map(|i| &u[i + 3..])?;
// Authority ends at '/', '?', '#', or end-of-string.
let host_end = after_scheme
.find(['/', '?', '#'])
.unwrap_or(after_scheme.len());
let authority = &after_scheme[..host_end];
// Strip optional "user:pass@" and port.
let host = authority
.rfind('@')
.map_or(authority, |i| &authority[i + 1..]);
let host = host.rfind(':').map_or(host, |i| &host[..i]);
if host.is_empty() {
None
} else {
Some(host)
}
})
.unwrap_or("<redacted>")
}

/// Return `true` when the config contains a non-openhuman `inference_url`,
/// indicating the user intends custom/BYOK routing rather than the managed
/// backend.
fn has_custom_inference_intent(config: &Config) -> bool {
config
.inference_url
.as_deref()
.map(str::trim)
.filter(|url| !url.is_empty())
.is_some_and(|url| !looks_like_openhuman_backend(url))
}

fn legacy_custom_inference_provider_string(config: &Config) -> Option<String> {
Expand Down
94 changes: 92 additions & 2 deletions src/openhuman/inference/provider/factory_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,15 +458,21 @@ fn legacy_inference_url_custom_provider_wins_over_openhuman_primary_for_unset_ro
}

#[test]
fn legacy_inference_url_without_matching_provider_stays_on_openhuman_primary() {
fn legacy_inference_url_without_matching_provider_returns_byok_sentinel() {
// BYOK intent: primary is OpenHuman but inference_url points at a custom
// endpoint with no matching cloud_providers entry. Must fail closed — do
// NOT silently route through the managed backend.
let mut other = openai_entry("p_other", "other");
other.endpoint = "https://other.example.com/v1".to_string();

let mut config = config_with_providers(vec![oh_entry("p_oh"), other]);
config.primary_cloud = Some("p_oh".to_string());
config.inference_url = Some("https://api.example.com/v1".to_string());

assert_eq!(provider_for_role("reasoning", &config), "openhuman");
assert_eq!(
provider_for_role("reasoning", &config),
BYOK_INCOMPLETE_SENTINEL
);
}

#[test]
Expand Down Expand Up @@ -708,3 +714,87 @@ fn make_openhuman_backend_keeps_reasoning_quick() {
let (_, model) = make_openhuman_backend(&config).expect("factory should succeed");
assert_eq!(model, "reasoning-quick-v1");
}

// ── BYOK fail-closed tests ────────────────────────────────────────────────────

#[test]
fn byok_intent_no_primary_no_matching_entry_returns_sentinel() {
// No primary_cloud set, inference_url points at a non-openhuman host with
// no matching cloud_providers entry → must return the fail-closed sentinel.
let mut config = Config::default();
config.inference_url = Some("https://custom-api.example.com/v1".to_string());
assert_eq!(
provider_for_role("reasoning", &config),
BYOK_INCOMPLETE_SENTINEL
);
}

#[test]
fn byok_intent_with_matching_entry_resolves_correctly() {
// Matching cloud_providers entry exists → legacy lookup succeeds; no sentinel.
let mut custom = openai_entry("p_custom", "custom");
custom.endpoint = "https://custom-api.example.com/v1".to_string();

let mut config = config_with_providers(vec![custom]);
config.inference_url = Some("https://custom-api.example.com/v1".to_string());

// Legacy URL matches the custom entry → "custom:gpt-4o"
assert_eq!(provider_for_role("reasoning", &config), "custom:gpt-4o");
}

#[test]
fn openhuman_inference_url_never_triggers_sentinel() {
// inference_url pointing at the managed backend is not BYOK intent.
let mut config = Config::default();
config.inference_url = Some("https://api.openhuman.ai/v1".to_string());
assert_eq!(provider_for_role("reasoning", &config), "openhuman");
}

#[test]
fn explicit_workload_route_bypasses_byok_sentinel() {
// A per-role provider route set explicitly always wins over the BYOK check.
let mut config = Config::default();
config.inference_url = Some("https://custom-api.example.com/v1".to_string());
config.reasoning_provider = Some("openhuman".to_string());
// Explicit "openhuman" route → goes straight to backend, no sentinel.
assert_eq!(provider_for_role("reasoning", &config), "openhuman");
}

#[test]
fn byok_sentinel_makes_provider_creation_error_with_clear_message() {
let mut config = Config::default();
config.inference_url = Some("https://custom-api.example.com/v1".to_string());

// Use match instead of unwrap_err(): Box<dyn Provider> doesn't impl Debug.
let msg = match create_chat_provider_from_string("reasoning", BYOK_INCOMPLETE_SENTINEL, &config)
{
Ok(_) => panic!("sentinel must produce an error, not a provider"),
Err(e) => e.to_string(),
};
assert!(
msg.contains("BYOK_INCOMPLETE"),
"error must name BYOK_INCOMPLETE; got: {msg}"
);
assert!(
msg.contains("custom-api.example.com"),
"error must include the configured inference_url; got: {msg}"
);
}

#[test]
fn byok_sentinel_error_mentions_configuration_action() {
// The error message must tell the user how to fix the issue.
let mut config = Config::default();
config.inference_url = Some("https://byok.example.com/v1".to_string());

// Use match instead of unwrap_err(): Box<dyn Provider> doesn't impl Debug.
let msg = match create_chat_provider_from_string("chat", BYOK_INCOMPLETE_SENTINEL, &config) {
Ok(_) => panic!("sentinel must produce an error"),
Err(e) => e.to_string(),
};
// Must mention adding a cloud_providers entry or clearing inference_url.
assert!(
msg.contains("cloud_providers") || msg.contains("inference_url"),
"error must suggest a remediation; got: {msg}"
);
}
2 changes: 1 addition & 1 deletion src/openhuman/inference/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ pub use traits::{

pub use billing_error::is_budget_exhausted_message;
pub use config_rejection::is_provider_config_rejection_message;
pub use factory::{create_chat_provider, provider_for_role};
pub use factory::{create_chat_provider, provider_for_role, BYOK_INCOMPLETE_SENTINEL};
pub use ops::*;
Loading