diff --git a/app/src/lib/i18n/chunks/de-5.ts b/app/src/lib/i18n/chunks/de-5.ts index 5ffd167b19..b9b7309481 100644 --- a/app/src/lib/i18n/chunks/de-5.ts +++ b/app/src/lib/i18n/chunks/de-5.ts @@ -527,6 +527,10 @@ const de5: TranslationMap = { 'settings.mascot.colorGreen': 'Grün', 'settings.mascot.colorNavy': 'Marine', 'settings.mascot.colorYellow': 'Gelb', + 'settings.mascot.customGifError': + 'GIF konnte nicht geladen werden. Bitte überprüfe die URL und versuche es erneut.', + 'settings.mascot.customGifHeading': 'Benutzerdefinierter GIF-Avatar', + 'settings.mascot.customGifLabel': 'URL für benutzerdefinierten GIF-Avatar', 'settings.mascot.libraryUnavailable': 'OpenHuman Bibliothek nicht verfügbar', 'settings.mascot.title': 'OpenHuman', }; diff --git a/src/openhuman/agent/bus.rs b/src/openhuman/agent/bus.rs index 55e32685dc..70fa11e20d 100644 --- a/src/openhuman/agent/bus.rs +++ b/src/openhuman/agent/bus.rs @@ -260,6 +260,10 @@ pub fn register_agent_handlers() { // wired into the orchestrator session via Agent::turn, // not the bus dispatcher. None, + // Use the default (allow-all) tool policy. Custom + // policies can be wired in via AgentTurnRequest when + // per-channel policy configuration is added (#2134). + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await }) diff --git a/src/openhuman/agent/harness/bughunt_tests.rs b/src/openhuman/agent/harness/bughunt_tests.rs index 481b0350db..7255966917 100644 --- a/src/openhuman/agent/harness/bughunt_tests.rs +++ b/src/openhuman/agent/harness/bughunt_tests.rs @@ -101,6 +101,7 @@ async fn native_tool_call_decodes_json_encoded_arguments_string() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -162,6 +163,7 @@ async fn documents_silent_drop_of_non_json_arguments_string() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -218,6 +220,7 @@ async fn parallel_tool_calls_in_single_iteration_all_execute() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -260,6 +263,7 @@ async fn same_named_tool_in_registry_first_match_wins() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -312,6 +316,7 @@ async fn markdown_fenced_tool_call_block_is_parsed() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -365,6 +370,7 @@ async fn native_tool_calls_take_precedence_over_xml_in_text() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -424,6 +430,7 @@ async fn per_tool_max_result_size_caps_history_payload() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -475,6 +482,7 @@ async fn empty_response_with_no_tool_calls_terminates_with_empty_text() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -518,6 +526,7 @@ async fn progress_sink_emits_lifecycle_events_in_order() { &[], Some(tx), None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); diff --git a/src/openhuman/agent/harness/harness_gap_tests.rs b/src/openhuman/agent/harness/harness_gap_tests.rs index bd8731bde6..f93b00f54b 100644 --- a/src/openhuman/agent/harness/harness_gap_tests.rs +++ b/src/openhuman/agent/harness/harness_gap_tests.rs @@ -151,6 +151,7 @@ async fn full_turn_cycle_user_llm_tool_result_final() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("full turn cycle should succeed"); @@ -210,6 +211,7 @@ async fn max_iterations_exceeded_downcasts_to_typed_agent_error() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("loop must fail when iterations exhausted"); @@ -285,6 +287,7 @@ async fn visible_tool_names_rejects_tool_outside_whitelist() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should recover after whitelisted-out tool call"); @@ -342,6 +345,7 @@ async fn visible_tool_names_allows_tool_inside_whitelist() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("whitelisted tool should execute"); diff --git a/src/openhuman/agent/harness/test_support_test.rs b/src/openhuman/agent/harness/test_support_test.rs index e50bbceb03..118343eb03 100644 --- a/src/openhuman/agent/harness/test_support_test.rs +++ b/src/openhuman/agent/harness/test_support_test.rs @@ -402,6 +402,7 @@ async fn keyword_provider_drives_prompt_guided_tool_loop_to_completion() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should complete"); @@ -451,6 +452,7 @@ async fn keyword_provider_drives_native_tool_calls_path() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should complete"); @@ -506,6 +508,7 @@ async fn keyword_provider_chains_multiple_tools_across_iterations() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -623,6 +626,7 @@ async fn crypto_wallet_send_flow_sequences_wallet_tools_and_confirmation_gate() &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("crypto wallet flow should complete"); @@ -735,6 +739,7 @@ async fn crypto_wallet_send_flow_does_not_execute_when_confirmation_is_not_grant &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("declined flow should still complete"); @@ -795,6 +800,7 @@ async fn keyword_provider_uses_latest_tool_result_to_drive_the_next_tool_call() &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should complete"); @@ -868,6 +874,7 @@ async fn keyword_provider_executes_multiple_native_tool_calls_from_one_turn() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should complete"); @@ -916,6 +923,7 @@ async fn keyword_provider_unknown_tool_surfaces_error_and_loop_continues() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -965,6 +973,7 @@ async fn run_tool_call_loop_returns_max_iterations_error() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("should hit max iterations"); @@ -1034,6 +1043,7 @@ async fn agent_loop_refuses_clirpconly_tools() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1093,6 +1103,7 @@ async fn tool_error_result_is_surfaced_to_next_iteration() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1148,6 +1159,7 @@ async fn tool_anyhow_error_surfaces_in_history() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1192,6 +1204,7 @@ async fn visible_tool_names_whitelist_rejects_filtered_out_tools() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1237,6 +1250,7 @@ async fn extra_tools_are_invokable_alongside_registry() { &extras, None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1391,6 +1405,7 @@ async fn harness_invokes_composio_action_tool_against_fake_backend() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .unwrap(); @@ -1537,6 +1552,7 @@ impl Tool for TestDelegationTool { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await?; @@ -1679,6 +1695,7 @@ async fn orchestrator_prompt_drives_composio_call_via_delegation_chain() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("orchestrator loop should complete"); diff --git a/src/openhuman/agent/harness/tests.rs b/src/openhuman/agent/harness/tests.rs index 550800589b..1be2539f9f 100644 --- a/src/openhuman/agent/harness/tests.rs +++ b/src/openhuman/agent/harness/tests.rs @@ -128,6 +128,7 @@ async fn run_tool_call_loop_returns_structured_error_for_non_vision_provider() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("provider without vision support should fail"); @@ -173,6 +174,7 @@ async fn run_tool_call_loop_rejects_oversized_image_payload() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("oversized payload must fail"); @@ -212,6 +214,7 @@ async fn run_tool_call_loop_accepts_valid_multimodal_request_flow() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("valid multimodal payload should pass"); diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index c473fa1d26..c18cd0508f 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -6,6 +6,7 @@ use crate::openhuman::approval::{ApprovalManager, ApprovalRequest, ApprovalRespo use crate::openhuman::inference::provider::{ ChatMessage, ChatRequest, Provider, ProviderCapabilityError, ProviderDelta, }; +use crate::openhuman::tools::policy::{DefaultToolPolicy, PolicyDecision, ToolPolicy}; use crate::openhuman::tools::traits::ToolScope; use crate::openhuman::tools::Tool; use anyhow::Result; @@ -49,6 +50,7 @@ pub(crate) async fn agent_turn( max_tool_iterations: usize, payload_summarizer: Option<&dyn PayloadSummarizer>, ) -> Result { + let default_policy = DefaultToolPolicy; run_tool_call_loop( provider, history, @@ -66,6 +68,7 @@ pub(crate) async fn agent_turn( &[], None, payload_summarizer, + &default_policy, ) .await } @@ -117,6 +120,7 @@ pub(crate) async fn run_tool_call_loop( extra_tools: &[Box], on_progress: Option>, payload_summarizer: Option<&dyn PayloadSummarizer>, + tool_policy: &dyn ToolPolicy, ) -> Result { let max_iterations = if max_tool_iterations == 0 { DEFAULT_MAX_TOOL_ITERATIONS @@ -609,6 +613,30 @@ pub(crate) async fn run_tool_call_loop( } }; + // ── Tool policy check (#2131) ───────────────── + // Evaluate the pluggable ToolPolicy before any approval or + // execution. If the policy denies the call, skip everything + // (including approval side-effects) and return the denial + // reason as a tool error to the model. + if let PolicyDecision::Deny(reason) = tool_policy.evaluate(&call.name, &call.arguments) + { + tracing::debug!( + iteration, + tool = call.name.as_str(), + reason = %reason, + "[agent_loop] tool policy denied tool call" + ); + let denied = format!("Tool '{}' denied by policy: {reason}", call.name); + emit_failed_completion(&denied).await; + individual_results.push(denied.clone()); + let _ = writeln!( + tool_results, + "\n{denied}\n", + call.name + ); + continue; + } + // ── Approval hook ──────────────────────────────── if let Some(mgr) = approval { if mgr.needs_approval(&call.name) { diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs index 0324472527..42a60f6a75 100644 --- a/src/openhuman/agent/harness/tool_loop_tests.rs +++ b/src/openhuman/agent/harness/tool_loop_tests.rs @@ -226,6 +226,7 @@ async fn run_tool_call_loop_intercepts_oversized_tool_results_via_summarizer() { &[], None, Some(&summarizer), + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop with summarizer should succeed"); @@ -277,6 +278,7 @@ async fn run_tool_call_loop_rejects_vision_markers_for_non_vision_provider() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("vision markers should be rejected"); @@ -315,6 +317,7 @@ async fn run_tool_call_loop_streams_final_text_chunks() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("final text should succeed"); @@ -368,6 +371,7 @@ async fn run_tool_call_loop_blocks_cli_rpc_only_tools_in_prompt_mode() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should recover after denial"); @@ -424,6 +428,7 @@ async fn run_tool_call_loop_persists_native_tool_results_as_tool_messages() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("native tool flow should succeed"); @@ -481,6 +486,7 @@ async fn run_tool_call_loop_auto_approves_supervised_tools_on_non_cli_channels() &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("non-cli channels should auto-approve supervised tools"); @@ -531,6 +537,7 @@ async fn run_tool_call_loop_reports_unknown_tool_and_uses_default_max_iterations &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("default iteration fallback should still succeed"); @@ -587,6 +594,7 @@ async fn run_tool_call_loop_formats_tool_error_paths() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should recover after tool errors"); @@ -627,6 +635,7 @@ async fn run_tool_call_loop_propagates_provider_errors_and_max_iteration_failure &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("provider error path should fail"); @@ -660,6 +669,7 @@ async fn run_tool_call_loop_propagates_provider_errors_and_max_iteration_failure &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect_err("loop should stop after configured iterations"); @@ -736,6 +746,7 @@ async fn run_tool_call_loop_aborts_when_stop_hook_returns_stop() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await }) @@ -788,6 +799,7 @@ async fn run_tool_call_loop_runs_unchanged_when_no_stop_hooks_installed() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should succeed without stop hooks"); @@ -863,6 +875,7 @@ async fn run_tool_call_loop_applies_per_tool_max_result_size_cap() { &[], None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop with capped tool should succeed"); @@ -989,6 +1002,7 @@ async fn run_tool_call_loop_dedups_duplicate_tool_names_before_provider_call() { &extra, None, None, + &crate::openhuman::tools::policy::DefaultToolPolicy, ) .await .expect("loop should succeed with deduplicated tool list"); diff --git a/src/openhuman/tools/mod.rs b/src/openhuman/tools/mod.rs index ffa68dea01..618a1a5bd0 100644 --- a/src/openhuman/tools/mod.rs +++ b/src/openhuman/tools/mod.rs @@ -2,6 +2,7 @@ pub mod generated; pub mod local_cli; pub mod ops; pub mod orchestrator_tools; +pub mod policy; pub mod schema; mod schemas; pub mod traits; @@ -12,6 +13,7 @@ pub(crate) mod implementations; pub use implementations::*; pub use ops::*; +pub use policy::{DefaultToolPolicy, PolicyDecision, ToolPolicy}; #[allow(unused_imports)] pub use schema::{CleaningStrategy, SchemaCleanr}; pub use schemas::{ diff --git a/src/openhuman/tools/policy.rs b/src/openhuman/tools/policy.rs new file mode 100644 index 0000000000..26c111c852 --- /dev/null +++ b/src/openhuman/tools/policy.rs @@ -0,0 +1,135 @@ +//! Tool-policy middleware — generic allow/deny gate evaluated before tool execution. +//! +//! The [`ToolPolicy`] trait provides a single extension point for centrally +//! governing which tool invocations proceed. The agent's tool loop calls +//! [`ToolPolicy::evaluate`] before every `tool.execute()`: if the verdict is +//! [`PolicyDecision::Deny`], the tool is never invoked and the denial reason +//! is returned as a `ToolResult::error` to the model. +//! +//! The shipped [`DefaultToolPolicy`] returns `Allow` unconditionally so +//! existing behaviour is preserved. Downstream crates and tests can supply +//! custom policies (rate-limiting, per-tool allow/deny lists, …) by +//! implementing the trait. + +use serde_json::Value; + +/// Outcome of a policy evaluation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PolicyDecision { + /// The tool call may proceed. + Allow, + /// The tool call is blocked. The `String` is the human-readable reason + /// surfaced to the model (and logged). + Deny(String), +} + +/// Trait for tool-execution policies evaluated before every tool invocation. +/// +/// Implementations MUST be cheap and synchronous — the policy is called on the +/// agent's hot path. Expensive checks (network, disk) belong in the tool +/// itself or in an async wrapper around this trait. +pub trait ToolPolicy: Send + Sync { + /// Evaluate whether a tool call is allowed. + /// + /// * `tool_name` — the registered name of the tool (`Tool::name()`). + /// * `args` — the JSON arguments the model supplied for this call. + fn evaluate(&self, tool_name: &str, args: &Value) -> PolicyDecision; +} + +/// Default policy that allows every tool invocation unconditionally. +/// +/// This is the backward-compatible default wired into the agent loop when no +/// custom policy is provided. +#[derive(Debug, Clone, Copy, Default)] +pub struct DefaultToolPolicy; + +impl ToolPolicy for DefaultToolPolicy { + fn evaluate(&self, _tool_name: &str, _args: &Value) -> PolicyDecision { + PolicyDecision::Allow + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── DefaultToolPolicy ───────────────────────────────────────── + + #[test] + fn default_policy_allows_all_tools() { + let policy = DefaultToolPolicy; + let decision = policy.evaluate("shell", &serde_json::json!({"command": "ls"})); + assert_eq!(decision, PolicyDecision::Allow); + } + + #[test] + fn default_policy_allows_unknown_tool_names() { + let policy = DefaultToolPolicy; + assert_eq!( + policy.evaluate("nonexistent_tool_xyz", &Value::Null), + PolicyDecision::Allow, + ); + } + + // ── Custom deny policy ──────────────────────────────────────── + + /// A test-only policy that blocks a specific tool by name. + struct DenyByNamePolicy { + blocked: String, + reason: String, + } + + impl ToolPolicy for DenyByNamePolicy { + fn evaluate(&self, tool_name: &str, _args: &Value) -> PolicyDecision { + if tool_name == self.blocked { + PolicyDecision::Deny(self.reason.clone()) + } else { + PolicyDecision::Allow + } + } + } + + #[test] + fn custom_deny_policy_blocks_matching_tool() { + let policy = DenyByNamePolicy { + blocked: "dangerous_tool".into(), + reason: "blocked by test policy".into(), + }; + let decision = policy.evaluate("dangerous_tool", &Value::Null); + assert_eq!( + decision, + PolicyDecision::Deny("blocked by test policy".into()), + ); + } + + #[test] + fn custom_deny_policy_allows_non_matching_tool() { + let policy = DenyByNamePolicy { + blocked: "dangerous_tool".into(), + reason: "blocked by test policy".into(), + }; + let decision = policy.evaluate("safe_tool", &Value::Null); + assert_eq!(decision, PolicyDecision::Allow); + } + + // ── Deny-all policy ─────────────────────────────────────────── + + struct DenyAllPolicy; + + impl ToolPolicy for DenyAllPolicy { + fn evaluate(&self, _tool_name: &str, _args: &Value) -> PolicyDecision { + PolicyDecision::Deny("all tools denied".into()) + } + } + + #[test] + fn deny_all_policy_blocks_every_tool() { + let policy = DenyAllPolicy; + for name in &["shell", "file_read", "memory_store", "web_search"] { + assert_eq!( + policy.evaluate(name, &Value::Null), + PolicyDecision::Deny("all tools denied".into()), + ); + } + } +}