Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ OPENHUMAN_MODEL=
OPENHUMAN_WORKSPACE=
# [optional] Default: 0.7
OPENHUMAN_TEMPERATURE=0.7
# [optional] Language for background LLM artifacts such as memory-tree summaries,
# entity-extraction reasons, and learning reflections. Accepts UI locale tags
# such as zh-CN or a language name. Leave unset for default behavior.
# OPENHUMAN_OUTPUT_LANGUAGE=zh-CN
# [optional] Skill + agent tool execution timeout in seconds (default 120, max 3600)
# OPENHUMAN_TOOL_TIMEOUT_SECS=
# [optional] Headless update restart contract: self_replace | supervisor
Expand Down
4 changes: 4 additions & 0 deletions src/openhuman/agent/harness/archivist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,10 @@ impl ArchivistHook {
let cfg = LlmSummariserConfig {
model: provider.name().to_string(),
structured_facet_extraction: false,
output_language: self
.config
.as_ref()
.and_then(|cfg| cfg.output_language.clone()),
};
let summariser = LlmSummariser::new(cfg, Arc::clone(provider));
tracing::debug!(
Expand Down
34 changes: 17 additions & 17 deletions src/openhuman/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ pub use ops::*;
#[allow(unused_imports)]
pub use schema::{
apply_runtime_proxy_to_builder, build_runtime_proxy_client,
build_runtime_proxy_client_with_timeouts, runtime_proxy_config, set_runtime_proxy_config,
AgentConfig, AuditConfig, AutocompleteConfig, AutonomyConfig, BrowserComputerUseConfig,
BrowserConfig, ChannelsConfig, ComposioConfig, Config, ContextConfig, CostConfig, CronConfig,
CurlConfig, DelegateAgentConfig, DictationActivationMode, DictationConfig, DiscordConfig,
DockerRuntimeConfig, EmbeddingRouteConfig, GitbooksConfig, HeartbeatConfig, HttpRequestConfig,
IMessageConfig, IntegrationToggle, IntegrationsConfig, LarkConfig, LearningConfig, LlmBackend,
LocalAiConfig, MatrixConfig, McpAuthConfig, McpClientConfig, McpClientIdentityConfig,
McpServerConfig, MeetConfig, MemoryConfig, MemoryTreeConfig, ModelRouteConfig,
MultimodalConfig, ObservabilityConfig, OrchestratorModelConfig, PolymarketClobCredentials,
PolymarketConfig, ProxyConfig, ProxyScope, ReflectionSource, ReliabilityConfig,
ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig,
SchedulerGateConfig, SchedulerGateMode, ScreenIntelligenceConfig, SearxngConfig, SecretsConfig,
SecurityConfig, SlackConfig, StorageConfig, StorageProviderConfig, StorageProviderSection,
StreamMode, TeamModelConfig, TelegramConfig, UpdateConfig, UpdateRestartStrategy,
VoiceActivationMode, VoiceServerConfig, WebSearchConfig, WebhookConfig,
DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CHAT_V1, MODEL_CODING_V1,
MODEL_REASONING_QUICK_V1, MODEL_REASONING_V1,
build_runtime_proxy_client_with_timeouts, output_language_directive, runtime_proxy_config,
set_runtime_proxy_config, AgentConfig, AuditConfig, AutocompleteConfig, AutonomyConfig,
BrowserComputerUseConfig, BrowserConfig, ChannelsConfig, ComposioConfig, Config, ContextConfig,
CostConfig, CronConfig, CurlConfig, DelegateAgentConfig, DictationActivationMode,
DictationConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, GitbooksConfig,
HeartbeatConfig, HttpRequestConfig, IMessageConfig, IntegrationToggle, IntegrationsConfig,
LarkConfig, LearningConfig, LlmBackend, LocalAiConfig, MatrixConfig, McpAuthConfig,
McpClientConfig, McpClientIdentityConfig, McpServerConfig, MeetConfig, MemoryConfig,
MemoryTreeConfig, ModelRouteConfig, MultimodalConfig, ObservabilityConfig,
OrchestratorModelConfig, PolymarketClobCredentials, PolymarketConfig, ProxyConfig, ProxyScope,
ReflectionSource, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend,
SandboxConfig, SchedulerConfig, SchedulerGateConfig, SchedulerGateMode,
ScreenIntelligenceConfig, SearxngConfig, SecretsConfig, SecurityConfig, SlackConfig,
StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode, TeamModelConfig,
TelegramConfig, UpdateConfig, UpdateRestartStrategy, VoiceActivationMode, VoiceServerConfig,
WebSearchConfig, WebhookConfig, DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1,
MODEL_CHAT_V1, MODEL_CODING_V1, MODEL_REASONING_QUICK_V1, MODEL_REASONING_V1,
};
pub use schema::{
clear_active_user, default_root_openhuman_dir, pre_login_user_dir, read_active_user_id,
Expand Down
7 changes: 7 additions & 0 deletions src/openhuman/config/schema/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,13 @@ impl Config {
}
}

if let Some(language) = env.get("OPENHUMAN_OUTPUT_LANGUAGE") {
let language = language.trim();
if !language.is_empty() {
self.output_language = Some(language.to_string());
}
}

if let Some(flag) = env.get_any(&["OPENHUMAN_REASONING_ENABLED", "REASONING_ENABLED"]) {
let normalized = flag.trim().to_ascii_lowercase();
match normalized.as_str() {
Expand Down
21 changes: 21 additions & 0 deletions src/openhuman/config/schema/load_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,27 @@ fn env_overlay_temperature_accepts_valid_and_ignores_out_of_range_or_garbage() {
assert_eq!(cfg.default_temperature, 2.0);
}

#[test]
fn env_overlay_output_language_accepts_non_empty_value() {
let mut cfg = Config::default();
assert!(cfg.output_language.is_none());

cfg.apply_env_overlay_with(&HashMapEnv::new().with("OPENHUMAN_OUTPUT_LANGUAGE", "zh-CN"));
assert_eq!(cfg.output_language.as_deref(), Some("zh-CN"));
assert!(cfg
.output_language_directive()
.as_deref()
.unwrap_or_default()
.contains("Simplified Chinese"));

cfg.apply_env_overlay_with(&HashMapEnv::new().with("OPENHUMAN_OUTPUT_LANGUAGE", " "));
assert_eq!(
cfg.output_language.as_deref(),
Some("zh-CN"),
"blank env value must not clear an explicit config value"
);
}

#[test]
fn env_overlay_reasoning_enabled_recognises_truthy_falsy_and_ignores_garbage() {
let mut cfg = Config::default();
Expand Down
99 changes: 99 additions & 0 deletions src/openhuman/config/schema/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ pub struct Config {
#[serde(default = "default_temperature_value")]
pub default_temperature: f64,

/// Optional language for background LLM artifacts such as memory-tree
/// summaries, extraction reasons, and learning reflections. Accepts either
/// a known UI locale tag (for example `zh-CN`) or a human-readable language
/// name. `None` preserves the existing default-language behaviour.
#[serde(default)]
pub output_language: Option<String>,

/// Models (by exact ID match OR shell-style glob like `gpt-5*`, `o1-*`) that
/// MUST NOT receive a `temperature` parameter. Used for reasoning models
/// that error out when temperature is set (OpenAI o-series, GPT-5).
Expand Down Expand Up @@ -376,6 +383,64 @@ fn default_temperature_unsupported_models() -> Vec<String> {
]
}

/// Normalize a configured output language into a display name suitable for
/// prompt directives. Unknown non-empty values are treated as user-provided
/// language names after stripping control characters.
pub fn normalize_output_language(language: &str) -> Option<String> {
let trimmed = language.trim();
if trimmed.is_empty() {
return None;
}

let tag = trimmed.to_ascii_lowercase().replace('_', "-");
let mapped = match tag.as_str() {
"ar" | "arabic" => Some("Arabic"),
"bn" | "bengali" | "bangla" => Some("Bengali"),
"de" | "german" => Some("German"),
"en" | "en-us" | "en-gb" | "english" => Some("English"),
"es" | "spanish" => Some("Spanish"),
"fr" | "french" => Some("French"),
"hi" | "hindi" => Some("Hindi"),
"id" | "indonesian" | "bahasa indonesia" => Some("Indonesian"),
"it" | "italian" => Some("Italian"),
"ja" | "japanese" => Some("Japanese"),
"ko" | "korean" => Some("Korean"),
"pt" | "pt-br" | "pt-pt" | "portuguese" => Some("Portuguese"),
"ru" | "russian" => Some("Russian"),
"th" | "thai" => Some("Thai"),
"tr" | "turkish" => Some("Turkish"),
"vi" | "vietnamese" => Some("Vietnamese"),
"zh" | "zh-cn" | "zh-hans" | "chinese" | "simplified chinese" => Some("Simplified Chinese"),
"zh-tw" | "zh-hant" | "traditional chinese" => Some("Traditional Chinese"),
_ => None,
};
Comment thread
sunilkumarvalmiki marked this conversation as resolved.
if let Some(language) = mapped {
return Some(language.to_string());
}

let cleaned: String = trimmed
.chars()
.filter(|c| !c.is_control())
.take(80)
.collect();
let cleaned = cleaned.trim();
if cleaned.is_empty() {
None
} else {
Some(cleaned.to_string())
}
}

/// Build a shared instruction for non-chat background prompts. JSON keys and
/// enum values stay stable; only user-visible prose changes language.
pub fn output_language_directive(language: Option<&str>) -> Option<String> {
let language = normalize_output_language(language?)?;
Some(format!(
"Output language: write all natural-language output in {language}. \
Keep JSON keys, enum values, proper nouns, code, commands, and quoted source text unchanged."
))
}

impl Config {
/// Resolve the root directory where chunk `.md` files are stored.
///
Expand Down Expand Up @@ -434,6 +499,11 @@ impl Config {
self.workload_local_model(workload).is_some()
}

/// Prompt directive for background LLM artifacts, if configured.
pub fn output_language_directive(&self) -> Option<String> {
output_language_directive(self.output_language.as_deref())
}

/// Resolve an exact model pin for an agent, if configured.
///
/// Precedence is intentionally narrow and deterministic:
Expand Down Expand Up @@ -520,6 +590,7 @@ impl Default for Config {
inference_url: None,
default_model: Some(DEFAULT_MODEL.to_string()),
default_temperature: DEFAULT_TEMPERATURE,
output_language: None,
temperature_unsupported_models: default_temperature_unsupported_models(),
observability: ObservabilityConfig::default(),
autonomy: AutonomyConfig::default(),
Expand Down Expand Up @@ -588,6 +659,34 @@ impl Default for Config {
mod model_pin_tests {
use super::*;

#[test]
fn output_language_directive_maps_locales_and_preserves_json_keys() {
for (tag, expected) in [
("zh-CN", "Simplified Chinese"),
("zh-TW", "Traditional Chinese"),
("zh_Hant", "Traditional Chinese"),
("ko", "Korean"),
("ja", "Japanese"),
("de", "German"),
("th", "Thai"),
("vi", "Vietnamese"),
("tr", "Turkish"),
] {
let directive = output_language_directive(Some(tag)).expect("directive");
assert!(
directive.contains(expected),
"{tag} should map to {expected}: {directive}"
);
assert!(directive.contains("Keep JSON keys"));
}
}

#[test]
fn output_language_directive_accepts_language_names() {
let directive = output_language_directive(Some("Kannada")).expect("directive");
assert!(directive.contains("Kannada"));
}

#[test]
fn config_parses_orchestrator_and_team_model_pins() {
let config: Config = toml::from_str(
Expand Down
5 changes: 5 additions & 0 deletions src/openhuman/learning/reflection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ impl ReflectionHook {
Keep each entry concise (one sentence). Return ONLY valid JSON, no markdown.\n\n",
);

if let Some(directive) = self.full_config.output_language_directive() {
prompt.push_str(&directive);
prompt.push_str("\n\n");
}

prompt.push_str(&format!(
"## User Message\n{}\n\n",
truncate(&ctx.user_message, 500)
Expand Down
13 changes: 13 additions & 0 deletions src/openhuman/learning/reflection_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ fn build_reflection_prompt_includes_tool_calls_and_truncation() {
assert!(prompt.contains(&format!("{}...", "x".repeat(100))));
}

#[test]
fn build_reflection_prompt_includes_output_language_directive() {
let memory: Arc<dyn Memory> = Arc::new(MockMemory::default());
let mut config = Config::default();
config.output_language = Some("zh-CN".into());
let hook = ReflectionHook::new(reflection_config(), Arc::new(config), memory, None);

let prompt = hook.build_reflection_prompt(&reflective_turn());
assert!(prompt.contains("Simplified Chinese"));
assert!(prompt.contains("Keep JSON keys"));
assert!(prompt.contains("\"observations\""));
}

#[test]
fn session_key_and_counter_management_work() {
let hook = ReflectionHook::new(
Expand Down
14 changes: 11 additions & 3 deletions src/openhuman/memory/tree/score/extract/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ pub struct LlmExtractorConfig {
/// content). Adds prompt tokens and gives the model one more
/// schema field to keep track of, so leave off unless needed.
pub emit_topics: bool,
/// Optional configured output language for natural-language values such
/// as `importance_reason` and topic labels. JSON field names and enum
/// values remain stable.
pub output_language: Option<String>,
}

impl Default for LlmExtractorConfig {
Expand All @@ -85,6 +89,7 @@ impl Default for LlmExtractorConfig {
],
strict_kinds: false,
emit_topics: false,
output_language: None,
}
}
}
Expand Down Expand Up @@ -114,7 +119,7 @@ impl LlmEntityExtractor {
/// Build the chat prompt sent to the provider for `text`.
fn build_prompt(&self, text: &str) -> ChatPrompt {
ChatPrompt {
system: build_system_prompt(self.cfg.emit_topics),
system: build_system_prompt(self.cfg.emit_topics, self.cfg.output_language.as_deref()),
user: format!("Text:\n{text}\n\nReturn JSON only."),
temperature: 0.0,
kind: "memory_tree::extract",
Expand Down Expand Up @@ -230,7 +235,7 @@ impl LlmEntityExtractor {
/// matches the pre-flag behaviour exactly — no mention of topics
/// anywhere — so the small model isn't asked to produce a field the
/// caller doesn't want.
fn build_system_prompt(emit_topics: bool) -> String {
fn build_system_prompt(emit_topics: bool, output_language: Option<&str>) -> String {
let topics_schema_line = if emit_topics {
" \"topics\": [\"<short theme label>\"],\n"
} else {
Expand All @@ -256,9 +261,12 @@ fn build_system_prompt(emit_topics: bool) -> String {
} else {
""
};
let language_directive = crate::openhuman::config::output_language_directive(output_language)
.map(|directive| format!("{directive}\n\n"))
.unwrap_or_default();

format!(
"You are a named-entity extractor and importance rater. Return JSON only — \
"{language_directive}You are a named-entity extractor and importance rater. Return JSON only — \
no prose, no markdown, no commentary. Do not summarize. Extract every named \
entity mention you find, including duplicates, and rate the chunk's overall \
importance as a float in [0.0, 1.0].
Expand Down
12 changes: 10 additions & 2 deletions src/openhuman/memory/tree/score/extract/llm_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::*;

#[test]
fn build_system_prompt_default_omits_topics() {
let p = build_system_prompt(false);
let p = build_system_prompt(false, None);
assert!(!p.contains("\"topics\""));
assert!(!p.contains("Topics are"));
assert!(p.contains("ALL three top-level fields"));
Expand All @@ -11,13 +11,21 @@ fn build_system_prompt_default_omits_topics() {

#[test]
fn build_system_prompt_with_flag_includes_topics() {
let p = build_system_prompt(true);
let p = build_system_prompt(true, None);
assert!(p.contains("\"topics\""));
assert!(p.contains("Topics are short free-form theme labels"));
assert!(p.contains("ALL four top-level fields"));
assert!(p.contains("entities, topics, importance"));
}

#[test]
fn build_system_prompt_includes_output_language_directive() {
let p = build_system_prompt(true, Some("zh-CN"));
assert!(p.contains("Simplified Chinese"));
assert!(p.contains("Keep JSON keys"));
assert!(p.contains("\"importance_reason\""));
}

#[test]
fn extraction_output_parses_topics_when_present() {
let json = r#"{"entities":[],"topics":["rate limiting","memory tree"],"importance":0.6,"importance_reason":"r"}"#;
Expand Down
1 change: 1 addition & 0 deletions src/openhuman/memory/tree/score/extract/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub fn build_summary_extractor(config: &Config) -> Arc<dyn EntityExtractor> {
let cfg = LlmExtractorConfig {
model: model.clone(),
emit_topics: true,
output_language: config.output_language.clone(),
..LlmExtractorConfig::default()
};

Expand Down
1 change: 1 addition & 0 deletions src/openhuman/memory/tree/score/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ impl ScoringConfig {

let cfg = extract::LlmExtractorConfig {
model: model.clone(),
output_language: config.output_language.clone(),
..extract::LlmExtractorConfig::default()
};

Expand Down
Loading
Loading