tinyhumansai · sunilkumarvalmiki · May 21, 2026
@@ -72,6 +72,10 @@ OPENHUMAN_MODEL=
 OPENHUMAN_WORKSPACE=
 # [optional] Default: 0.7
 OPENHUMAN_TEMPERATURE=0.7
+# [optional] Language for background LLM artifacts such as memory-tree summaries,
+# entity-extraction reasons, and learning reflections. Accepts UI locale tags
+# such as zh-CN or a language name. Leave unset for default behavior.
+# OPENHUMAN_OUTPUT_LANGUAGE=zh-CN
 # [optional] Skill + agent tool execution timeout in seconds (default 120, max 3600)
 # OPENHUMAN_TOOL_TIMEOUT_SECS=
 # [optional] Headless update restart contract: self_replace | supervisor

@@ -682,6 +682,10 @@ impl ArchivistHook {
             let cfg = LlmSummariserConfig {
                 model: provider.name().to_string(),
                 structured_facet_extraction: false,
+                output_language: self
+                    .config
+                    .as_ref()
+                    .and_then(|cfg| cfg.output_language.clone()),
             };
             let summariser = LlmSummariser::new(cfg, Arc::clone(provider));
             tracing::debug!(

@@ -23,23 +23,23 @@ pub use ops::*;
 #[allow(unused_imports)]
 pub use schema::{
     apply_runtime_proxy_to_builder, build_runtime_proxy_client,
-    build_runtime_proxy_client_with_timeouts, runtime_proxy_config, set_runtime_proxy_config,
-    AgentConfig, AuditConfig, AutocompleteConfig, AutonomyConfig, BrowserComputerUseConfig,
-    BrowserConfig, ChannelsConfig, ComposioConfig, Config, ContextConfig, CostConfig, CronConfig,
-    CurlConfig, DelegateAgentConfig, DictationActivationMode, DictationConfig, DiscordConfig,
-    DockerRuntimeConfig, EmbeddingRouteConfig, GitbooksConfig, HeartbeatConfig, HttpRequestConfig,
-    IMessageConfig, IntegrationToggle, IntegrationsConfig, LarkConfig, LearningConfig, LlmBackend,
-    LocalAiConfig, MatrixConfig, McpAuthConfig, McpClientConfig, McpClientIdentityConfig,
-    McpServerConfig, MeetConfig, MemoryConfig, MemoryTreeConfig, ModelRouteConfig,
-    MultimodalConfig, ObservabilityConfig, OrchestratorModelConfig, PolymarketClobCredentials,
-    PolymarketConfig, ProxyConfig, ProxyScope, ReflectionSource, ReliabilityConfig,
-    ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig,
-    SchedulerGateConfig, SchedulerGateMode, ScreenIntelligenceConfig, SearxngConfig, SecretsConfig,
-    SecurityConfig, SlackConfig, StorageConfig, StorageProviderConfig, StorageProviderSection,
-    StreamMode, TeamModelConfig, TelegramConfig, UpdateConfig, UpdateRestartStrategy,
-    VoiceActivationMode, VoiceServerConfig, WebSearchConfig, WebhookConfig,
-    DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1, MODEL_CHAT_V1, MODEL_CODING_V1,
-    MODEL_REASONING_QUICK_V1, MODEL_REASONING_V1,
+    build_runtime_proxy_client_with_timeouts, output_language_directive, runtime_proxy_config,
+    set_runtime_proxy_config, AgentConfig, AuditConfig, AutocompleteConfig, AutonomyConfig,
+    BrowserComputerUseConfig, BrowserConfig, ChannelsConfig, ComposioConfig, Config, ContextConfig,
+    CostConfig, CronConfig, CurlConfig, DelegateAgentConfig, DictationActivationMode,
+    DictationConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, GitbooksConfig,
+    HeartbeatConfig, HttpRequestConfig, IMessageConfig, IntegrationToggle, IntegrationsConfig,
+    LarkConfig, LearningConfig, LlmBackend, LocalAiConfig, MatrixConfig, McpAuthConfig,
+    McpClientConfig, McpClientIdentityConfig, McpServerConfig, MeetConfig, MemoryConfig,
+    MemoryTreeConfig, ModelRouteConfig, MultimodalConfig, ObservabilityConfig,
+    OrchestratorModelConfig, PolymarketClobCredentials, PolymarketConfig, ProxyConfig, ProxyScope,
+    ReflectionSource, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend,
+    SandboxConfig, SchedulerConfig, SchedulerGateConfig, SchedulerGateMode,
+    ScreenIntelligenceConfig, SearxngConfig, SecretsConfig, SecurityConfig, SlackConfig,
+    StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode, TeamModelConfig,
+    TelegramConfig, UpdateConfig, UpdateRestartStrategy, VoiceActivationMode, VoiceServerConfig,
+    WebSearchConfig, WebhookConfig, DEFAULT_CLOUD_LLM_MODEL, DEFAULT_MODEL, MODEL_AGENTIC_V1,
+    MODEL_CHAT_V1, MODEL_CODING_V1, MODEL_REASONING_QUICK_V1, MODEL_REASONING_V1,
 };
 pub use schema::{
     clear_active_user, default_root_openhuman_dir, pre_login_user_dir, read_active_user_id,

@@ -1328,6 +1328,13 @@ impl Config {
             }
         }
 
+        if let Some(language) = env.get("OPENHUMAN_OUTPUT_LANGUAGE") {
+            let language = language.trim();
+            if !language.is_empty() {
+                self.output_language = Some(language.to_string());
+            }
+        }
+
         if let Some(flag) = env.get_any(&["OPENHUMAN_REASONING_ENABLED", "REASONING_ENABLED"]) {
             let normalized = flag.trim().to_ascii_lowercase();
             match normalized.as_str() {

@@ -546,6 +546,27 @@ fn env_overlay_temperature_accepts_valid_and_ignores_out_of_range_or_garbage() {
     assert_eq!(cfg.default_temperature, 2.0);
 }
 
+#[test]
+fn env_overlay_output_language_accepts_non_empty_value() {
+    let mut cfg = Config::default();
+    assert!(cfg.output_language.is_none());
+
+    cfg.apply_env_overlay_with(&HashMapEnv::new().with("OPENHUMAN_OUTPUT_LANGUAGE", "zh-CN"));
+    assert_eq!(cfg.output_language.as_deref(), Some("zh-CN"));
+    assert!(cfg
+        .output_language_directive()
+        .as_deref()
+        .unwrap_or_default()
+        .contains("Simplified Chinese"));
+
+    cfg.apply_env_overlay_with(&HashMapEnv::new().with("OPENHUMAN_OUTPUT_LANGUAGE", "   "));
+    assert_eq!(
+        cfg.output_language.as_deref(),
+        Some("zh-CN"),
+        "blank env value must not clear an explicit config value"
+    );
+}
+
 #[test]
 fn env_overlay_reasoning_enabled_recognises_truthy_falsy_and_ignores_garbage() {
     let mut cfg = Config::default();

@@ -61,6 +61,13 @@ pub struct Config {
     #[serde(default = "default_temperature_value")]
     pub default_temperature: f64,
 
+    /// Optional language for background LLM artifacts such as memory-tree
+    /// summaries, extraction reasons, and learning reflections. Accepts either
+    /// a known UI locale tag (for example `zh-CN`) or a human-readable language
+    /// name. `None` preserves the existing default-language behaviour.
+    #[serde(default)]
+    pub output_language: Option<String>,
+
     /// Models (by exact ID match OR shell-style glob like `gpt-5*`, `o1-*`) that
     /// MUST NOT receive a `temperature` parameter. Used for reasoning models
     /// that error out when temperature is set (OpenAI o-series, GPT-5).
@@ -376,6 +383,64 @@ fn default_temperature_unsupported_models() -> Vec<String> {
     ]
 }
 
+/// Normalize a configured output language into a display name suitable for
+/// prompt directives. Unknown non-empty values are treated as user-provided
+/// language names after stripping control characters.
+pub fn normalize_output_language(language: &str) -> Option<String> {
+    let trimmed = language.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let tag = trimmed.to_ascii_lowercase().replace('_', "-");
+    let mapped = match tag.as_str() {
+        "ar" | "arabic" => Some("Arabic"),
+        "bn" | "bengali" | "bangla" => Some("Bengali"),
+        "de" | "german" => Some("German"),
+        "en" | "en-us" | "en-gb" | "english" => Some("English"),
+        "es" | "spanish" => Some("Spanish"),
+        "fr" | "french" => Some("French"),
+        "hi" | "hindi" => Some("Hindi"),
+        "id" | "indonesian" | "bahasa indonesia" => Some("Indonesian"),
+        "it" | "italian" => Some("Italian"),
+        "ja" | "japanese" => Some("Japanese"),
+        "ko" | "korean" => Some("Korean"),
+        "pt" | "pt-br" | "pt-pt" | "portuguese" => Some("Portuguese"),
+        "ru" | "russian" => Some("Russian"),
+        "th" | "thai" => Some("Thai"),
+        "tr" | "turkish" => Some("Turkish"),
+        "vi" | "vietnamese" => Some("Vietnamese"),
+        "zh" | "zh-cn" | "zh-hans" | "chinese" | "simplified chinese" => Some("Simplified Chinese"),
+        "zh-tw" | "zh-hant" | "traditional chinese" => Some("Traditional Chinese"),
+        _ => None,
+    };
+    if let Some(language) = mapped {
+        return Some(language.to_string());
+    }
+
+    let cleaned: String = trimmed
+        .chars()
+        .filter(|c| !c.is_control())
+        .take(80)
+        .collect();
+    let cleaned = cleaned.trim();
+    if cleaned.is_empty() {
+        None
+    } else {
+        Some(cleaned.to_string())
+    }
+}
+
+/// Build a shared instruction for non-chat background prompts. JSON keys and
+/// enum values stay stable; only user-visible prose changes language.
+pub fn output_language_directive(language: Option<&str>) -> Option<String> {
+    let language = normalize_output_language(language?)?;
+    Some(format!(
+        "Output language: write all natural-language output in {language}. \
+         Keep JSON keys, enum values, proper nouns, code, commands, and quoted source text unchanged."
+    ))
+}
+
 impl Config {
     /// Resolve the root directory where chunk `.md` files are stored.
     ///
@@ -434,6 +499,11 @@ impl Config {
         self.workload_local_model(workload).is_some()
     }
 
+    /// Prompt directive for background LLM artifacts, if configured.
+    pub fn output_language_directive(&self) -> Option<String> {
+        output_language_directive(self.output_language.as_deref())
+    }
+
     /// Resolve an exact model pin for an agent, if configured.
     ///
     /// Precedence is intentionally narrow and deterministic:
@@ -520,6 +590,7 @@ impl Default for Config {
             inference_url: None,
             default_model: Some(DEFAULT_MODEL.to_string()),
             default_temperature: DEFAULT_TEMPERATURE,
+            output_language: None,
             temperature_unsupported_models: default_temperature_unsupported_models(),
             observability: ObservabilityConfig::default(),
             autonomy: AutonomyConfig::default(),
@@ -588,6 +659,34 @@ impl Default for Config {
 mod model_pin_tests {
     use super::*;
 
+    #[test]
+    fn output_language_directive_maps_locales_and_preserves_json_keys() {
+        for (tag, expected) in [
+            ("zh-CN", "Simplified Chinese"),
+            ("zh-TW", "Traditional Chinese"),
+            ("zh_Hant", "Traditional Chinese"),
+            ("ko", "Korean"),
+            ("ja", "Japanese"),
+            ("de", "German"),
+            ("th", "Thai"),
+            ("vi", "Vietnamese"),
+            ("tr", "Turkish"),
+        ] {
+            let directive = output_language_directive(Some(tag)).expect("directive");
+            assert!(
+                directive.contains(expected),
+                "{tag} should map to {expected}: {directive}"
+            );
+            assert!(directive.contains("Keep JSON keys"));
+        }
+    }
+
+    #[test]
+    fn output_language_directive_accepts_language_names() {
+        let directive = output_language_directive(Some("Kannada")).expect("directive");
+        assert!(directive.contains("Kannada"));
+    }
+
     #[test]
     fn config_parses_orchestrator_and_team_model_pins() {
         let config: Config = toml::from_str(

@@ -122,6 +122,11 @@ impl ReflectionHook {
              Keep each entry concise (one sentence). Return ONLY valid JSON, no markdown.\n\n",
         );
 
+        if let Some(directive) = self.full_config.output_language_directive() {
+            prompt.push_str(&directive);
+            prompt.push_str("\n\n");
+        }
+
         prompt.push_str(&format!(
             "## User Message\n{}\n\n",
             truncate(&ctx.user_message, 500)

@@ -202,6 +202,19 @@ fn build_reflection_prompt_includes_tool_calls_and_truncation() {
     assert!(prompt.contains(&format!("{}...", "x".repeat(100))));
 }
 
+#[test]
+fn build_reflection_prompt_includes_output_language_directive() {
+    let memory: Arc<dyn Memory> = Arc::new(MockMemory::default());
+    let mut config = Config::default();
+    config.output_language = Some("zh-CN".into());
+    let hook = ReflectionHook::new(reflection_config(), Arc::new(config), memory, None);
+
+    let prompt = hook.build_reflection_prompt(&reflective_turn());
+    assert!(prompt.contains("Simplified Chinese"));
+    assert!(prompt.contains("Keep JSON keys"));
+    assert!(prompt.contains("\"observations\""));
+}
+
 #[test]
 fn session_key_and_counter_management_work() {
     let hook = ReflectionHook::new(

@@ -66,6 +66,10 @@ pub struct LlmExtractorConfig {
     /// content). Adds prompt tokens and gives the model one more
     /// schema field to keep track of, so leave off unless needed.
     pub emit_topics: bool,
+    /// Optional configured output language for natural-language values such
+    /// as `importance_reason` and topic labels. JSON field names and enum
+    /// values remain stable.
+    pub output_language: Option<String>,
 }
 
 impl Default for LlmExtractorConfig {
@@ -85,6 +89,7 @@ impl Default for LlmExtractorConfig {
             ],
             strict_kinds: false,
             emit_topics: false,
+            output_language: None,
         }
     }
 }
@@ -114,7 +119,7 @@ impl LlmEntityExtractor {
     /// Build the chat prompt sent to the provider for `text`.
     fn build_prompt(&self, text: &str) -> ChatPrompt {
         ChatPrompt {
-            system: build_system_prompt(self.cfg.emit_topics),
+            system: build_system_prompt(self.cfg.emit_topics, self.cfg.output_language.as_deref()),
             user: format!("Text:\n{text}\n\nReturn JSON only."),
             temperature: 0.0,
             kind: "memory_tree::extract",
@@ -230,7 +235,7 @@ impl LlmEntityExtractor {
 /// matches the pre-flag behaviour exactly — no mention of topics
 /// anywhere — so the small model isn't asked to produce a field the
 /// caller doesn't want.
-fn build_system_prompt(emit_topics: bool) -> String {
+fn build_system_prompt(emit_topics: bool, output_language: Option<&str>) -> String {
     let topics_schema_line = if emit_topics {
         "  \"topics\": [\"<short theme label>\"],\n"
     } else {
@@ -256,9 +261,12 @@ fn build_system_prompt(emit_topics: bool) -> String {
     } else {
         ""
     };
+    let language_directive = crate::openhuman::config::output_language_directive(output_language)
+        .map(|directive| format!("{directive}\n\n"))
+        .unwrap_or_default();
 
     format!(
-        "You are a named-entity extractor and importance rater. Return JSON only — \
+        "{language_directive}You are a named-entity extractor and importance rater. Return JSON only — \
 no prose, no markdown, no commentary. Do not summarize. Extract every named \
 entity mention you find, including duplicates, and rate the chunk's overall \
 importance as a float in [0.0, 1.0].

@@ -2,7 +2,7 @@ use super::*;
 
 #[test]
 fn build_system_prompt_default_omits_topics() {
-    let p = build_system_prompt(false);
+    let p = build_system_prompt(false, None);
     assert!(!p.contains("\"topics\""));
     assert!(!p.contains("Topics are"));
     assert!(p.contains("ALL three top-level fields"));
@@ -11,13 +11,21 @@ fn build_system_prompt_default_omits_topics() {
 
 #[test]
 fn build_system_prompt_with_flag_includes_topics() {
-    let p = build_system_prompt(true);
+    let p = build_system_prompt(true, None);
     assert!(p.contains("\"topics\""));
     assert!(p.contains("Topics are short free-form theme labels"));
     assert!(p.contains("ALL four top-level fields"));
     assert!(p.contains("entities, topics, importance"));
 }
 
+#[test]
+fn build_system_prompt_includes_output_language_directive() {
+    let p = build_system_prompt(true, Some("zh-CN"));
+    assert!(p.contains("Simplified Chinese"));
+    assert!(p.contains("Keep JSON keys"));
+    assert!(p.contains("\"importance_reason\""));
+}
+
 #[test]
 fn extraction_output_parses_topics_when_present() {
     let json = r#"{"entities":[],"topics":["rate limiting","memory tree"],"importance":0.6,"importance_reason":"r"}"#;

@@ -47,6 +47,7 @@ pub fn build_summary_extractor(config: &Config) -> Arc<dyn EntityExtractor> {
     let cfg = LlmExtractorConfig {
         model: model.clone(),
         emit_topics: true,
+        output_language: config.output_language.clone(),
         ..LlmExtractorConfig::default()
     };
 

@@ -135,6 +135,7 @@ impl ScoringConfig {
 
         let cfg = extract::LlmExtractorConfig {
             model: model.clone(),
+            output_language: config.output_language.clone(),
             ..extract::LlmExtractorConfig::default()
         };