UiPath · rajivml · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py b/backend/alembic/versions/a3f1d7c4e9b2_persona_multilingual_query_expansion.py
@@ -0,0 +1,32 @@
+"""persona multilingual_query_expansion flag
+
+Revision ID: a3f1d7c4e9b2
+Revises: c8a4e2f9d1b3
+Create Date: 2026-05-04 12:00:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a3f1d7c4e9b2"
+down_revision = "c8a4e2f9d1b3"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column(
+            "multilingual_query_expansion",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "multilingual_query_expansion")
diff --git a/backend/danswer/chat/multilingual_translation.py b/backend/danswer/chat/multilingual_translation.py
@@ -0,0 +1,151 @@
+"""Helpers for the per-persona multi-language post-processing pass.
+
+When a persona has `multilingual_query_expansion=True` and the user's
+query is non-English, the answering LLM still produces English most of
+the time (it tends to mirror the English context corpus regardless of
+the LANGUAGE_HINT directive). We compensate by post-translating the
+English answer back into the user's original language.
+
+Trade-off: in translate mode we buffer the streamed answer instead of
+showing it token-by-token. The user sees a brief delay (one extra LLM
+round-trip), but reliably gets a reply in their language. English
+queries are unaffected — they keep streaming normally.
+"""
+from __future__ import annotations
+
+import unicodedata
+
+from danswer.llm.interfaces import LLM
+from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
+from danswer.llm.utils import message_to_string
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# Display name passed to the translation prompt. Keys are the language
+# codes detect_query_language returns. Anything not in this map is
+# treated as English (no translation needed).
+_LANGUAGE_NAMES: dict[str, str] = {
+    "ja": "Japanese",
+    "zh": "Chinese (Simplified)",
+    "ko": "Korean",
+}
+
+
+def detect_query_language(text: str) -> str:
+    """Cheap script-based language detector covering the languages we
+    explicitly support translation for. Returns one of: 'ja', 'zh',
+    'ko', or 'en' (English/other — no translation needed).
+
+    Heuristic mirrors the script-presence test in
+    backend/scripts/test_multilanguage_e2e.py: a few percent of CJK /
+    Hangul / kana code points is enough to decide. We don't try to be
+    clever about mixed-language queries — the dominant non-English
+    script wins, and ties default to English.
+    """
+    if not text:
+        return "en"
+
+    counts = {"hiragana_katakana": 0, "hangul": 0, "cjk": 0, "ascii_letter": 0}
+    total_letters = 0
+    for ch in text:
+        cp = ord(ch)
+        if (0x3040 <= cp <= 0x309F) or (0x30A0 <= cp <= 0x30FF):
+            counts["hiragana_katakana"] += 1
+            total_letters += 1
+        elif 0xAC00 <= cp <= 0xD7AF:
+            counts["hangul"] += 1
+            total_letters += 1
+        elif (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF):
+            counts["cjk"] += 1
+            total_letters += 1
+        elif unicodedata.category(ch).startswith("L"):
+            counts["ascii_letter"] += 1
+            total_letters += 1
+
+    if total_letters == 0:
+        return "en"
+    threshold = max(1, total_letters // 20)  # ~5%
+    if counts["hiragana_katakana"] >= threshold:
+        return "ja"
+    if counts["hangul"] >= threshold:
+        return "ko"
+    if counts["cjk"] >= threshold:
+        return "zh"
+    return "en"
+
+
+def language_name(code: str) -> str | None:
+    return _LANGUAGE_NAMES.get(code)
+
+
+# The prompt is intentionally directive about preserving citations and
+# not adding commentary. Citations are bracketed numerals like [1] /
+# [[1]](url); URLs and code blocks should also pass through unchanged.
+_TRANSLATE_PROMPT = """\
+You are a precise translator.
+
+Translate the text below into {target_language}.
+
+CRITICAL RULES — follow exactly:
+- Preserve every citation marker exactly as-is. Citation markers look
+  like [1], [2], [[1]](https://example.com), etc. Do not translate
+  them, do not change the brackets, do not change the numbers.
+- Preserve every URL exactly.
+- Preserve every code block (text between triple backticks) exactly.
+- Preserve every inline code span (text between single backticks).
+- Do not add any commentary, preface, or trailing notes — output only
+  the translated text.
+- Keep numbers, proper nouns, and product names in their original
+  form unless the target language has a well-established equivalent.
+
+TEXT TO TRANSLATE:
+{text}
+"""
+
+
+def translate_answer_to_language(
+    answer_text: str,
+    target_language_code: str,
+    llm: LLM,
+) -> str:
+    """Translate `answer_text` into the language named by
+    `target_language_code` (a key of _LANGUAGE_NAMES). Returns the
+    English original on any failure — better to ship an English answer
+    than to drop the response entirely."""
+    target_name = _LANGUAGE_NAMES.get(target_language_code)
+    if target_name is None:
+        # Caller should have skipped, but be defensive.
+        return answer_text
+
+    if not answer_text.strip():
+        return answer_text
+
+    prompt_messages = [
+        {
+            "role": "user",
+            "content": _TRANSLATE_PROMPT.format(
+                target_language=target_name, text=answer_text
+            ),
+        }
+    ]
+
+    try:
+        filled = dict_based_prompt_to_langchain_prompt(prompt_messages)
+        translated = message_to_string(llm.invoke(filled))
+    except Exception:
+        logger.exception(
+            "Failed to translate answer to %s; falling back to English",
+            target_name,
+        )
+        return answer_text
+
+    translated = translated.strip()
+    if not translated:
+        logger.warning(
+            "Translation to %s came back empty; falling back to English",
+            target_name,
+        )
+        return answer_text
+    return translated
diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py
@@ -14,6 +14,9 @@
 from danswer.chat.models import LLMRelevanceFilterResponse
 from danswer.chat.models import QADocsResponse
 from danswer.chat.models import StreamingError
+from danswer.chat.multilingual_translation import detect_query_language
+from danswer.chat.multilingual_translation import language_name
+from danswer.chat.multilingual_translation import translate_answer_to_language
 from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
 from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@@ -92,11 +95,11 @@ def translate_citations(
     for db_doc in db_docs:
         if db_doc.document_id not in doc_id_to_saved_doc_id_map:
             doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id
-            #print(f'found doc id: {db_doc.id}')
+            # print(f'found doc id: {db_doc.id}')
 
     citation_to_saved_doc_id_map: dict[int, int] = {}
     for citation in citations_list:
-        #print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
+        # print(f'citation id {citation.document_id} for doc num {citation.citation_num}')
         if citation.citation_num not in citation_to_saved_doc_id_map:
             citation_to_saved_doc_id_map[
                 citation.citation_num
@@ -404,15 +407,25 @@ def stream_chat_message_objects(
         if not final_msg.prompt:
             raise RuntimeError("No Prompt found")
 
+        # Persona may be None for legacy flows; treat the flag as off in
+        # that case. When persona exists, thread its flag through so the
+        # answer-side prompt builders add the LANGUAGE_HINT.
+        persona_multilingual = (
+            persona.multilingual_query_expansion if persona is not None else False
+        )
         prompt_config = (
             PromptConfig.from_model(
                 final_msg.prompt,
                 prompt_override=(
                     new_msg_req.prompt_override or chat_session.prompt_override
                 ),
+                multilingual_query_expansion=persona_multilingual,
             )
             if not persona
-            else PromptConfig.from_model(persona.prompts[0])
+            else PromptConfig.from_model(
+                persona.prompts[0],
+                multilingual_query_expansion=persona_multilingual,
+            )
         )
 
         # find out what tools to use
@@ -539,6 +552,22 @@ def stream_chat_message_objects(
         ai_message_files = None  # any files to associate with the AI message e.g. dall-e generated images
         dropped_indices = None
         tool_result = None
+
+        # Multi-language post-processing pass (option C in the design):
+        # when the persona has multilingual_query_expansion=True and the
+        # user's question is in a non-English language, the LLM tends
+        # to answer in English regardless of the LANGUAGE_HINT
+        # directive. We compensate by buffering DanswerAnswerPiece
+        # tokens during the stream and emitting a single translated
+        # piece at the end. Other packet types (citations, tool
+        # responses, image generation, etc.) still flow in real time.
+        translate_target = None
+        if persona_multilingual:
+            detected = detect_query_language(message_text)
+            if language_name(detected) is not None:
+                translate_target = detected
+        buffered_answer_pieces: list[str] = []
+
         for packet in answer.processed_streamed_output:
             if isinstance(packet, ToolResponse):
                 if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -594,8 +623,35 @@ def stream_chat_message_objects(
             else:
                 if isinstance(packet, ToolCallFinalResult):
                     tool_result = packet
+                if (
+                    translate_target is not None
+                    and isinstance(packet, DanswerAnswerPiece)
+                    and packet.answer_piece
+                ):
+                    # Hold answer tokens back; we'll translate the full
+                    # answer at the end of the stream.
+                    buffered_answer_pieces.append(packet.answer_piece)
+                    continue
                 yield cast(ChatPacket, packet)
 
+        # End of stream. If we buffered for translation, do the second
+        # LLM pass now and emit the translated answer as one piece.
+        # `answer.llm_answer` reads from the same processed stream, so
+        # it already contains the full English text — we use that as
+        # the source of truth (more reliable than reassembling from
+        # buffered pieces, which may have None entries from end-of-
+        # stream sentinels).
+        translated_answer_text: str | None = None
+        if translate_target is not None:
+            english_answer = answer.llm_answer
+            translated_answer_text = translate_answer_to_language(
+                answer_text=english_answer,
+                target_language_code=translate_target,
+                llm=llm,
+            )
+            yield DanswerAnswerPiece(answer_piece=translated_answer_text)
+            yield DanswerAnswerPiece(answer_piece=None)
+
     except Exception as e:
         logger.exception("Failed to process chat message")
 
@@ -627,14 +683,24 @@ def stream_chat_message_objects(
             for tool in tool_list:
                 tool_name_to_tool_id[tool.name()] = tool_id
 
+        # If we translated, persist the user-facing translated text
+        # rather than the English intermediate. Citations are computed
+        # from the LLM's English output (where the [1]/[2] markers
+        # were emitted relative to retrieved docs); the translation
+        # prompt preserves those markers verbatim.
+        final_answer_text = (
+            translated_answer_text
+            if translated_answer_text is not None
+            else answer.llm_answer
+        )
         gen_ai_response_message = partial_response(
-            message=answer.llm_answer,
+            message=final_answer_text,
             rephrased_query=(
                 qa_docs_response.rephrased_query if qa_docs_response else None
             ),
             reference_docs=reference_db_search_docs,
             files=ai_message_files,
-            token_count=len(llm_tokenizer_encode_func(answer.llm_answer)),
+            token_count=len(llm_tokenizer_encode_func(final_answer_text)),
             citations=db_citations,
             error=None,
             tool_calls=[

diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
@@ -1003,6 +1003,15 @@ class Persona(Base):
     # Enables using LLM to extract time and source type filters
     # Can also be admin disabled globally
     llm_filter_extraction: Mapped[bool] = mapped_column(Boolean)
+    # When true, non-English queries on this persona are translated to
+    # English before retrieval and the LLM is instructed to answer in
+    # the user's original language. Off by default since most traffic
+    # is English and turning it on incurs an extra LLM call per query.
+    # Behaves as an override of the global MULTILINGUAL_QUERY_EXPANSION
+    # env var: persona flag wins; if false, falls back to env var.
+    multilingual_query_expansion: Mapped[bool] = mapped_column(
+        Boolean, nullable=False, default=False, server_default="false"
+    )
     recency_bias: Mapped[RecencyBiasSetting] = mapped_column(
         Enum(RecencyBiasSetting, native_enum=False)
     )

diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py
@@ -79,6 +79,7 @@ def create_update_persona(
             llm_model_version_override=create_persona_request.llm_model_version_override,
             starter_messages=create_persona_request.starter_messages,
             is_public=create_persona_request.is_public,
+            multilingual_query_expansion=create_persona_request.multilingual_query_expansion,
             db_session=db_session,
         )
 
@@ -327,6 +328,7 @@ def upsert_persona(
     tool_ids: list[int] | None = None,
     persona_id: int | None = None,
     default_persona: bool = False,
+    multilingual_query_expansion: bool = False,
     commit: bool = True,
 ) -> Persona:
     if persona_id is not None:
@@ -379,6 +381,7 @@ def upsert_persona(
         persona.starter_messages = starter_messages
         persona.deleted = False  # Un-delete if previously deleted
         persona.is_public = is_public
+        persona.multilingual_query_expansion = multilingual_query_expansion
 
         # Do not delete any associations manually added unless
         # a new updated list is provided
@@ -411,6 +414,7 @@ def upsert_persona(
             llm_model_version_override=llm_model_version_override,
             starter_messages=starter_messages,
             tools=tools or [],
+            multilingual_query_expansion=multilingual_query_expansion,
         )
         db_session.add(persona)
 

diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py
@@ -121,10 +121,17 @@ class PromptConfig(BaseModel):
     task_prompt: str
     datetime_aware: bool
     include_citations: bool
+    # When true, the answer-side prompts add the LANGUAGE_HINT directive
+    # so the LLM responds in the user's original language. Sourced from
+    # the persona's multilingual_query_expansion flag at construction.
+    multilingual_query_expansion: bool = False
 
     @classmethod
     def from_model(
-        cls, model: "Prompt", prompt_override: PromptOverride | None = None
+        cls,
+        model: "Prompt",
+        prompt_override: PromptOverride | None = None,
+        multilingual_query_expansion: bool = False,
     ) -> "PromptConfig":
         override_system_prompt = (
             prompt_override.system_prompt if prompt_override else None
@@ -136,6 +143,7 @@ def from_model(
             task_prompt=override_task_prompt or model.task_prompt,
             datetime_aware=model.datetime_aware,
             include_citations=model.include_citations,
+            multilingual_query_expansion=multilingual_query_expansion,
         )
 
     # needed so that this can be passed into lru_cache funcs