leettools-dev · pengfeng · Apr 1, 2025 · Apr 1, 2025
diff --git a/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po
diff --git a/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po
diff --git a/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po
diff --git a/src/leettools/core/strategy/schemas/strategy_section.py b/src/leettools/core/strategy/schemas/strategy_section.py
@@ -30,5 +30,5 @@ class StrategySection(BaseModel):
     llm_system_prompt_id: Optional[str] = None
     llm_user_prompt_id: Optional[str] = None
     # prompts by intention
-    llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = None
-    llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = None
+    llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = {}
+    llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = {}
diff --git a/src/leettools/eds/api_caller/api_caller_base.py b/src/leettools/eds/api_caller/api_caller_base.py
@@ -166,7 +166,7 @@ def setup_default_prompts(self) -> None:
 
         if (
             section.strategy_name.lower() == "default"
-            or self.strategy_section.strategy_name.lower() == "true"
+            or section.strategy_name.lower() == "true"
         ):
             if section.llm_system_prompt_id is None:
                 logger().warning(
@@ -189,7 +189,7 @@ def setup_default_prompts(self) -> None:
 
             if section.llm_user_prompt_id is None:
                 logger().warning(
-                    f"No user prompt id for {section.section_name} provided."
+                    f"No user prompt id for {section.section_name} provided. "
                     "Fallback to the default user prompt."
                 )
                 self.user_prompt_template = None
@@ -205,15 +205,16 @@ def setup_default_prompts(self) -> None:
                     self.user_prompt_template = user_prompt.prompt_template
 
         if self.user_prompt_template is None:
+            strategy_name = section.strategy_name
             user_prompt_template_file = (
-                f"{self.script_dir}/prompts/default_user_prompt.txt"
+                f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt"
             )
             with open(user_prompt_template_file, "r", encoding="utf-8") as file:
                 self.user_prompt_template = file.read()
 
         if self.system_prompt_template is None:
             system_prompt_template_file = (
-                f"{self.script_dir}/prompts/default_system_prompt.txt"
+                f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt"
             )
             with open(system_prompt_template_file, "r", encoding="utf-8") as file:
                 self.system_prompt_template = file.read()
@@ -233,7 +234,7 @@ def setup_prompts_for_intention(self, query_metadata: ChatQueryMetadata):
 
         if intention_str not in sp_ids:
             self.display_logger.warning(
-                f"No system prompt id for {intention_str} provided to {section_name}."
+                f"No system prompt id for {intention_str} provided to {section_name}. "
                 f"Fallback to the default intention."
             )
             intention_str = DEFAULT_INTENTION
@@ -285,13 +286,21 @@ def get_user_prompt_template_for_intention(self, intention_str: str) -> str:
         if self.script_dir is None:
             raise UnexpectedCaseException("Script directory is not set.")
 
-        user_prompt_file = f"{self.script_dir}/prompts/{intention_str}_user_prompt.txt"
+        strategy_name = self.strategy_section.strategy_name
+        user_prompt_file = (
+            f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_user_prompt.txt"
+        )
         # if the user prompt for the intention is not provided, use the default
         if not os.path.exists(user_prompt_file):
             self.display_logger.warning(
-                f"User prompt for {intention_str} not found. Using default."
+                f"User prompt for {intention_str} not found: {user_prompt_file}"
             )
-            user_prompt_file = f"{self.script_dir}/prompts/default_user_prompt.txt"
+            user_prompt_file = (
+                f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt"
+            )
+            self.display_logger.info(f"Using default user prompt: {user_prompt_file}")
+        else:
+            self.display_logger.debug(f"Using user prompt file: {user_prompt_file}")
         return read_template_file(user_prompt_file)
 
     def get_system_prompt_template_for_intention(self, intention_str: str) -> str:
@@ -301,14 +310,20 @@ def get_system_prompt_template_for_intention(self, intention_str: str) -> str:
         if self.script_dir is None:
             raise UnexpectedCaseException("Script directory is not set.")
 
-        system_prompt_file = (
-            f"{self.script_dir}/prompts/{intention_str}_system_prompt.txt"
-        )
+        strategy_name = self.strategy_section.strategy_name
+        system_prompt_file = f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_system_prompt.txt"
         if not os.path.exists(system_prompt_file):
             self.display_logger.warning(
-                f"System prompt for {intention_str} not found. Using default."
+                f"System prompt for {intention_str} not found: {system_prompt_file}"
+            )
+            system_prompt_file = (
+                f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt"
             )
-            system_prompt_file = f"{self.script_dir}/prompts/default_system_prompt.txt"
+            self.display_logger.info(
+                f"Using default system prompt: {system_prompt_file}"
+            )
+        else:
+            self.display_logger.debug(f"Using system prompt file: {system_prompt_file}")
         return read_template_file(system_prompt_file)
 
     def run_inference_call(

diff --git a/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py b/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py
@@ -54,7 +54,6 @@ def rewrite(
         query = query_item.query_content
 
         # add query history
-        query_id = query_item.query_id
         ch_manager = get_history_manager(self.context)
         query_history = ch_manager.get_ch_entry(
             username=self.user.username,

diff --git a/src/leettools/flow/flows/answer/flow_answer.py b/src/leettools/flow/flows/answer/flow_answer.py
@@ -19,6 +19,8 @@
 from leettools.core.schemas.knowledgebase import KnowledgeBase
 from leettools.core.schemas.organization import Org
 from leettools.core.schemas.user import User
+from leettools.core.strategy.schemas.strategy_section import StrategySection
+from leettools.core.strategy.schemas.strategy_section_name import StrategySectionName
 from leettools.flow import flow_option_items, steps
 from leettools.flow.exec_info import ExecInfo
 from leettools.flow.flow import AbstractFlow
@@ -118,29 +120,46 @@ def execute_query(
         )
 
         # flow starts there
+
         query_metadata = steps.StepIntention.run_step(exec_info=exec_info)
-        rewrite = steps.StepQueryRewrite.run_step(
-            exec_info=exec_info,
-            query_metadata=query_metadata,
-        )
 
         if is_search_engine(retriever_type):
-            # query the web first, after this function, the search results
-            # are processed and stored in the KB
-            # TODO: make this function async
+            rewrite = steps.StepQueryRewrite.run_step(
+                exec_info=exec_info,
+                query_metadata=query_metadata,
+            )
             if rewrite.search_keywords is None:
                 keywords = rewrite.rewritten_question
             else:
                 keywords = rewrite.search_keywords
+
+            # query the web first, after this function, the search results
+            # are processed and stored in the KB
+            # TODO: make this function async
             docsource = steps.StepSearchToDocsource.run_step(
                 exec_info=exec_info, search_keywords=keywords
             )
             # we will answer using the whole KB
             # right now filter by docsource cannot include re-used docsinks
             # flow_options[DOCSOURCE_UUID_ATTR] = docsource.docsource_uuid
-
-        # TODO Next: add a flow_option to control if include the whole KB in the search
-        # flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid
+            # TODO Next: add a flow_option to control if include the whole KB in the search
+            # flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid
+        else:
+            # for local KB, we should use local KB data as the rewrite context
+            rewrite_section = StrategySection(
+                section_name=StrategySectionName.REWRITE,
+                strategy_name="keywords",
+            )
+            rewrite = steps.StepQueryRewrite.run_step(
+                exec_info=exec_info,
+                query_metadata=query_metadata,
+                rewrite_section=rewrite_section,
+            )
+            # the keywords actually not used in the local search
+            if rewrite.search_keywords is None:
+                keywords = rewrite.rewritten_question
+            else:
+                keywords = rewrite.search_keywords
 
         top_ranked_result_segments = steps.StepVectorSearch.run_step(
             exec_info=exec_info,

diff --git a/src/leettools/flow/flows/news/flow_news.py b/src/leettools/flow/flows/news/flow_news.py
@@ -6,6 +6,7 @@
 from pydantic import BaseModel, ConfigDict, create_model
 
 from leettools.common import exceptions
+from leettools.common.i18n.translator import _
 from leettools.common.logging.event_logger import EventLogger
 from leettools.common.utils import config_utils, json_utils, lang_utils, time_utils
 from leettools.common.utils.template_eval import render_template
@@ -108,6 +109,8 @@ def full_description(cls) -> str:
 - The categories of the news
 - The keywords of the news
 - The date of the news item
+
+{{ language_instruction }}
 """
 
     @classmethod
@@ -118,10 +121,9 @@ def depends_on(cls) -> List[Type["FlowComponent"]]:
     def direct_flow_option_items(cls) -> List[FlowOptionItem]:
         foi_news_source_min = FlowOptionItem(
             name=flow_option.FLOW_OPTION_NEWS_SOURCE_MIN,
-            display_name="News item source count threshold",
-            description=(
-                "Number of sources a news item has to have to be included in the result."
-                "Default is 1. Depends on the nature of the knowledge base."
+            display_name=_("News item source count threshold"),
+            description=_(
+                "Number of sources a news item has to have to be included in the result. Default is 1. Depends on the nature of the knowledge base."
             ),
             default_value="1",
             value_type="int",
@@ -131,10 +133,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:
 
         foi_news_include_old = FlowOptionItem(
             name=flow_option.FLOW_OPTION_NEWS_INCLUDE_OLD,
-            display_name="Include previously reported news items",
-            description=(
-                "Include all news items in the result, even if it has been reported before."
-                "Default is False."
+            display_name=_("Include previously reported news items"),
+            description=_(
+                "Include all news items in the result, even if it has been reported before. Default is False."
             ),
             default_value="False",
             value_type="bool",
@@ -144,8 +145,8 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:
 
         foi_news_output_format = FlowOptionItem(
             name=flow_option.FLOW_OPTION_EXTRACT_OUTPUT_FORMAT,
-            display_name="Output format",
-            description=(
+            display_name=_("Output format"),
+            description=_(
                 "The format of the output: 'md' (default), 'table and 'json'."
             ),
             default_value="md",
@@ -156,10 +157,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:
 
         foi_news_run_search = FlowOptionItem(
             name=flow_option.FLOW_OPTION_NEWS_RUN_SEARCH,
-            display_name="Run search before extracting news data",
-            description=(
-                "Run the search step before extracting the news data."
-                "Default is True."
+            display_name=_("Run search before extracting news data"),
+            description=_(
+                "Run the search step before extracting the news data. Default is True."
             ),
             default_value="True",
             value_type="bool",
@@ -487,6 +487,7 @@ def execute_query(
                 "query": query,
                 "word_count": news_params.word_count,
                 "article_style": news_params.article_style,
+                "language_instruction": news_params.language_instruction,
             },
         )
 

diff --git a/src/leettools/flow/steps/step_query_rewrite.py b/src/leettools/flow/steps/step_query_rewrite.py
@@ -1,4 +1,4 @@
-from typing import ClassVar, List, Type
+from typing import ClassVar, List, Optional, Type
 
 from leettools.core.schemas.chat_query_metadata import ChatQueryMetadata
 from leettools.core.strategy.schemas.strategy_section import StrategySection
@@ -27,17 +27,30 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:
     def run_step(
         exec_info: ExecInfo,
         query_metadata: ChatQueryMetadata,
+        rewrite_section: Optional[StrategySection] = None,
     ) -> Rewrite:
         """
         Rewrite the query based on the strategy section and the query metadata.
+
+        If rewrite_section is provided, use it directly.
+        If rewrite_section is not provided, get it from the strategy.
+
+        Args:
+        - exec_info: the execution info
+        - query_metadata: the query metadata
+        - rewrite_section: the rewrite section
+
+        Returns:
+        - rewrite: the rewrite result
         """
         display_logger = exec_info.display_logger
         display_logger.info(
             f"[Status] Rewrite query: {exec_info.target_chat_query_item.query_content}"
         )
-        rewrite_section = exec_info.strategy.strategy_sections.get(
-            StrategySectionName.REWRITE, None
-        )
+        if rewrite_section is None:
+            rewrite_section = exec_info.strategy.strategy_sections.get(
+                StrategySectionName.REWRITE, None
+            )
         return _step_run_rewriter(
             exec_info=exec_info,
             rewrite_section=rewrite_section,

diff --git a/src/leettools/svc/api/v1/routers/file_router.py b/src/leettools/svc/api/v1/routers/file_router.py
@@ -33,6 +33,7 @@ async def read_raw_document(uri: str) -> FileResponse:
             logger().debug(f"Reading raw document from {uri}")
 
             safe_base_path = Path(self.settings.DATA_ROOT)
+            # TODO: make these configurable and in sync with Docker config
             incoming_file_path = Path("/incoming")
             uploads_file_path = Path("/app/uploads")