leettools-dev · leettools-dev · Mar 22, 2025 · Mar 22, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,8 +40,8 @@ dependencies = [
     "sentence_transformers==2.5.1",
     "tiktoken==0.8.0",
     "duckdb==1.1.3",
-    "docling==2.25.2",
-    "docling_core==2.21.1",
+    "docling==2.26.0",
+    "docling_core==2.23.0",
     "chonkie==0.5.1",
     "langchain-community==0.3.15",
     "firecrawl-py==1.12.0",

diff --git a/requirements.txt b/requirements.txt
@@ -23,8 +23,8 @@ scipy==1.14.1
 sentence_transformers==2.5.1
 tiktoken==0.8.0
 duckdb==1.1.3
-docling==2.25.2
-docling_core==2.21.1
+docling==2.26.0
+docling_core==2.23.0
 chonkie==0.5.1
 langchain-community==0.3.15
 firecrawl-py==1.12.0

diff --git a/src/leettools/chat/schemas/chat_history.py b/src/leettools/chat/schemas/chat_history.py
@@ -126,6 +126,29 @@ class ChatHistory(CHInDB):
         None, description="For adhoc chat, we need to return the kb_name created."
     )
 
+    def get_history_str(self, ignore_last: bool = False) -> str:
+        """
+        Get the history string of the chat history in the format of:
+        [user_query_1]
+        [assistant_answer_1]
+        [user_query_2]
+        [assistant_answer_2]
+        ...
+        """
+        history_str = ""
+        if len(self.queries) == 0:
+            return history_str
+        if ignore_last:
+            total_count = len(self.queries) - 1
+        else:
+            total_count = len(self.queries)
+
+        for i in range(total_count):
+            history_str += f"[query] {self.queries[i].query_content}\n"
+            if i < len(self.answers):
+                history_str += f"[answer] {self.answers[i].answer_content}\n"
+        return history_str
+
     @classmethod
     def from_ch_in_db(ChatHistory, ch_in_db: CHInDB) -> "ChatHistory":
         # we need to assignt attributes with non-None values

diff --git a/src/leettools/core/consts/flow_option.py b/src/leettools/core/consts/flow_option.py
@@ -35,5 +35,6 @@
 FLOW_OPTION_STRICT_CONTEXT = "strict_context"
 FLOW_OPTION_SUMMARIZING_MODEL = "summarizing_model"
 FLOW_OPTION_TARGET_SITE = "target_site"
+FLOW_OPTION_TIMEZONE = "timezone"
 FLOW_OPTION_WORD_COUNT = "word_count"
 FLOW_OPTION_WRITING_MODEL = "writing_model"
diff --git a/src/leettools/core/schemas/chat_query_item.py b/src/leettools/core/schemas/chat_query_item.py
@@ -104,6 +104,9 @@ def get_strategy(
             return strategy
 
         if strategy_base is None:
+            display_logger.debug(
+                f"Using strategy id (strategy_base not provided): {strategy_id}"
+            )
             strategy = strategy_store.get_strategy_by_id(strategy_id)
             if strategy is None:
                 raise exceptions.EntityNotFoundException(
@@ -128,4 +131,8 @@ def get_strategy(
                 strategy_status=StrategyStatus.ACTIVE.value,
                 is_system=True,
             )
+        else:
+            display_logger.debug(
+                f"Using strategy id (strategy_base provided): {strategy_id}"
+            )
         return strategy
diff --git a/src/leettools/core/strategy/predefined/default/chat_strategy.json b/src/leettools/core/strategy/predefined/default/chat_strategy.json
@@ -1,4 +1,6 @@
 {
     "strategy_name": "default",
-    "strategy_description": "Default strategy using system default settings"
+    "strategy_description": "Default strategy using system default settings",
+    "rewrite": "default",
+    "rewrite_options": {"model_name": "gpt-4o-mini"}
 }
diff --git a/src/leettools/core/strategy/predefined/default/rewrite_up_default.txt b/src/leettools/core/strategy/predefined/default/rewrite_up_default.txt
@@ -1,41 +1,42 @@
-When users ask questions, they often do not provide enough information or 
-clear purposes. However, given a context about the question, we want to rewrite 
-the question so that the LLM can have a more clear goal and path to generate the 
-answer. Here are a few examples:
+[default rewrite user prompt]
 
-Example 1: 
-Context: The user is asking the question on a web site called google.com.
-Question: How can I create an account on the website?
-Rewrite: 
+{{ date_instruction }}
 
-{
-    "rewritten_question": “I want to create an account on the website like google.com, please 
-    show me a sequence of operations on the website and information I need in each step
-    to create an account.”
-}
+When users ask questions, they often provide limited context or unclear objectives, making
+it challenging for an LLM to generate precise answers. Considering the previous query
+history if provided, rewrite the user's current question into a detailed and structured
+query, clearly stating the intent and outlining specific steps or information needed.
+Additionally, replace any pronouns or vague references (like "it" or "they") with specific
+terms to clarify exactly what is being referred to.
 
-Example 2:
-Context: We are working on some C++ code.
-Question: My program is reporting an OOM error, what should I do?
-Rewrite: 
+Example:
 
+Original Question:
+How do I reset my password?
+
+Rewritten Question:
 {
-    "rewritten_question": "I am getting an OOM (out of memory) error in my C++ program, 
-    please provide me with a sequence of steps to diagnose and fix the error."
+    "rewritten_question": "I need to reset my password on the website. Provide a clear
+sequence of steps, including where on the website I should navigate, what information I'll
+need, and any verification steps involved.",
+    "search_keywords": "reset password steps website verification"
 }
 
-As illustrated by the above example, rewrite the given question, using the same language
-as the original question, as a list of instructions that lead to a clear path to generate
-the answer. Just output the rewritten query itself without any extra information:
+Rewrite the following question into a clear, structured, and actionable set of 
+instructions to facilitate generating an accurate and useful answer. Be sure to explicitly
+clarify all pronouns or vague references. Additionally, generate a concise string containing
+relevant keywords suitable for performing an internet search to gather further information
+about the query. Using the user query history for context, if provided.
 
-Context: {{ context }}
+{{ query_history_instruction }}
 
-Question: {{ question }}
+Question:
+{{ question }}
 
-Please output your answer in the following format, ensuring the output is formatted as 
-JSON data, the rewritten question is in the same language as the input question, and 
-not in a JSON block:
+Output your response strictly in the following JSON format, ensuring the output is 
+formatted as JSON data, and not in a JSON block:
 
 {
-    "rewritten_question": "rewritten_question"
-}
+    "rewritten_question": "rewritten_question",
+    "search_keywords": "search_keywords"
+}
diff --git a/src/leettools/core/strategy/schemas/strategy_conf.py b/src/leettools/core/strategy/schemas/strategy_conf.py
@@ -39,7 +39,7 @@ class StrategyConfBase(BaseModel):
         "intention_list will be passed in the prompt.",
     )
 
-    rewrite: Optional[str] = Field(None, description="The query rewrite strategy")
+    rewrite: Optional[str] = Field("default", description="The query rewrite strategy")
     rewrite_options: Optional[Dict[str, Any]] = Field(
         None,
         description="The options for rewrite, right now support "

diff --git a/src/leettools/eds/api_caller/api_utils.py b/src/leettools/eds/api_caller/api_utils.py
@@ -205,6 +205,9 @@ def run_inference_call_direct(
         display_logger.info(
             f"({completion.usage.total_tokens}) tokens used for ({call_target})."
         )
+    except Exception as e:
+        display_logger.error(f"Error in running inference call: {e}")
+        raise e
     finally:
         end_timestamp_in_ms = time_utils.cur_timestamp_in_ms()
         if completion is not None:

diff --git a/src/leettools/eds/pipeline/convert/_impl/converter_local.py b/src/leettools/eds/pipeline/convert/_impl/converter_local.py
@@ -1,7 +1,6 @@
 import os
 import traceback
 from pathlib import Path
-from typing import List, Optional
 
 from leettools.common.logging import logger
 from leettools.core.consts.return_code import ReturnCode

diff --git a/src/leettools/eds/pipeline/convert/_impl/parser_docling.py b/src/leettools/eds/pipeline/convert/_impl/parser_docling.py
@@ -112,8 +112,5 @@ def pptx2md(self, pptx_filepath: str, target_path: Optional[Path] = None) -> str
         return self._convert(pptx_filepath, target_path)
 
     def xlsx2md(self, xlsx_filepath: str, target_path: Optional[Path] = None) -> str:
-        # not supported yet
-        logger().error(
-            f"XLSX to markdown conversion is not supported yet: {xlsx_filepath}"
-        )
-        return ""
+        logger().debug(f"Converting XLSX to markdown: {xlsx_filepath}")
+        return self._convert(xlsx_filepath, target_path)
diff --git a/src/leettools/eds/rag/rewrite/_impl/prompts/default_keywords_user_prompt.txt b/src/leettools/eds/rag/rewrite/_impl/prompts/default_keywords_user_prompt.txt
@@ -1,40 +1,41 @@
-When users ask questions, they often do not provide enough information or 
-clear purposes. However, given a context about the question, we want to rewrite 
-the question so that the LLM can have a more clear goal and path to generate the 
-answer. Here are a few examples:
+{{ date_instruction }}
+When users ask questions, they often provide limited context or unclear objectives, making
+it challenging for an LLM to generate precise answers. Considering the previous query
+history if provided, rewrite the user's current question into a detailed and structured
+query, clearly stating the intent and outlining specific steps or information needed.
+Additionally, replace any pronouns or vague references (like "it" or "they") with specific
+terms to clarify exactly what is being referred to.
 
-Example 1: 
-Question: How can I create an account on the website?
-Context: The user is asking the question on a web site called google.com.
-Rewrite: 
+{{ query_history_instruction }}
 
-{
-    "rewritten_question": “I want to create an account on the website like google.com, please 
-    show me a sequence of operations on the website and information I need in each step
-    to create an account.”
-}
+Example:
 
-Example 2:
-Question: My program is reporting an OOM error, what should I do?
-Context: We are working on some C++ code.
-Rewrite: 
+Original Question:
+How do I reset my password?
 
+Rewritten Question:
 {
-    "rewritten_question": "I am getting an OOM (out of memory) error in my C++ program, 
-    please provide me with a sequence of steps to diagnose and fix the error."
+    "rewritten_question": "I need to reset my password on the website. Provide a clear
+sequence of steps, including where on the website I should navigate, what information I'll
+need, and any verification steps involved.",
+    "search_keywords": "reset password steps website verification"
 }
 
-As illustrated by the above example, given the question and the context, rewrite the 
-given question as a list of  instructions that lead to a clear path to generate the answer. 
-Just output the rewritten query itself without any extra information:
+Using the provided query history for context, rewrite the following question into a clear,
+structured, and actionable set of instructions to facilitate generating an accurate and
+useful answer. Additionally, generate a concise string containing relevant keywords
+suitable for performing an internet search to gather further information about the query.
+Be sure to explicitly clarify all pronouns or vague references.
 
-Question: {{ question }}
+Question:
+{{ question }}
 
 Context: {{ context }}
 
-Please output your answer in the following format, ensuring the output is formatted as 
-JSON data, and not in a JSON block:
+Output your response strictly in the following JSON format, ensuring the output is 
+formatted as JSON data, and not in a JSON block:
 
 {
-    "rewritten_question": "rewritten_question"
-}
+    "rewritten_question": "rewritten_question",
+    "search_keywords": "search_keywords"
+}
diff --git a/src/leettools/eds/rag/rewrite/_impl/prompts/default_user_prompt.txt b/src/leettools/eds/rag/rewrite/_impl/prompts/default_user_prompt.txt
@@ -1,35 +1,41 @@
-When users ask questions, they often do not provide enough information or 
-clear purposes. We want to rewrite the question so that the LLM can have
-a more clear goal and path to generate the answer. Here are a few examples:
+[rewrite user prompt under the rewrite _impl]
 
-Example 1: 
-Question: How can I create an account on the website?
-Rewrite: 
+{{ date_instruction }}
+When users ask questions, they often provide limited context or unclear objectives, making
+it challenging for an LLM to generate precise answers. Considering the previous query
+history if provided, rewrite the user's current question into a detailed and structured
+query, clearly stating the intent and outlining specific steps or information needed.
+Additionally, replace any pronouns or vague references (like "it" or "they") with specific
+terms to clarify exactly what is being referred to.
 
-{
-    "rewritten_question": “I want to create an account on the website, please show me 
-    a sequence of operations on the website and information I need in each step
-    to create an account.”
-}
+{{ query_history_instruction }}
 
-Example 2:
-Question: My program is reporting an OOM error, what should I do?
-Rewrite: 
+Example:
 
+Original Question:
+How do I reset my password?
+
+Rewritten Question:
 {
-    "rewritten_question": "I am getting an OOM (out of memory) error in my program, 
-    please provide me with a sequence of steps to diagnose and fix the error."
+    "rewritten_question": "I need to reset my password on the website. Provide a clear
+sequence of steps, including where on the website I should navigate, what information I'll
+need, and any verification steps involved.",
+    "search_keywords": "reset password steps website verification"
 }
 
-As illustrated by the above example, rewrite the given question as a list of 
-instructions that lead to a clear path to generate the answer. Just output the
-rewritten query itself without any extra information:
+Using the provided query history for context, rewrite the following question into a clear,
+structured, and actionable set of instructions to facilitate generating an accurate and
+useful answer. Be sure to explicitly clarify all pronouns or vague references. Additionally,
+generate a concise string containing relevant keywords suitable for performing an
+internet search to gather further information about the query.
 
+Question:
 {{ question }}
 
-Please output your answer in the following format, ensuring the output is formatted as 
-JSON data, and not in a JSON block:
+Output your response strictly in the following JSON format, ensuring the output is 
+formatted as JSON data, and not in a JSON block:
 
 {
-    "rewritten_question": "rewritten_question"
-}
+    "rewritten_question": "rewritten_question",
+    "search_keywords": "search_keywords"
+}
diff --git a/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py b/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py
@@ -4,10 +4,12 @@
 
 import click
 
+from leettools.chat.history_manager import get_history_manager
 from leettools.common.logging import logger
 from leettools.common.logging.event_logger import EventLogger
 from leettools.common.utils.template_eval import render_template
 from leettools.context_manager import Context, ContextManager
+from leettools.core.schemas.chat_query_item import ChatQueryItem
 from leettools.core.schemas.chat_query_metadata import ChatQueryMetadata
 from leettools.core.schemas.knowledgebase import KnowledgeBase
 from leettools.core.schemas.organization import Org
@@ -22,6 +24,8 @@
     get_query_rewriter_by_strategy,
 )
 from leettools.eds.rag.schemas.rewrite import Rewrite
+from leettools.flow.exec_info import ExecInfo
+from leettools.flow.utils import prompt_utils
 
 _script_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -39,12 +43,39 @@ def __init__(
         )
 
     def rewrite(
-        self, org: Org, kb: KnowledgeBase, query: str, query_metadata: ChatQueryMetadata
+        self,
+        org: Org,
+        kb: KnowledgeBase,
+        query_item: ChatQueryItem,
+        query_metadata: ChatQueryMetadata,
     ) -> Rewrite:
 
         self.setup_prompts_for_intention(query_metadata)
-
-        user_prompt = render_template(self.user_prompt_template, {"question": query})
+        query = query_item.query_content
+
+        # add query history
+        query_id = query_item.query_id
+        ch_manager = get_history_manager(self.context)
+        query_history = ch_manager.get_ch_entry(
+            username=self.user.username,
+            chat_id=query_item.chat_id,
+        )
+        if query_history is not None and query_history != "":
+            query_history_str = query_history.get_history_str(ignore_last=True)
+            query_history_instruction = (
+                "Here is the chat history:\n" + query_history_str
+            )
+        else:
+            query_history_instruction = ""
+
+        user_prompt = render_template(
+            self.user_prompt_template,
+            {
+                "question": query,
+                "query_history_instruction": query_history_instruction,
+                "date_instruction": prompt_utils.date_instruction(),
+            },
+        )
         logger().debug(f"Final user prompt for rewrite: {user_prompt}")
 
         system_prompt = render_template(