Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 157 additions & 113 deletions src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po

Large diffs are not rendered by default.

255 changes: 140 additions & 115 deletions src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po

Large diffs are not rendered by default.

256 changes: 141 additions & 115 deletions src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/leettools/core/strategy/schemas/strategy_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ class StrategySection(BaseModel):
llm_system_prompt_id: Optional[str] = None
llm_user_prompt_id: Optional[str] = None
# prompts by intention
llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = None
llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = None
llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = {}
llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = {}
41 changes: 28 additions & 13 deletions src/leettools/eds/api_caller/api_caller_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def setup_default_prompts(self) -> None:

if (
section.strategy_name.lower() == "default"
or self.strategy_section.strategy_name.lower() == "true"
or section.strategy_name.lower() == "true"
):
if section.llm_system_prompt_id is None:
logger().warning(
Expand All @@ -189,7 +189,7 @@ def setup_default_prompts(self) -> None:

if section.llm_user_prompt_id is None:
logger().warning(
f"No user prompt id for {section.section_name} provided."
f"No user prompt id for {section.section_name} provided. "
"Fallback to the default user prompt."
)
self.user_prompt_template = None
Expand All @@ -205,15 +205,16 @@ def setup_default_prompts(self) -> None:
self.user_prompt_template = user_prompt.prompt_template

if self.user_prompt_template is None:
strategy_name = section.strategy_name
user_prompt_template_file = (
f"{self.script_dir}/prompts/default_user_prompt.txt"
f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt"
)
with open(user_prompt_template_file, "r", encoding="utf-8") as file:
self.user_prompt_template = file.read()

if self.system_prompt_template is None:
system_prompt_template_file = (
f"{self.script_dir}/prompts/default_system_prompt.txt"
f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt"
)
with open(system_prompt_template_file, "r", encoding="utf-8") as file:
self.system_prompt_template = file.read()
Expand All @@ -233,7 +234,7 @@ def setup_prompts_for_intention(self, query_metadata: ChatQueryMetadata):

if intention_str not in sp_ids:
self.display_logger.warning(
f"No system prompt id for {intention_str} provided to {section_name}."
f"No system prompt id for {intention_str} provided to {section_name}. "
f"Fallback to the default intention."
)
intention_str = DEFAULT_INTENTION
Expand Down Expand Up @@ -285,13 +286,21 @@ def get_user_prompt_template_for_intention(self, intention_str: str) -> str:
if self.script_dir is None:
raise UnexpectedCaseException("Script directory is not set.")

user_prompt_file = f"{self.script_dir}/prompts/{intention_str}_user_prompt.txt"
strategy_name = self.strategy_section.strategy_name
user_prompt_file = (
f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_user_prompt.txt"
)
# if the user prompt for the intention is not provided, use the default
if not os.path.exists(user_prompt_file):
self.display_logger.warning(
f"User prompt for {intention_str} not found. Using default."
f"User prompt for {intention_str} not found: {user_prompt_file}"
)
user_prompt_file = f"{self.script_dir}/prompts/default_user_prompt.txt"
user_prompt_file = (
f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt"
)
self.display_logger.info(f"Using default user prompt: {user_prompt_file}")
else:
self.display_logger.debug(f"Using user prompt file: {user_prompt_file}")
return read_template_file(user_prompt_file)

def get_system_prompt_template_for_intention(self, intention_str: str) -> str:
Expand All @@ -301,14 +310,20 @@ def get_system_prompt_template_for_intention(self, intention_str: str) -> str:
if self.script_dir is None:
raise UnexpectedCaseException("Script directory is not set.")

system_prompt_file = (
f"{self.script_dir}/prompts/{intention_str}_system_prompt.txt"
)
strategy_name = self.strategy_section.strategy_name
system_prompt_file = f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_system_prompt.txt"
if not os.path.exists(system_prompt_file):
self.display_logger.warning(
f"System prompt for {intention_str} not found. Using default."
f"System prompt for {intention_str} not found: {system_prompt_file}"
)
system_prompt_file = (
f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt"
)
system_prompt_file = f"{self.script_dir}/prompts/default_system_prompt.txt"
self.display_logger.info(
f"Using default system prompt: {system_prompt_file}"
)
else:
self.display_logger.debug(f"Using system prompt file: {system_prompt_file}")
return read_template_file(system_prompt_file)

def run_inference_call(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def rewrite(
query = query_item.query_content

# add query history
query_id = query_item.query_id
ch_manager = get_history_manager(self.context)
query_history = ch_manager.get_ch_entry(
username=self.user.username,
Expand Down
39 changes: 29 additions & 10 deletions src/leettools/flow/flows/answer/flow_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from leettools.core.schemas.knowledgebase import KnowledgeBase
from leettools.core.schemas.organization import Org
from leettools.core.schemas.user import User
from leettools.core.strategy.schemas.strategy_section import StrategySection
from leettools.core.strategy.schemas.strategy_section_name import StrategySectionName
from leettools.flow import flow_option_items, steps
from leettools.flow.exec_info import ExecInfo
from leettools.flow.flow import AbstractFlow
Expand Down Expand Up @@ -118,29 +120,46 @@ def execute_query(
)

# flow starts there

query_metadata = steps.StepIntention.run_step(exec_info=exec_info)
rewrite = steps.StepQueryRewrite.run_step(
exec_info=exec_info,
query_metadata=query_metadata,
)

if is_search_engine(retriever_type):
# query the web first, after this function, the search results
# are processed and stored in the KB
# TODO: make this function async
rewrite = steps.StepQueryRewrite.run_step(
exec_info=exec_info,
query_metadata=query_metadata,
)
if rewrite.search_keywords is None:
keywords = rewrite.rewritten_question
else:
keywords = rewrite.search_keywords

# query the web first, after this function, the search results
# are processed and stored in the KB
# TODO: make this function async
docsource = steps.StepSearchToDocsource.run_step(
exec_info=exec_info, search_keywords=keywords
)
# we will answer using the whole KB
# right now filter by docsource cannot include re-used docsinks
# flow_options[DOCSOURCE_UUID_ATTR] = docsource.docsource_uuid

# TODO Next: add a flow_option to control if include the whole KB in the search
# flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid
# TODO Next: add a flow_option to control if include the whole KB in the search
# flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid
else:
# for local KB, we should use local KB data as the rewrite context
rewrite_section = StrategySection(
section_name=StrategySectionName.REWRITE,
strategy_name="keywords",
)
rewrite = steps.StepQueryRewrite.run_step(
exec_info=exec_info,
query_metadata=query_metadata,
rewrite_section=rewrite_section,
)
# the keywords actually not used in the local search
if rewrite.search_keywords is None:
keywords = rewrite.rewritten_question
else:
keywords = rewrite.search_keywords

top_ranked_result_segments = steps.StepVectorSearch.run_step(
exec_info=exec_info,
Expand Down
29 changes: 15 additions & 14 deletions src/leettools/flow/flows/news/flow_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pydantic import BaseModel, ConfigDict, create_model

from leettools.common import exceptions
from leettools.common.i18n.translator import _
from leettools.common.logging.event_logger import EventLogger
from leettools.common.utils import config_utils, json_utils, lang_utils, time_utils
from leettools.common.utils.template_eval import render_template
Expand Down Expand Up @@ -108,6 +109,8 @@ def full_description(cls) -> str:
- The categories of the news
- The keywords of the news
- The date of the news item

{{ language_instruction }}
"""

@classmethod
Expand All @@ -118,10 +121,9 @@ def depends_on(cls) -> List[Type["FlowComponent"]]:
def direct_flow_option_items(cls) -> List[FlowOptionItem]:
foi_news_source_min = FlowOptionItem(
name=flow_option.FLOW_OPTION_NEWS_SOURCE_MIN,
display_name="News item source count threshold",
description=(
"Number of sources a news item has to have to be included in the result."
"Default is 1. Depends on the nature of the knowledge base."
display_name=_("News item source count threshold"),
description=_(
"Number of sources a news item has to have to be included in the result. Default is 1. Depends on the nature of the knowledge base."
),
default_value="1",
value_type="int",
Expand All @@ -131,10 +133,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:

foi_news_include_old = FlowOptionItem(
name=flow_option.FLOW_OPTION_NEWS_INCLUDE_OLD,
display_name="Include previously reported news items",
description=(
"Include all news items in the result, even if it has been reported before."
"Default is False."
display_name=_("Include previously reported news items"),
description=_(
"Include all news items in the result, even if it has been reported before. Default is False."
),
default_value="False",
value_type="bool",
Expand All @@ -144,8 +145,8 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:

foi_news_output_format = FlowOptionItem(
name=flow_option.FLOW_OPTION_EXTRACT_OUTPUT_FORMAT,
display_name="Output format",
description=(
display_name=_("Output format"),
description=_(
"The format of the output: 'md' (default), 'table and 'json'."
),
default_value="md",
Expand All @@ -156,10 +157,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:

foi_news_run_search = FlowOptionItem(
name=flow_option.FLOW_OPTION_NEWS_RUN_SEARCH,
display_name="Run search before extracting news data",
description=(
"Run the search step before extracting the news data."
"Default is True."
display_name=_("Run search before extracting news data"),
description=_(
"Run the search step before extracting the news data. Default is True."
),
default_value="True",
value_type="bool",
Expand Down Expand Up @@ -487,6 +487,7 @@ def execute_query(
"query": query,
"word_count": news_params.word_count,
"article_style": news_params.article_style,
"language_instruction": news_params.language_instruction,
},
)

Expand Down
21 changes: 17 additions & 4 deletions src/leettools/flow/steps/step_query_rewrite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import ClassVar, List, Type
from typing import ClassVar, List, Optional, Type

from leettools.core.schemas.chat_query_metadata import ChatQueryMetadata
from leettools.core.strategy.schemas.strategy_section import StrategySection
Expand Down Expand Up @@ -27,17 +27,30 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]:
def run_step(
exec_info: ExecInfo,
query_metadata: ChatQueryMetadata,
rewrite_section: Optional[StrategySection] = None,
) -> Rewrite:
"""
Rewrite the query based on the strategy section and the query metadata.

If rewrite_section is provided, use it directly.
If rewrite_section is not provided, get it from the strategy.

Args:
- exec_info: the execution info
- query_metadata: the query metadata
- rewrite_section: the rewrite section

Returns:
- rewrite: the rewrite result
"""
display_logger = exec_info.display_logger
display_logger.info(
f"[Status] Rewrite query: {exec_info.target_chat_query_item.query_content}"
)
rewrite_section = exec_info.strategy.strategy_sections.get(
StrategySectionName.REWRITE, None
)
if rewrite_section is None:
rewrite_section = exec_info.strategy.strategy_sections.get(
StrategySectionName.REWRITE, None
)
return _step_run_rewriter(
exec_info=exec_info,
rewrite_section=rewrite_section,
Expand Down
1 change: 1 addition & 0 deletions src/leettools/svc/api/v1/routers/file_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ async def read_raw_document(uri: str) -> FileResponse:
logger().debug(f"Reading raw document from {uri}")

safe_base_path = Path(self.settings.DATA_ROOT)
# TODO: make these configurable and in sync with Docker config
incoming_file_path = Path("/incoming")
uploads_file_path = Path("/app/uploads")

Expand Down
Loading