From 6011b8e99c0dcb5c2465f966fee5d975ca6afd40 Mon Sep 17 00:00:00 2001 From: Feng Peng Date: Tue, 1 Apr 2025 14:45:52 -0700 Subject: [PATCH] Make answer_flow check local KB when querying local. (#1646) * Make answer_flow check local KB when querying local. (#1631) Co-authored-by: Feng Peng * Fix flow_news options. --------- Co-authored-by: Feng Peng --- .../i18n/locales/en/LC_MESSAGES/messages.po | 270 ++++++++++-------- .../i18n/locales/ja/LC_MESSAGES/messages.po | 255 +++++++++-------- .../i18n/locales/zh/LC_MESSAGES/messages.po | 256 +++++++++-------- .../core/strategy/schemas/strategy_section.py | 4 +- .../eds/api_caller/api_caller_base.py | 41 ++- .../rewrite/_impl/rewrite_direct_dynamic.py | 1 - .../flow/flows/answer/flow_answer.py | 39 ++- src/leettools/flow/flows/news/flow_news.py | 29 +- .../flow/steps/step_query_rewrite.py | 21 +- .../svc/api/v1/routers/file_router.py | 1 + 10 files changed, 530 insertions(+), 387 deletions(-) diff --git a/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po index cd9cbc4..269a23a 100644 --- a/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po +++ b/src/leettools/common/i18n/locales/en/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-03-16 23:00-0700\n" +"POT-Creation-Date: 2025-03-24 21:03-0700\n" "PO-Revision-Date: 2025-03-08 15:55-0800\n" "Last-Translator: FULL NAME \n" "Language: en\n" @@ -18,73 +18,59 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.16.0\n" -#: ../../settings.py:731 -#: ../../settings.py:731 +#: ../../settings.py:736 msgid "LLM API Key used in the inference process." msgstr "" -#: ../../settings.py:738 -#: ../../settings.py:738 +#: ../../settings.py:743 msgid "LLM Base URL used in the inference process." msgstr "" -#: ../../settings.py:745 -#: ../../settings.py:745 +#: ../../settings.py:750 msgid "Default inference model used in the inference process." msgstr "" -#: ../../settings.py:752 -#: ../../settings.py:752 +#: ../../settings.py:757 msgid "Default dense embedding model used in the embedding process." msgstr "" -#: ../../settings.py:761 -#: ../../settings.py:761 +#: ../../settings.py:766 msgid "Dense embedding model dimension." msgstr "" -#: ../../settings.py:768 -#: ../../settings.py:768 +#: ../../settings.py:773 msgid "API Key used for the embedder." msgstr "" -#: ../../settings.py:775 -#: ../../settings.py:775 +#: ../../settings.py:780 msgid "Base URL for the the embedder service." msgstr "" -#: ../../settings.py:782 -#: ../../settings.py:782 +#: ../../settings.py:787 msgid "API Key used in the reranking process." msgstr "" -#: ../../settings.py:789 -#: ../../settings.py:789 +#: ../../settings.py:794 msgid "Base url used in the reranking process." msgstr "" -#: ../../settings.py:796 -#: ../../settings.py:796 +#: ../../settings.py:801 msgid "Default rerank model used in the rerank process." msgstr "" -#: ../../settings.py:803 -#: ../../settings.py:803 +#: ../../settings.py:808 msgid "Google API Key used in the search process." msgstr "" -#: ../../settings.py:810 -#: ../../settings.py:810 +#: ../../settings.py:815 msgid "Google custom search Key used in the search process." msgstr "" -#: ../../settings.py:817 -#: ../../settings.py:817 +#: ../../settings.py:822 msgid "Google custom search Key for patent used in the search process." msgstr "" -#: ../../settings.py:826 -#: ../../settings.py:826 +#: ../../settings.py:831 msgid "Tavily API Key used in the search process." msgstr "" @@ -92,15 +78,15 @@ msgstr "" msgid "The section of the settings" msgstr "" -#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:50 +#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:51 msgid "The name of the variable." msgstr "" -#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:60 +#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:61 msgid "The description of the variable." msgstr "" -#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:63 +#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:64 msgid "The default value of the variable." msgstr "" @@ -136,222 +122,233 @@ msgstr "" msgid "The time the settings was updated." msgstr "" -#: ../../flow/flow_option_items.py:53 +#: ../../flow/flow_option_items.py:54 msgid "The flow components that use this variable." msgstr "" -#: ../../flow/flow_option_items.py:57 +#: ../../flow/flow_option_items.py:58 msgid "The display name of the variable." msgstr "" -#: ../../flow/flow_option_items.py:67 +#: ../../flow/flow_option_items.py:68 msgid "The type of the value,currently support str, int, float, bool." msgstr "" -#: ../../flow/flow_option_items.py:73 +#: ../../flow/flow_option_items.py:74 msgid "Whether the variable is required or not." msgstr "" -#: ../../flow/flow_option_items.py:77 +#: ../../flow/flow_option_items.py:78 msgid "Whether the variable should be explicitly set by the user or not." msgstr "" -#: ../../flow/flow_option_items.py:83 +#: ../../flow/flow_option_items.py:84 msgid "Whether the variable should be displayed in multiple lines or not." msgstr "" -#: ../../flow/flow_option_items.py:89 +#: ../../flow/flow_option_items.py:90 msgid "The example value of the variable, if no default is provided." msgstr "" -#: ../../flow/flow_option_items.py:93 +#: ../../flow/flow_option_items.py:94 msgid "" "If the value should be shown and edited as the specified programming " "code, such as Python, Markdown. Default is None." msgstr "" -#: ../../flow/flow_option_items.py:99 +#: ../../flow/flow_option_items.py:100 msgid "The variables the code should provide to the backend." msgstr "" -#: ../../flow/flow_option_items.py:115 +#: ../../flow/flow_option_items.py:116 msgid "Planning Model" msgstr "" -#: ../../flow/flow_option_items.py:116 +#: ../../flow/flow_option_items.py:117 msgid "The model used to do the article planning." msgstr "" -#: ../../flow/flow_option_items.py:135 +#: ../../flow/flow_option_items.py:136 msgid "Summarizing Model" msgstr "" -#: ../../flow/flow_option_items.py:136 +#: ../../flow/flow_option_items.py:137 msgid "The model used to summarize each article." msgstr "" -#: ../../flow/flow_option_items.py:155 +#: ../../flow/flow_option_items.py:156 msgid "Writing Model" msgstr "" -#: ../../flow/flow_option_items.py:156 +#: ../../flow/flow_option_items.py:157 msgid "The model used to generating each section." msgstr "" -#: ../../flow/flow_option_items.py:169 +#: ../../flow/flow_option_items.py:170 msgid "Retriever" msgstr "" -#: ../../flow/flow_option_items.py:170 +#: ../../flow/flow_option_items.py:171 msgid "The type of retriever to use for the web search." msgstr "" -#: ../../flow/flow_option_items.py:184 +#: ../../flow/flow_option_items.py:185 msgid "Content Instruction" msgstr "" -#: ../../flow/flow_option_items.py:185 +#: ../../flow/flow_option_items.py:186 msgid "" "The relevance of the result documents from keyword search is assessed by " "the content instruction if provided. " msgstr "" -#: ../../flow/flow_option_items.py:203 +#: ../../flow/flow_option_items.py:204 msgid "Days Limit" msgstr "" -#: ../../flow/flow_option_items.py:204 +#: ../../flow/flow_option_items.py:205 msgid "" "Number of days to limit the search results. 0 or empty means no limit. In" " local KB, filters by the import time." msgstr "" -#: ../../flow/flow_option_items.py:222 +#: ../../flow/flow_option_items.py:223 msgid "Max search Results" msgstr "" -#: ../../flow/flow_option_items.py:223 -msgid "The maximum number of search results for retrievers to return. Each retriever may have different paging mechanisms. Use the parameter and the search iteration to control the number of results.If the retieval is local, -1 here means process all documents." +#: ../../flow/flow_option_items.py:224 +msgid "" +"The maximum number of search results for retrievers to return. Each " +"retriever may have different paging mechanisms. Use the parameter and the" +" search iteration to control the number of results.If the retieval is " +"local, -1 here means process all documents." msgstr "" -#: ../../flow/flow_option_items.py:238 +#: ../../flow/flow_option_items.py:239 msgid "Rewrite Search Keywords" msgstr "" -#: ../../flow/flow_option_items.py:239 +#: ../../flow/flow_option_items.py:240 msgid "Ask the LLM to generate search keywords from the search query." msgstr "" -#: ../../flow/flow_option_items.py:252 +#: ../../flow/flow_option_items.py:253 msgid "Search Language" msgstr "" -#: ../../flow/flow_option_items.py:253 +#: ../../flow/flow_option_items.py:254 msgid "The language used for keyword search if the search API supports." msgstr "" -#: ../../flow/flow_option_items.py:268 +#: ../../flow/flow_option_items.py:269 msgid "Output Language" msgstr "" -#: ../../flow/flow_option_items.py:269 +#: ../../flow/flow_option_items.py:270 msgid "Output the result in the language." msgstr "" -#: ../../flow/flow_option_items.py:282 +#: ../../flow/flow_option_items.py:283 msgid "Output Example" msgstr "" -#: ../../flow/flow_option_items.py:283 +#: ../../flow/flow_option_items.py:284 msgid "" "The example of the expected output content. If left empty, no example " "will be provided to LLM." msgstr "" -#: ../../flow/flow_option_items.py:300 +#: ../../flow/flow_option_items.py:301 msgid "Number of Sections" msgstr "" -#: ../../flow/flow_option_items.py:301 +#: ../../flow/flow_option_items.py:302 msgid "" "The number of sections in the output article. If left empty, the planning" " agent will decide automatically." msgstr "" -#: ../../flow/flow_option_items.py:317 +#: ../../flow/flow_option_items.py:318 +msgid "Timezone" +msgstr "" + +#: ../../flow/flow_option_items.py:319 +msgid "" +"The timezone when determining the date. See " +"https://docs.python.org/3/library/zoneinfo.html" +msgstr "" + +#: ../../flow/flow_option_items.py:335 msgid "Article Style" msgstr "" -#: ../../flow/flow_option_items.py:318 +#: ../../flow/flow_option_items.py:336 msgid "" "The style of the output article such as analytical research reports, " "humorous news articles, or technical blog posts." msgstr "" -#: ../../flow/flow_option_items.py:335 +#: ../../flow/flow_option_items.py:353 msgid "Word Count" msgstr "" -#: ../../flow/flow_option_items.py:336 +#: ../../flow/flow_option_items.py:354 msgid "The number of words in the output section. Empty means automatics." msgstr "" -#: ../../flow/flow_option_items.py:351 +#: ../../flow/flow_option_items.py:369 msgid "Extract Instruction" msgstr "" -#: ../../flow/flow_option_items.py:352 +#: ../../flow/flow_option_items.py:370 msgid "Describe what information to extract from the content." msgstr "" -#: ../../flow/flow_option_items.py:377 +#: ../../flow/flow_option_items.py:395 msgid "Extract Pydantic Model" msgstr "" -#: ../../flow/flow_option_items.py:378 +#: ../../flow/flow_option_items.py:396 msgid "" "The schema of the target data as a pydantic model. Can be a single line " "string as the file path to the pydantic model, or a multi-line string as " "the pydantic model definition." -"The schema of the target data as a pydantic model. Can be a single line " -"string as the file path to the pydantic model, or a multi-line string as " -"the pydantic model definition." msgstr "" -#: ../../flow/flow_option_items.py:402 +#: ../../flow/flow_option_items.py:420 msgid "Extract Schema as JSON" msgstr "" -#: ../../flow/flow_option_items.py:403 +#: ../../flow/flow_option_items.py:421 msgid "The schema of the extracted information. Should be a JSON string." msgstr "" -#: ../../flow/flow_option_items.py:445 +#: ../../flow/flow_option_items.py:463 msgid "Extraction Instructions in Python" msgstr "" -#: ../../flow/flow_option_items.py:446 +#: ../../flow/flow_option_items.py:464 msgid "" "The instructions of the extractions in Python code. Right now the " "required variables are 'target_model_name' and 'instructions'. Also we " "need to specify the key fields and verify fields if needed." msgstr "" -#: ../../flow/flow_option_items.py:483 +#: ../../flow/flow_option_items.py:501 msgid "Target Pydantic Model Name used in the final list" msgstr "" -#: ../../flow/flow_option_items.py:484 +#: ../../flow/flow_option_items.py:502 msgid "" "There might be multiple Pydantic models in the schema definition. Specify" " which model to use for the final list." msgstr "" -#: ../../flow/flow_option_items.py:502 +#: ../../flow/flow_option_items.py:520 msgid "Key Fields" msgstr "" -#: ../../flow/flow_option_items.py:503 +#: ../../flow/flow_option_items.py:521 msgid "" "Comma separated field names that identifies an object in the extraction. " "Extracted data with the same key fields will be considered of the same " @@ -361,11 +358,11 @@ msgid "" "unique." msgstr "" -#: ../../flow/flow_option_items.py:522 +#: ../../flow/flow_option_items.py:540 msgid "Save extracted data to backend" msgstr "" -#: ../../flow/flow_option_items.py:523 +#: ../../flow/flow_option_items.py:541 msgid "" "Save the extracted data to the backend. Default True. If False, the " "extracted data will not be saved. The saved data will have the same " @@ -373,11 +370,11 @@ msgid "" "time and the original document URI." msgstr "" -#: ../../flow/flow_option_items.py:541 +#: ../../flow/flow_option_items.py:559 msgid "Verification Fields" msgstr "" -#: ../../flow/flow_option_items.py:542 +#: ../../flow/flow_option_items.py:560 msgid "" "Comma separated field names that need to be verified for the extracted " "objects. For example, although the address of a company is not in the key" @@ -386,110 +383,157 @@ msgid "" "no verification will be performed." msgstr "" -#: ../../flow/flow_option_items.py:560 +#: ../../flow/flow_option_items.py:578 ../../flow/flows/news/flow_news.py:148 msgid "Output format" msgstr "" -#: ../../flow/flow_option_items.py:561 +#: ../../flow/flow_option_items.py:579 msgid "" "The output of the extracted data. Default is json. Currently also support" " csv, md." msgstr "" -#: ../../flow/flow_option_items.py:577 +#: ../../flow/flow_option_items.py:595 msgid "Reference Style" msgstr "" -#: ../../flow/flow_option_items.py:578 +#: ../../flow/flow_option_items.py:596 msgid "" "The style of the references in the output article. Right now support " "news, default, and full." msgstr "" -#: ../../flow/flow_option_items.py:596 +#: ../../flow/flow_option_items.py:614 msgid "Strict Context" msgstr "" -#: ../../flow/flow_option_items.py:597 +#: ../../flow/flow_option_items.py:615 msgid "When generating a section, whether to use strict context or not." msgstr "" -#: ../../flow/flow_option_items.py:614 +#: ../../flow/flow_option_items.py:632 msgid "Target Site" msgstr "" -#: ../../flow/flow_option_items.py:615 +#: ../../flow/flow_option_items.py:633 msgid "" "When searching the web, limit the search to this site. Empty means search" " all sites." msgstr "" -#: ../../flow/flow_option_items.py:633 +#: ../../flow/flow_option_items.py:651 msgid "Max iteration when using the web search retriever" msgstr "" -#: ../../flow/flow_option_items.py:634 +#: ../../flow/flow_option_items.py:652 msgid "If the max result is not reached, how many times we go to the next page." msgstr "" -#: ../../flow/flow_option_items.py:651 +#: ../../flow/flow_option_items.py:669 msgid "Recursive scrape" msgstr "" -#: ../../flow/flow_option_items.py:652 +#: ../../flow/flow_option_items.py:670 msgid "If true, scrape the top urls found in the search results documents." msgstr "" -#: ../../flow/flow_option_items.py:669 +#: ../../flow/flow_option_items.py:687 msgid "Recursive scrape iteration" msgstr "" -#: ../../flow/flow_option_items.py:670 +#: ../../flow/flow_option_items.py:688 msgid "" "When we do recursive scraping, we will not stop until we reach the max " "number of results or the number of iterations specified here." msgstr "" -#: ../../flow/flow_option_items.py:688 +#: ../../flow/flow_option_items.py:706 msgid "Recursive scrape max item count" msgstr "" -#: ../../flow/flow_option_items.py:689 +#: ../../flow/flow_option_items.py:707 msgid "" "When we do recursive scraping, we will not stop until we reach the number" " of max iterations or the max number of results specified here." msgstr "" -#: ../../flow/flow_option_items.py:707 +#: ../../flow/flow_option_items.py:725 msgid "Image Search" msgstr "" -#: ../../flow/flow_option_items.py:708 +#: ../../flow/flow_option_items.py:726 msgid "When searching on the web, limit the search to image search. " msgstr "" -#: ../../flow/flow_option_items.py:723 +#: ../../flow/flow_option_items.py:741 msgid "Excluded Site" msgstr "" -#: ../../flow/flow_option_items.py:724 +#: ../../flow/flow_option_items.py:742 msgid "" "List of sites separated by comma to ignore when search for the " "information. Empty means no filter." msgstr "" -#: ../../flow/flow_option_items.py:742 +#: ../../flow/flow_option_items.py:760 msgid "Docsource UUID" msgstr "" -#: ../../flow/flow_option_items.py:743 +#: ../../flow/flow_option_items.py:761 msgid "The docsource uuid to run the query on when querying local KB." msgstr "" -#: ../../flow/flow_option_items.py:758 +#: ../../flow/flow_option_items.py:776 msgid "Context Limit" msgstr "" -#: ../../flow/flow_option_items.py:759 +#: ../../flow/flow_option_items.py:777 msgid "Override the context limit from the model info." msgstr "" + +#: ../../flow/flows/news/flow_news.py:124 +msgid "News item source count threshold" +msgstr "" + +#: ../../flow/flows/news/flow_news.py:125 +msgid "" +"Number of sources a news item has to have to be included in the result. " +"Default is 1. Depends on the nature of the knowledge base." +msgstr "" + +#: ../../flow/flows/news/flow_news.py:136 +msgid "Include previously reported news items" +msgstr "" + +#: ../../flow/flows/news/flow_news.py:137 +msgid "" +"Include all news items in the result, even if it has been reported " +"before. Default is False." +msgstr "" + +#: ../../flow/flows/news/flow_news.py:149 +msgid "The format of the output: 'md' (default), 'table and 'json'." +msgstr "" + +#: ../../flow/flows/news/flow_news.py:160 +msgid "Run search before extracting news data" +msgstr "" + +#: ../../flow/flows/news/flow_news.py:162 +msgid "Run the search step before extracting the news data. Default is True." +msgstr "" + +#~ msgid "" +#~ "The schema of the target data as" +#~ " a pydantic model. Can be a " +#~ "single line string as the file " +#~ "path to the pydantic model, or a" +#~ " multi-line string as the pydantic" +#~ " model definition.The schema of the " +#~ "target data as a pydantic model. " +#~ "Can be a single line string as " +#~ "the file path to the pydantic " +#~ "model, or a multi-line string as" +#~ " the pydantic model definition." +#~ msgstr "" + diff --git a/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po index ab233e8..2093281 100644 --- a/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po +++ b/src/leettools/common/i18n/locales/ja/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-03-16 23:00-0700\n" +"POT-Creation-Date: 2025-03-24 21:03-0700\n" "PO-Revision-Date: 2025-03-08 15:55-0800\n" "Last-Translator: FULL NAME \n" "Language: ja\n" @@ -18,73 +18,59 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.16.0\n" -#: ../../settings.py:731 -#: ../../settings.py:731 +#: ../../settings.py:736 msgid "LLM API Key used in the inference process." msgstr "推論プロセスで使用されるLLM APIキー。" -#: ../../settings.py:738 -#: ../../settings.py:738 +#: ../../settings.py:743 msgid "LLM Base URL used in the inference process." msgstr "推論プロセスで使用されるLLMベースURL。" -#: ../../settings.py:745 -#: ../../settings.py:745 +#: ../../settings.py:750 msgid "Default inference model used in the inference process." msgstr "推論プロセスで使用されるデフォルトの推論モデル。" -#: ../../settings.py:752 -#: ../../settings.py:752 +#: ../../settings.py:757 msgid "Default dense embedding model used in the embedding process." msgstr "埋め込みプロセスで使用されるデフォルトの密な埋め込みモデル。" -#: ../../settings.py:761 -#: ../../settings.py:761 +#: ../../settings.py:766 msgid "Dense embedding model dimension." msgstr "密な埋め込みモデルの次元。" -#: ../../settings.py:768 -#: ../../settings.py:768 +#: ../../settings.py:773 msgid "API Key used for the embedder." msgstr "埋め込み器で使用されるAPIキー。" -#: ../../settings.py:775 -#: ../../settings.py:775 +#: ../../settings.py:780 msgid "Base URL for the the embedder service." msgstr "埋め込みサービスのベースURL。" -#: ../../settings.py:782 -#: ../../settings.py:782 +#: ../../settings.py:787 msgid "API Key used in the reranking process." msgstr "再ランク付けプロセスで使用されるAPIキー。" -#: ../../settings.py:789 -#: ../../settings.py:789 +#: ../../settings.py:794 msgid "Base url used in the reranking process." msgstr "再ランク付けプロセスで使用されるベースURL。" -#: ../../settings.py:796 -#: ../../settings.py:796 +#: ../../settings.py:801 msgid "Default rerank model used in the rerank process." msgstr "再ランク付けプロセスで使用されるデフォルトの再ランクモデル。" -#: ../../settings.py:803 -#: ../../settings.py:803 +#: ../../settings.py:808 msgid "Google API Key used in the search process." msgstr "検索プロセスで使用されるGoogle APIキー。" -#: ../../settings.py:810 -#: ../../settings.py:810 +#: ../../settings.py:815 msgid "Google custom search Key used in the search process." msgstr "検索プロセスで使用されるGoogleカスタム検索キー。" -#: ../../settings.py:817 -#: ../../settings.py:817 +#: ../../settings.py:822 msgid "Google custom search Key for patent used in the search process." msgstr "検索プロセスで使用される特許用Googleカスタム検索キー。" -#: ../../settings.py:826 -#: ../../settings.py:826 +#: ../../settings.py:831 msgid "Tavily API Key used in the search process." msgstr "検索プロセスで使用されるTavily APIキー。" @@ -92,15 +78,15 @@ msgstr "検索プロセスで使用されるTavily APIキー。" msgid "The section of the settings" msgstr "設定のセクション" -#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:50 +#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:51 msgid "The name of the variable." msgstr "変数の名前。" -#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:60 +#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:61 msgid "The description of the variable." msgstr "変数の説明。" -#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:63 +#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:64 msgid "The default value of the variable." msgstr "変数のデフォルト値。" @@ -137,219 +123,233 @@ msgstr "設定が作成された時間。" msgid "The time the settings was updated." msgstr "設定が更新された時間。" -#: ../../flow/flow_option_items.py:53 +#: ../../flow/flow_option_items.py:54 msgid "The flow components that use this variable." msgstr "この変数を使用するフローコンポーネント。" -#: ../../flow/flow_option_items.py:57 +#: ../../flow/flow_option_items.py:58 msgid "The display name of the variable." msgstr "変数の表示名。" -#: ../../flow/flow_option_items.py:67 +#: ../../flow/flow_option_items.py:68 msgid "The type of the value,currently support str, int, float, bool." msgstr "値の型。現在str、int、float、boolをサポートしています。" -#: ../../flow/flow_option_items.py:73 +#: ../../flow/flow_option_items.py:74 msgid "Whether the variable is required or not." msgstr "変数が必須かどうか。" -#: ../../flow/flow_option_items.py:77 +#: ../../flow/flow_option_items.py:78 msgid "Whether the variable should be explicitly set by the user or not." msgstr "変数をユーザーが明示的に設定する必要があるかどうか。" -#: ../../flow/flow_option_items.py:83 +#: ../../flow/flow_option_items.py:84 msgid "Whether the variable should be displayed in multiple lines or not." msgstr "変数を複数行で表示するかどうか。" -#: ../../flow/flow_option_items.py:89 +#: ../../flow/flow_option_items.py:90 msgid "The example value of the variable, if no default is provided." msgstr "デフォルト値が提供されていない場合の変数の例値。" -#: ../../flow/flow_option_items.py:93 +#: ../../flow/flow_option_items.py:94 msgid "" "If the value should be shown and edited as the specified programming " "code, such as Python, Markdown. Default is None." msgstr "値をPythonやMarkdownなどの指定されたプログラミングコードとして表示・編集するかどうか。デフォルトはNone。" -#: ../../flow/flow_option_items.py:99 +#: ../../flow/flow_option_items.py:100 msgid "The variables the code should provide to the backend." msgstr "コードがバックエンドに提供すべき変数。" -#: ../../flow/flow_option_items.py:115 +#: ../../flow/flow_option_items.py:116 msgid "Planning Model" msgstr "プランニングモデル" -#: ../../flow/flow_option_items.py:116 +#: ../../flow/flow_option_items.py:117 msgid "The model used to do the article planning." msgstr "記事の計画に使用されるモデル。" -#: ../../flow/flow_option_items.py:135 +#: ../../flow/flow_option_items.py:136 msgid "Summarizing Model" msgstr "要約モデル" -#: ../../flow/flow_option_items.py:136 +#: ../../flow/flow_option_items.py:137 msgid "The model used to summarize each article." msgstr "各記事を要約するために使用されるモデル。" -#: ../../flow/flow_option_items.py:155 +#: ../../flow/flow_option_items.py:156 msgid "Writing Model" msgstr "執筆モデル" -#: ../../flow/flow_option_items.py:156 +#: ../../flow/flow_option_items.py:157 msgid "The model used to generating each section." msgstr "各セクションを生成するために使用されるモデル。" -#: ../../flow/flow_option_items.py:169 +#: ../../flow/flow_option_items.py:170 msgid "Retriever" msgstr "リトリーバー" -#: ../../flow/flow_option_items.py:170 +#: ../../flow/flow_option_items.py:171 msgid "The type of retriever to use for the web search." msgstr "ウェブ検索に使用するリトリーバーの種類。" -#: ../../flow/flow_option_items.py:184 +#: ../../flow/flow_option_items.py:185 msgid "Content Instruction" msgstr "コンテンツ指示" -#: ../../flow/flow_option_items.py:185 +#: ../../flow/flow_option_items.py:186 msgid "" "The relevance of the result documents from keyword search is assessed by " "the content instruction if provided. " msgstr "キーワード検索結果の文書の関連性は、提供された場合、コンテンツ指示によって評価されます。" -#: ../../flow/flow_option_items.py:203 +#: ../../flow/flow_option_items.py:204 msgid "Days Limit" msgstr "日数制限" -#: ../../flow/flow_option_items.py:204 +#: ../../flow/flow_option_items.py:205 msgid "" "Number of days to limit the search results. 0 or empty means no limit. In" " local KB, filters by the import time." msgstr "検索結果を制限する日数。0または空は制限なしを意味します。ローカルKBでは、インポート時間でフィルタリングします。" -#: ../../flow/flow_option_items.py:222 +#: ../../flow/flow_option_items.py:223 msgid "Max search Results" msgstr "最大検索結果数" -#: ../../flow/flow_option_items.py:223 -msgid "The maximum number of search results for retrievers to return. Each retriever may have different paging mechanisms. Use the parameter and the search iteration to control the number of results.If the retieval is local, -1 here means process all documents." +#: ../../flow/flow_option_items.py:224 +msgid "" +"The maximum number of search results for retrievers to return. Each " +"retriever may have different paging mechanisms. Use the parameter and the" +" search iteration to control the number of results.If the retieval is " +"local, -1 here means process all documents." msgstr "リトリーバーが返す検索結果の最大数。各リトリーバーは異なるページング機構を持つ場合があります。パラメータと検索反復を使用して結果の数を制御します。" -#: ../../flow/flow_option_items.py:238 +#: ../../flow/flow_option_items.py:239 msgid "Rewrite Search Keywords" msgstr "検索キーワードの書き換え" -#: ../../flow/flow_option_items.py:239 +#: ../../flow/flow_option_items.py:240 msgid "Ask the LLM to generate search keywords from the search query." msgstr "検索クエリから検索キーワードを生成するようLLMに依頼します。" -#: ../../flow/flow_option_items.py:252 +#: ../../flow/flow_option_items.py:253 msgid "Search Language" msgstr "検索言語" -#: ../../flow/flow_option_items.py:253 +#: ../../flow/flow_option_items.py:254 msgid "The language used for keyword search if the search API supports." msgstr "検索APIがサポートしている場合のキーワード検索に使用される言語。" -#: ../../flow/flow_option_items.py:268 +#: ../../flow/flow_option_items.py:269 msgid "Output Language" msgstr "出力言語" -#: ../../flow/flow_option_items.py:269 +#: ../../flow/flow_option_items.py:270 msgid "Output the result in the language." msgstr "結果をその言語で出力します。" -#: ../../flow/flow_option_items.py:282 +#: ../../flow/flow_option_items.py:283 msgid "Output Example" msgstr "出力例" -#: ../../flow/flow_option_items.py:283 +#: ../../flow/flow_option_items.py:284 msgid "" "The example of the expected output content. If left empty, no example " "will be provided to LLM." msgstr "期待される出力内容の例。空の場合、LLMに例は提供されません。" -#: ../../flow/flow_option_items.py:300 +#: ../../flow/flow_option_items.py:301 msgid "Number of Sections" msgstr "セクション数" -#: ../../flow/flow_option_items.py:301 +#: ../../flow/flow_option_items.py:302 msgid "" "The number of sections in the output article. If left empty, the planning" " agent will decide automatically." msgstr "出力記事のセクション数。空の場合、プランニングエージェントが自動的に決定します。" -#: ../../flow/flow_option_items.py:317 +#: ../../flow/flow_option_items.py:318 +msgid "Timezone" +msgstr "タイムゾーン" + +#: ../../flow/flow_option_items.py:319 +msgid "" +"The timezone when determining the date. See " +"https://docs.python.org/3/library/zoneinfo.html" +msgstr "日付を決定する際に使用されるタイムゾーン。https://docs.python.org/3/library/zoneinfo.htmlを参照してください。" + +#: ../../flow/flow_option_items.py:335 msgid "Article Style" msgstr "記事スタイル" -#: ../../flow/flow_option_items.py:318 +#: ../../flow/flow_option_items.py:336 msgid "" "The style of the output article such as analytical research reports, " "humorous news articles, or technical blog posts." msgstr "分析的な研究レポート、ユーモアのあるニュース記事、技術ブログ投稿などの出力記事のスタイル。" -#: ../../flow/flow_option_items.py:335 +#: ../../flow/flow_option_items.py:353 msgid "Word Count" msgstr "単語数" -#: ../../flow/flow_option_items.py:336 +#: ../../flow/flow_option_items.py:354 msgid "The number of words in the output section. Empty means automatics." msgstr "出力セクションの単語数。空は自動を意味します。" -#: ../../flow/flow_option_items.py:351 +#: ../../flow/flow_option_items.py:369 msgid "Extract Instruction" msgstr "抽出指示" -#: ../../flow/flow_option_items.py:352 +#: ../../flow/flow_option_items.py:370 msgid "Describe what information to extract from the content." msgstr "コンテンツから抽出する情報を説明します。" -#: ../../flow/flow_option_items.py:377 +#: ../../flow/flow_option_items.py:395 msgid "Extract Pydantic Model" msgstr "Pydanticモデルの抽出" -#: ../../flow/flow_option_items.py:378 +#: ../../flow/flow_option_items.py:396 msgid "" "The schema of the target data as a pydantic model. Can be a single line " "string as the file path to the pydantic model, or a multi-line string as " "the pydantic model definition." msgstr "ターゲットデータのスキーマをpydanticモデルとして定義します。単一行の文字列としてpydanticモデルのファイルパス、または複数行の文字列としてpydanticモデルの定義を指定できます。" -#: ../../flow/flow_option_items.py:402 +#: ../../flow/flow_option_items.py:420 msgid "Extract Schema as JSON" msgstr "JSONとしてスキーマを抽出" -#: ../../flow/flow_option_items.py:403 +#: ../../flow/flow_option_items.py:421 msgid "The schema of the extracted information. Should be a JSON string." msgstr "抽出された情報のスキーマ。JSON文字列である必要があります。" -#: ../../flow/flow_option_items.py:445 +#: ../../flow/flow_option_items.py:463 msgid "Extraction Instructions in Python" msgstr "Pythonでの抽出指示" -#: ../../flow/flow_option_items.py:446 +#: ../../flow/flow_option_items.py:464 msgid "" "The instructions of the extractions in Python code. Right now the " "required variables are 'target_model_name' and 'instructions'. Also we " "need to specify the key fields and verify fields if needed." msgstr "Pythonコードでの抽出指示。現在、必要な変数は'target_model_name'と'instructions'です。また、必要に応じてキーフィールドと検証フィールドを指定する必要があります。" -#: ../../flow/flow_option_items.py:483 +#: ../../flow/flow_option_items.py:501 msgid "Target Pydantic Model Name used in the final list" msgstr "最終リストで使用されるターゲットPydanticモデル名" -#: ../../flow/flow_option_items.py:484 +#: ../../flow/flow_option_items.py:502 msgid "" "There might be multiple Pydantic models in the schema definition. Specify" " which model to use for the final list." msgstr "スキーマ定義に複数のPydanticモデルが存在する可能性があります。最終リストに使用するモデルを指定してください。" -#: ../../flow/flow_option_items.py:502 +#: ../../flow/flow_option_items.py:520 msgid "Key Fields" msgstr "キーフィールド" -#: ../../flow/flow_option_items.py:503 +#: ../../flow/flow_option_items.py:521 msgid "" "Comma separated field names that identifies an object in the extraction. " "Extracted data with the same key fields will be considered of the same " @@ -359,11 +359,11 @@ msgid "" "unique." msgstr "抽出時にオブジェクトを識別するカンマ区切りのフィールド名。同じキーフィールドを持つ抽出データは同じオブジェクトとみなされます。同じオブジェクトの抽出されたすべてのバージョンはそれらに基づいて重複排除されます。キーフィールドは抽出された情報のスキーマに存在する必要があります。空の場合、抽出された各オブジェクトは一意とみなされます。" -#: ../../flow/flow_option_items.py:522 +#: ../../flow/flow_option_items.py:540 msgid "Save extracted data to backend" msgstr "抽出データをバックエンドに保存" -#: ../../flow/flow_option_items.py:523 +#: ../../flow/flow_option_items.py:541 msgid "" "Save the extracted data to the backend. Default True. If False, the " "extracted data will not be saved. The saved data will have the same " @@ -371,11 +371,11 @@ msgid "" "time and the original document URI." msgstr "抽出データをバックエンドに保存します。デフォルトはTrue。Falseの場合、抽出データは保存されません。保存されたデータはpydanticモデルで指定されたのと同じスキーマを持ち、インポート時間や元のドキュメントURIなどのメタデータも含まれます。" -#: ../../flow/flow_option_items.py:541 +#: ../../flow/flow_option_items.py:559 msgid "Verification Fields" msgstr "検証フィールド" -#: ../../flow/flow_option_items.py:542 +#: ../../flow/flow_option_items.py:560 msgid "" "Comma separated field names that need to be verified for the extracted " "objects. For example, although the address of a company is not in the key" @@ -384,120 +384,145 @@ msgid "" "no verification will be performed." msgstr "抽出されたオブジェクトに対して検証が必要なカンマ区切りのフィールド名。例えば、会社の住所はキーフィールドにはなく、会社は異なるオフィスに複数の住所を持つ可能性がありますが、抽出されたすべての住所を検証し重複排除したい場合があります。空の場合、検証は実行されません。" -#: ../../flow/flow_option_items.py:560 +#: ../../flow/flow_option_items.py:578 ../../flow/flows/news/flow_news.py:148 msgid "Output format" msgstr "出力形式" -#: ../../flow/flow_option_items.py:561 +#: ../../flow/flow_option_items.py:579 msgid "" "The output of the extracted data. Default is json. Currently also support" " csv, md." msgstr "抽出データの出力形式。デフォルトはjsonです。現在、csv、mdもサポートしています。" -#: ../../flow/flow_option_items.py:577 +#: ../../flow/flow_option_items.py:595 msgid "Reference Style" msgstr "参照スタイル" -#: ../../flow/flow_option_items.py:578 +#: ../../flow/flow_option_items.py:596 msgid "" "The style of the references in the output article. Right now support " "news, default, and full." msgstr "出力記事の参照のスタイル。現在、news、default、fullをサポートしています。" -#: ../../flow/flow_option_items.py:596 +#: ../../flow/flow_option_items.py:614 msgid "Strict Context" msgstr "厳密なコンテキスト" -#: ../../flow/flow_option_items.py:597 +#: ../../flow/flow_option_items.py:615 msgid "When generating a section, whether to use strict context or not." msgstr "セクションを生成する際に、厳密なコンテキストを使用するかどうか。" -#: ../../flow/flow_option_items.py:614 +#: ../../flow/flow_option_items.py:632 msgid "Target Site" msgstr "ターゲットサイト" -#: ../../flow/flow_option_items.py:615 +#: ../../flow/flow_option_items.py:633 msgid "" "When searching the web, limit the search to this site. Empty means search" " all sites." msgstr "ウェブ検索時に、このサイトに検索を制限します。空はすべてのサイトを検索することを意味します。" -#: ../../flow/flow_option_items.py:633 +#: ../../flow/flow_option_items.py:651 msgid "Max iteration when using the web search retriever" msgstr "ウェブ検索リトリーバー使用時の最大反復回数" -#: ../../flow/flow_option_items.py:634 +#: ../../flow/flow_option_items.py:652 msgid "If the max result is not reached, how many times we go to the next page." msgstr "最大結果に達していない場合、次のページに何回移動するか。" -#: ../../flow/flow_option_items.py:651 +#: ../../flow/flow_option_items.py:669 msgid "Recursive scrape" msgstr "再帰的スクレイピング" -#: ../../flow/flow_option_items.py:652 +#: ../../flow/flow_option_items.py:670 msgid "If true, scrape the top urls found in the search results documents." msgstr "trueの場合、検索結果文書で見つかった上位のURLをスクレイピングします。" -#: ../../flow/flow_option_items.py:669 +#: ../../flow/flow_option_items.py:687 msgid "Recursive scrape iteration" msgstr "再帰的スクレイピングの反復" -#: ../../flow/flow_option_items.py:670 +#: ../../flow/flow_option_items.py:688 msgid "" "When we do recursive scraping, we will not stop until we reach the max " "number of results or the number of iterations specified here." msgstr "再帰的スクレイピングを行う場合、最大結果数またはここで指定された反復回数に達するまで停止しません。" -#: ../../flow/flow_option_items.py:688 +#: ../../flow/flow_option_items.py:706 msgid "Recursive scrape max item count" msgstr "再帰的スクレイピングの最大項目数" -#: ../../flow/flow_option_items.py:689 +#: ../../flow/flow_option_items.py:707 msgid "" "When we do recursive scraping, we will not stop until we reach the number" " of max iterations or the max number of results specified here." msgstr "再帰的スクレイピングを行う場合、最大反復回数またはここで指定された最大結果数に達するまで停止しません。" -#: ../../flow/flow_option_items.py:707 +#: ../../flow/flow_option_items.py:725 msgid "Image Search" msgstr "画像検索" -#: ../../flow/flow_option_items.py:708 +#: ../../flow/flow_option_items.py:726 msgid "When searching on the web, limit the search to image search. " msgstr "ウェブ検索時に、検索を画像検索に制限します。" -#: ../../flow/flow_option_items.py:723 +#: ../../flow/flow_option_items.py:741 msgid "Excluded Site" msgstr "除外サイト" -#: ../../flow/flow_option_items.py:724 +#: ../../flow/flow_option_items.py:742 msgid "" "List of sites separated by comma to ignore when search for the " "information. Empty means no filter." msgstr "情報を検索する際に無視するサイトのカンマ区切りリスト。空はフィルターなしを意味します。" -#: ../../flow/flow_option_items.py:742 +#: ../../flow/flow_option_items.py:760 msgid "Docsource UUID" msgstr "ドキュメントソースUUID" -#: ../../flow/flow_option_items.py:743 +#: ../../flow/flow_option_items.py:761 msgid "The docsource uuid to run the query on when querying local KB." msgstr "ローカルKBを照会する際にクエリを実行するドキュメントソースUUID。" -#: ../../flow/flow_option_items.py:758 +#: ../../flow/flow_option_items.py:776 msgid "Context Limit" msgstr "コンテキスト制限" -#: ../../flow/flow_option_items.py:759 +#: ../../flow/flow_option_items.py:777 msgid "Override the context limit from the model info." msgstr "モデル情報からのコンテキスト制限を上書きします。" -#~ msgid "" -#~ "The schema of the target data as" -#~ " a pydantic model, see " -#~ "https://docs.pydantic.dev" -#~ msgstr "ターゲットデータのスキーマをpydanticモデルとして定義します。https://docs.pydantic.dev を参照してください。" +#: ../../flow/flows/news/flow_news.py:124 +msgid "News item source count threshold" +msgstr "新しいソースの数" + +#: ../../flow/flows/news/flow_news.py:125 +msgid "" +"Number of sources a news item has to have to be included in the result. " +"Default is 1. Depends on the nature of the knowledge base." +msgstr "新しいソースの数" + +#: ../../flow/flows/news/flow_news.py:136 +msgid "Include previously reported news items" +msgstr "以前に報告されたニュース項目を含める" + +#: ../../flow/flows/news/flow_news.py:137 +msgid "" +"Include all news items in the result, even if it has been reported " +"before. Default is False." +msgstr "結果には、以前に報告されたニュース項目も含める。デフォルトはFalse。" + +#: ../../flow/flows/news/flow_news.py:149 +msgid "The format of the output: 'md' (default), 'table and 'json'." +msgstr "出力形式:'md'(デフォルト)、'table'と'json'。" + +#: ../../flow/flows/news/flow_news.py:160 +msgid "Run search before extracting news data" +msgstr "ニュースデータを抽出する前に検索を実行する" +#: ../../flow/flows/news/flow_news.py:162 +msgid "Run the search step before extracting the news data. Default is True." +msgstr "ニュースデータを抽出する前に検索ステップを実行する。デフォルトはTrue。" #~ msgid "" #~ "The schema of the target data as" diff --git a/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po b/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po index 2946f97..b866afd 100644 --- a/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po +++ b/src/leettools/common/i18n/locales/zh/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-03-16 23:00-0700\n" +"POT-Creation-Date: 2025-03-24 21:03-0700\n" "PO-Revision-Date: 2025-03-08 15:55-0800\n" "Last-Translator: FULL NAME \n" "Language: zh\n" @@ -18,73 +18,59 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.16.0\n" -#: ../../settings.py:731 -#: ../../settings.py:731 +#: ../../settings.py:736 msgid "LLM API Key used in the inference process." msgstr "推理过程中使用的LLM API密钥。" -#: ../../settings.py:738 -#: ../../settings.py:738 +#: ../../settings.py:743 msgid "LLM Base URL used in the inference process." msgstr "推理过程中使用的LLM基础URL。" -#: ../../settings.py:745 -#: ../../settings.py:745 +#: ../../settings.py:750 msgid "Default inference model used in the inference process." msgstr "推理过程中使用的默认推理模型。" -#: ../../settings.py:752 -#: ../../settings.py:752 +#: ../../settings.py:757 msgid "Default dense embedding model used in the embedding process." msgstr "嵌入过程中使用的默认密集嵌入模型。" -#: ../../settings.py:761 -#: ../../settings.py:761 +#: ../../settings.py:766 msgid "Dense embedding model dimension." msgstr "密集嵌入模型维度。" -#: ../../settings.py:768 -#: ../../settings.py:768 +#: ../../settings.py:773 msgid "API Key used for the embedder." msgstr "嵌入器使用的API密钥。" -#: ../../settings.py:775 -#: ../../settings.py:775 +#: ../../settings.py:780 msgid "Base URL for the the embedder service." msgstr "嵌入器服务的基础URL。" -#: ../../settings.py:782 -#: ../../settings.py:782 +#: ../../settings.py:787 msgid "API Key used in the reranking process." msgstr "重排序过程中使用的API密钥。" -#: ../../settings.py:789 -#: ../../settings.py:789 +#: ../../settings.py:794 msgid "Base url used in the reranking process." msgstr "重排序过程中使用的基础URL。" -#: ../../settings.py:796 -#: ../../settings.py:796 +#: ../../settings.py:801 msgid "Default rerank model used in the rerank process." msgstr "重排序过程中使用的默认重排序模型。" -#: ../../settings.py:803 -#: ../../settings.py:803 +#: ../../settings.py:808 msgid "Google API Key used in the search process." msgstr "搜索过程中使用的Google API密钥。" -#: ../../settings.py:810 -#: ../../settings.py:810 +#: ../../settings.py:815 msgid "Google custom search Key used in the search process." msgstr "搜索过程中使用的Google自定义搜索密钥。" -#: ../../settings.py:817 -#: ../../settings.py:817 +#: ../../settings.py:822 msgid "Google custom search Key for patent used in the search process." msgstr "搜索过程中使用的Google专利自定义搜索密钥。" -#: ../../settings.py:826 -#: ../../settings.py:826 +#: ../../settings.py:831 msgid "Tavily API Key used in the search process." msgstr "搜索过程中使用的Tavily API密钥。" @@ -92,15 +78,15 @@ msgstr "搜索过程中使用的Tavily API密钥。" msgid "The section of the settings" msgstr "设置的部分" -#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:50 +#: ../../core/schemas/user_settings.py:17 ../../flow/flow_option_items.py:51 msgid "The name of the variable." msgstr "变量的名称。" -#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:60 +#: ../../core/schemas/user_settings.py:19 ../../flow/flow_option_items.py:61 msgid "The description of the variable." msgstr "变量的描述。" -#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:63 +#: ../../core/schemas/user_settings.py:22 ../../flow/flow_option_items.py:64 msgid "The default value of the variable." msgstr "变量的默认值。" @@ -137,219 +123,233 @@ msgstr "设置创建的时间。" msgid "The time the settings was updated." msgstr "设置更新的时间。" -#: ../../flow/flow_option_items.py:53 +#: ../../flow/flow_option_items.py:54 msgid "The flow components that use this variable." msgstr "使用此变量的流程组件。" -#: ../../flow/flow_option_items.py:57 +#: ../../flow/flow_option_items.py:58 msgid "The display name of the variable." msgstr "变量的显示名称。" -#: ../../flow/flow_option_items.py:67 +#: ../../flow/flow_option_items.py:68 msgid "The type of the value,currently support str, int, float, bool." msgstr "值的类型,目前支持字符串、整数、浮点数、布尔值。" -#: ../../flow/flow_option_items.py:73 +#: ../../flow/flow_option_items.py:74 msgid "Whether the variable is required or not." msgstr "变量是否必需。" -#: ../../flow/flow_option_items.py:77 +#: ../../flow/flow_option_items.py:78 msgid "Whether the variable should be explicitly set by the user or not." msgstr "变量是否需要由用户明确设置。" -#: ../../flow/flow_option_items.py:83 +#: ../../flow/flow_option_items.py:84 msgid "Whether the variable should be displayed in multiple lines or not." msgstr "变量是否应该以多行显示。" -#: ../../flow/flow_option_items.py:89 +#: ../../flow/flow_option_items.py:90 msgid "The example value of the variable, if no default is provided." msgstr "如果没有提供默认值,变量的示例值。" -#: ../../flow/flow_option_items.py:93 +#: ../../flow/flow_option_items.py:94 msgid "" "If the value should be shown and edited as the specified programming " "code, such as Python, Markdown. Default is None." msgstr "如果值应该作为指定的编程代码(如Python、Markdown)显示和编辑。默认为None。" -#: ../../flow/flow_option_items.py:99 +#: ../../flow/flow_option_items.py:100 msgid "The variables the code should provide to the backend." msgstr "代码应该提供给后端的变量。" -#: ../../flow/flow_option_items.py:115 +#: ../../flow/flow_option_items.py:116 msgid "Planning Model" msgstr "规划模型" -#: ../../flow/flow_option_items.py:116 +#: ../../flow/flow_option_items.py:117 msgid "The model used to do the article planning." msgstr "用于文章规划的模型。" -#: ../../flow/flow_option_items.py:135 +#: ../../flow/flow_option_items.py:136 msgid "Summarizing Model" msgstr "摘要模型" -#: ../../flow/flow_option_items.py:136 +#: ../../flow/flow_option_items.py:137 msgid "The model used to summarize each article." msgstr "用于总结每篇文章的模型。" -#: ../../flow/flow_option_items.py:155 +#: ../../flow/flow_option_items.py:156 msgid "Writing Model" msgstr "写作模型" -#: ../../flow/flow_option_items.py:156 +#: ../../flow/flow_option_items.py:157 msgid "The model used to generating each section." msgstr "用于生成每个部分的模型。" -#: ../../flow/flow_option_items.py:169 +#: ../../flow/flow_option_items.py:170 msgid "Retriever" msgstr "检索器" -#: ../../flow/flow_option_items.py:170 +#: ../../flow/flow_option_items.py:171 msgid "The type of retriever to use for the web search." msgstr "用于网络搜索的检索器类型。" -#: ../../flow/flow_option_items.py:184 +#: ../../flow/flow_option_items.py:185 msgid "Content Instruction" msgstr "内容指令" -#: ../../flow/flow_option_items.py:185 +#: ../../flow/flow_option_items.py:186 msgid "" "The relevance of the result documents from keyword search is assessed by " "the content instruction if provided. " msgstr "如果提供了内容指令,关键词搜索结果文档的相关性将由内容指令评估。" -#: ../../flow/flow_option_items.py:203 +#: ../../flow/flow_option_items.py:204 msgid "Days Limit" msgstr "天数限制" -#: ../../flow/flow_option_items.py:204 +#: ../../flow/flow_option_items.py:205 msgid "" "Number of days to limit the search results. 0 or empty means no limit. In" " local KB, filters by the import time." msgstr "限制搜索结果的天数。0或空表示无限制。在本地知识库中,按导入时间过滤。" -#: ../../flow/flow_option_items.py:222 +#: ../../flow/flow_option_items.py:223 msgid "Max search Results" msgstr "最大搜索结果" -#: ../../flow/flow_option_items.py:223 -msgid "The maximum number of search results for retrievers to return. Each retriever may have different paging mechanisms. Use the parameter and the search iteration to control the number of results.If the retieval is local, -1 here means process all documents." +#: ../../flow/flow_option_items.py:224 +msgid "" +"The maximum number of search results for retrievers to return. Each " +"retriever may have different paging mechanisms. Use the parameter and the" +" search iteration to control the number of results.If the retieval is " +"local, -1 here means process all documents." msgstr "检索器返回的最大搜索结果数。每个检索器可能有不同的分页机制。使用参数和搜索迭代来控制结果数量。如果检索是本地的,-1表示处理所有文档。" -#: ../../flow/flow_option_items.py:238 +#: ../../flow/flow_option_items.py:239 msgid "Rewrite Search Keywords" msgstr "重写搜索关键词" -#: ../../flow/flow_option_items.py:239 +#: ../../flow/flow_option_items.py:240 msgid "Ask the LLM to generate search keywords from the search query." msgstr "要求LLM从搜索查询生成搜索关键词。" -#: ../../flow/flow_option_items.py:252 +#: ../../flow/flow_option_items.py:253 msgid "Search Language" msgstr "搜索语言" -#: ../../flow/flow_option_items.py:253 +#: ../../flow/flow_option_items.py:254 msgid "The language used for keyword search if the search API supports." msgstr "如果搜索API支持,用于关键词搜索的语言。" -#: ../../flow/flow_option_items.py:268 +#: ../../flow/flow_option_items.py:269 msgid "Output Language" msgstr "输出语言" -#: ../../flow/flow_option_items.py:269 +#: ../../flow/flow_option_items.py:270 msgid "Output the result in the language." msgstr "用指定语言输出结果。" -#: ../../flow/flow_option_items.py:282 +#: ../../flow/flow_option_items.py:283 msgid "Output Example" msgstr "输出示例" -#: ../../flow/flow_option_items.py:283 +#: ../../flow/flow_option_items.py:284 msgid "" "The example of the expected output content. If left empty, no example " "will be provided to LLM." msgstr "预期输出内容的示例。如果留空,将不会向LLM提供示例。" -#: ../../flow/flow_option_items.py:300 +#: ../../flow/flow_option_items.py:301 msgid "Number of Sections" msgstr "章节数量" -#: ../../flow/flow_option_items.py:301 +#: ../../flow/flow_option_items.py:302 msgid "" "The number of sections in the output article. If left empty, the planning" " agent will decide automatically." msgstr "输出文章中的章节数量。如果留空,规划代理将自动决定。" -#: ../../flow/flow_option_items.py:317 +#: ../../flow/flow_option_items.py:318 +msgid "Timezone" +msgstr "时区" + +#: ../../flow/flow_option_items.py:319 +msgid "" +"The timezone when determining the date. See " +"https://docs.python.org/3/library/zoneinfo.html" +msgstr "确定日期时使用的时区。请参阅https://docs.python.org/3/library/zoneinfo.html" + +#: ../../flow/flow_option_items.py:335 msgid "Article Style" msgstr "文章风格" -#: ../../flow/flow_option_items.py:318 +#: ../../flow/flow_option_items.py:336 msgid "" "The style of the output article such as analytical research reports, " "humorous news articles, or technical blog posts." msgstr "输出文章的风格,如分析研究报告、幽默新闻文章或技术博客文章。" -#: ../../flow/flow_option_items.py:335 +#: ../../flow/flow_option_items.py:353 msgid "Word Count" msgstr "字数" -#: ../../flow/flow_option_items.py:336 +#: ../../flow/flow_option_items.py:354 msgid "The number of words in the output section. Empty means automatics." msgstr "输出部分的字数。空表示自动。" -#: ../../flow/flow_option_items.py:351 +#: ../../flow/flow_option_items.py:369 msgid "Extract Instruction" msgstr "提取指令" -#: ../../flow/flow_option_items.py:352 +#: ../../flow/flow_option_items.py:370 msgid "Describe what information to extract from the content." msgstr "描述从内容中提取什么信息。" -#: ../../flow/flow_option_items.py:377 +#: ../../flow/flow_option_items.py:395 msgid "Extract Pydantic Model" msgstr "提取Pydantic模型" -#: ../../flow/flow_option_items.py:378 +#: ../../flow/flow_option_items.py:396 msgid "" "The schema of the target data as a pydantic model. Can be a single line " "string as the file path to the pydantic model, or a multi-line string as " "the pydantic model definition." msgstr "目标数据的模式作为pydantic模型。可以作为pydantic模型文件路径的单行字符串,或者作为pydantic模型定义的多行字符串。" -#: ../../flow/flow_option_items.py:402 +#: ../../flow/flow_option_items.py:420 msgid "Extract Schema as JSON" msgstr "将模式提取为JSON" -#: ../../flow/flow_option_items.py:403 +#: ../../flow/flow_option_items.py:421 msgid "The schema of the extracted information. Should be a JSON string." msgstr "提取信息的模式。应该是JSON字符串。" -#: ../../flow/flow_option_items.py:445 +#: ../../flow/flow_option_items.py:463 msgid "Extraction Instructions in Python" msgstr "Python中的提取指令" -#: ../../flow/flow_option_items.py:446 +#: ../../flow/flow_option_items.py:464 msgid "" "The instructions of the extractions in Python code. Right now the " "required variables are 'target_model_name' and 'instructions'. Also we " "need to specify the key fields and verify fields if needed." msgstr "Python代码中的提取指令。目前所需的变量是'target_model_name'和'instructions'。如果需要,我们还需要指定关键字段和验证字段。" -#: ../../flow/flow_option_items.py:483 +#: ../../flow/flow_option_items.py:501 msgid "Target Pydantic Model Name used in the final list" msgstr "最终列表中使用的目标Pydantic模型名称" -#: ../../flow/flow_option_items.py:484 +#: ../../flow/flow_option_items.py:502 msgid "" "There might be multiple Pydantic models in the schema definition. Specify" " which model to use for the final list." msgstr "模式定义中可能有多个Pydantic模型。指定最终列表使用哪个模型。" -#: ../../flow/flow_option_items.py:502 +#: ../../flow/flow_option_items.py:520 msgid "Key Fields" msgstr "关键字段" -#: ../../flow/flow_option_items.py:503 +#: ../../flow/flow_option_items.py:521 msgid "" "Comma separated field names that identifies an object in the extraction. " "Extracted data with the same key fields will be considered of the same " @@ -359,11 +359,11 @@ msgid "" "unique." msgstr "用逗号分隔的字段名,用于标识提取中的对象。具有相同关键字段的提取数据将被视为同一对象。同一对象的所有提取版本将基于这些字段进行去重。关键字段应存在于提取信息的模式中。如果留空,每个提取的对象都将被视为唯一。" -#: ../../flow/flow_option_items.py:522 +#: ../../flow/flow_option_items.py:540 msgid "Save extracted data to backend" msgstr "将提取的数据保存到后端" -#: ../../flow/flow_option_items.py:523 +#: ../../flow/flow_option_items.py:541 msgid "" "Save the extracted data to the backend. Default True. If False, the " "extracted data will not be saved. The saved data will have the same " @@ -371,11 +371,11 @@ msgid "" "time and the original document URI." msgstr "将提取的数据保存到后端。默认为True。如果为False,提取的数据将不会被保存。保存的数据将具有pydantic模型中指定的相同模式,以及导入时间和原始文档URI等元数据。" -#: ../../flow/flow_option_items.py:541 +#: ../../flow/flow_option_items.py:559 msgid "Verification Fields" msgstr "验证字段" -#: ../../flow/flow_option_items.py:542 +#: ../../flow/flow_option_items.py:560 msgid "" "Comma separated field names that need to be verified for the extracted " "objects. For example, although the address of a company is not in the key" @@ -384,119 +384,145 @@ msgid "" "no verification will be performed." msgstr "需要为提取对象验证的字段名,用逗号分隔。例如,虽然公司地址不在关键字段中,且一个公司可能有多个办公室地址,我们想要验证并去重所有提取的地址。如果留空,将不执行验证。" -#: ../../flow/flow_option_items.py:560 +#: ../../flow/flow_option_items.py:578 ../../flow/flows/news/flow_news.py:148 msgid "Output format" msgstr "输出格式" -#: ../../flow/flow_option_items.py:561 +#: ../../flow/flow_option_items.py:579 msgid "" "The output of the extracted data. Default is json. Currently also support" " csv, md." msgstr "提取数据的输出格式。默认为json。目前还支持csv、md。" -#: ../../flow/flow_option_items.py:577 +#: ../../flow/flow_option_items.py:595 msgid "Reference Style" msgstr "引用样式" -#: ../../flow/flow_option_items.py:578 +#: ../../flow/flow_option_items.py:596 msgid "" "The style of the references in the output article. Right now support " "news, default, and full." msgstr "输出文章中引用的样式。目前支持新闻、默认和完整样式。" -#: ../../flow/flow_option_items.py:596 +#: ../../flow/flow_option_items.py:614 msgid "Strict Context" msgstr "严格上下文" -#: ../../flow/flow_option_items.py:597 +#: ../../flow/flow_option_items.py:615 msgid "When generating a section, whether to use strict context or not." msgstr "生成章节时是否使用严格上下文。" -#: ../../flow/flow_option_items.py:614 +#: ../../flow/flow_option_items.py:632 msgid "Target Site" msgstr "目标网站" -#: ../../flow/flow_option_items.py:615 +#: ../../flow/flow_option_items.py:633 msgid "" "When searching the web, limit the search to this site. Empty means search" " all sites." msgstr "在网络搜索时,将搜索限制在此网站。空表示搜索所有网站。" -#: ../../flow/flow_option_items.py:633 +#: ../../flow/flow_option_items.py:651 msgid "Max iteration when using the web search retriever" msgstr "使用网络搜索检索器时的最大迭代次数" -#: ../../flow/flow_option_items.py:634 +#: ../../flow/flow_option_items.py:652 msgid "If the max result is not reached, how many times we go to the next page." msgstr "如果未达到最大结果,我们翻到下一页的次数。" -#: ../../flow/flow_option_items.py:651 +#: ../../flow/flow_option_items.py:669 msgid "Recursive scrape" msgstr "递归抓取" -#: ../../flow/flow_option_items.py:652 +#: ../../flow/flow_option_items.py:670 msgid "If true, scrape the top urls found in the search results documents." msgstr "如果为真,抓取搜索结果文档中找到的顶部URL。" -#: ../../flow/flow_option_items.py:669 +#: ../../flow/flow_option_items.py:687 msgid "Recursive scrape iteration" msgstr "递归抓取迭代" -#: ../../flow/flow_option_items.py:670 +#: ../../flow/flow_option_items.py:688 msgid "" "When we do recursive scraping, we will not stop until we reach the max " "number of results or the number of iterations specified here." msgstr "当我们进行递归抓取时,我们不会停止,直到达到最大结果数或这里指定的迭代次数。" -#: ../../flow/flow_option_items.py:688 +#: ../../flow/flow_option_items.py:706 msgid "Recursive scrape max item count" msgstr "递归抓取最大项目数" -#: ../../flow/flow_option_items.py:689 +#: ../../flow/flow_option_items.py:707 msgid "" "When we do recursive scraping, we will not stop until we reach the number" " of max iterations or the max number of results specified here." msgstr "当我们进行递归抓取时,我们不会停止,直到达到最大迭代次数或这里指定的最大结果数。" -#: ../../flow/flow_option_items.py:707 +#: ../../flow/flow_option_items.py:725 msgid "Image Search" msgstr "图片搜索" -#: ../../flow/flow_option_items.py:708 +#: ../../flow/flow_option_items.py:726 msgid "When searching on the web, limit the search to image search. " msgstr "在网络搜索时,将搜索限制为图片搜索。" -#: ../../flow/flow_option_items.py:723 +#: ../../flow/flow_option_items.py:741 msgid "Excluded Site" msgstr "排除网站" -#: ../../flow/flow_option_items.py:724 +#: ../../flow/flow_option_items.py:742 msgid "" "List of sites separated by comma to ignore when search for the " "information. Empty means no filter." msgstr "搜索信息时要忽略的网站列表,用逗号分隔。空表示无过滤。" -#: ../../flow/flow_option_items.py:742 +#: ../../flow/flow_option_items.py:760 msgid "Docsource UUID" msgstr "文档源UUID" -#: ../../flow/flow_option_items.py:743 +#: ../../flow/flow_option_items.py:761 msgid "The docsource uuid to run the query on when querying local KB." msgstr "在查询本地知识库时运行查询的文档源UUID。" -#: ../../flow/flow_option_items.py:758 +#: ../../flow/flow_option_items.py:776 msgid "Context Limit" msgstr "上下文限制" -#: ../../flow/flow_option_items.py:759 +#: ../../flow/flow_option_items.py:777 msgid "Override the context limit from the model info." msgstr "覆盖模型信息中的上下文限制。" -#~ msgid "" -#~ "The schema of the target data as" -#~ " a pydantic model, see " -#~ "https://docs.pydantic.dev" -#~ msgstr "目标数据的模式作为pydantic模型,参见https://docs.pydantic.dev" +#: ../../flow/flows/news/flow_news.py:124 +msgid "News item source count threshold" +msgstr "新闻项目源数量阈值" + +#: ../../flow/flows/news/flow_news.py:125 +msgid "" +"Number of sources a news item has to have to be included in the result. " +"Default is 1. Depends on the nature of the knowledge base." +msgstr "新闻项目必须具有的源数量才能包含在结果中。默认是1。取决于知识库的性质。" + +#: ../../flow/flows/news/flow_news.py:136 +msgid "Include previously reported news items" +msgstr "包含之前报道的新闻项目" + +#: ../../flow/flows/news/flow_news.py:137 +msgid "" +"Include all news items in the result, even if it has been reported " +"before. Default is False." +msgstr "在结果中包含所有新闻项目,即使它们之前已经报道过。默认是False。" + +#: ../../flow/flows/news/flow_news.py:149 +msgid "The format of the output: 'md' (default), 'table and 'json'." +msgstr "输出格式:'md'(默认)、'table'和'json'。" + +#: ../../flow/flows/news/flow_news.py:160 +msgid "Run search before extracting news data" +msgstr "在提取新闻数据之前运行搜索" + +#: ../../flow/flows/news/flow_news.py:162 +msgid "Run the search step before extracting the news data. Default is True." +msgstr "在提取新闻数据之前运行搜索步骤。默认是True。" #~ msgid "" #~ "The schema of the target data as" diff --git a/src/leettools/core/strategy/schemas/strategy_section.py b/src/leettools/core/strategy/schemas/strategy_section.py index b25c791..afcabb0 100644 --- a/src/leettools/core/strategy/schemas/strategy_section.py +++ b/src/leettools/core/strategy/schemas/strategy_section.py @@ -30,5 +30,5 @@ class StrategySection(BaseModel): llm_system_prompt_id: Optional[str] = None llm_user_prompt_id: Optional[str] = None # prompts by intention - llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = None - llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = None + llm_system_prompt_ids_by_intention: Optional[Dict[str, str]] = {} + llm_user_prompt_ids_by_intention: Optional[Dict[str, str]] = {} diff --git a/src/leettools/eds/api_caller/api_caller_base.py b/src/leettools/eds/api_caller/api_caller_base.py index ce3c079..7a648c0 100644 --- a/src/leettools/eds/api_caller/api_caller_base.py +++ b/src/leettools/eds/api_caller/api_caller_base.py @@ -166,7 +166,7 @@ def setup_default_prompts(self) -> None: if ( section.strategy_name.lower() == "default" - or self.strategy_section.strategy_name.lower() == "true" + or section.strategy_name.lower() == "true" ): if section.llm_system_prompt_id is None: logger().warning( @@ -189,7 +189,7 @@ def setup_default_prompts(self) -> None: if section.llm_user_prompt_id is None: logger().warning( - f"No user prompt id for {section.section_name} provided." + f"No user prompt id for {section.section_name} provided. " "Fallback to the default user prompt." ) self.user_prompt_template = None @@ -205,15 +205,16 @@ def setup_default_prompts(self) -> None: self.user_prompt_template = user_prompt.prompt_template if self.user_prompt_template is None: + strategy_name = section.strategy_name user_prompt_template_file = ( - f"{self.script_dir}/prompts/default_user_prompt.txt" + f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt" ) with open(user_prompt_template_file, "r", encoding="utf-8") as file: self.user_prompt_template = file.read() if self.system_prompt_template is None: system_prompt_template_file = ( - f"{self.script_dir}/prompts/default_system_prompt.txt" + f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt" ) with open(system_prompt_template_file, "r", encoding="utf-8") as file: self.system_prompt_template = file.read() @@ -233,7 +234,7 @@ def setup_prompts_for_intention(self, query_metadata: ChatQueryMetadata): if intention_str not in sp_ids: self.display_logger.warning( - f"No system prompt id for {intention_str} provided to {section_name}." + f"No system prompt id for {intention_str} provided to {section_name}. " f"Fallback to the default intention." ) intention_str = DEFAULT_INTENTION @@ -285,13 +286,21 @@ def get_user_prompt_template_for_intention(self, intention_str: str) -> str: if self.script_dir is None: raise UnexpectedCaseException("Script directory is not set.") - user_prompt_file = f"{self.script_dir}/prompts/{intention_str}_user_prompt.txt" + strategy_name = self.strategy_section.strategy_name + user_prompt_file = ( + f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_user_prompt.txt" + ) # if the user prompt for the intention is not provided, use the default if not os.path.exists(user_prompt_file): self.display_logger.warning( - f"User prompt for {intention_str} not found. Using default." + f"User prompt for {intention_str} not found: {user_prompt_file}" ) - user_prompt_file = f"{self.script_dir}/prompts/default_user_prompt.txt" + user_prompt_file = ( + f"{self.script_dir}/prompts/default_{strategy_name}_user_prompt.txt" + ) + self.display_logger.info(f"Using default user prompt: {user_prompt_file}") + else: + self.display_logger.debug(f"Using user prompt file: {user_prompt_file}") return read_template_file(user_prompt_file) def get_system_prompt_template_for_intention(self, intention_str: str) -> str: @@ -301,14 +310,20 @@ def get_system_prompt_template_for_intention(self, intention_str: str) -> str: if self.script_dir is None: raise UnexpectedCaseException("Script directory is not set.") - system_prompt_file = ( - f"{self.script_dir}/prompts/{intention_str}_system_prompt.txt" - ) + strategy_name = self.strategy_section.strategy_name + system_prompt_file = f"{self.script_dir}/prompts/{intention_str}_{strategy_name}_system_prompt.txt" if not os.path.exists(system_prompt_file): self.display_logger.warning( - f"System prompt for {intention_str} not found. Using default." + f"System prompt for {intention_str} not found: {system_prompt_file}" + ) + system_prompt_file = ( + f"{self.script_dir}/prompts/default_{strategy_name}_system_prompt.txt" ) - system_prompt_file = f"{self.script_dir}/prompts/default_system_prompt.txt" + self.display_logger.info( + f"Using default system prompt: {system_prompt_file}" + ) + else: + self.display_logger.debug(f"Using system prompt file: {system_prompt_file}") return read_template_file(system_prompt_file) def run_inference_call( diff --git a/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py b/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py index 5f0c935..1c181f6 100644 --- a/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py +++ b/src/leettools/eds/rag/rewrite/_impl/rewrite_direct_dynamic.py @@ -54,7 +54,6 @@ def rewrite( query = query_item.query_content # add query history - query_id = query_item.query_id ch_manager = get_history_manager(self.context) query_history = ch_manager.get_ch_entry( username=self.user.username, diff --git a/src/leettools/flow/flows/answer/flow_answer.py b/src/leettools/flow/flows/answer/flow_answer.py index 9666591..85c6f1a 100644 --- a/src/leettools/flow/flows/answer/flow_answer.py +++ b/src/leettools/flow/flows/answer/flow_answer.py @@ -19,6 +19,8 @@ from leettools.core.schemas.knowledgebase import KnowledgeBase from leettools.core.schemas.organization import Org from leettools.core.schemas.user import User +from leettools.core.strategy.schemas.strategy_section import StrategySection +from leettools.core.strategy.schemas.strategy_section_name import StrategySectionName from leettools.flow import flow_option_items, steps from leettools.flow.exec_info import ExecInfo from leettools.flow.flow import AbstractFlow @@ -118,29 +120,46 @@ def execute_query( ) # flow starts there + query_metadata = steps.StepIntention.run_step(exec_info=exec_info) - rewrite = steps.StepQueryRewrite.run_step( - exec_info=exec_info, - query_metadata=query_metadata, - ) if is_search_engine(retriever_type): - # query the web first, after this function, the search results - # are processed and stored in the KB - # TODO: make this function async + rewrite = steps.StepQueryRewrite.run_step( + exec_info=exec_info, + query_metadata=query_metadata, + ) if rewrite.search_keywords is None: keywords = rewrite.rewritten_question else: keywords = rewrite.search_keywords + + # query the web first, after this function, the search results + # are processed and stored in the KB + # TODO: make this function async docsource = steps.StepSearchToDocsource.run_step( exec_info=exec_info, search_keywords=keywords ) # we will answer using the whole KB # right now filter by docsource cannot include re-used docsinks # flow_options[DOCSOURCE_UUID_ATTR] = docsource.docsource_uuid - - # TODO Next: add a flow_option to control if include the whole KB in the search - # flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid + # TODO Next: add a flow_option to control if include the whole KB in the search + # flow_options[DocSource.FIELD_DOCSOURCE_UUID] = docsource.docsource_uuid + else: + # for local KB, we should use local KB data as the rewrite context + rewrite_section = StrategySection( + section_name=StrategySectionName.REWRITE, + strategy_name="keywords", + ) + rewrite = steps.StepQueryRewrite.run_step( + exec_info=exec_info, + query_metadata=query_metadata, + rewrite_section=rewrite_section, + ) + # the keywords actually not used in the local search + if rewrite.search_keywords is None: + keywords = rewrite.rewritten_question + else: + keywords = rewrite.search_keywords top_ranked_result_segments = steps.StepVectorSearch.run_step( exec_info=exec_info, diff --git a/src/leettools/flow/flows/news/flow_news.py b/src/leettools/flow/flows/news/flow_news.py index a329e18..36064e4 100644 --- a/src/leettools/flow/flows/news/flow_news.py +++ b/src/leettools/flow/flows/news/flow_news.py @@ -6,6 +6,7 @@ from pydantic import BaseModel, ConfigDict, create_model from leettools.common import exceptions +from leettools.common.i18n.translator import _ from leettools.common.logging.event_logger import EventLogger from leettools.common.utils import config_utils, json_utils, lang_utils, time_utils from leettools.common.utils.template_eval import render_template @@ -108,6 +109,8 @@ def full_description(cls) -> str: - The categories of the news - The keywords of the news - The date of the news item + +{{ language_instruction }} """ @classmethod @@ -118,10 +121,9 @@ def depends_on(cls) -> List[Type["FlowComponent"]]: def direct_flow_option_items(cls) -> List[FlowOptionItem]: foi_news_source_min = FlowOptionItem( name=flow_option.FLOW_OPTION_NEWS_SOURCE_MIN, - display_name="News item source count threshold", - description=( - "Number of sources a news item has to have to be included in the result." - "Default is 1. Depends on the nature of the knowledge base." + display_name=_("News item source count threshold"), + description=_( + "Number of sources a news item has to have to be included in the result. Default is 1. Depends on the nature of the knowledge base." ), default_value="1", value_type="int", @@ -131,10 +133,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]: foi_news_include_old = FlowOptionItem( name=flow_option.FLOW_OPTION_NEWS_INCLUDE_OLD, - display_name="Include previously reported news items", - description=( - "Include all news items in the result, even if it has been reported before." - "Default is False." + display_name=_("Include previously reported news items"), + description=_( + "Include all news items in the result, even if it has been reported before. Default is False." ), default_value="False", value_type="bool", @@ -144,8 +145,8 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]: foi_news_output_format = FlowOptionItem( name=flow_option.FLOW_OPTION_EXTRACT_OUTPUT_FORMAT, - display_name="Output format", - description=( + display_name=_("Output format"), + description=_( "The format of the output: 'md' (default), 'table and 'json'." ), default_value="md", @@ -156,10 +157,9 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]: foi_news_run_search = FlowOptionItem( name=flow_option.FLOW_OPTION_NEWS_RUN_SEARCH, - display_name="Run search before extracting news data", - description=( - "Run the search step before extracting the news data." - "Default is True." + display_name=_("Run search before extracting news data"), + description=_( + "Run the search step before extracting the news data. Default is True." ), default_value="True", value_type="bool", @@ -487,6 +487,7 @@ def execute_query( "query": query, "word_count": news_params.word_count, "article_style": news_params.article_style, + "language_instruction": news_params.language_instruction, }, ) diff --git a/src/leettools/flow/steps/step_query_rewrite.py b/src/leettools/flow/steps/step_query_rewrite.py index c4c0d6e..0ec1448 100644 --- a/src/leettools/flow/steps/step_query_rewrite.py +++ b/src/leettools/flow/steps/step_query_rewrite.py @@ -1,4 +1,4 @@ -from typing import ClassVar, List, Type +from typing import ClassVar, List, Optional, Type from leettools.core.schemas.chat_query_metadata import ChatQueryMetadata from leettools.core.strategy.schemas.strategy_section import StrategySection @@ -27,17 +27,30 @@ def direct_flow_option_items(cls) -> List[FlowOptionItem]: def run_step( exec_info: ExecInfo, query_metadata: ChatQueryMetadata, + rewrite_section: Optional[StrategySection] = None, ) -> Rewrite: """ Rewrite the query based on the strategy section and the query metadata. + + If rewrite_section is provided, use it directly. + If rewrite_section is not provided, get it from the strategy. + + Args: + - exec_info: the execution info + - query_metadata: the query metadata + - rewrite_section: the rewrite section + + Returns: + - rewrite: the rewrite result """ display_logger = exec_info.display_logger display_logger.info( f"[Status] Rewrite query: {exec_info.target_chat_query_item.query_content}" ) - rewrite_section = exec_info.strategy.strategy_sections.get( - StrategySectionName.REWRITE, None - ) + if rewrite_section is None: + rewrite_section = exec_info.strategy.strategy_sections.get( + StrategySectionName.REWRITE, None + ) return _step_run_rewriter( exec_info=exec_info, rewrite_section=rewrite_section, diff --git a/src/leettools/svc/api/v1/routers/file_router.py b/src/leettools/svc/api/v1/routers/file_router.py index eaebb27..d289680 100644 --- a/src/leettools/svc/api/v1/routers/file_router.py +++ b/src/leettools/svc/api/v1/routers/file_router.py @@ -33,6 +33,7 @@ async def read_raw_document(uri: str) -> FileResponse: logger().debug(f"Reading raw document from {uri}") safe_base_path = Path(self.settings.DATA_ROOT) + # TODO: make these configurable and in sync with Docker config incoming_file_path = Path("/incoming") uploads_file_path = Path("/app/uploads")