From 9ca940a9f07abd4d3a52d2d585fcb36ef1481491 Mon Sep 17 00:00:00 2001 From: ych <1821947036@qq.com> Date: Mon, 25 May 2026 10:46:40 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E7=90=86=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E8=AE=BF=E9=97=AE=E6=9C=8D=E5=8A=A1=E4=B8=AD=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/storage/engine/service/adapter/StorageUtils.java | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/src/main/java/com/storage/engine/service/adapter/StorageUtils.java b/backend/src/main/java/com/storage/engine/service/adapter/StorageUtils.java index 6de7d80..4eddcb8 100644 --- a/backend/src/main/java/com/storage/engine/service/adapter/StorageUtils.java +++ b/backend/src/main/java/com/storage/engine/service/adapter/StorageUtils.java @@ -125,7 +125,6 @@ public static String toFileLeafPath(String iginxPath, String originalFileName) { if (slash >= 0 && slash < base.length() - 1) { base = base.substring(slash + 1); } - base = base.replaceAll("[^a-zA-Z0-9._-]", "_"); if (base.isEmpty()) { base = "content.bin"; } From c68c87e6050c7aa4e58fed31064c3789f3ecb769 Mon Sep 17 00:00:00 2001 From: ych <1821947036@qq.com> Date: Mon, 25 May 2026 11:26:50 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=9F=A5=E8=AF=86=E6=8F=90=E5=8F=96prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../resources/udf/metadata/extractors/document_extractor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/main/resources/udf/metadata/extractors/document_extractor.py b/backend/src/main/resources/udf/metadata/extractors/document_extractor.py index d8556e7..a0909e8 100644 --- a/backend/src/main/resources/udf/metadata/extractors/document_extractor.py +++ b/backend/src/main/resources/udf/metadata/extractors/document_extractor.py @@ -6,6 +6,7 @@ class DocumentMetadataExtractor(BaseMetadataExtractor): USER_PROMPT_TEMPLATE = ( "请从输入的文档文本中抽取语义三元组。 " + "尽可能提取代表元数据的语义,而非具体的数值或事实。 " "请严格只返回 JSON,不要输出 Markdown 或解释。 " "返回格式要求:" "{{\"entities\":[\"entity\"],\"triples\":[{{\"subject\":\"entityA\",\"predicate\":\"relation\",\"object\":\"entityB\"}}]}}. " @@ -15,7 +16,7 @@ class DocumentMetadataExtractor(BaseMetadataExtractor): ) USER_RETRY_PROMPT_TEMPLATE = ( - "你上一轮可能没有返回可用三元组。请再次检查内容并尽量抽取核心语义关系;若确实无关系,triples 返回空数组。 " + "你上一轮可能没有返回可用三元组。请再次检查内容并尽量抽取核心语义关系;尽可能提取代表元数据的语义,而非具体的数值或事实;若确实无关系,triples 返回空数组。 " "请严格只返回 JSON,不要输出 Markdown 或解释。 " "返回格式要求:" "{{\"entities\":[\"entity\"],\"triples\":[{{\"subject\":\"entityA\",\"predicate\":\"relation\",\"object\":\"entityB\"}}]}}. " From 8a34984d4a959e07bd6e0c635e88445d92469f8d Mon Sep 17 00:00:00 2001 From: ych <1821947036@qq.com> Date: Tue, 26 May 2026 21:13:51 +0800 Subject: [PATCH 3/3] =?UTF-8?q?1.=E6=94=AF=E6=8C=81=E6=98=BE=E7=A4=BAPB?= =?UTF-8?q?=E7=BA=A7=E6=95=B0=E6=8D=AE=202.=E5=A2=9E=E5=8A=A0=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE=E6=BA=90=E7=9A=84?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=E5=8F=91=E9=80=81=E5=BB=B6=E8=BF=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/app.js | 4 ++-- test/batch_add_datasource.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/frontend/app.js b/frontend/app.js index efdf641..13e76a4 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -113,7 +113,7 @@ function renderPagination(containerId, stateKey, totalPages, total, onPageChange function formatBytes(bytes) { const value = Number(bytes || 0); if (!Number.isFinite(value) || value <= 0) return '0 B'; - const units = ['B', 'KB', 'MB', 'GB', 'TB']; + const units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']; let size = value; let idx = 0; while (size >= 1024 && idx < units.length - 1) { @@ -2982,7 +2982,7 @@ function escapeHtml(str) { function formatFileSize(bytes) { if (!bytes || bytes === 0) return '0 B'; - const units = ['B', 'KB', 'MB', 'GB', 'TB']; + const units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']; const i = Math.floor(Math.log(bytes) / Math.log(1024)); return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + units[i]; } diff --git a/test/batch_add_datasource.py b/test/batch_add_datasource.py index 8961a8e..b70c08b 100644 --- a/test/batch_add_datasource.py +++ b/test/batch_add_datasource.py @@ -25,6 +25,8 @@ SSH_PORT = 22 REQUEST_DELAY = 0.1 # 请求间隔(秒) +BATCH_SIZE = 10 # 每批请求数量 +BATCH_REQUEST_DELAY = 0 # 每批请求间隔(秒) TIMEOUT = 60 # 请求超时(秒) # ==================== 颜色输出 ==================== @@ -124,8 +126,8 @@ def main(): Colors.RED ) - # 每10个打印进度 - if i % 10 == 0: + # 每 BATCH_SIZE 个打印进度 + if i % BATCH_SIZE == 0: progress = (i * 100.0) / TOTAL_COUNT print() print_colored( @@ -134,6 +136,7 @@ def main(): Colors.YELLOW ) print() + time.sleep(BATCH_REQUEST_DELAY) # 延迟 time.sleep(REQUEST_DELAY)