Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ public static String toFileLeafPath(String iginxPath, String originalFileName) {
if (slash >= 0 && slash < base.length() - 1) {
base = base.substring(slash + 1);
}
base = base.replaceAll("[^a-zA-Z0-9._-]", "_");
if (base.isEmpty()) {
base = "content.bin";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class DocumentMetadataExtractor(BaseMetadataExtractor):

USER_PROMPT_TEMPLATE = (
"请从输入的文档文本中抽取语义三元组。 "
"尽可能提取代表元数据的语义,而非具体的数值或事实。 "
"请严格只返回 JSON,不要输出 Markdown 或解释。 "
"返回格式要求:"
"{{\"entities\":[\"entity\"],\"triples\":[{{\"subject\":\"entityA\",\"predicate\":\"relation\",\"object\":\"entityB\"}}]}}. "
Expand All @@ -15,7 +16,7 @@ class DocumentMetadataExtractor(BaseMetadataExtractor):
)

USER_RETRY_PROMPT_TEMPLATE = (
"你上一轮可能没有返回可用三元组。请再次检查内容并尽量抽取核心语义关系;若确实无关系,triples 返回空数组。 "
"你上一轮可能没有返回可用三元组。请再次检查内容并尽量抽取核心语义关系;尽可能提取代表元数据的语义,而非具体的数值或事实;若确实无关系,triples 返回空数组。 "
"请严格只返回 JSON,不要输出 Markdown 或解释。 "
"返回格式要求:"
"{{\"entities\":[\"entity\"],\"triples\":[{{\"subject\":\"entityA\",\"predicate\":\"relation\",\"object\":\"entityB\"}}]}}. "
Expand Down
4 changes: 2 additions & 2 deletions frontend/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function renderPagination(containerId, stateKey, totalPages, total, onPageChange
function formatBytes(bytes) {
const value = Number(bytes || 0);
if (!Number.isFinite(value) || value <= 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
const units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'];
let size = value;
let idx = 0;
while (size >= 1024 && idx < units.length - 1) {
Expand Down Expand Up @@ -2982,7 +2982,7 @@ function escapeHtml(str) {

function formatFileSize(bytes) {
if (!bytes || bytes === 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
const units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'];
const i = Math.floor(Math.log(bytes) / Math.log(1024));
return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + units[i];
}
Expand Down
7 changes: 5 additions & 2 deletions test/batch_add_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
SSH_PORT = 22

REQUEST_DELAY = 0.1 # 请求间隔(秒)
BATCH_SIZE = 10 # 每批请求数量
BATCH_REQUEST_DELAY = 0 # 每批请求间隔(秒)
TIMEOUT = 60 # 请求超时(秒)

# ==================== 颜色输出 ====================
Expand Down Expand Up @@ -124,8 +126,8 @@ def main():
Colors.RED
)

# 每10个打印进度
if i % 10 == 0:
# 每 BATCH_SIZE 个打印进度
if i % BATCH_SIZE == 0:
progress = (i * 100.0) / TOTAL_COUNT
print()
print_colored(
Expand All @@ -134,6 +136,7 @@ def main():
Colors.YELLOW
)
print()
time.sleep(BATCH_REQUEST_DELAY)

# 延迟
time.sleep(REQUEST_DELAY)
Expand Down
Loading