-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamlit_app.py
More file actions
1890 lines (1726 loc) · 87 KB
/
streamlit_app.py
File metadata and controls
1890 lines (1726 loc) · 87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import annotations
import datetime as dt
import json
import os
from pathlib import Path
import subprocess
import sys
import tempfile
import time
import uuid
import re
import streamlit as st
from app.services.diagnostics_service import write_review_diagnostics
from app.services.preset_service import apply_preset_defaults, list_review_presets
from app.settings import AppSettings, load_settings
from app.workflows.pipeline import BASE_CONSTRAINTS, BASE_EXPERT_VIEW, SUMMARY_TEMPLATE, run_revision
from app.workflows.report import DEFAULT_FRAMEWORK, generate_report, complete_report_docx
from app.workflows.report_integrate import integrate_report_chapters
from app.tools.path_utils import resolve_path
_CHAT_STATE_INIT_KEY = "reviewer_chat_initialized"
_CHAT_SESSIONS_KEY = "reviewer_chat_sessions"
_CHAT_ACTIVE_SESSION_KEY = "reviewer_chat_active_session_id"
_CHAT_HISTORY_PATH_KEY = "reviewer_chat_history_path"
_CHAT_SESSION_SELECT_KEY = "reviewer_chat_session_select"
_CHAT_SESSION_TITLE_INPUT_PREFIX = "reviewer_chat_session_title_input"
_CHAT_PENDING_WIDGET_STATE_KEY = "reviewer_chat_pending_widget_state"
_INTENT_INPUT_KEY = "reviewer_intent_input"
_INTENT_PROMPT_SELECT_KEY = "reviewer_intent_prompt_select"
_REVIEW_PRESET_KEY = "reviewer_review_preset_key"
_REVIEW_DIAGNOSTICS_KEY = "reviewer_review_diagnostics"
_REVIEW_DIAGNOSTICS_ONLY_KEY = "reviewer_review_diagnostics_only"
_FOCUS_FILTER_KEY = "reviewer_focus_filter_only_targets"
_FORMAT_PROFILE_SELECT_KEY = "reviewer_format_profile_select"
_MEMORY_SCOPE_KEY = "reviewer_review_memory_scope"
_INLINE_CONTEXT_KEY = "reviewer_review_inline_context"
_CHUNK_CONTEXT_KEY = "reviewer_review_chunk_context"
_CTX_MAX_CHARS_KEY = "reviewer_review_context_max_chars"
_TABLE_EXTRACT_KEY = "reviewer_extract_table_elements"
_TABLE_IMAGE_UNDERSTANDING_KEY = "reviewer_table_image_understanding"
_TABLE_IMAGE_PROMPT_KEY = "reviewer_table_image_prompt"
_DOCX_IMAGE_EXTRACT_KEY = "reviewer_extract_docx_images"
_REPORT_TOPIC_KEY = "reviewer_report_topic"
_REPORT_FRAMEWORK_KEY = "reviewer_report_framework"
_REPORT_LAST_RESULT_KEY = "reviewer_report_last_result"
_REPORT_COMPLETE_TOPIC_KEY = "reviewer_report_complete_topic"
_REPORT_COMPLETE_LAST_RESULT_KEY = "reviewer_report_complete_last_result"
_REPORT_COMPLETE_UPLOAD_KEY = "reviewer_report_complete_upload"
_REPORT_INTEGRATE_LAST_RESULT_KEY = "reviewer_report_integrate_last_result"
_REPORT_INTEGRATE_UPLOAD_KEY = "reviewer_report_integrate_uploads"
_MODE_KEY = "reviewer_app_mode"
def _prepare_workspace(root_dir: str) -> Path:
workspace = Path(root_dir) / "workspace"
workspace.mkdir(parents=True, exist_ok=True)
return workspace
def _chat_history_path(workspace: Path) -> Path:
return workspace / "reviewer_chat_sessions.json"
def _now_iso() -> str:
return dt.datetime.now().replace(microsecond=0).isoformat(sep=" ")
def _rerun() -> None:
rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
if callable(rerun_fn):
rerun_fn()
def _queue_widget_state(widget_key: str, value) -> None:
pending = st.session_state.get(_CHAT_PENDING_WIDGET_STATE_KEY)
if not isinstance(pending, dict):
pending = {}
st.session_state[_CHAT_PENDING_WIDGET_STATE_KEY] = pending
pending[widget_key] = value
def _apply_queued_widget_state() -> None:
pending = st.session_state.get(_CHAT_PENDING_WIDGET_STATE_KEY)
if not isinstance(pending, dict) or not pending:
return
for widget_key, value in list(pending.items()):
if isinstance(widget_key, str) and widget_key:
st.session_state[widget_key] = value
st.session_state[_CHAT_PENDING_WIDGET_STATE_KEY] = {}
def _safe_read_bytes(path: Path) -> bytes | None:
try:
return path.read_bytes()
except Exception:
return None
def _safe_read_text(path: Path, *, encoding: str = "utf-8") -> str | None:
try:
return path.read_text(encoding=encoding)
except Exception:
return None
def _try_load_json(text: str):
try:
return json.loads(text)
except Exception:
return None
def _env_int(name: str, default: int, *, min_value: int = 1) -> int:
raw = os.getenv(name, "").strip()
if not raw:
return max(min_value, int(default))
try:
value = int(raw)
except Exception:
value = int(default)
return max(min_value, value)
def _streamlit_log_tail_lines() -> int:
return _env_int("STREAMLIT_LOG_TAIL_LINES", 300, min_value=20)
def _streamlit_log_buffer_lines() -> int:
return _env_int("STREAMLIT_LOG_BUFFER_LINES", 3000, min_value=200)
def _streamlit_log_update_interval_seconds() -> float:
interval_ms = _env_int("STREAMLIT_LOG_UPDATE_INTERVAL_MS", 400, min_value=0)
return max(0.0, float(interval_ms) / 1000.0)
def _tail_lines_text(text: str, max_lines: int) -> str:
if max_lines <= 0:
return text
lines = (text or "").splitlines()
if len(lines) <= max_lines:
return text
omitted = len(lines) - max_lines
tail = "\n".join(lines[-max_lines:])
return f"...(仅显示最后{max_lines}行,已省略{omitted}行)\n{tail}"
def _build_throttled_log_writer(log_box, log_lines: list[str]):
interval_s = _streamlit_log_update_interval_seconds()
tail_lines = _streamlit_log_tail_lines()
buffer_lines = _streamlit_log_buffer_lines()
disabled = False
last_render_ts = 0.0
def _render(*, force: bool) -> None:
nonlocal disabled, last_render_ts
if disabled:
return
now = time.monotonic()
if not force and interval_s > 0 and (now - last_render_ts) < interval_s:
return
last_render_ts = now
try:
content = "\n".join(log_lines[-tail_lines:]) if tail_lines > 0 else "\n".join(log_lines)
log_box.code(content)
except Exception:
# Browser websocket may already be closed; stop UI writes silently.
disabled = True
def _logger(message: str) -> None:
log_lines.append(message)
if len(log_lines) > buffer_lines:
del log_lines[:-buffer_lines]
_render(force=False)
def _flush() -> None:
_render(force=True)
return _logger, _flush
def _resolve_prompt_dir(root_dir: str) -> Path | None:
for name in ("prompt", "promt"):
candidate = Path(root_dir) / name
if candidate.is_dir():
return candidate
return None
def _list_prompt_files(prompt_dir: Path) -> list[Path]:
try:
return sorted([p for p in prompt_dir.glob("*.txt") if p.is_file()], key=lambda p: p.name)
except Exception:
return []
def _open_in_file_manager(path: Path) -> tuple[bool, str]:
try:
target = str(path)
if os.name == "nt":
os.startfile(target) # type: ignore[attr-defined]
elif sys.platform == "darwin":
subprocess.Popen(["open", target], close_fds=True)
else:
subprocess.Popen(["xdg-open", target], close_fds=True)
return True, ""
except Exception as exc: # noqa: BLE001
return False, str(exc)
def _load_chat_history(path: Path) -> dict:
if not path.exists():
return {"version": 1, "active_session_id": None, "sessions": []}
try:
payload = json.loads(path.read_text(encoding="utf-8-sig"))
except Exception:
return {"version": 1, "active_session_id": None, "sessions": []}
if not isinstance(payload, dict):
return {"version": 1, "active_session_id": None, "sessions": []}
sessions = payload.get("sessions", [])
if not isinstance(sessions, list):
sessions = []
active_session_id = payload.get("active_session_id")
if active_session_id is not None and not isinstance(active_session_id, str):
active_session_id = None
return {"version": 1, "active_session_id": active_session_id, "sessions": sessions}
def _save_chat_history(path: Path, sessions: list[dict], active_session_id: str | None) -> None:
payload = {"version": 1, "active_session_id": active_session_id, "sessions": sessions}
try:
tmp_path = path.with_suffix(".tmp")
tmp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
tmp_path.replace(path)
except Exception:
return
def _new_chat_session(title: str | None = None) -> dict:
now = _now_iso()
return {
"id": uuid.uuid4().hex,
"title": title or f"会话 {now}",
"created_at": now,
"updated_at": now,
"runs": [],
}
def _init_chat_state(workspace: Path, persist_sessions: bool) -> None:
if st.session_state.get(_CHAT_STATE_INIT_KEY):
return
history_path = _chat_history_path(workspace)
sessions: list[dict] = []
active_session_id: str | None = None
if persist_sessions:
payload = _load_chat_history(history_path)
sessions = payload.get("sessions", []) if isinstance(payload.get("sessions"), list) else []
active_session_id = payload.get("active_session_id")
now = _now_iso()
normalized_sessions: list[dict] = []
for session in sessions:
if not isinstance(session, dict):
continue
session_id = session.get("id")
if not isinstance(session_id, str) or not session_id:
continue
created_at = session.get("created_at") if isinstance(session.get("created_at"), str) else ""
updated_at = session.get("updated_at") if isinstance(session.get("updated_at"), str) else ""
title = session.get("title") if isinstance(session.get("title"), str) else ""
if not created_at:
created_at = now
if not updated_at:
updated_at = created_at
if not title:
title = f"会话 {created_at}"
session["created_at"] = created_at
session["updated_at"] = updated_at
session["title"] = title
if not isinstance(session.get("runs"), list):
session["runs"] = []
normalized_sessions.append(session)
sessions = normalized_sessions
if not sessions:
session = _new_chat_session()
sessions = [session]
active_session_id = session["id"]
if persist_sessions:
_save_chat_history(history_path, sessions, active_session_id)
valid_ids = [s.get("id") for s in sessions if isinstance(s, dict) and isinstance(s.get("id"), str)]
if active_session_id not in valid_ids:
active_session_id = valid_ids[0] if valid_ids else None
st.session_state[_CHAT_HISTORY_PATH_KEY] = str(history_path)
st.session_state[_CHAT_SESSIONS_KEY] = sessions
st.session_state[_CHAT_ACTIVE_SESSION_KEY] = active_session_id
st.session_state[_CHAT_SESSION_SELECT_KEY] = active_session_id
st.session_state[_CHAT_STATE_INIT_KEY] = True
def _persist_chat_state(persist_sessions: bool) -> None:
if not persist_sessions:
return
path_raw = st.session_state.get(_CHAT_HISTORY_PATH_KEY)
if not isinstance(path_raw, str) or not path_raw:
return
sessions = st.session_state.get(_CHAT_SESSIONS_KEY)
if not isinstance(sessions, list):
return
sessions = [session for session in sessions if isinstance(session, dict)]
active_session_id = st.session_state.get(_CHAT_ACTIVE_SESSION_KEY)
if active_session_id is not None and not isinstance(active_session_id, str):
active_session_id = None
_save_chat_history(Path(path_raw), sessions, active_session_id)
def _get_active_session() -> dict | None:
sessions = st.session_state.get(_CHAT_SESSIONS_KEY)
if not isinstance(sessions, list):
return None
active_session_id = st.session_state.get(_CHAT_ACTIVE_SESSION_KEY)
if not isinstance(active_session_id, str) or not active_session_id:
return None
for session in sessions:
if isinstance(session, dict) and session.get("id") == active_session_id:
return session
return None
def _render_chat_message(role: str, render_fn) -> None:
chat_message = getattr(st, "chat_message", None)
if callable(chat_message):
with chat_message(role):
render_fn()
return
label = "用户" if role == "user" else "助手"
with st.container():
st.markdown(f"**{label}**")
render_fn()
def _save_upload(uploaded_file, workspace: Path) -> Path:
filename = Path(uploaded_file.name).name
suffix = Path(filename).suffix or ".docx"
tmp_dir = Path(tempfile.mkdtemp(prefix="run_", dir=str(workspace)))
input_path = tmp_dir / f"input{suffix}"
input_path.write_bytes(uploaded_file.getvalue())
return input_path
def _build_output_path(uploaded_name: str, workspace: Path) -> Path:
safe_name = Path(uploaded_name).name
stem = Path(safe_name).stem or "document"
suffix = Path(safe_name).suffix or ".docx"
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{stem}_修订版_{timestamp}{suffix}"
return workspace / filename
def _parse_extra_constraints(raw: str) -> list[str]:
return [line.strip() for line in raw.splitlines() if line.strip()]
def _has_win32() -> bool:
if os.name != "nt":
return False
try:
import pythoncom # type: ignore # noqa: F401
import win32com # type: ignore # noqa: F401
except Exception:
return False
return True
def _has_python_docx() -> bool:
try:
import docx # type: ignore # noqa: F401
except Exception:
return False
return True
def _has_langgraph_sqlite() -> bool:
try:
from langgraph.checkpoint.sqlite import SqliteSaver # type: ignore # noqa: F401
except Exception:
return False
return True
def _has_tavily_key() -> bool:
return bool(os.getenv("TAVILY_API_KEY", "").strip())
def _has_openai_key() -> bool:
return bool(os.getenv("OPENAI_API_KEY", "").strip() or os.getenv("API_KEY", "").strip())
def _has_apiyi_key() -> bool:
return bool(os.getenv("APIYI_API_KEY", "").strip())
def _safe_report_filename(topic: str) -> str:
cleaned = re.sub(r"[^\w\u4e00-\u9fff\-]+", "_", topic or "").strip("_")
if not cleaned:
cleaned = "topic"
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{cleaned}_课题报告_{timestamp}.docx"
def _safe_report_completion_filename(name: str) -> str:
stem = Path(name or "").stem or "report"
cleaned = re.sub(r"[^\w\u4e00-\u9fff\-]+", "_", stem).strip("_")
if not cleaned:
cleaned = "report"
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{cleaned}_完善_{timestamp}.docx"
def _safe_report_integration_filename(topic: str, *, chapter_count: int) -> str:
cleaned = re.sub(r"[^\w\u4e00-\u9fff\-]+", "_", topic or "").strip("_")
if not cleaned:
cleaned = "report"
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{cleaned}_整合_{chapter_count}章_{timestamp}.docx"
def _render_report_ui(settings: AppSettings, workspace: Path) -> None:
st.header("课题报告生成")
st.caption("输入选题与框架,系统将联网检索并生成万字左右课题报告(优先使用 Win32 Word 引擎)。")
model_override = st.sidebar.text_input("模型覆盖(可选)", value="", key="report_model_override")
if model_override:
settings.model = model_override
if (settings.model or "").strip().lower().startswith("openai:") and not _has_openai_key():
st.sidebar.error("未检测到 OPENAI_API_KEY/API_KEY,模型调用将失败;请先配置环境变量或 .env。")
topic = st.text_input("选题/题目", key=_REPORT_TOPIC_KEY)
framework_text = st.text_area(
"报告框架(可编辑)",
value=st.session_state.get(_REPORT_FRAMEWORK_KEY, DEFAULT_FRAMEWORK),
height=220,
key=_REPORT_FRAMEWORK_KEY,
)
total_chars = st.slider("目标字数(约)", min_value=6000, max_value=15000, value=10000, step=500)
report_docx_engine = st.selectbox(
"Word生成引擎",
options=["auto", "win32com", "python-docx"],
index=0,
format_func=lambda x: {"auto": "自动(推荐)", "win32com": "Win32 Word", "python-docx": "python-docx"}.get(x, x),
help="auto 会在可用时优先使用 Win32 Word;python-docx 会生成较朴素的 docx(不含 Word 原生目录/多级编号)。",
key="report_docx_engine",
)
try:
from app.formatting.profiles import PROFILES # noqa: WPS433
report_format_all = [p.key for p in PROFILES]
except Exception:
report_format_all = ["none", "thesis_standard", "a4_strict", "zhengda_cup"]
report_format_options = [key for key in report_format_all if key != "zhengda_cup"]
if "none" not in report_format_options:
report_format_options.insert(0, "none")
default_report_format = "thesis_standard" if "thesis_standard" in report_format_options else "none"
report_format = st.selectbox(
"报告排版风格",
options=report_format_options,
index=report_format_options.index(default_report_format),
format_func=lambda x: {"none": "无", "thesis_standard": "论文标准格式", "a4_strict": "A4规范格式"}.get(x, x),
help="生成后使用 Win32 Word 统一字体/行距/多级编号/目录。",
key="report_format_profile",
)
toc_position = st.selectbox(
"目录位置",
options=["before_outline", "after_title", "none"],
index=0,
format_func=lambda x: {
"before_outline": "报告大纲表前",
"after_title": "标题后",
"none": "不生成目录",
}.get(x, x),
help="控制 Word 目录插入位置(仅论文标准格式生效)。",
key="report_toc_position",
)
max_results = st.slider("每条检索返回数", min_value=3, max_value=8, value=5)
section_timeout = st.slider(
"单章节超时(秒)",
min_value=60,
max_value=900,
value=300,
step=30,
help="当前用于日志提示(不会强制中断模型调用);建议结合“章节并行数/重试次数”控制总耗时。",
)
max_retries = st.slider("章节重试次数", min_value=0, max_value=3, value=2, step=1)
section_workers = st.slider("章节并行数", min_value=1, max_value=6, value=3, step=1)
allow_web_search = st.checkbox("启用联网检索(Tavily)", value=True)
if allow_web_search and not _has_tavily_key():
st.warning("未检测到 TAVILY_API_KEY,无法联网检索。请先配置环境变量。")
if not _has_win32():
st.warning("未检测到 Win32 Word(pywin32),将回退使用 python-docx 生成报告。")
if report_docx_engine == "win32com" and not _has_win32():
st.warning("你选择了 Win32 Word 引擎,但当前未检测到 pywin32,将无法按该引擎生成。")
if report_format != "none" and not _has_win32():
st.warning("当前未检测到 Win32 Word(pywin32),无法应用报告排版风格。")
run_button = st.button("生成课题报告", type="primary")
status = st.empty()
log_box = st.empty()
if run_button:
if not topic:
status.error("请输入选题/题目。")
return
if allow_web_search and not _has_tavily_key():
status.error("未配置 TAVILY_API_KEY,无法联网检索。")
return
if report_docx_engine == "win32com" and not _has_win32():
status.error("已选择 Win32 Word 引擎,但未检测到 pywin32。请安装 pywin32 或改为 auto/python-docx。")
return
output_name = _safe_report_filename(topic)
output_path = workspace / "reports" / output_name
output_path.parent.mkdir(parents=True, exist_ok=True)
status.info("正在生成课题报告,请稍候...")
log_lines: list[str] = []
logger_writer, flush_logger = _build_throttled_log_writer(log_box, log_lines)
try:
os.environ["REPORT_TOC_POSITION"] = toc_position
os.environ["REPORT_DOCX_ENGINE"] = report_docx_engine
result = generate_report(
settings=settings,
topic=topic,
output_path=str(output_path),
framework_text=framework_text,
total_chars=total_chars,
allow_web_search=allow_web_search,
max_results_per_query=max_results,
section_timeout=section_timeout,
max_section_retries=max_retries,
section_workers=section_workers,
format_profile=report_format,
logger=logger_writer,
)
except Exception as exc: # noqa: BLE001
status.error(f"生成失败:{exc}")
return
finally:
flush_logger()
status.success("课题报告生成完成。")
st.session_state[_REPORT_LAST_RESULT_KEY] = result
latest = st.session_state.get(_REPORT_LAST_RESULT_KEY)
if isinstance(latest, dict):
output_path = resolve_path(latest.get("output_path", "")) if latest.get("output_path") else None
text_path = resolve_path(latest.get("text_path", "")) if latest.get("text_path") else None
sources_path = resolve_path(latest.get("sources_path", "")) if latest.get("sources_path") else None
outline_path = resolve_path(latest.get("outline_path", "")) if latest.get("outline_path") else None
stats_path = resolve_path(latest.get("stats_path", "")) if latest.get("stats_path") else None
if output_path and output_path.exists():
try:
doc_bytes = output_path.read_bytes()
doc_key = f"report_dl_doc_{output_path.name}_{output_path.stat().st_mtime_ns}"
except Exception:
doc_bytes = None
doc_key = "report_dl_doc_missing"
st.download_button(
"下载课题报告(.docx)",
data=doc_bytes or b"",
file_name=output_path.name,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
use_container_width=True,
key=doc_key,
)
if text_path and text_path.exists():
try:
text_data = text_path.read_text(encoding="utf-8")
text_key = f"report_dl_txt_{text_path.name}_{text_path.stat().st_mtime_ns}"
except Exception:
text_data = ""
text_key = "report_dl_txt_missing"
st.download_button(
"下载课题报告(.txt)",
data=text_data,
file_name=text_path.name,
mime="text/plain",
use_container_width=True,
key=text_key,
)
if sources_path and sources_path.exists():
with st.expander("检索来源(JSON)", expanded=False):
st.json(json.loads(sources_path.read_text(encoding="utf-8")))
if stats_path and stats_path.exists():
try:
stats_data = stats_path.read_text(encoding="utf-8")
stats_key = f"report_dl_stats_{stats_path.name}_{stats_path.stat().st_mtime_ns}"
except Exception:
stats_data = ""
stats_key = "report_dl_stats_missing"
st.download_button(
"下载质量统计(.json)",
data=stats_data,
file_name=stats_path.name,
mime="application/json",
use_container_width=True,
key=stats_key,
)
with st.expander("生成质量统计(JSON)", expanded=False):
st.json(json.loads(stats_path.read_text(encoding="utf-8")))
stats = latest.get("stats") if isinstance(latest.get("stats"), dict) else None
if isinstance(stats, dict):
with st.expander("质量概览", expanded=False):
st.write(f"- 覆盖率:{stats.get('leaf_coverage')}")
st.write(f"- 缺失叶子节点:{stats.get('leaf_missing')}")
st.write(f"- 偏短叶子节点:{stats.get('leaf_short')}")
if outline_path and outline_path.exists():
with st.expander("报告大纲(JSON)", expanded=False):
st.json(json.loads(outline_path.read_text(encoding="utf-8")))
logs = latest.get("logs")
if isinstance(logs, list) and logs:
with st.expander("生成日志", expanded=False):
st.code(_tail_lines_text("\n".join(logs), _streamlit_log_tail_lines()))
st.divider()
with st.expander("完善已有报告(按 Word 目录补全)", expanded=False):
if not _has_win32():
st.warning("未检测到 Win32 Word(pywin32),无法使用 Word COM 补全报告。")
uploaded_report = st.file_uploader(
"上传待完善的课题报告(.docx)",
type=["docx"],
key=_REPORT_COMPLETE_UPLOAD_KEY,
)
complete_topic = st.text_input("选题/题目(可选,留空将自动识别)", key=_REPORT_COMPLETE_TOPIC_KEY)
fill_empty_headings = st.checkbox("补全空标题(无正文)", value=True, key="report_complete_fill_empty")
complete_button = st.button("补全报告", type="primary", key="report_complete_button")
status_complete = st.empty()
log_box_complete = st.empty()
if complete_button:
if not uploaded_report:
status_complete.error("请上传待完善的报告文件。")
return
if not _has_win32():
status_complete.error("未检测到 Win32 Word(pywin32),无法执行补全。")
return
allow_web_search_complete = allow_web_search and _has_tavily_key()
if allow_web_search and not _has_tavily_key():
status_complete.warning("未检测到 TAVILY_API_KEY,补全将不使用联网检索。")
input_path = _save_upload(uploaded_report, workspace)
output_name = _safe_report_completion_filename(uploaded_report.name)
output_path = workspace / "reports" / output_name
output_path.parent.mkdir(parents=True, exist_ok=True)
status_complete.info("正在补全报告,请稍候...")
log_lines: list[str] = []
logger_writer, flush_logger = _build_throttled_log_writer(log_box_complete, log_lines)
try:
os.environ["REPORT_TOC_POSITION"] = toc_position
result = complete_report_docx(
settings=settings,
input_path=str(input_path),
output_path=str(output_path),
topic=complete_topic.strip() if isinstance(complete_topic, str) else "",
allow_web_search=allow_web_search_complete,
max_results_per_query=max_results,
section_timeout=section_timeout,
fill_empty_headings=bool(fill_empty_headings),
format_profile=report_format,
logger=logger_writer,
)
except Exception as exc: # noqa: BLE001
status_complete.error(f"补全失败:{exc}")
return
finally:
flush_logger()
status_complete.success("报告补全完成。")
st.session_state[_REPORT_COMPLETE_LAST_RESULT_KEY] = result
latest_complete = st.session_state.get(_REPORT_COMPLETE_LAST_RESULT_KEY)
if isinstance(latest_complete, dict):
output_path = resolve_path(latest_complete.get("output_path", "")) if latest_complete.get("output_path") else None
if output_path and output_path.exists():
try:
doc_bytes = output_path.read_bytes()
doc_key = f"report_complete_dl_{output_path.name}_{output_path.stat().st_mtime_ns}"
except Exception:
doc_bytes = None
doc_key = "report_complete_dl_missing"
st.download_button(
"下载补全后的报告(.docx)",
data=doc_bytes or b"",
file_name=output_path.name,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
use_container_width=True,
key=doc_key,
)
logs = latest_complete.get("logs")
if isinstance(logs, list) and logs:
with st.expander("补全日志", expanded=False):
st.code(_tail_lines_text("\n".join(logs), _streamlit_log_tail_lines()))
with st.expander("报告整合(多章节 Word 合并)", expanded=False):
st.caption("上传各章节 Word(已生成的分章文件),系统将读取摘要、生成引言与过渡段,并整合为一份完整报告。")
uploaded_chapters = st.file_uploader(
"上传章节Word(.docx,可多选)",
type=["docx"],
accept_multiple_files=True,
key=_REPORT_INTEGRATE_UPLOAD_KEY,
)
integrate_topic = st.text_input("整合报告题目(可选)", value="", key="report_integrate_topic")
order_mode = st.selectbox(
"章节排序",
options=["filename", "upload", "manual"],
index=0,
format_func=lambda x: {"filename": "按文件名排序(推荐)", "upload": "按上传顺序", "manual": "手动指定顺序"}.get(x, x),
key="report_integrate_order_mode",
)
manual_order_text = ""
if order_mode == "manual":
manual_order_text = st.text_area(
"手动章节顺序(每行一个:可写“章节标题”或“文件名/文件名(不含扩展名)”)",
value="",
height=120,
key="report_integrate_manual_order",
help="示例:\n引言\n现状分析\n影响因素分析\n对策建议\n结论",
)
allow_llm_integrate = st.checkbox("使用模型生成引言/过渡/摘要", value=True, key="report_integrate_allow_llm")
auto_captions = st.checkbox("自动图表题注编号(图/表)", value=True, key="report_integrate_auto_captions")
integrate_button = st.button("开始整合生成报告", type="primary", key="report_integrate_button")
integrate_status = st.empty()
integrate_log_box = st.empty()
if integrate_button:
if not uploaded_chapters:
integrate_status.error("请先上传至少一个章节 Word 文件。")
return
if not _has_win32():
integrate_status.error("未检测到 Win32 Word(pywin32),无法执行章节整合。")
return
tmp_dir = Path(tempfile.mkdtemp(prefix="integrate_", dir=str(workspace)))
saved_paths: list[Path] = []
for f in uploaded_chapters:
name = Path(f.name).name
target = tmp_dir / name
try:
target.write_bytes(f.getvalue())
except Exception:
continue
saved_paths.append(target)
if not saved_paths:
integrate_status.error("保存上传文件失败,请重试。")
return
if order_mode == "filename":
saved_paths = sorted(saved_paths, key=lambda p: p.name)
final_topic = integrate_topic.strip() if isinstance(integrate_topic, str) else ""
if not final_topic:
final_topic = saved_paths[0].stem
output_name = _safe_report_integration_filename(final_topic, chapter_count=len(saved_paths))
output_path = workspace / "reports" / output_name
output_path.parent.mkdir(parents=True, exist_ok=True)
integrate_status.info("正在整合报告,请稍候...")
log_lines: list[str] = []
logger_writer, flush_logger = _build_throttled_log_writer(integrate_log_box, log_lines)
fixed_order = None
if order_mode == "manual" and isinstance(manual_order_text, str) and manual_order_text.strip():
fixed_order = [line.strip() for line in manual_order_text.splitlines() if line.strip()]
try:
os.environ["REPORT_TOC_POSITION"] = toc_position
result = integrate_report_chapters(
settings=settings,
chapter_paths=[str(p) for p in saved_paths],
output_path=str(output_path),
topic=final_topic,
toc_position=toc_position,
format_profile=report_format,
allow_llm=bool(allow_llm_integrate),
auto_captions=bool(auto_captions),
fixed_order=fixed_order,
logger=logger_writer,
)
except Exception as exc: # noqa: BLE001
integrate_status.error(f"整合失败:{exc}")
msg = str(exc)
if "win32com.gen_py" in msg or "CLSIDToPackageMap" in msg or "CLSIDToClassMap" in msg:
st.warning(
"检测到 pywin32 的 gen_py 缓存损坏。建议:关闭所有 Word 窗口后重试;"
"必要时手动清理 win32com 缓存(gen_py)并重启 Streamlit。"
)
return
finally:
flush_logger()
integrate_status.success("报告整合完成。")
st.session_state[_REPORT_INTEGRATE_LAST_RESULT_KEY] = result
latest_integrate = st.session_state.get(_REPORT_INTEGRATE_LAST_RESULT_KEY)
if isinstance(latest_integrate, dict):
output_path = resolve_path(latest_integrate.get("output_path", "")) if latest_integrate.get("output_path") else None
analysis_path = (
resolve_path(latest_integrate.get("analysis_path", ""))
if latest_integrate.get("analysis_path")
else None
)
if output_path and output_path.exists():
try:
doc_bytes = output_path.read_bytes()
doc_key = f"report_integrate_dl_{output_path.name}_{output_path.stat().st_mtime_ns}"
except Exception:
doc_bytes = None
doc_key = "report_integrate_dl_missing"
st.download_button(
"下载整合后的报告(.docx)",
data=doc_bytes or b"",
file_name=output_path.name,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
use_container_width=True,
key=doc_key,
)
if analysis_path and analysis_path.exists():
try:
analysis_bytes = analysis_path.read_bytes()
analysis_key = f"report_integrate_json_{analysis_path.name}_{analysis_path.stat().st_mtime_ns}"
except Exception:
analysis_bytes = None
analysis_key = "report_integrate_json_missing"
st.download_button(
"下载整合分析(JSON)",
data=analysis_bytes or b"",
file_name=analysis_path.name,
mime="application/json",
use_container_width=True,
key=analysis_key,
)
logs = latest_integrate.get("logs")
if isinstance(logs, list) and logs:
with st.expander("整合日志", expanded=False):
st.code(_tail_lines_text("\n".join(logs), _streamlit_log_tail_lines()))
def _render_diagnostics_result(diagnostics_path: Path, *, run_id: str, key_prefix: str) -> None:
diagnostics_text = _safe_read_text(diagnostics_path, encoding="utf-8")
if diagnostics_text is None:
st.error(f"读取学术诊断失败:{diagnostics_path}")
return
st.download_button(
"下载学术诊断(JSON)",
data=diagnostics_text,
file_name=diagnostics_path.name,
mime="application/json",
key=f"{key_prefix}dl_diagnostics_{run_id}",
use_container_width=True,
)
payload = _try_load_json(diagnostics_text)
if not isinstance(payload, dict):
with st.expander("学术诊断预览", expanded=False):
st.text(diagnostics_text)
return
overview = payload.get("overview") if isinstance(payload.get("overview"), dict) else {}
cards = overview.get("cards") if isinstance(overview.get("cards"), list) else []
average_score = overview.get("average_score", "-")
critical_count = overview.get("critical_count", 0)
warning_count = overview.get("warning_count", 0)
summary_text = overview.get("summary") if isinstance(overview.get("summary"), str) else ""
st.caption(
f"学术诊断总览:平均分 {average_score},critical {critical_count},warning {warning_count}"
)
if summary_text:
st.write(summary_text)
if cards:
cols = st.columns(min(3, len(cards)))
for index, card in enumerate(cards):
if not isinstance(card, dict):
continue
with cols[index % len(cols)]:
label = card.get("label") if isinstance(card.get("label"), str) else card.get("key", "诊断项")
severity = card.get("severity") if isinstance(card.get("severity"), str) else "info"
score = card.get("score", 0)
headline = card.get("headline") if isinstance(card.get("headline"), str) else ""
st.markdown(f"**{label}**")
st.caption(f"severity: {severity} · score: {score}")
if headline:
st.write(headline)
with st.expander("学术诊断详情", expanded=False):
st.json(payload)
def _render_run_result(run: dict, *, show_success_header: bool, key_prefix: str) -> None:
run_id = run.get("id") if isinstance(run.get("id"), str) else uuid.uuid4().hex
output_path_raw = run.get("output_path") if isinstance(run.get("output_path"), str) else ""
summary_path_raw = run.get("summary_path") if isinstance(run.get("summary_path"), str) else ""
log_path_raw = run.get("log_path") if isinstance(run.get("log_path"), str) else ""
diagnostics_path_raw = run.get("diagnostics_path") if isinstance(run.get("diagnostics_path"), str) else ""
tables_path_raw = run.get("tables_path") if isinstance(run.get("tables_path"), str) else ""
images_path_raw = run.get("images_path") if isinstance(run.get("images_path"), str) else ""
model_output = run.get("model_output") if isinstance(run.get("model_output"), str) else ""
run_status = run.get("status") if isinstance(run.get("status"), str) else ""
run_error = run.get("error") if isinstance(run.get("error"), str) else ""
output_path = Path(output_path_raw) if output_path_raw else None
summary_path = Path(summary_path_raw) if summary_path_raw else None
log_path = Path(log_path_raw) if log_path_raw else None
diagnostics_path = Path(diagnostics_path_raw) if diagnostics_path_raw else None
tables_path = Path(tables_path_raw) if tables_path_raw else None
images_path = Path(images_path_raw) if images_path_raw else None
if run_status and run_status != "success":
st.warning(f"状态:{run_status}")
if run_error:
st.error(run_error)
if output_path and output_path.exists():
output_bytes = _safe_read_bytes(output_path)
if output_bytes is None:
st.error(f"读取输出文件失败:{output_path}")
else:
if show_success_header:
st.success(f"审阅完成:{output_path.name}")
col_dl, col_open = st.columns(2)
with col_dl:
st.download_button(
"下载修订文档",
data=output_bytes,
file_name=output_path.name,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
key=f"{key_prefix}dl_doc_{run_id}",
use_container_width=True,
)
with col_open:
if st.button(
"打开结果目录",
key=f"{key_prefix}open_dir_{run_id}",
use_container_width=True,
):
ok, error = _open_in_file_manager(output_path.parent)
if not ok:
st.error(f"打开目录失败:{error}")
elif output_path_raw:
st.warning(f"输出文件不存在:{output_path_raw}")
if diagnostics_path and diagnostics_path.exists():
if show_success_header and not (output_path and output_path.exists()):
st.success(f"学术诊断完成:{diagnostics_path.name}")
_render_diagnostics_result(diagnostics_path, run_id=run_id, key_prefix=key_prefix)
elif diagnostics_path_raw:
st.warning(f"学术诊断文件不存在:{diagnostics_path_raw}")
if summary_path and summary_path.exists():
summary_text = _safe_read_text(summary_path, encoding="utf-8")
if summary_text is None:
st.error(f"读取修订摘要失败:{summary_path}")
else:
st.download_button(
"下载修订摘要(JSON)",
data=summary_text,
file_name=summary_path.name,
mime="application/json",
key=f"{key_prefix}dl_summary_{run_id}",
use_container_width=True,
)
with st.expander("修订摘要预览", expanded=False):
try:
st.json(json.loads(summary_text))
except Exception:
st.text(summary_text)
if log_path and log_path.exists():
log_text = _safe_read_text(log_path, encoding="utf-8")
if log_text is None:
st.error(f"读取运行日志失败:{log_path}")
else:
with st.expander("运行日志(模型修改细节)", expanded=False):
st.code(_tail_lines_text(log_text, _streamlit_log_tail_lines()))
st.download_button(
"下载运行日志",
data=log_text,
file_name=log_path.name,
mime="text/plain",
key=f"{key_prefix}dl_log_{run_id}",
use_container_width=True,
)