1717)
1818from .models import log_error , RULES
1919from .status_store import (
20- read_local_timestamp_date , read_or_backfill_product_last_status ,
20+ normalize_data_date , read_local_timestamp_date ,
21+ read_or_backfill_product_last_status ,
2122 report_dir_path , status_db_path , PRODUCT_LAST_STATUS_FILE ,
2223)
2324
25+ # --- CSV 日期推断 ---
26+
27+
28+ def infer_local_date_from_csv (data_root : Path , product : str , rule ) -> Optional [str ]:
29+ """从 CSV 数据内容推断本地最新日期(当 timestamp.txt 缺失时使用)。
30+
31+ 策略:按文件名倒序取最后 20 个文件,提取日期列最大值。
32+ 尽力推断而非精确,推断偏低时只会多回补几天,无数据损坏风险。
33+ 无 CSV 文件或无 rule 时返回 None(真正的首次同步)。
34+ """
35+ if not rule :
36+ return None
37+
38+ # 确定日期列:优先 date_filter_col,其次 sort_cols 第一列
39+ date_col = rule .date_filter_col or (rule .sort_cols [0 ] if rule .sort_cols else None )
40+ if not date_col :
41+ return None
42+
43+ product_dir = data_root / product
44+ csv_files = _list_csv_files (product_dir )
45+ if not csv_files :
46+ return None
47+
48+ # 按文件名倒序排,取最后 20 个(比纯随机更可靠地覆盖最新日期)
49+ csv_files_sorted = sorted (csv_files , key = lambda f : f .name , reverse = True )
50+ samples = csv_files_sorted [:20 ]
51+
52+ max_date = None
53+ for f in samples :
54+ try :
55+ header , rows = _read_csv_full (f , rule )
56+ if not header or not rows or date_col not in header :
57+ continue
58+ idx = header .index (date_col )
59+ for row in rows :
60+ if idx < len (row ) and row [idx ].strip ():
61+ # normalize_data_date 校验格式合法性,过滤伪日期
62+ d = normalize_data_date (row [idx ].strip ()[:10 ])
63+ if d and (max_date is None or d > max_date ):
64+ max_date = d
65+ except Exception :
66+ continue
67+ return max_date
68+
69+
2470# --- 产品状态总览 ---
2571
2672# 状态颜色阈值(自然日)
@@ -647,8 +693,14 @@ def _check_temporal_integrity(data_root, product, rule, trading_calendar):
647693 return issues
648694
649695 ts_date = read_local_timestamp_date (data_root , product )
696+ inferred_mode = False
650697 if not ts_date :
651- return issues
698+ # 无 timestamp 时从 CSV 推断,启用推断模式
699+ inferred = infer_local_date_from_csv (data_root , product , rule )
700+ if not inferred :
701+ return issues # 真正无数据,跳过
702+ ts_date = inferred
703+ inferred_mode = True
652704
653705 # 确定日期列:优先 date_filter_col,其次 sort_cols 第一列
654706 date_col = rule .date_filter_col or (rule .sort_cols [0 ] if rule .sort_cols else None )
@@ -660,9 +712,9 @@ def _check_temporal_integrity(data_root, product, rule, trading_calendar):
660712 if not csv_files :
661713 return issues
662714
663- # #7 timestamp-数据日期一致性:CSV 最大日期不应超过 timestamp,也不应远落后
715+ # #7 timestamp-数据日期一致性:推断模式跳过(endpoint 和 max_date 来自同一数据源,比较无意义)
664716 max_date = _sample_max_date (csv_files , rule , date_col , sample_size = 20 )
665- if max_date :
717+ if max_date and not inferred_mode :
666718 if max_date > ts_date :
667719 issues .append (_issue ("date_exceeds_timestamp" , "error" , "temporal_integrity" ,
668720 product , f"CSV 最大日期 { max_date } > timestamp { ts_date } " ,
@@ -677,9 +729,12 @@ def _check_temporal_integrity(data_root, product, rule, trading_calendar):
677729 f"timestamp { ts_date } 远超数据最大日期 { max_date } (差 { gap_days } 天)" ,
678730 "" , False , "needs_resync" ))
679731
680- # #8 日期连续性:用 min_date 作为起点,ts_date 作为终点,检查期间是否有缺失日期
732+ # #8 日期连续性:用 min_date 作为起点检查期间是否有缺失日期
733+ # 推断模式下用 max_date(_sample_max_date 随机抽样)作为终点,
734+ # 避免用 inferred(文件名倒序抽样)当终点时 end > max_date 产生虚假缺口
681735 if not max_date :
682736 return issues
737+ end_date = max_date if inferred_mode else ts_date
683738
684739 # 抽样最小日期,作为连续性检查的起始点
685740 min_date = _sample_min_date (csv_files , rule , date_col , sample_size = 20 )
@@ -690,13 +745,13 @@ def _check_temporal_integrity(data_root, product, rule, trading_calendar):
690745 expected = None
691746 if is_crypto :
692747 # 加密货币全天候交易,期望每日都有数据
693- expected = _generate_calendar_days (min_date , ts_date )
748+ expected = _generate_calendar_days (min_date , end_date )
694749 elif product in BUSINESS_DAY_ONLY_PRODUCTS and trading_calendar :
695750 # A 股交易日产品,用精确交易日历
696- expected = {d for d in trading_calendar if min_date <= d <= ts_date }
751+ expected = {d for d in trading_calendar if min_date <= d <= end_date }
697752 elif product in BUSINESS_DAY_ONLY_PRODUCTS :
698753 # 无交易日历时降级为工作日近似(节假日可能误报)
699- expected = _generate_weekdays (min_date , ts_date )
754+ expected = _generate_weekdays (min_date , end_date )
700755
701756 if expected is None :
702757 return issues
0 commit comments