From c6d55654b87e4f2880d671821336d6e4637d4fb8 Mon Sep 17 00:00:00 2001 From: Wuchen-4810 Date: Tue, 12 May 2026 00:01:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20merged=20forward=20message=20(?= =?UTF-8?q?=E5=90=88=E5=B9=B6=E8=BD=AC=E5=8F=91)=20parsing=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add _format_merged_forward_message to parse app_type 17/19 XML - Extract individual messages from elements - Show sender, timestamp, and content for each forwarded message - Handle nested forwarded messages and file references - Fix XML parsing failure when declaration is embedded inside - Increase _XML_PARSE_MAX_LEN from 20000 to 200000 for long chat records Co-Authored-By: Claude Opus 4.7 --- wechat_cli/core/messages.py | 129 +++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/wechat_cli/core/messages.py b/wechat_cli/core/messages.py index d62ef33..6dadbfc 100644 --- a/wechat_cli/core/messages.py +++ b/wechat_cli/core/messages.py @@ -14,7 +14,7 @@ _zstd_dctx = zstd.ZstdDecompressor() _XML_UNSAFE_RE = re.compile(r''): + inner_xml = inner_xml[:-3] + inner_xml[-3:].replace(']]>', '') + # 清理 + inner_xml = inner_xml.replace('', '').strip() + if not inner_xml: + continue + + record_root = _parse_xml_root(inner_xml) + if record_root is None: + continue + + # 尝试解析 中的 元素(最精确) + datalist = record_root.find('.//datalist') + parsed_items = False + if datalist is not None: + dataitems = datalist.findall('dataitem') + if dataitems: + parsed_items = True + for item in dataitems: + sender = _collapse_text(item.findtext('sourcename') or '') + msg_time = _collapse_text(item.findtext('sourcetime') or '') + datatype = _parse_int(item.findtext('datatype') or '0', 0) + datadesc = _collapse_text(item.findtext('datadesc') or '') + datatitle = _collapse_text(item.findtext('datatitle') or '') + datafmt = _collapse_text(item.findtext('datafmt') or '') + + # 根据数据类型格式化 + type_labels = {1: '', 2: '[图片]', 8: '[文件]', 17: '[合并转发]'} + type_label = type_labels.get(datatype, f'[type={datatype}]') + + if datatype == 0 and not datadesc and not datatitle: + continue # 空分隔符/占位项,跳过 + if datatype == 8: # 文件 + fname = datatitle or datadesc or '未知文件' + ext = f' .{datafmt}' if datafmt else '' + body = f'[文件] {fname}{ext}' + elif datatype == 2: # 图片 + body = '[图片]' + elif datatype == 17: # 嵌套合并转发 + body = f'[合并转发] {datatitle}' + else: + body = datadesc or datatitle or type_label + + # 截断过长消息 + if len(body) > 200: + body = body[:200] + '...' + + prefix = f' ├ {sender}' if sender else ' ├' + if msg_time: + prefix += f' [{msg_time}]' + parts.append(f'{prefix}: {body}') + continue # 已处理此 recorditem + + # 回退:解析 字段(摘要模式) + desc_text = _collapse_text(record_root.findtext('desc') or '') + if desc_text: + # 的格式: "发送者: 内容\n发送者: 内容\n..." + # HTML 实体解码 + import html + desc_text = html.unescape(desc_text) + for line in desc_text.split('\n'): + line = line.strip() + if not line: + continue + if ':' in line: + colon_pos = line.index(':') + sender_part = line[:colon_pos].strip() + msg_part = line[colon_pos + 1:].strip() + if len(msg_part) > 200: + msg_part = msg_part[:200] + '...' + parts.append(f' ├ {sender_part}: {msg_part}') + else: + # 没有冒号 = 可能是纯内容 + if len(line) > 200: + line = line[:200] + '...' + parts.append(f' ├ {line}') + + # 如果只解析出标题(空转发),回退 summary_text + if len(parts) == 1 and summary_text: + for line in summary_text.split('\n'): + line = line.strip() + if not line: + continue + if len(line) > 200: + line = line[:200] + '...' + parts.append(f' ├ {line}') + + return '\n'.join(parts) + + def _format_app_message_text(content, local_type, is_group, chat_username, chat_display_name, names, _display_name_fn, resolve_media=False, db_dir=None, create_time_ts=0): if not content or ' 内部嵌入了 声明,导致 XML 解析失败 + # 将其移除后再解析 + content = re.sub(r'<\?xml\b[^?]*\?>', '', content) _, sub_type = _split_msg_type(local_type) root = _parse_xml_root(content) if root is None: @@ -198,6 +319,12 @@ def _format_app_message_text(content, local_type, is_group, chat_username, chat_ return f"[链接] {title}" if title else "[链接]" if app_type in (33, 36, 44): return f"[小程序] {title}" if title else "[小程序]" + if app_type in (17, 19): + merged = _format_merged_forward_message(content, appmsg) + if merged is not None: + return merged + # 回退:至少显示标题 + return f"[合并转发] {title}" if title else "[合并转发]" if title: return f"[链接/文件] {title}" return "[链接/文件]"