-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_parse.py
More file actions
95 lines (75 loc) · 3.26 KB
/
test_parse.py
File metadata and controls
95 lines (75 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Test email parsing with attachments."""
import imaplib
import email
from email.header import decode_header
import sys
sys.path.insert(0, 'src')
from parsers import parse_html, parse_pdf_bytes, parse_docx_bytes
from extractors import extract_phone, extract_order_number
HOST = "imap.gmail.com"
USER = "lovelykimura832@gmail.com"
PASSWORD = "ztrv pndd qslg jtsh"
FROM_FILTERS = ["s1963@yandex.ru", "nsv11061992@gmail.com"]
def decode_header_str(header):
if not header:
return ""
decoded = []
for part, enc in decode_header(header):
if isinstance(part, bytes):
decoded.append(part.decode(enc or "utf-8", errors="replace"))
else:
decoded.append(part)
return " ".join(decoded)
print("Connecting...")
mail = imaplib.IMAP4_SSL(HOST)
mail.login(USER, PASSWORD)
mail.select("INBOX")
for sender in FROM_FILTERS:
_, messages = mail.search(None, f'(FROM "{sender}")')
for num in messages[0].split():
_, msg_data = mail.fetch(num, "(RFC822)")
raw = msg_data[0][1]
msg = email.message_from_bytes(raw)
subj = decode_header_str(msg.get("Subject"))
print(f"\n{'='*50}")
print(f"📧 От: {sender}")
print(f"📝 Тема: {subj}")
print(f"{'='*50}")
all_text = subj + "\n"
for part in msg.walk():
ctype = part.get_content_type()
disp = str(part.get("Content-Disposition", ""))
try:
payload = part.get_payload(decode=True)
if not payload:
continue
charset = part.get_content_charset() or "utf-8"
if "attachment" in disp:
fname = part.get_filename() or ""
print(f"\n📎 Вложение: {fname}")
if fname.lower().endswith(".pdf"):
text = parse_pdf_bytes(payload)
all_text += text
print(f" 📄 Извлечено {len(text)} символов")
elif fname.lower().endswith(".docx"):
text = parse_docx_bytes(payload)
all_text += text
print(f" 📄 Извлечено {len(text)} символов")
elif ctype == "text/plain":
text = payload.decode(charset, errors="replace")
all_text += text
print(f"\n📝 Текст письма:\n{text[:500]}...")
elif ctype == "text/html":
text = parse_html(payload.decode(charset, errors="replace"))
all_text += text
except Exception as e:
print(f" ⚠️ Ошибка: {e}")
# Extract data
phone = extract_phone(all_text)
order = extract_order_number(all_text)
print(f"\n{'='*50}")
print("🔍 РЕЗУЛЬТАТ ПАРСИНГА:")
print(f" 📱 Телефон: {phone or '❌ не найден'}")
print(f" 🔢 Заказ: {order or '❌ не найден'}")
print(f"{'='*50}")
mail.logout()