forked from willow-god/Friend-Circle-Lite
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
160 lines (131 loc) · 6.11 KB
/
run.py
File metadata and controls
160 lines (131 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import logging
import sys
import os
from friend_circle_lite.all_friends import fetch_and_process_data, marge_data_from_json_url, marge_errors_from_json_url, deal_with_large_data
from friend_circle_lite.utils.json import write_json
from friend_circle_lite.utils.config import load_config
from friend_circle_lite.utils.mail import send_emails
from friend_circle_lite.single_friend import get_latest_articles_from_link
from friend_circle_lite.utils.github import extract_emails_from_issues
# ========== 日志设置 ==========
logging.basicConfig(
level=logging.INFO,
format='😋 %(levelname)s: %(message)s'
)
# ========== 加载环境变量 ==========
# if os.getenv("GITHUB_TOKEN") is None:
# from dotenv import load_dotenv
# load_dotenv()
# ========== 加载配置 ==========
config = load_config("./conf.yaml")
# ========== 爬虫模块 ==========
if config["spider_settings"]["enable"]:
logging.info("✅ 爬虫已启用")
json_url = config['spider_settings']['json_url']
article_count = config['spider_settings']['article_count']
specific_rss = config['specific_RSS']
logging.info(f"📥 正在从 {json_url} 获取数据,每个博客获取 {article_count} 篇文章")
result, lost_friends = fetch_and_process_data(
json_url = json_url, # 包含朋友信息的 JSON 文件的 URL。
specific_RSS = specific_rss, # 包含特定 RSS 源的字典列表 [{name, url}](来自 YAML)。
count = article_count, # 获取每个博客的最大文章数。
cache_file = "./temp/cache.json" # 缓存文件路径。
)
if config["spider_settings"]["merge_result"]["enable"]:
merge_url = config['spider_settings']["merge_result"]['merge_json_url']
logging.info(f"🔀 合并功能开启,从 {merge_url} 获取外部数据")
result = marge_data_from_json_url(result, f"{merge_url}/all.json")
lost_friends = marge_errors_from_json_url(lost_friends, f"{merge_url}/errors.json")
article_count = len(result.get("article_data", []))
logging.info(f"📦 数据获取完毕,共有 {article_count} 篇文章,正在处理数据")
result = deal_with_large_data(result)
write_json("./all.json", result)
write_json("./errors.json", lost_friends)
# ========== 邮箱推送准备 ==========
SMTP_isReady = False
sender_email = ""
server = ""
port = 0
use_tls = False
password = ""
if config["email_push"]["enable"] or config["rss_subscribe"]["enable"]:
logging.info("📨 推送功能已启用,正在准备中...")
smtp_conf = config["smtp"]
sender_email = smtp_conf["email"]
server = smtp_conf["server"]
port = smtp_conf["port"]
use_tls = smtp_conf["use_tls"]
password = os.getenv("SMTP_PWD")
logging.info(f"📡 SMTP 服务器:{server}:{port}")
if not password or not sender_email or not server or not port:
logging.error("❌ 环境变量 SMTP_PWD 未设置,无法发送邮件")
else:
logging.info(f"🔐 密码(部分):{password[:3]}*****")
SMTP_isReady = True
# ========== 邮件推送(待实现)==========
if config["email_push"]["enable"] and SMTP_isReady:
logging.info("📧 邮件推送已启用")
logging.info("⚠️ 抱歉,目前尚未实现邮件推送功能")
# ========== RSS 订阅推送 ==========
if config["rss_subscribe"]["enable"] and SMTP_isReady:
logging.info("📰 RSS 订阅推送已启用")
# 获取 GitHub 仓库信息
fcl_repo = os.getenv('FCL_REPO') # 仓库内置
if fcl_repo:
github_username, github_repo = fcl_repo.split('/')
else:
github_username = str(config["rss_subscribe"]["github_username"]).strip()
github_repo = str(config["rss_subscribe"]["github_repo"]).strip()
logging.info(f"👤 GitHub 用户名:{github_username}")
logging.info(f"📁 GitHub 仓库:{github_repo}")
your_blog_url = config["rss_subscribe"]["your_blog_url"]
email_template = config["rss_subscribe"]["email_template"]
website_title = config["rss_subscribe"]["website_info"]["title"]
latest_articles = get_latest_articles_from_link(
url=your_blog_url,
count=5,
last_articles_path="./temp/newest_posts.json" # 存储上一次的文章
)
if not latest_articles:
logging.info("📭 无新文章,无需推送")
else:
logging.info(f"🆕 获取到的最新文章:{latest_articles}")
github_api_url = (
f"https://api.github.com/repos/{github_username}/{github_repo}/issues"
f"?state=closed&label=subscribed&per_page=200"
)
logging.info(f"🔎 正在从 GitHub 获取订阅邮箱:{github_api_url}")
email_list = extract_emails_from_issues(github_api_url)
if not email_list:
logging.info("⚠️ 无订阅邮箱,请检查格式或是否有订阅者")
sys.exit(0)
logging.info(f"📬 获取到邮箱列表:{email_list}")
for article in latest_articles:
template_data = {
"title": article["title"],
"summary": article["summary"],
"published": article["published"],
"link": article["link"],
"website_title": website_title,
"github_issue_url": (
f"https://github.com/{github_username}/{github_repo}"
"/issues?q=is%3Aissue+is%3Aclosed"
),
}
send_emails(
emails=email_list["emails"],
sender_email=sender_email,
smtp_server=server,
port=port,
password=password,
subject=f"{website_title} の最新文章:{article['title']}",
body=(
f"📄 文章标题:{article['title']}\n"
f"🔗 链接:{article['link']}\n"
f"📝 简介:{article['summary']}\n"
f"🕒 发布时间:{article['published']}"
),
template_path=email_template,
template_data=template_data,
use_tls=use_tls
)