-
Notifications
You must be signed in to change notification settings - Fork 102
Expand file tree
/
Copy pathanalysis_04_entities.py
More file actions
218 lines (187 loc) · 8.32 KB
/
analysis_04_entities.py
File metadata and controls
218 lines (187 loc) · 8.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python3
"""
川普密碼 分析 #4 — 人物與國家點名分析
他提到誰、什麼國家、頻率變化 = 風向球
"""
import json
import re
from collections import Counter, defaultdict
from pathlib import Path
BASE = Path(__file__).parent
DATA = BASE / "data"
def main():
with open(BASE / "clean_president.json", 'r', encoding='utf-8') as f:
posts = json.load(f)
originals = [p for p in posts if p['has_text'] and not p['is_retweet']]
print("=" * 70)
print("🌍 分析 #4: 人物與國家點名分析")
print(f" 分析對象: 就任後原創貼文 {len(originals)} 篇")
print("=" * 70)
# --- 1. 國家/地區提及頻率 ---
# P3-7: 'EU' 去掉尾隨空格(避免誤匹配),'Korean' → 'South Korean'/'South Korea'
# 'Border' 從 Mexico 移除(Border 是邊境政策通用詞,不專屬墨西哥)
countries = {
'China': ['China', 'Chinese', 'Beijing', 'Xi', 'Jinping', 'CCP'],
'Japan': ['Japan', 'Japanese', 'Tokyo', 'Kishida', 'Ishiba'],
'Russia': ['Russia', 'Russian', 'Putin', 'Moscow', 'Kremlin'],
'Ukraine': ['Ukraine', 'Ukrainian', 'Zelensky', 'Zelenskyy', 'Kiev', 'Kyiv'],
'Iran': ['Iran', 'Iranian', 'Tehran', 'Khamenei'],
'North Korea': ['North Korea', 'DPRK', 'Kim Jong', 'Pyongyang'],
'Israel': ['Israel', 'Israeli', 'Netanyahu', 'Bibi', 'Gaza', 'Hamas', 'Hezbollah'],
'Mexico': ['Mexico', 'Mexican', 'Cartels'],
'Canada': ['Canada', 'Canadian', 'Trudeau', 'Ottawa'],
'Europe/EU': ['Europe', 'European', 'EU', 'NATO', 'Brussels'],
'UK': ['Britain', 'British', 'England', 'London', 'Starmer'],
'India': ['India', 'Indian', 'Modi', 'Delhi'],
'Taiwan': ['Taiwan', 'Taiwanese', 'Taipei'],
'Saudi Arabia': ['Saudi', 'Arabia', 'Riyadh', 'MBS'],
'South Korea': ['South Korea', 'South Korean', 'Seoul'],
}
country_counts = {}
country_monthly = defaultdict(lambda: defaultdict(int))
for country, keywords in countries.items():
count = 0
for p in originals:
content = p['content']
if any(kw.lower() in content.lower() for kw in keywords):
count += 1
month = p['created_at'][:7]
country_monthly[country][month] += 1
country_counts[country] = count
print(f"\n🌐 國家/地區提及次數:")
print("-" * 60)
for country, count in sorted(country_counts.items(), key=lambda x: -x[1]):
bar = '█' * (count // 3)
print(f" {country:15s} | {count:4d}篇 {bar}")
# 國家提及的月度趨勢(Top 6 國家)
top_countries = sorted(country_counts.items(), key=lambda x: -x[1])[:6]
print(f"\n📈 Top 6 國家月度趨勢:")
print("-" * 60)
all_months = sorted(set(p['created_at'][:7] for p in originals))
header = f" {'月份':10s}"
for c, _ in top_countries:
header += f" {c[:6]:>7s}"
print(header)
for month in all_months:
row = f" {month:10s}"
for c, _ in top_countries:
val = country_monthly[c].get(month, 0)
row += f" {val:7d}"
print(row)
# --- 2. 人物點名 ---
people = {
'Biden': ['Biden', 'Joe Biden', 'Sleepy Joe'],
'Obama': ['Obama', 'Barack'],
'Pelosi': ['Pelosi', 'Nancy'],
'Schumer': ['Schumer', 'Chuck Schumer'],
'DeSantis': ['DeSantis', 'Ron DeSantis', 'DeSanctimonious'],
'Elon Musk': ['Elon', 'Musk', 'Tesla', 'DOGE'],
'Vivek': ['Vivek', 'Ramaswamy'],
'Kamala': ['Kamala', 'Harris'],
'Pence': ['Pence', 'Mike Pence'],
'McConnell': ['McConnell', 'Mitch'],
'RFK Jr': ['Kennedy', 'RFK'],
'Vance': ['Vance', 'J.D.', 'JD Vance'],
'Jack Smith': ['Jack Smith', 'Special Counsel'],
'Putin': ['Putin', 'Vladimir'],
'Xi Jinping': ['Xi Jinping', 'Xi '],
'Zelensky': ['Zelensky', 'Zelenskyy'],
'Kim Jong Un': ['Kim Jong'],
'Netanyahu': ['Netanyahu', 'Bibi'],
}
people_counts = {}
people_monthly = defaultdict(lambda: defaultdict(int))
for person, keywords in people.items():
count = 0
for p in originals:
content = p['content']
if any(kw.lower() in content.lower() for kw in keywords):
count += 1
month = p['created_at'][:7]
people_monthly[person][month] += 1
people_counts[person] = count
print(f"\n👤 人物提及次數:")
print("-" * 60)
for person, count in sorted(people_counts.items(), key=lambda x: -x[1]):
if count > 0:
bar = '█' * min(count // 2, 40)
print(f" {person:15s} | {count:4d}篇 {bar}")
# --- 3. 暱稱/外號追蹤 ---
print(f"\n🏷️ 川普專用外號追蹤:")
print("-" * 60)
nicknames = [
'Sleepy Joe', 'Crooked', 'Crazy', 'Radical Left', 'Fake News',
'RINO', 'Deep State', 'Witch Hunt', 'Enemy of the People',
'Do Nothing', 'Low Energy', 'Lyin\'', 'Shifty', 'Nervous',
'Deranged', 'Failing', 'Phony', 'Corrupt', 'Lunatic',
'Incompetent', 'Stupid', 'DeSanctimonious', 'Comrade',
'Laughing', 'Loco', 'Wacko', 'Liddle', 'Mini', 'Sloppy',
]
nickname_counts = {}
for nick in nicknames:
count = sum(1 for p in originals if nick.lower() in p['content'].lower())
if count > 0:
nickname_counts[nick] = count
for nick, count in sorted(nickname_counts.items(), key=lambda x: -x[1]):
bar = '█' * min(count, 30)
print(f" {nick:25s} | {count:4d} {bar}")
# --- 4. 主題關鍵字 ---
print(f"\n📋 政策關鍵字頻率:")
print("-" * 60)
topics = {
'Tariff/關稅': ['tariff', 'tariffs', 'duty', 'duties'],
'Border/邊境': ['border', 'wall', 'immigration', 'migrant', 'deportation', 'deport'],
'Economy/經濟': ['economy', 'economic', 'inflation', 'gdp', 'recession', 'growth'],
'Trade/貿易': ['trade', 'trade deal', 'trade deficit', 'export', 'import'],
'Military/軍事': ['military', 'army', 'navy', 'troops', 'defense', 'defence'],
'Energy/能源': ['energy', 'oil', 'gas', 'drill', 'pipeline', 'opec'],
'Tech/科技': ['technology', 'tech', 'artificial intelligence', ' ai ', 'chips', 'semiconductor'],
'Crime/犯罪': ['crime', 'criminal', 'gang', 'ms-13', 'fentanyl', 'drugs'],
'Election/選舉': ['election', 'vote', 'voter', 'ballot', 'poll'],
'Tax/稅': ['tax', 'taxes', 'irs', 'tax cut'],
'Jobs/就業': ['jobs', 'employment', 'unemployment', 'workers', 'hiring'],
'Stock Market/股市': ['stock market', 'dow', 'nasdaq', 'wall street', 's&p'],
}
topic_counts = {}
topic_monthly = defaultdict(lambda: defaultdict(int))
for topic, keywords in topics.items():
count = 0
for p in originals:
cl = p['content'].lower()
if any(kw in cl for kw in keywords):
count += 1
month = p['created_at'][:7]
topic_monthly[topic][month] += 1
topic_counts[topic] = count
for topic, count in sorted(topic_counts.items(), key=lambda x: -x[1]):
bar = '█' * (count // 3)
print(f" {topic:20s} | {count:4d}篇 {bar}")
# 主題月度趨勢
print(f"\n📈 主題月度趨勢 (Top 6):")
print("-" * 60)
top_topics = sorted(topic_counts.items(), key=lambda x: -x[1])[:6]
header = f" {'月份':10s}"
for t, _ in top_topics:
header += f" {t[:8]:>9s}"
print(header)
for month in all_months:
row = f" {month:10s}"
for t, _ in top_topics:
val = topic_monthly[t].get(month, 0)
row += f" {val:9d}"
print(row)
# 存結果
results = {
'country_counts': country_counts,
'country_monthly': {k: dict(v) for k, v in country_monthly.items()},
'people_counts': people_counts,
'people_monthly': {k: dict(v) for k, v in people_monthly.items()},
'nickname_counts': nickname_counts,
'topic_counts': topic_counts,
'topic_monthly': {k: dict(v) for k, v in topic_monthly.items()},
}
with open(DATA / 'results_04_entities.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n💾 詳細結果存入 results_04_entities.json")
if __name__ == '__main__':
main()