Skip to content

Commit 5c3b386

Browse files
feat: Integrate fake news detection tool
This commit integrates a new fake news detection tool into the `social_media_analyzer`. The new tool can be accessed through the main menu and allows users to analyze a URL for signs of fake news. The detection is based on a heuristic approach, which includes: - Checking the URL against a list of known fake news domains. - Analyzing the content of the URL for sensationalist keywords and clickbait patterns. A new `fake_news_detector.py` module has been created to house the detection logic. The `heuristics.py` file has been updated with new lists for fake news detection. The `main.py` file has been updated to include the new menu option. Unit tests have been added for the new functionality, and existing tests have been updated to use mocked HTTP responses for improved reliability.
1 parent 9849fb2 commit 5c3b386

4 files changed

Lines changed: 149 additions & 4 deletions

File tree

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import re
2+
import urllib.request
3+
from urllib.parse import urlparse
4+
from .heuristics import (
5+
FAKE_NEWS_DOMAINS,
6+
SENSATIONALIST_KEYWORDS,
7+
CLICKBAIT_PATTERNS,
8+
HEURISTIC_WEIGHTS
9+
)
10+
11+
def analyze_url_for_fake_news(url):
12+
"""
13+
Analyzes a URL for indicators of fake news.
14+
"""
15+
if not url.startswith(('http://', 'https://')):
16+
url = 'http://' + url
17+
18+
domain = urlparse(url).netloc.lower()
19+
20+
score = 0.0
21+
indicators_found = []
22+
23+
# 1. Check against known fake news domains
24+
if domain in FAKE_NEWS_DOMAINS:
25+
score += HEURISTIC_WEIGHTS.get("KNOWN_FAKE_NEWS_DOMAIN", 5.0)
26+
indicators_found.append(f"Domain '{domain}' is a known source of fake news.")
27+
return {
28+
"url": url,
29+
"score": round(score, 2),
30+
"indicators_found": indicators_found
31+
}
32+
33+
# 2. Fetch and analyze content
34+
try:
35+
headers = {'User-Agent': 'Mozilla/5.0'}
36+
request = urllib.request.Request(url, headers=headers)
37+
with urllib.request.urlopen(request, timeout=10) as response:
38+
if response.status == 200:
39+
html_content = response.read().decode('utf-8', errors='ignore')
40+
text_content = re.sub(r'<[^>]+>', '', html_content).lower()
41+
42+
# 3. Analyze text for sensationalist keywords
43+
for keyword in SENSATIONALIST_KEYWORDS:
44+
if keyword in text_content:
45+
score += HEURISTIC_WEIGHTS.get("SENSATIONALIST_KEYWORD", 1.0)
46+
indicators_found.append(f"Found sensationalist keyword: '{keyword}'")
47+
48+
# 4. Analyze text for clickbait patterns
49+
for pattern in CLICKBAIT_PATTERNS:
50+
if re.search(pattern, text_content, re.IGNORECASE):
51+
score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5)
52+
indicators_found.append(f"Found clickbait pattern: '{pattern}'")
53+
54+
else:
55+
return {"error": f"Failed to fetch URL: HTTP status code {response.status}"}
56+
except Exception as e:
57+
return {"error": f"An error occurred: {e}"}
58+
59+
return {
60+
"url": url,
61+
"score": round(score, 2),
62+
"indicators_found": indicators_found
63+
}

social_media_analyzer/heuristics.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,27 @@
123123
]
124124

125125

126+
# --- Fake News Heuristics ---
127+
128+
FAKE_NEWS_DOMAINS = [
129+
"abcnews.com.co", "cnn.com.de", "daily-mail.com.de",
130+
"infowars.com", "naturalnews.com", "breitbart.com",
131+
"worldnewsdailyreport.com", "theonion.com", # The Onion is satire, but often mistaken for real news
132+
"yournewswire.com", "davidwolfe.com"
133+
]
134+
135+
SENSATIONALIST_KEYWORDS = [
136+
"shocking", "bombshell", "secret", "cover-up",
137+
"miracle", "cure", "unbelievable", "outrageous",
138+
"conspiracy", "hidden truth", "what they don't want you to know", "fake news"
139+
]
140+
141+
CLICKBAIT_PATTERNS = [
142+
r"you won't believe", r"will shock you", r"number \d will amaze you",
143+
r"this one weird trick", r"doctors hate him", r"the truth about",
144+
r"scientists baffled", r"what happened next", r"secret to"
145+
]
146+
126147
# --- Regular Expression Patterns ---
127148

128149
# Basic URL detection
@@ -201,6 +222,9 @@ def generate_suspicious_url_patterns(legitimate_domains):
201222

202223
# --- Scoring Weights ---
203224
HEURISTIC_WEIGHTS = {
225+
"KNOWN_FAKE_NEWS_DOMAIN": 5.0,
226+
"SENSATIONALIST_KEYWORD": 1.0,
227+
"CLICKBAIT_PATTERN": 1.5,
204228
"URGENCY": 1.5,
205229
"SENSITIVE_INFO": 2.5,
206230
"TOO_GOOD_TO_BE_TRUE": 2.0,

social_media_analyzer/main.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
from . import fake_profile_detector
22
from . import scam_detector
3+
from . import fake_news_detector
4+
5+
def analyze_news_url():
6+
"""Analyzes a news URL for potential fake news."""
7+
url_to_check = input("Please enter the full URL of the news article you want to analyze: ").strip()
8+
if not url_to_check:
9+
print("No URL entered.")
10+
return
11+
12+
print("\n--- Analyzing News URL for Fake News ---")
13+
result = fake_news_detector.analyze_url_for_fake_news(url_to_check)
14+
15+
if "error" in result:
16+
print(f"Could not analyze news URL: {result['error']}")
17+
elif not result.get("indicators_found"):
18+
print("No specific fake news indicators were found.")
19+
else:
20+
print(f"Score: {result['score']} (Higher is more suspicious)")
21+
print("Indicators Found:")
22+
for indicator in result['indicators_found']:
23+
print(f"- {indicator}")
324

425
def analyze_website_url():
526
"""Analyzes a website URL for potential scams."""
@@ -107,21 +128,24 @@ def analyze_social_media():
107128
def main():
108129
"""Main function to run the security analyzer."""
109130
print("--- Universal Security Analyzer ---")
110-
print("This tool helps you analyze social media, messages, and websites for potential scams.")
131+
print("This tool helps you analyze social media, messages, and websites for potential scams and fake news.")
111132

112133
while True:
113134
print("\n--- Main Menu ---")
114135
print("1. Analyze a Social Media Platform")
115-
print("2. Analyze a Website URL")
116-
print("3. Exit")
136+
print("2. Analyze a Website URL for Scams")
137+
print("3. Analyze a News URL for Fake News")
138+
print("4. Exit")
117139

118140
try:
119-
choice = int(input("Enter your choice (1-3): "))
141+
choice = int(input("Enter your choice (1-4): "))
120142
if choice == 1:
121143
analyze_social_media()
122144
elif choice == 2:
123145
analyze_website_url()
124146
elif choice == 3:
147+
analyze_news_url()
148+
elif choice == 4:
125149
print("Exiting. Stay safe!")
126150
break
127151
else:
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import unittest
2+
from unittest.mock import patch, Mock
3+
from .fake_news_detector import analyze_url_for_fake_news
4+
5+
class TestFakeNewsDetector(unittest.TestCase):
6+
7+
@patch('urllib.request.urlopen')
8+
def test_fake_news_url(self, mock_urlopen):
9+
# Mock the response for a fake news URL
10+
mock_response = Mock()
11+
mock_response.status = 200
12+
mock_response.read.return_value = b'<html><head><title>Fake News</title></head><body>This is a shocking story!</body></html>'
13+
mock_urlopen.return_value.__enter__.return_value = mock_response
14+
15+
url = "http://abcnews.com.co/news/breaking-news-report.html"
16+
result = analyze_url_for_fake_news(url)
17+
self.assertGreater(result["score"], 0)
18+
self.assertIn("Domain 'abcnews.com.co' is a known source of fake news.", result["indicators_found"])
19+
20+
@patch('urllib.request.urlopen')
21+
def test_legitimate_news_url(self, mock_urlopen):
22+
# Mock the response for a legitimate news URL
23+
mock_response = Mock()
24+
mock_response.status = 200
25+
mock_response.read.return_value = b'<html><head><title>Real News</title></head><body>This is a real news story.</body></html>'
26+
mock_urlopen.return_value.__enter__.return_value = mock_response
27+
28+
url = "https://www.bbc.com/news"
29+
result = analyze_url_for_fake_news(url)
30+
self.assertEqual(result["score"], 0)
31+
self.assertEqual(len(result["indicators_found"]), 0)
32+
33+
if __name__ == '__main__':
34+
unittest.main()

0 commit comments

Comments
 (0)