-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
62 lines (51 loc) · 1.75 KB
/
main.py
File metadata and controls
62 lines (51 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import praw
import pandas as pd
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
# Initialize Reddit client with environment variables
reddit = praw.Reddit(
client_id=os.getenv('REDDIT_CLIENT_ID'),
client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
user_agent=os.getenv('REDDIT_USER_AGENT', 'claude_scraper_by_u/No-West-9936')
)
def scrape_reddit_data(subreddit_name='ClaudeAI', post_limit=20):
"""
Scrape posts and comments from a specified subreddit.
Args:
subreddit_name (str): Name of the subreddit to scrape
post_limit (int): Number of posts to scrape
Returns:
list: List of dictionaries containing post data
"""
subreddit = reddit.subreddit(subreddit_name)
posts_data = []
for post in subreddit.hot(limit=post_limit):
post.comments.replace_more(limit=0)
comments = [comment.body for comment in post.comments.list()]
posts_data.append({
'title': post.title,
'body': post.selftext,
'comments': comments,
'score': post.score,
'created_utc': post.created_utc,
'num_comments': post.num_comments
})
return posts_data
def save_to_csv(data, output_path):
"""
Save scraped data to CSV file.
Args:
data (list): List of dictionaries containing post data
output_path (str): Path where CSV file should be saved
"""
df = pd.DataFrame(data)
df.to_csv(output_path, index=False)
print(f"Data saved to {output_path}")
if __name__ == "__main__":
# Scrape data
posts_data = scrape_reddit_data()
# Save to CSV
output_path = os.getenv('OUTPUT_PATH', 'posts_data.csv')
save_to_csv(posts_data, output_path)