-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
136 lines (113 loc) · 3.52 KB
/
main.py
File metadata and controls
136 lines (113 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
"""Main entry point for Blocket scraper."""
import argparse
import os
import sys
from config import (
CATEGORIES,
DEFAULT_CATEGORIES,
OUTPUT_DIR,
DEFAULT_CSV_FILE,
DEFAULT_JSON_FILE,
)
from scraper import BlocketScraper
def main():
"""Main function to run the scraper."""
parser = argparse.ArgumentParser(
description="Scrape listings from Blocket.se marketplace",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python main.py # Scrape default categories (phones, electronics)
python main.py -c phones -p 3 # Scrape phones category, 3 pages
python main.py -c phones computers # Scrape multiple categories
python main.py --list-categories # List all available categories
python main.py -o my_results.csv # Custom output filename
""",
)
parser.add_argument(
"-c",
"--categories",
nargs="+",
default=DEFAULT_CATEGORIES,
help=f"Categories to scrape (default: {DEFAULT_CATEGORIES})",
)
parser.add_argument(
"-p",
"--pages",
type=int,
default=2,
help="Maximum pages per category (default: 2)",
)
parser.add_argument(
"-d",
"--delay",
type=float,
default=1.0,
help="Delay between requests in seconds (default: 1.0)",
)
parser.add_argument(
"-o",
"--output",
default=DEFAULT_CSV_FILE,
help=f"Output CSV filename (default: {DEFAULT_CSV_FILE})",
)
parser.add_argument(
"--json",
action="store_true",
help="Also export to JSON format",
)
parser.add_argument(
"--list-categories",
action="store_true",
help="List all available categories and exit",
)
parser.add_argument(
"--api-key",
help="ScrapingAnt API key (or set SCRAPINGANT_API_KEY env var)",
)
args = parser.parse_args()
# List categories if requested
if args.list_categories:
print("Available categories:")
print("-" * 40)
for key, value in sorted(CATEGORIES.items()):
print(f" {key:<20} ({value})")
return 0
# Validate categories
invalid_categories = [c for c in args.categories if c not in CATEGORIES]
if invalid_categories:
print(f"Error: Unknown categories: {invalid_categories}")
print(f"Use --list-categories to see available categories")
return 1
# Initialize scraper
try:
scraper = BlocketScraper(api_key=args.api_key)
except ValueError as e:
print(f"Error: {e}")
return 1
# Run scraper
print(f"Scraping categories: {args.categories}")
print(f"Max pages per category: {args.pages}")
print("-" * 40)
total = scraper.scrape_categories(
categories=args.categories,
max_pages=args.pages,
delay=args.delay,
)
print("-" * 40)
print(f"Total listings scraped: {total}")
if total > 0:
# Export to CSV
csv_path = os.path.join(OUTPUT_DIR, args.output)
scraper.export_csv(csv_path)
# Export to JSON if requested
if args.json:
json_filename = args.output.replace(".csv", ".json")
if json_filename == args.output:
json_filename = DEFAULT_JSON_FILE
json_path = os.path.join(OUTPUT_DIR, json_filename)
scraper.export_json(json_path)
return 0
if __name__ == "__main__":
sys.exit(main())