-
Notifications
You must be signed in to change notification settings - Fork 32
Expand file tree
/
Copy pathproxy-pool.ts
More file actions
104 lines (91 loc) · 2.87 KB
/
proxy-pool.ts
File metadata and controls
104 lines (91 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env node
/**
* Proxy Pool Example
*
* Demonstrates configuring multiple proxies with rotation for scraping.
* Useful for avoiding rate limits and IP blocks when scraping at scale.
*
* Usage:
* Set your proxy credentials and run:
* npx tsx basic/proxy-pool.ts
*/
import { ReaderClient } from "@vakra-dev/reader";
async function main() {
console.log("Starting proxy pool example\n");
// Configure proxy pool with rotation
// Replace with your actual proxy credentials
const reader = new ReaderClient({
verbose: true,
// List of proxies to rotate through
proxies: [
{
host: "proxy1.example.com",
port: 8080,
username: "user1",
password: "pass1",
type: "datacenter",
},
{
host: "proxy2.example.com",
port: 8080,
username: "user2",
password: "pass2",
type: "datacenter",
},
{
host: "residential.example.com",
port: 9000,
username: "user3",
password: "pass3",
type: "residential",
country: "us", // Geo-target to US
},
],
// Rotation strategy: "round-robin" (default) or "random"
proxyRotation: "round-robin",
});
// URLs to scrape - each will use a different proxy from the pool
const urls = [
"https://example.com",
"https://example.org",
"https://example.net",
];
console.log(`Scraping ${urls.length} URLs with proxy rotation\n`);
console.log("Proxy rotation: round-robin");
console.log("Proxy pool size: 3\n");
try {
const result = await reader.scrape({
urls,
formats: ["markdown"],
batchConcurrency: 1, // Sequential to demonstrate rotation
onProgress: (progress) => {
console.log(`Progress: ${progress.completed}/${progress.total} - ${progress.currentUrl}`);
},
});
console.log("\nScrape completed!\n");
console.log("Results:");
for (const page of result.data) {
console.log(`\n ${page.metadata.baseUrl}`);
console.log(` Title: ${page.metadata.website.title}`);
console.log(` Duration: ${page.metadata.duration}ms`);
// Show which proxy was used (if available)
if (page.metadata.proxy) {
console.log(` Proxy: ${page.metadata.proxy.host}:${page.metadata.proxy.port}`);
if (page.metadata.proxy.country) {
console.log(` Country: ${page.metadata.proxy.country}`);
}
}
}
console.log("\nBatch Metadata:");
console.log(` Total URLs: ${result.batchMetadata.totalUrls}`);
console.log(` Successful: ${result.batchMetadata.successfulUrls}`);
console.log(` Failed: ${result.batchMetadata.failedUrls}`);
console.log(` Total Duration: ${result.batchMetadata.totalDuration}ms`);
} catch (error: any) {
console.error("Error:", error.message);
process.exit(1);
} finally {
await reader.close();
}
}
main();