Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
"build": "tsc",
"start": "node dist/main.js",
"dev": "npx tsx src/main.ts",
"test": "echo \"Error: no test specified\" && exit 1"
"test": "echo \"Error: no test specified\" && exit 1",
"snapshot": "npx tsx scripts/save-snapshots.ts",
"test-selectors": "npx tsx scripts/test-selectors.ts"
},
"repository": {
"type": "git",
Expand All @@ -32,4 +34,4 @@
"ts-node": "^10.9.2",
"typescript": "^5.9.3"
}
}
}
178 changes: 178 additions & 0 deletions scripts/save-snapshots-v2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/**
* save-snapshots-v2.ts
*
* Attempt to bypass Cloudflare Turnstile by waiting longer and
* interacting with the challenge page.
*/
import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { join } from 'path';

const SNAPSHOT_DIR = join(import.meta.dirname ?? '.', '..', 'snapshots');
mkdirSync(SNAPSHOT_DIR, { recursive: true });

function delay(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}

async function waitForCloudflare(page: any, maxWaitSec = 45): Promise<boolean> {
console.log(' Waiting for Cloudflare challenge to resolve...');
const start = Date.now();
while (Date.now() - start < maxWaitSec * 1000) {
const content = await page.content();
// If we see actual avto.net content (results or detail page), we're through
if (content.includes('details.asp') || content.includes('OglasNaslov') || content.includes('ResultsAd') || content.includes('GO-Results')) {
console.log(' ✅ Cloudflare passed!');
return true;
}
// Try clicking the Turnstile checkbox if visible
try {
const frame = page.frames().find((f: any) => f.url().includes('challenges.cloudflare.com'));
if (frame) {
const checkbox = await frame.$('input[type="checkbox"], .cb-i');
if (checkbox) {
console.log(' Found Turnstile checkbox, clicking...');
await checkbox.click();
await delay(5000);
}
}
} catch {}
await delay(2000);
}
console.log(' ❌ Cloudflare challenge not resolved after ' + maxWaitSec + 's');
return false;
}

async function main() {
console.log('Launching browser v2 (longer waits, Turnstile interaction)...');

const browser = await chromium.launch({
headless: false,
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--window-size=1920,1080',
],
});
Comment on lines +46 to +58
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Browser is not closed if an error occurs mid-execution.

If any unhandled error is thrown between chromium.launch() and browser.close(), the browser process leaks. Wrap the body of main() in a try/finally to ensure cleanup.

🛡️ Proposed fix
 async function main() {
     console.log('Launching browser v2 (longer waits, Turnstile interaction)...');
 
     const browser = await chromium.launch({
         headless: false,
         args: [ /* ... */ ],
     });
 
+    try {
     const context = await browser.newContext({ /* ... */ });
     // ... rest of main body ...
-    await browser.close();
-    console.log('\nDone!');
+    } finally {
+        await browser.close();
+        console.log('\nDone!');
+    }
 }

Also applies to: 171-172

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/save-snapshots-v2.ts` around lines 46 - 58, The browser launched by
chromium.launch() in main() can leak if an exception occurs before
browser.close(); declare let browser; before calling chromium.launch(), then
wrap the launch and the rest of main()'s body in a try/finally block and call
await browser?.close() in the finally to guarantee cleanup; apply the same
try/finally pattern around the other launch/close pair referenced at lines
171-172 so every browser instance is closed on error.


const context = await browser.newContext({
userAgent:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
viewport: { width: 1920, height: 1080 },
locale: 'sl-SI',
timezoneId: 'Europe/Ljubljana',
});

await context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
Object.defineProperty(navigator, 'languages', {
get: () => ['sl', 'en-US', 'en'],
});
// Fake chrome object
// @ts-ignore
window.chrome = { runtime: {}, loadTimes: () => {}, csi: () => {} };
});

const page = await context.newPage();

// First, visit the homepage to get cookies
console.log('Visiting homepage first to establish session...');
await page.goto('https://www.avto.net/', { waitUntil: 'domcontentloaded', timeout: 60_000 });
// Just wait for Cloudflare to process — homepage won't have our content markers
console.log(' Waiting 20s for Cloudflare on homepage...');
await delay(20_000);
const homeHtml = await page.content();
const homePassed = !homeHtml.includes('challenge-platform');
console.log(` Homepage ${homePassed ? '✅ passed' : '❌ still challenged'} (${(homeHtml.length/1024).toFixed(0)} KB)`);
await delay(5000);

const SEARCH_URLS = [
'https://www.avto.net/Ads/results.asp?zession=&Type=&Maker=&MakerN=&Model=&ModelN=&Category=1&SO=&GO=&NOC=&NOS=&NOV=&VOL=&KW=&CY=&FT=&TT=&BT=&SY=&ST=&EY=&ET=&Red=0&Q=&A=',
];

const detailUrls: string[] = [];

for (let i = 0; i < SEARCH_URLS.length; i++) {
const url = SEARCH_URLS[i];
console.log(`\n--- Search page ${i + 1}`);
await delay(8000);

try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60_000 });
const passed = await waitForCloudflare(page, 60);

const html = await page.content();
const filename = passed ? `search-${i + 1}.html` : `search-${i + 1}-cf-blocked.html`;
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);

if (passed) {
const links = await page.$$eval(
'a[href*="/Ads/details.asp"], a[href*="details.asp"]',
(els: HTMLAnchorElement[]) => els.map((a) => a.href),
);
const unique = [...new Set(links)].filter((l) => l.includes('details.asp'));
console.log(`Found ${unique.length} detail links`);
detailUrls.push(...unique);
}
} catch (e) {
console.error(`Failed:`, e);
}
}

// If we got through, try a second search page via pagination
if (detailUrls.length > 0) {
console.log('\nTrying to navigate to page 2 via pagination...');
await delay(10000);
try {
const nextLink = await page.$('a:has-text("Naslednja"), a:has-text("»"), a:has-text("2")');
if (nextLink) {
await nextLink.click();
await page.waitForLoadState('domcontentloaded', { timeout: 30000 });
await waitForCloudflare(page, 60);
const html = await page.content();
writeFileSync(join(SNAPSHOT_DIR, 'search-2.html'), html, 'utf-8');
console.log(`Saved search-2.html (${(html.length / 1024).toFixed(0)} KB)`);
}
} catch (e) {
console.log('Could not get page 2:', e);
}
}
Comment on lines +128 to +145
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Pagination snapshot doesn't apply the cf-blocked naming convention.

Search page 1 (line 110) conditionally names the file with -cf-blocked when CF isn't passed, but the paginated search-2 (line 139) always saves as search-2.html even though waitForCloudflare is called on line 137. This could produce a snapshot that looks valid but actually contains the CF challenge page.

Proposed fix
-                await waitForCloudflare(page, 60);
+                const passed = await waitForCloudflare(page, 60);
                 const html = await page.content();
-                writeFileSync(join(SNAPSHOT_DIR, 'search-2.html'), html, 'utf-8');
-                console.log(`Saved search-2.html (${(html.length / 1024).toFixed(0)} KB)`);
+                const filename = passed ? 'search-2.html' : 'search-2-cf-blocked.html';
+                writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
+                console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// If we got through, try a second search page via pagination
if (detailUrls.length > 0) {
console.log('\nTrying to navigate to page 2 via pagination...');
await delay(10000);
try {
const nextLink = await page.$('a:has-text("Naslednja"), a:has-text("»"), a:has-text("2")');
if (nextLink) {
await nextLink.click();
await page.waitForLoadState('domcontentloaded', { timeout: 30000 });
await waitForCloudflare(page, 60);
const html = await page.content();
writeFileSync(join(SNAPSHOT_DIR, 'search-2.html'), html, 'utf-8');
console.log(`Saved search-2.html (${(html.length / 1024).toFixed(0)} KB)`);
}
} catch (e) {
console.log('Could not get page 2:', e);
}
}
// If we got through, try a second search page via pagination
if (detailUrls.length > 0) {
console.log('\nTrying to navigate to page 2 via pagination...');
await delay(10000);
try {
const nextLink = await page.$('a:has-text("Naslednja"), a:has-text("»"), a:has-text("2")');
if (nextLink) {
await nextLink.click();
await page.waitForLoadState('domcontentloaded', { timeout: 30000 });
const passed = await waitForCloudflare(page, 60);
const html = await page.content();
const filename = passed ? 'search-2.html' : 'search-2-cf-blocked.html';
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);
}
} catch (e) {
console.log('Could not get page 2:', e);
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/save-snapshots-v2.ts` around lines 128 - 145, The paginated snapshot
code always writes "search-2.html" even when the page is Cloudflare-challenged;
update the block that calls waitForCloudflare(page, 60) to mirror the earlier
search-1 logic: capture the page HTML into html, determine whether the page is
CF-blocked (use the same check/flag used for the first search capture), and set
the filename to include "-cf-blocked" when blocked before calling
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8') and logging. Ensure
you reference the same detection mechanism used with waitForCloudflare and reuse
symbols html, SNAPSHOT_DIR, writeFileSync and waitForCloudflare so naming is
consistent with the initial search snapshot.


// Fetch detail pages
const detailsToFetch = [...new Set(detailUrls)].slice(0, 5);
console.log(`\nWill fetch ${detailsToFetch.length} detail pages`);

for (let i = 0; i < detailsToFetch.length; i++) {
const url = detailsToFetch[i];
console.log(`\n--- Detail ${i + 1}: ${url.substring(0, 100)}...`);

const wait = 15_000 + Math.random() * 5_000;
console.log(`Waiting ${(wait / 1000).toFixed(1)}s...`);
await delay(wait);

try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60_000 });
const passed = await waitForCloudflare(page, 60);
const html = await page.content();
const filename = passed ? `detail-${i + 1}.html` : `detail-${i + 1}-cf-blocked.html`;
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);
} catch (e) {
console.error(`Failed:`, e);
}
}

await browser.close();
console.log('\nDone!');
}

main().catch((e) => {
console.error('Fatal:', e);
process.exit(1);
});
144 changes: 144 additions & 0 deletions scripts/save-snapshots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/**
* save-snapshots.ts
*
* Fetches avto.net search results and detail pages, saving full HTML
* for offline selector development. Designed to run with xvfb-run
* in headed mode to bypass Cloudflare Turnstile.
*
* Usage: xvfb-run npx tsx scripts/save-snapshots.ts
*/
import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { join } from 'path';

const SNAPSHOT_DIR = join(import.meta.dirname ?? '.', '..', 'snapshots');
mkdirSync(SNAPSHOT_DIR, { recursive: true });

const SEARCH_URLS = [
'https://www.avto.net/Ads/results.asp?zession=&Pession=&Type=&Maker=&MakerN=&Model=&ModelN=&Category=1&SO=&GO=&NOC=&NOS=&NOV=&VOL=&KW=&CY=&FT=&TT=&BT=&SY=&ST=&EY=&ET=&AAession=&Kession=&Ession=&FY=&FT2=&TO=&TDO=&TOO=&Ession2=&ModelT=&AI=&AO=&Red=0&Q=&A=&Jession=&ESSION_TAB=&UESSION_TAB=',
'https://www.avto.net/Ads/results.asp?zession=&Pession=&Type=&Maker=&MakerN=&Model=&ModelN=&Category=1&SO=&GO=&NOC=&NOS=&NOV=&VOL=&KW=&CY=&FT=&TT=&BT=&SY=&ST=&EY=&ET=&AAession=&KSession=&Ession=&FY=&FT2=&TO=&TDO=&TOO=&Ession2=&ModelT=&AI=&AO=&Red=0&Q=&A=&JSession=&ESSION_TAB=&UESSION_TAB=&stession=2',
];

function delay(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}

async function main() {
console.log('Launching browser (headed mode for Cloudflare bypass)...');

const browser = await chromium.launch({
headless: false,
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920,1080',
],
});

const context = await browser.newContext({
userAgent:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
viewport: { width: 1920, height: 1080 },
locale: 'sl-SI',
timezoneId: 'Europe/Ljubljana',
});

// Remove webdriver flag
await context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
// @ts-ignore
delete navigator.__proto__.webdriver;
// Fake plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
Object.defineProperty(navigator, 'languages', {
get: () => ['sl', 'en-US', 'en'],
});
});

const page = await context.newPage();

// Collect detail URLs from search pages
const detailUrls: string[] = [];

for (let i = 0; i < SEARCH_URLS.length; i++) {
const url = SEARCH_URLS[i];
console.log(`\n--- Fetching search page ${i + 1}: ${url.substring(0, 80)}...`);

try {
await page.goto(url, { waitUntil: 'networkidle', timeout: 60_000 });
// Wait extra for Cloudflare challenge
await delay(15_000);

const html = await page.content();
const filename = `search-${i + 1}.html`;
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);

// Extract detail links
const links = await page.$$eval(
'a[href*="/Ads/details.asp"], a[href*="details.asp"]',
(els) => els.map((a) => (a as HTMLAnchorElement).href),
);
const unique = [...new Set(links)].filter((l) => l.includes('details.asp'));
console.log(`Found ${unique.length} detail links`);
detailUrls.push(...unique);
} catch (e) {
console.error(`Failed to fetch search page ${i + 1}:`, e);
// Save whatever we have
try {
const html = await page.content();
writeFileSync(join(SNAPSHOT_DIR, `search-${i + 1}-partial.html`), html, 'utf-8');
console.log(`Saved partial HTML for search-${i + 1}`);
} catch {}
}

if (i < SEARCH_URLS.length - 1) {
const wait = 10_000 + Math.random() * 5_000;
console.log(`Waiting ${(wait / 1000).toFixed(1)}s...`);
await delay(wait);
}
}

// Fetch detail pages (up to 5)
const detailsToFetch = [...new Set(detailUrls)].slice(0, 5);
console.log(`\nWill fetch ${detailsToFetch.length} detail pages`);

for (let i = 0; i < detailsToFetch.length; i++) {
const url = detailsToFetch[i];
console.log(`\n--- Fetching detail ${i + 1}: ${url.substring(0, 100)}...`);

const wait = 10_000 + Math.random() * 10_000;
console.log(`Waiting ${(wait / 1000).toFixed(1)}s before request...`);
await delay(wait);

try {
await page.goto(url, { waitUntil: 'networkidle', timeout: 60_000 });
await delay(10_000);

const html = await page.content();
const filename = `detail-${i + 1}.html`;
writeFileSync(join(SNAPSHOT_DIR, filename), html, 'utf-8');
console.log(`Saved ${filename} (${(html.length / 1024).toFixed(0)} KB)`);
} catch (e) {
console.error(`Failed to fetch detail ${i + 1}:`, e);
try {
const html = await page.content();
writeFileSync(join(SNAPSHOT_DIR, `detail-${i + 1}-partial.html`), html, 'utf-8');
} catch {}
}
}

await browser.close();
console.log('\nDone! Snapshots saved to snapshots/');
}

main().catch((e) => {
console.error('Fatal error:', e);
process.exit(1);
});
Loading