Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions docs/examples/using_browser_profile.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
id: using-browser-profile
title: Using browser profile
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
import ApiLink from '@site/src/components/ApiLink';

import PlaywrightChromeSource from '!!raw-loader!./using_browser_profiles_playwright_chrome.ts';
import PuppeteerChromeSource from '!!raw-loader!./using_browser_profiles_puppeteer_chrome.ts';
import PlaywrightFirefoxSource from '!!raw-loader!./using_browser_profiles_playwright_firefox.ts';

This example demonstrates how to run <ApiLink to="playwright-crawler/class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> and <ApiLink to="puppeteer-crawler/class/PuppeteerCrawler">`PuppeteerCrawler`</ApiLink> using your local browser profile from [Chrome](https://www.google.com/intl/us/chrome/) or [Firefox](https://www.firefox.com/).

Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.

## Chrome browser

To run a crawler with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.

:::warning Profile access limitation
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
:::

Make sure you don't have any running Chrome browser processes before running this code:

<Tabs groupId="crawler-type">
<TabItem value="playwright_crawler" label="Playwright Crawler" default>
<CodeBlock className="language-js">
{PlaywrightChromeSource}
</CodeBlock>
</TabItem>
<TabItem value="puppeteer_crawler" label="Puppeteer Crawler">
<CodeBlock className="language-js">
{PuppeteerChromeSource}
</CodeBlock>
</TabItem>
</Tabs>

## Firefox browser

To find the path to your Firefox profile, enter `about:profiles` as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.

Make sure you don't have any running Firefox browser processes before running this code:

<CodeBlock className="language-js">
{PlaywrightFirefoxSource}
</CodeBlock>
42 changes: 42 additions & 0 deletions docs/examples/using_browser_profiles_playwright_chrome.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';

import { PlaywrightCrawler } from 'crawlee';

// Profile name to use (usually 'Default' for single profile setups)
const PROFILE_NAME = 'Default';

// Path to Chrome user data directory (example for Windows)
// Use `chrome://version/` to find your profile path
const PROFILE_PATH = path.join(os.homedir(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data');

// Copy profile to a temp directory to avoid Chrome's lock
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crawlee-chrome-profile-'));
fs.cpSync(path.join(PROFILE_PATH, PROFILE_NAME), path.join(tempDir, PROFILE_NAME), { recursive: true });

const crawler = new PlaywrightCrawler({
launchContext: {
// Use the installed Chrome browser
useChrome: true,
// Set user data directory to the temp copy
userDataDir: tempDir,
launchOptions: {
headless: false,
// Slow down actions to mimic human behavior
slowMo: 200,
args: [
// Use the specified profile
`--profile-directory=${PROFILE_NAME}`,
],
},
},
async requestHandler({ request, log }) {
log.info(`Visiting ${request.url}`);
},
});

await crawler.run(['https://crawlee.dev']);

// Clean up the temp profile
fs.rmSync(tempDir, { recursive: true, force: true });
34 changes: 34 additions & 0 deletions docs/examples/using_browser_profiles_playwright_firefox.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os from 'node:os';
import path from 'node:path';

import { PlaywrightCrawler } from 'crawlee';
import { firefox } from 'playwright';

// Replace this with your actual Firefox profile name
// Find it at about:profiles in Firefox
const PROFILE_NAME = 'your-profile-name-here';

// Path to Firefox profile directory (example for Windows)
// Use `about:profiles` to find your profile path
const PROFILE_PATH = path.join(os.homedir(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME);

const crawler = new PlaywrightCrawler({
launchContext: {
// Use Firefox browser
launcher: firefox,
// Path to your Firefox profile
userDataDir: PROFILE_PATH,
launchOptions: {
headless: false,
args: [
// Required to avoid version conflicts
'--allow-downgrade',
],
},
},
async requestHandler({ request, log }) {
log.info(`Visiting ${request.url}`);
},
});

await crawler.run(['https://crawlee.dev']);
42 changes: 42 additions & 0 deletions docs/examples/using_browser_profiles_puppeteer_chrome.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';

import { PuppeteerCrawler } from 'crawlee';

// Profile name to use (usually 'Default' for single profile setups)
const PROFILE_NAME = 'Default';

// Path to Chrome user data directory (example for Windows)
// Use `chrome://version/` to find your profile path
const PROFILE_PATH = path.join(os.homedir(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data');

// Copy profile to a temp directory to avoid Chrome's lock
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crawlee-chrome-profile-'));
fs.cpSync(path.join(PROFILE_PATH, PROFILE_NAME), path.join(tempDir, PROFILE_NAME), { recursive: true });

const crawler = new PuppeteerCrawler({
launchContext: {
// Use the installed Chrome browser
useChrome: true,
launchOptions: {
headless: false,
// Set user data directory via Puppeteer launch options
userDataDir: tempDir,
// Slow down actions to mimic human behavior
slowMo: 200,
args: [
// Use the specified profile
`--profile-directory=${PROFILE_NAME}`,
],
},
},
async requestHandler({ request, log }) {
log.info(`Visiting ${request.url}`);
},
});

await crawler.run(['https://crawlee.dev']);

// Clean up the temp profile
fs.rmSync(tempDir, { recursive: true, force: true });
Loading