Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
4ad3540
decouples logger from @apify/log
l2ysho Feb 5, 2026
4c92be2
update .gitignore
l2ysho Feb 5, 2026
90fbb0c
feat: add connectOptions and connectOverCDPOptions to PlaywrightCrawler
l2ysho Feb 17, 2026
21a7f13
Merge branch 'v4' into 3068-decouple-log-configuration-from-apifylog
l2ysho Feb 18, 2026
7711a16
refactor to make build work
l2ysho Feb 18, 2026
3e75bdc
lint fix
l2ysho Feb 18, 2026
f2c18c9
add abstract class to simplify implementation
l2ysho Feb 19, 2026
26d101e
Fix docs
l2ysho Feb 19, 2026
5750725
fix log spy in test
l2ysho Feb 19, 2026
e10bff2
Revert "feat: add connectOptions and connectOverCDPOptions to Playwri…
l2ysho Feb 19, 2026
f1f1fac
Fix Request logger
l2ysho Feb 19, 2026
1d78eda
cleanup + refactor
l2ysho Feb 19, 2026
d8cb997
refactor
l2ysho Feb 19, 2026
ab44350
cleanup
l2ysho Feb 19, 2026
3ae62b2
fix request log
l2ysho Feb 19, 2026
d259010
fix minimal level
l2ysho Feb 19, 2026
62a2934
fix request tandem
l2ysho Feb 19, 2026
8175e7b
add test for BaseCrawleeLogger
l2ysho Feb 19, 2026
d785449
fix test
l2ysho Feb 19, 2026
b3875f6
lint fix
l2ysho Feb 20, 2026
fbaf3a5
fix warningOnece
l2ysho Feb 20, 2026
d03dcf3
fix tests
l2ysho Feb 20, 2026
bbf293f
revert gitignore
l2ysho Feb 21, 2026
78380e8
Merge branch 'v4' into 3068-decouple-log-configuration-from-apifylog
l2ysho Feb 23, 2026
84b256f
after merge refactor
l2ysho Feb 23, 2026
439e67b
fix leftover imports
l2ysho Feb 24, 2026
37788ad
lint:fix
l2ysho Feb 24, 2026
67616a3
update snapshotter
l2ysho Feb 24, 2026
3c4fb54
lint:fix
l2ysho Feb 24, 2026
96eca15
lint:fix
l2ysho Feb 24, 2026
2b8974e
update recoverable_state
l2ysho Feb 24, 2026
cbabbae
refactor test
l2ysho Feb 24, 2026
fb555ee
remove dead import
l2ysho Feb 24, 2026
fd22e54
small fix in tests
l2ysho Feb 24, 2026
167e14a
fix tests
l2ysho Feb 24, 2026
45f03df
Remove useless enum
l2ysho Feb 24, 2026
c026c58
remove unused import
l2ysho Feb 24, 2026
7c20454
lint:fix
l2ysho Feb 24, 2026
9ccf389
decouple some more packages
l2ysho Feb 24, 2026
9f838ff
lint:fix
l2ysho Feb 24, 2026
765bfea
lint:fix
l2ysho Feb 24, 2026
d15376a
lint:fix
l2ysho Feb 24, 2026
6bc344d
fix warningOnce spawning multiple childs
l2ysho Feb 25, 2026
1957a9c
lint:fix
l2ysho Feb 25, 2026
60e3c30
revert configuration changes
l2ysho Feb 25, 2026
8cc7216
update tests
l2ysho Feb 25, 2026
b56653a
cleanup test
l2ysho Feb 26, 2026
5b81475
refactor(log): rename internal logger methods
l2ysho Feb 27, 2026
7b3c158
refactor _log to log + remove loggerProvider from Conf + cleanup
l2ysho Feb 27, 2026
ee32484
lint:fix
l2ysho Feb 27, 2026
f2944aa
lint:fix
l2ysho Feb 27, 2026
755261a
enable logger conflict
l2ysho Feb 27, 2026
a942bd2
lint fix
l2ysho Feb 27, 2026
9deb0c8
fix getLog
l2ysho Feb 27, 2026
82b688d
internal -> logWithLevel
l2ysho Feb 27, 2026
f4361bc
lint:fix
l2ysho Feb 27, 2026
4b67118
add child logger
l2ysho Feb 27, 2026
87bab27
fix tests
l2ysho Feb 27, 2026
3c8fdbb
fix tests
l2ysho Feb 27, 2026
21499bf
fix test
l2ysho Feb 27, 2026
00c5fbb
lint fix
l2ysho Feb 27, 2026
ba7872a
remove internal log property
l2ysho Mar 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/examples/file_download_stream.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { pipeline, Transform } from 'stream';

import { FileDownload, type Log } from 'crawlee';
import { FileDownload, type CrawleeLogger } from 'crawlee';

// A sample Transform stream logging the download progress.
function createProgressTracker({ url, log, totalBytes }: { url: URL; log: Log; totalBytes: number }) {
function createProgressTracker({ url, log, totalBytes }: { url: URL; log: CrawleeLogger; totalBytes: number }) {
let downloadedBytes = 0;

return new Transform({
Expand Down
66 changes: 39 additions & 27 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type {
AddRequestsBatchedResult,
AutoscaledPoolOptions,
Configuration,
CrawleeLogger,
CrawlingContext,
DatasetExportOptions,
EnqueueLinksOptions,
Expand Down Expand Up @@ -40,6 +41,7 @@ import {
EnqueueStrategy,
EventType,
KeyValueStore,
LogLevel,
mergeCookies,
NonRetryableError,
purgeDefaultStorages,
Expand Down Expand Up @@ -77,8 +79,6 @@ import { getDomain } from 'tldts';
import type { ReadonlyDeep, SetRequired } from 'type-fest';

import { LruCache } from '@apify/datastructures';
import type { Log } from '@apify/log';
import defaultLog, { LogLevel } from '@apify/log';
import { addTimeoutToPromise, TimeoutError, tryCancel } from '@apify/timeout';
import { cryptoRandomObjectId } from '@apify/utilities';

Expand Down Expand Up @@ -370,9 +370,6 @@ export interface BasicCrawlerOptions<
*/
onSkippedRequest?: SkippedRequestCallback;

/** @internal */
log?: Log;

/**
* Enables experimental features of Crawlee, which can alter the behavior of the crawler.
* WARNING: these options are not guaranteed to be stable and may change or be removed at any time.
Expand Down Expand Up @@ -415,6 +412,12 @@ export interface BasicCrawlerOptions<
*/
eventManager?: EventManager;

/**
* Custom logger to use for this crawler.
* If provided, the crawler will use its own ServiceLocator instance instead of the global one.
*/
logger?: CrawleeLogger;

/**
* A unique identifier for the crawler instance. This ID is used to isolate the state returned by
* {@apilink BasicCrawler.useState|`crawler.useState()`} from other crawler instances.
Expand Down Expand Up @@ -586,7 +589,12 @@ export class BasicCrawler<
running = false;
hasFinishedBefore = false;

readonly log: Log;
#log!: CrawleeLogger;

get log(): CrawleeLogger {
return this.#log;
}

protected requestHandler!: RequestHandler<ExtendedContext>;
protected errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
protected failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
Expand Down Expand Up @@ -651,6 +659,7 @@ export class BasicCrawler<
configuration: ow.optional.object,
storageClient: ow.optional.object,
eventManager: ow.optional.object,
logger: ow.optional.object,

// AutoscaledPool shorthands
minConcurrency: ow.optional.number,
Expand All @@ -659,7 +668,6 @@ export class BasicCrawler<
keepAlive: ow.optional.boolean,

// internal
log: ow.optional.object,
experiments: ow.optional.object,

statisticsOptions: ow.optional.object,
Expand Down Expand Up @@ -695,6 +703,7 @@ export class BasicCrawler<
configuration,
storageClient,
eventManager,
logger,

// AutoscaledPool shorthands
minConcurrency,
Expand All @@ -714,7 +723,6 @@ export class BasicCrawler<
httpClient,

// internal
log = defaultLog.child({ prefix: this.constructor.name }),
experiments = {},

id,
Expand All @@ -730,15 +738,18 @@ export class BasicCrawler<
if (
storageClient ||
eventManager ||
logger ||
(configuration !== undefined && configuration !== serviceLocator.getConfiguration())
) {
const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient);
const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient, logger);
serviceLocatorScope = bindMethodsToServiceLocator(scopedServiceLocator, this);
}

try {
serviceLocatorScope.enterScope();

this.#log = serviceLocator.getLogger().child({ prefix: this.constructor.name });

// Store whether the user explicitly provided an ID
this.hasExplicitId = id !== undefined;
// Store the user-provided ID, or generate a unique one for tracking purposes (not for state key)
Expand Down Expand Up @@ -793,7 +804,6 @@ export class BasicCrawler<

this.httpClient = httpClient ?? new GotScrapingHttpClient();
this.proxyConfiguration = proxyConfiguration;
this.log = log;
this.statusMessageLoggingInterval = statusMessageLoggingInterval;
this.statusMessageCallback = statusMessageCallback as StatusMessageCallback;
this.domainAccessedTime = new Map();
Expand Down Expand Up @@ -833,19 +843,19 @@ export class BasicCrawler<
this.sameDomainDelayMillis = sameDomainDelaySecs * 1000;
this.maxSessionRotations = maxSessionRotations;
this.stats = new Statistics({
logMessage: `${log.getOptions().prefix} request statistics:`,
log,
logMessage: `${this.constructor.name} request statistics:`,
log: this.log,
...(this.hasExplicitId ? { id: this.crawlerId } : {}),
...statisticsOptions,
});
this.sessionPoolOptions = {
...sessionPoolOptions,
log,
log: this.log,
};
if (this.retryOnBlocked) {
this.sessionPoolOptions.blockedStatusCodes = sessionPoolOptions.blockedStatusCodes ?? [];
if (this.sessionPoolOptions.blockedStatusCodes.length !== 0) {
log.warning(
this.log.warning(
`Both 'blockedStatusCodes' and 'retryOnBlocked' are set. Please note that the 'retryOnBlocked' feature might not work as expected.`,
);
}
Expand All @@ -854,7 +864,7 @@ export class BasicCrawler<

const maxSignedInteger = 2 ** 31 - 1;
if (this.requestHandlerTimeoutMillis > maxSignedInteger) {
log.warning(
this.log.warning(
`requestHandlerTimeoutMillis ${this.requestHandlerTimeoutMillis}` +
` does not fit a signed 32-bit integer. Limiting the value to ${maxSignedInteger}`,
);
Expand Down Expand Up @@ -885,7 +895,7 @@ export class BasicCrawler<
isTaskReadyFunction: async () => {
if (isMaxPagesExceeded()) {
if (this.shouldLogMaxProcessedRequestsExceeded) {
log.info(
this.log.info(
'Crawler reached the maxRequestsPerCrawl limit of ' +
`${this.maxRequestsPerCrawl} requests and will shut down soon. Requests that are in progress will be allowed to finish.`,
);
Expand All @@ -898,7 +908,7 @@ export class BasicCrawler<
},
isFinishedFunction: async () => {
if (isMaxPagesExceeded()) {
log.info(
this.log.info(
`Earlier, the crawler reached the maxRequestsPerCrawl limit of ${this.maxRequestsPerCrawl} requests ` +
'and all requests that were in progress at that time have now finished. ' +
`In total, the crawler processed ${this.handledRequestsCount} requests and will shut down.`,
Expand All @@ -914,12 +924,12 @@ export class BasicCrawler<
const reason = isFinishedFunction
? "Crawler's custom isFinishedFunction() returned true, the crawler will shut down."
: 'All requests from the queue have been processed, the crawler will shut down.';
log.info(reason);
this.log.info(reason);
}

return isFinished;
},
log,
log: this.log,
};

this.autoscaledPoolOptions = { ...autoscaledPoolOptions, ...basicCrawlerAutoscaledPoolConfiguration };
Expand All @@ -944,7 +954,7 @@ export class BasicCrawler<
async setStatusMessage(message: string, options: SetStatusMessageOptions = {}) {
const data =
options.isStatusMessageTerminal != null ? { terminal: options.isStatusMessageTerminal } : undefined;
this.log.internal(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data);
this.log.logWithLevel(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data);

const client = serviceLocator.getStorageClient();

Expand Down Expand Up @@ -1099,7 +1109,7 @@ export class BasicCrawler<
retryHistogram: this.stats.requestRetryHistogram,
...finalStats,
};
this.log.info('Final request statistics:', stats);
this.log.info('Final request statistics:', stats as unknown as Record<string, unknown>);

if (this.stats.errorTracker.total !== 0) {
const prettify = ([count, info]: [number, string[]]) =>
Expand Down Expand Up @@ -1193,12 +1203,14 @@ export class BasicCrawler<
BasicCrawler.useStateCrawlerIds.add(this.crawlerId);

if (BasicCrawler.useStateCrawlerIds.size > 1) {
defaultLog.warningOnce(
'Multiple crawler instances are calling useState() without an explicit `id` option. \n' +
'This means they will share the same state object, which is likely unintended. \n' +
'To fix this, provide a unique `id` option to each crawler instance. \n' +
'Example: new BasicCrawler({ id: "my-crawler-1", ... })',
);
serviceLocator
.getLogger()
.warningOnce(
'Multiple crawler instances are calling useState() without an explicit `id` option. \n' +
'This means they will share the same state object, which is likely unintended. \n' +
'To fix this, provide a unique `id` option to each crawler instance. \n' +
'Example: new BasicCrawler({ id: "my-crawler-1", ... })',
);
}

return kvs.getAutoSavedValue<State>(BasicCrawler.CRAWLEE_STATE_KEY, defaultValue);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { type CrawleeLogger, serviceLocator } from '@crawlee/core';
import type { Cookie, Dictionary } from '@crawlee/types';
import { nanoid } from 'nanoid';
import { TypedEmitter } from 'tiny-typed-emitter';
Expand All @@ -6,7 +7,6 @@ import { tryCancel } from '@apify/timeout';

import { BROWSER_CONTROLLER_EVENTS } from '../events.js';
import type { LaunchContext } from '../launch-context.js';
import { log } from '../logger.js';
import type { UnwrapPromise } from '../utils.js';
import type { BrowserPlugin, CommonBrowser, CommonLibrary } from './browser-plugin.js';

Expand Down Expand Up @@ -40,6 +40,7 @@ export abstract class BrowserController<
NewPageResult = UnwrapPromise<ReturnType<LaunchResult['newPage']>>,
> extends TypedEmitter<BrowserControllerEvents<Library, LibraryOptions, LaunchResult, NewPageOptions, NewPageResult>> {
id = nanoid();
protected log!: CrawleeLogger;

/**
* The `BrowserPlugin` instance used to launch the browser.
Expand Down Expand Up @@ -90,6 +91,7 @@ export abstract class BrowserController<

constructor(browserPlugin: BrowserPlugin<Library, LibraryOptions, LaunchResult, NewPageOptions, NewPageResult>) {
super();
this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });
this.browserPlugin = browserPlugin;
}

Expand Down Expand Up @@ -136,14 +138,14 @@ export abstract class BrowserController<
// TODO: shouldn't this go in a finally instead?
this.isActive = false;
} catch (error) {
log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
}

this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);

setTimeout(() => {
this._kill().catch((err) => {
log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id });
this.log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id });
});
}, PROCESS_KILL_TIMEOUT_MILLIS);
}
Expand Down
4 changes: 3 additions & 1 deletion packages/browser-pool/src/abstract-classes/browser-plugin.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CriticalError } from '@crawlee/core';
import { type CrawleeLogger, CriticalError, serviceLocator } from '@crawlee/core';
import type { Dictionary } from '@crawlee/types';
import merge from 'lodash.merge';

Expand Down Expand Up @@ -105,6 +105,7 @@ export abstract class BrowserPlugin<
NewPageResult = UnwrapPromise<ReturnType<LaunchResult['newPage']>>,
> {
name = this.constructor.name;
protected log!: CrawleeLogger;
library: Library;
launchOptions: LibraryOptions;
proxyUrl?: string;
Expand All @@ -121,6 +122,7 @@ export abstract class BrowserPlugin<
browserPerProxy = false,
} = options;

this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });
this.library = library;
this.launchOptions = launchOptions;
this.proxyUrl = proxyUrl && new URL(proxyUrl).href.slice(0, -1);
Expand Down
17 changes: 9 additions & 8 deletions packages/browser-pool/src/browser-pool.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { TieredProxy } from '@crawlee/core';
import { type CrawleeLogger, serviceLocator, type TieredProxy } from '@crawlee/core';
import type { BrowserFingerprintWithHeaders } from 'fingerprint-generator';
import { FingerprintGenerator } from 'fingerprint-generator';
import { FingerprintInjector } from 'fingerprint-injector';
Expand All @@ -20,7 +20,6 @@ import {
} from './fingerprinting/hooks.js';
import type { FingerprintGeneratorOptions } from './fingerprinting/types.js';
import type { LaunchContext } from './launch-context.js';
import { log } from './logger.js';
import type { InferBrowserPluginArray, UnwrapPromise } from './utils.js';

const PAGE_CLOSE_KILL_TIMEOUT_MILLIS = 1000;
Expand Down Expand Up @@ -334,9 +333,11 @@ export class BrowserPool<
private browserRetireInterval?: NodeJS.Timeout;

private limiter = pLimit(1);
private log!: CrawleeLogger;

constructor(options: Options & BrowserPoolHooks<BrowserControllerReturn, LaunchContextReturn, PageReturn>) {
super();
this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });

this.browserKillerInterval!.unref();

Expand Down Expand Up @@ -708,7 +709,7 @@ export class BrowserPool<
throw err;
}

log.debug('Launched new browser.', { id: browserController.id });
this.log.debug('Launched new browser.', { id: browserController.id });
browserController.proxyTier = proxyTier;
browserController.proxyUrl = proxyUrl;

Expand All @@ -719,7 +720,7 @@ export class BrowserPool<
} catch (err) {
this.startingBrowserControllers.delete(browserController);
browserController.close().catch((closeErr) => {
log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, {
this.log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, {
id: browserController.id,
});
});
Expand Down Expand Up @@ -774,15 +775,15 @@ export class BrowserPool<

if (isBrowserIdle || isBrowserEmpty) {
const { id } = controller;
log.debug('Closing retired browser.', { id });
this.log.debug('Closing retired browser.', { id });
await controller.close();
this.retiredBrowserControllers.delete(controller);
closedBrowserIds.push(id);
}
}

if (closedBrowserIds.length) {
log.debug('Closed retired browsers.', {
this.log.debug('Closed retired browsers.', {
count: closedBrowserIds.length,
closedBrowserIds,
});
Expand All @@ -798,7 +799,7 @@ export class BrowserPool<
await this._executeHooks(this.prePageCloseHooks, page, browserController);

await originalPageClose.apply(page, args).catch((err: Error) => {
log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id });
this.log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id });
});

await this._executeHooks(this.postPageCloseHooks, pageId, browserController);
Expand All @@ -821,7 +822,7 @@ export class BrowserPool<
// Run this with a delay, otherwise page.close()
// might fail with "Protocol error (Target.closeTarget): Target closed."
setTimeout(() => {
log.debug('Closing retired browser because it has no active pages', { id: browserController.id });
this.log.debug('Closing retired browser because it has no active pages', { id: browserController.id });
void browserController.close().finally(() => {
this.retiredBrowserControllers.delete(browserController);
});
Expand Down
5 changes: 0 additions & 5 deletions packages/browser-pool/src/logger.ts

This file was deleted.

Loading