Skip to content
Merged

Dev #20

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 71 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ await client.close();
- **Web Scraping** — Scrape any website using anti-bot detection bypass and proxy support
- **Search Engine Results** — Google, Bing, and Yandex search with batch support
- **Platform Scrapers** — Structured data collection from LinkedIn, Amazon, Instagram, TikTok, YouTube, Reddit, and more
- **Datasets** — Access 19 pre-built datasets (Amazon, LinkedIn, Instagram, TikTok, X/Twitter) with query/download support
- **Discover API** — AI-powered web search with intent-based relevance ranking
- **Scraper Studio** — Trigger and fetch results from custom scrapers built in Bright Data's Scraper Studio
- **Datasets** — Access 126 pre-built datasets across dozens of platforms with query/download support
- **Parallel Processing** — Concurrent processing for multiple URLs or queries
- **Robust Error Handling** — Typed error classes with retry logic
- **Zone Management** — Automatic zone creation and management
Expand Down Expand Up @@ -168,11 +170,77 @@ console.log(result.status); // 'ready'
console.log(result.rowCount);
```

**Available platforms:** `linkedin`, `amazon`, `instagram`, `tiktok`, `youtube`, `reddit`, `facebook`, `chatGPT`, `digikey`, `perplexity`
**Available platforms:** `linkedin`, `amazon`, `instagram`, `tiktok`, `youtube`, `reddit`, `facebook`, `pinterest`, `chatGPT`, `digikey`, `perplexity`

### Discover API

AI-powered web search with relevance ranking based on intent.

```javascript
// Basic search
const result = await client.discover('artificial intelligence trends 2026');
console.log(result.data); // [{ link, title, description, relevance_score }, ...]

// With intent for semantic ranking
const result = await client.discover('Tesla battery technology', {
intent: 'recent breakthroughs in EV battery chemistry',
});

// With filtering and localization
const result = await client.discover('sustainable fashion brands', {
intent: 'eco-friendly clothing companies',
filterKeywords: ['sustainability', 'eco-friendly', 'organic'],
country: 'us',
numResults: 10,
});

// Include full page content
const result = await client.discover('python asyncio tutorial', {
includeContent: true,
numResults: 3,
});

// Manual trigger/poll/fetch
const job = await client.discoverTrigger('market research SaaS', {
intent: 'competitor pricing strategies',
});
await job.wait({ timeout: 60_000 });
const data = await job.fetch();
```

### Scraper Studio

Trigger and fetch results from your custom scrapers built in [Scraper Studio](https://brightdata.com/cp/data_collector).

```javascript
// Orchestrated — trigger + poll + return results
const results = await client.scraperStudio.run('c_your_collector_id', {
input: { url: 'https://example.com/product/1' },
});
// results: RunResult[] — one per input with { input, data, error, responseId, elapsedMs }

// Multiple inputs (processed sequentially)
const results = await client.scraperStudio.run('c_your_collector_id', {
input: [
{ url: 'https://example.com/product/1' },
{ url: 'https://example.com/product/2' },
],
});

// Manual control — trigger, then poll yourself
const job = await client.scraperStudio.trigger('c_your_collector_id', {
url: 'https://example.com/product/1',
});
const data = await job.waitAndFetch();

// Check job status (by job ID from the dashboard)
const status = await client.scraperStudio.status('j_abc123');
console.log(status.status); // 'queued' | 'running' | 'done' | 'failed'
```

### Datasets

Access pre-built datasets for querying and downloading structured data snapshots.
Access 126 pre-built datasets for querying and downloading structured data snapshots.

```javascript
const ds = client.datasets;
Expand Down
81 changes: 32 additions & 49 deletions src/api/datasets/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { Transport, assertResponse } from '../../core/transport';
import { API_ENDPOINT } from '../../utils/constants';
import { parseJSON } from '../../utils/misc';
import { getLogger } from '../../utils/logger';
import { wrapAPIError } from '../../utils/error-utils';
import { pollUntilReady } from '../../utils/polling';
import type {
DatasetMetadata,
Expand Down Expand Up @@ -34,41 +33,33 @@ export abstract class BaseDataset {
'{dataset_id}',
this.datasetId,
);
try {
const response = await this.transport.request(url);
const text = await assertResponse(response);
return parseJSON<DatasetMetadata>(text);
} catch (e: unknown) {
wrapAPIError(e, `datasets.${this.name}.getMetadata`);
}
const response = await this.transport.request(url);
const text = await assertResponse(response);
return parseJSON<DatasetMetadata>(text);
}

async query(
filter: Record<string, unknown>,
opts?: DatasetQueryOptions,
): Promise<string> {
this.logger.debug('query', { filter });
try {
const body: Record<string, unknown> = {
dataset_id: this.datasetId,
filter,
};
if (opts?.records_limit) {
body.records_limit = opts.records_limit;
}
const response = await this.transport.request(
API_ENDPOINT.DATASET_FILTER,
{
method: 'POST',
body: JSON.stringify(body),
},
);
const text = await assertResponse(response);
const result = parseJSON<{ snapshot_id: string }>(text);
return result.snapshot_id;
} catch (e: unknown) {
wrapAPIError(e, `datasets.${this.name}.query`);
const body: Record<string, unknown> = {
dataset_id: this.datasetId,
filter,
};
if (opts?.records_limit) {
body.records_limit = opts.records_limit;
}
const response = await this.transport.request(
API_ENDPOINT.DATASET_FILTER,
{
method: 'POST',
body: JSON.stringify(body),
},
);
const text = await assertResponse(response);
const result = parseJSON<{ snapshot_id: string }>(text);
return result.snapshot_id;
}

async sample(opts?: DatasetQueryOptions): Promise<string> {
Expand All @@ -82,33 +73,25 @@ export abstract class BaseDataset {
'{snapshot_id}',
snapshotId,
);
try {
const response = await this.transport.request(url);
const text = await assertResponse(response);
return parseJSON<DatasetSnapshotStatus>(text);
} catch (e: unknown) {
wrapAPIError(e, `datasets.${this.name}.getStatus`);
}
const response = await this.transport.request(url);
const text = await assertResponse(response);
return parseJSON<DatasetSnapshotStatus>(text);
}

async download(
snapshotId: string,
opts?: DatasetDownloadOptions,
): Promise<unknown[]> {
this.logger.debug('download', { snapshotId });
try {
await pollUntilReady(snapshotId, (id) => this.getStatus(id));
const url = API_ENDPOINT.DATASET_SNAPSHOT_DOWNLOAD.replace(
'{snapshot_id}',
snapshotId,
);
const query: Record<string, unknown> = {};
if (opts?.format) query.format = opts.format;
const response = await this.transport.request(url, { query });
const text = await assertResponse(response);
return parseJSON<unknown[]>(text);
} catch (e: unknown) {
wrapAPIError(e, `datasets.${this.name}.download`);
}
await pollUntilReady(snapshotId, (id) => this.getStatus(id));
const url = API_ENDPOINT.DATASET_SNAPSHOT_DOWNLOAD.replace(
'{snapshot_id}',
snapshotId,
);
const query: Record<string, unknown> = {};
if (opts?.format) query.format = opts.format;
const response = await this.transport.request(url, { query });
const text = await assertResponse(response);
return parseJSON<unknown[]>(text);
}
}
Loading