From 52876f9b57cf90ab4c78f6baab4fd80d031d6630 Mon Sep 17 00:00:00 2001 From: "user.mail" Date: Thu, 9 Apr 2026 09:31:14 +0300 Subject: [PATCH 1/4] add datasets, scraper studio, discoverapi and tests --- README.md | 74 +- src/api/datasets/base.ts | 83 +- src/api/datasets/client.ts | 2258 ++++++++++++++++- src/api/datasets/platforms/agoda.ts | 6 + src/api/datasets/platforms/airbnb.ts | 6 + src/api/datasets/platforms/american_eagle.ts | 6 + src/api/datasets/platforms/apple.ts | 11 + .../datasets/platforms/ashley_furniture.ts | 6 + src/api/datasets/platforms/asos.ts | 6 + .../platforms/australia_real_estate.ts | 6 + src/api/datasets/platforms/autozone.ts | 6 + src/api/datasets/platforms/balenciaga.ts | 6 + src/api/datasets/platforms/bbc.ts | 6 + src/api/datasets/platforms/berluti.ts | 6 + src/api/datasets/platforms/bestbuy.ts | 6 + src/api/datasets/platforms/bh.ts | 6 + src/api/datasets/platforms/bluesky.ts | 11 + src/api/datasets/platforms/booking.ts | 11 + src/api/datasets/platforms/bottega_veneta.ts | 6 + src/api/datasets/platforms/carsales.ts | 6 + src/api/datasets/platforms/carters.ts | 6 + src/api/datasets/platforms/celine.ts | 6 + src/api/datasets/platforms/chanel.ts | 6 + src/api/datasets/platforms/chileautos.ts | 6 + src/api/datasets/platforms/cnn.ts | 6 + .../datasets/platforms/companies_enriched.ts | 6 + src/api/datasets/platforms/costco.ts | 6 + .../datasets/platforms/crate_and_barrel.ts | 6 + .../datasets/platforms/creative_commons.ts | 11 + src/api/datasets/platforms/crunchbase.ts | 6 + src/api/datasets/platforms/delvaux.ts | 6 + src/api/datasets/platforms/digikey.ts | 6 + src/api/datasets/platforms/dior.ts | 6 + src/api/datasets/platforms/ebay.ts | 6 + .../datasets/platforms/employees_enriched.ts | 6 + src/api/datasets/platforms/etsy.ts | 6 + src/api/datasets/platforms/facebook.ts | 51 + src/api/datasets/platforms/fanatics.ts | 6 + src/api/datasets/platforms/fendi.ts | 6 + src/api/datasets/platforms/g2.ts | 11 + src/api/datasets/platforms/github.ts | 6 + src/api/datasets/platforms/glassdoor.ts | 16 + src/api/datasets/platforms/goodreads.ts | 6 + src/api/datasets/platforms/google_maps.ts | 11 + src/api/datasets/platforms/google_news.ts | 6 + src/api/datasets/platforms/google_play.ts | 11 + src/api/datasets/platforms/google_shopping.ts | 11 + src/api/datasets/platforms/hermes.ts | 6 + src/api/datasets/platforms/hm.ts | 6 + src/api/datasets/platforms/home_depot.ts | 11 + src/api/datasets/platforms/ikea.ts | 6 + src/api/datasets/platforms/imdb.ts | 6 + src/api/datasets/platforms/indeed.ts | 11 + src/api/datasets/platforms/infocasas.ts | 6 + src/api/datasets/platforms/inmuebles24.ts | 6 + src/api/datasets/platforms/kroger.ts | 6 + src/api/datasets/platforms/lazada.ts | 16 + src/api/datasets/platforms/lazboy.ts | 6 + src/api/datasets/platforms/lego.ts | 6 + src/api/datasets/platforms/linkedin.ts | 15 + src/api/datasets/platforms/llbean.ts | 6 + src/api/datasets/platforms/loewe.ts | 6 + src/api/datasets/platforms/lowes.ts | 6 + src/api/datasets/platforms/macys.ts | 6 + src/api/datasets/platforms/mango.ts | 6 + src/api/datasets/platforms/manta.ts | 6 + src/api/datasets/platforms/massimo_dutti.ts | 6 + src/api/datasets/platforms/mattress_firm.ts | 6 + src/api/datasets/platforms/mediamarkt.ts | 6 + src/api/datasets/platforms/mercadolivre.ts | 6 + src/api/datasets/platforms/metrocuadrado.ts | 6 + src/api/datasets/platforms/microcenter.ts | 6 + src/api/datasets/platforms/montblanc.ts | 6 + src/api/datasets/platforms/mouser.ts | 6 + src/api/datasets/platforms/moynat.ts | 6 + src/api/datasets/platforms/mybobs.ts | 6 + src/api/datasets/platforms/myntra.ts | 6 + src/api/datasets/platforms/naver.ts | 6 + src/api/datasets/platforms/nba.ts | 6 + src/api/datasets/platforms/olx.ts | 6 + src/api/datasets/platforms/otodom.ts | 6 + src/api/datasets/platforms/owler.ts | 6 + src/api/datasets/platforms/ozon.ts | 6 + src/api/datasets/platforms/pinterest.ts | 11 + src/api/datasets/platforms/pitchbook.ts | 6 + src/api/datasets/platforms/prada.ts | 6 + src/api/datasets/platforms/properati.ts | 6 + src/api/datasets/platforms/quora.ts | 6 + .../datasets/platforms/raymour_flanigan.ts | 6 + src/api/datasets/platforms/realtor.ts | 6 + src/api/datasets/platforms/reddit.ts | 11 + src/api/datasets/platforms/rona.ts | 6 + src/api/datasets/platforms/sephora.ts | 6 + src/api/datasets/platforms/shein.ts | 6 + src/api/datasets/platforms/shopee.ts | 6 + src/api/datasets/platforms/sleep_number.ts | 6 + src/api/datasets/platforms/slintel.ts | 6 + src/api/datasets/platforms/snapchat.ts | 6 + src/api/datasets/platforms/target.ts | 6 + src/api/datasets/platforms/toctoc.ts | 6 + src/api/datasets/platforms/tokopedia.ts | 6 + src/api/datasets/platforms/toysrus.ts | 6 + src/api/datasets/platforms/trustpilot.ts | 6 + src/api/datasets/platforms/trustradius.ts | 6 + src/api/datasets/platforms/us_lawyers.ts | 6 + src/api/datasets/platforms/ventureradar.ts | 6 + src/api/datasets/platforms/vimeo.ts | 6 + src/api/datasets/platforms/walmart.ts | 11 + src/api/datasets/platforms/wayfair.ts | 6 + src/api/datasets/platforms/webmotors.ts | 6 + src/api/datasets/platforms/wikipedia.ts | 6 + src/api/datasets/platforms/wildberries.ts | 6 + src/api/datasets/platforms/world_data.ts | 11 + src/api/datasets/platforms/xing.ts | 6 + src/api/datasets/platforms/yahoo_finance.ts | 6 + src/api/datasets/platforms/yapo.ts | 6 + src/api/datasets/platforms/yelp.ts | 11 + src/api/datasets/platforms/youtube.ts | 16 + src/api/datasets/platforms/ysl.ts | 6 + src/api/datasets/platforms/zalando.ts | 6 + src/api/datasets/platforms/zara.ts | 11 + src/api/datasets/platforms/zillow.ts | 11 + src/api/datasets/platforms/zonaprop.ts | 6 + src/api/datasets/platforms/zoominfo.ts | 6 + src/api/datasets/platforms/zoopla.ts | 6 + src/api/discover/index.ts | 5 + src/api/discover/job.ts | 142 ++ src/api/discover/result.ts | 53 + src/api/discover/service.ts | 89 + src/api/scrape/base.ts | 55 +- src/api/scrape/digikey.ts | 23 + src/api/scrape/job.ts | 4 +- src/api/scrape/perplexity.ts | 3 +- src/api/scrape/pinterest.ts | 66 + src/api/scrape/router.ts | 3 + src/api/scrape/snapshot.ts | 123 +- src/api/scrape/tiktok.ts | 86 + src/api/scraperstudio/index.ts | 3 + src/api/scraperstudio/job.ts | 107 + src/api/scraperstudio/service.ts | 146 ++ src/api/unlocker/request.ts | 72 +- src/api/zones.ts | 37 +- src/client.ts | 55 + src/core/transport.ts | 75 +- src/index.ts | 18 + src/models/datasets.ts | 64 + src/schemas/client.ts | 12 +- src/schemas/discover.ts | 34 + src/schemas/responses.ts | 31 + src/schemas/scraperstudio.ts | 95 + src/types/client.ts | 5 +- src/types/datasets.ts | 12 +- src/types/discover.ts | 12 + src/utils/constants.ts | 29 + src/utils/error-utils.ts | 11 - src/utils/misc.ts | 23 + src/utils/polling.ts | 25 +- tests/discover.test.ts | 290 +++ tests/error-classification.test.ts | 92 +- tests/integration/serp.test.ts | 53 + tests/integration/web_unlocker.test.ts | 49 + tests/polling.test.ts | 18 +- tests/response-schemas.test.ts | 150 ++ tests/scraper-studio.test.ts | 378 +++ tests/snapshot.test.ts | 4 +- tests/transport.test.ts | 166 +- 166 files changed, 5660 insertions(+), 374 deletions(-) create mode 100644 src/api/datasets/platforms/agoda.ts create mode 100644 src/api/datasets/platforms/airbnb.ts create mode 100644 src/api/datasets/platforms/american_eagle.ts create mode 100644 src/api/datasets/platforms/apple.ts create mode 100644 src/api/datasets/platforms/ashley_furniture.ts create mode 100644 src/api/datasets/platforms/asos.ts create mode 100644 src/api/datasets/platforms/australia_real_estate.ts create mode 100644 src/api/datasets/platforms/autozone.ts create mode 100644 src/api/datasets/platforms/balenciaga.ts create mode 100644 src/api/datasets/platforms/bbc.ts create mode 100644 src/api/datasets/platforms/berluti.ts create mode 100644 src/api/datasets/platforms/bestbuy.ts create mode 100644 src/api/datasets/platforms/bh.ts create mode 100644 src/api/datasets/platforms/bluesky.ts create mode 100644 src/api/datasets/platforms/booking.ts create mode 100644 src/api/datasets/platforms/bottega_veneta.ts create mode 100644 src/api/datasets/platforms/carsales.ts create mode 100644 src/api/datasets/platforms/carters.ts create mode 100644 src/api/datasets/platforms/celine.ts create mode 100644 src/api/datasets/platforms/chanel.ts create mode 100644 src/api/datasets/platforms/chileautos.ts create mode 100644 src/api/datasets/platforms/cnn.ts create mode 100644 src/api/datasets/platforms/companies_enriched.ts create mode 100644 src/api/datasets/platforms/costco.ts create mode 100644 src/api/datasets/platforms/crate_and_barrel.ts create mode 100644 src/api/datasets/platforms/creative_commons.ts create mode 100644 src/api/datasets/platforms/crunchbase.ts create mode 100644 src/api/datasets/platforms/delvaux.ts create mode 100644 src/api/datasets/platforms/digikey.ts create mode 100644 src/api/datasets/platforms/dior.ts create mode 100644 src/api/datasets/platforms/ebay.ts create mode 100644 src/api/datasets/platforms/employees_enriched.ts create mode 100644 src/api/datasets/platforms/etsy.ts create mode 100644 src/api/datasets/platforms/facebook.ts create mode 100644 src/api/datasets/platforms/fanatics.ts create mode 100644 src/api/datasets/platforms/fendi.ts create mode 100644 src/api/datasets/platforms/g2.ts create mode 100644 src/api/datasets/platforms/github.ts create mode 100644 src/api/datasets/platforms/glassdoor.ts create mode 100644 src/api/datasets/platforms/goodreads.ts create mode 100644 src/api/datasets/platforms/google_maps.ts create mode 100644 src/api/datasets/platforms/google_news.ts create mode 100644 src/api/datasets/platforms/google_play.ts create mode 100644 src/api/datasets/platforms/google_shopping.ts create mode 100644 src/api/datasets/platforms/hermes.ts create mode 100644 src/api/datasets/platforms/hm.ts create mode 100644 src/api/datasets/platforms/home_depot.ts create mode 100644 src/api/datasets/platforms/ikea.ts create mode 100644 src/api/datasets/platforms/imdb.ts create mode 100644 src/api/datasets/platforms/indeed.ts create mode 100644 src/api/datasets/platforms/infocasas.ts create mode 100644 src/api/datasets/platforms/inmuebles24.ts create mode 100644 src/api/datasets/platforms/kroger.ts create mode 100644 src/api/datasets/platforms/lazada.ts create mode 100644 src/api/datasets/platforms/lazboy.ts create mode 100644 src/api/datasets/platforms/lego.ts create mode 100644 src/api/datasets/platforms/llbean.ts create mode 100644 src/api/datasets/platforms/loewe.ts create mode 100644 src/api/datasets/platforms/lowes.ts create mode 100644 src/api/datasets/platforms/macys.ts create mode 100644 src/api/datasets/platforms/mango.ts create mode 100644 src/api/datasets/platforms/manta.ts create mode 100644 src/api/datasets/platforms/massimo_dutti.ts create mode 100644 src/api/datasets/platforms/mattress_firm.ts create mode 100644 src/api/datasets/platforms/mediamarkt.ts create mode 100644 src/api/datasets/platforms/mercadolivre.ts create mode 100644 src/api/datasets/platforms/metrocuadrado.ts create mode 100644 src/api/datasets/platforms/microcenter.ts create mode 100644 src/api/datasets/platforms/montblanc.ts create mode 100644 src/api/datasets/platforms/mouser.ts create mode 100644 src/api/datasets/platforms/moynat.ts create mode 100644 src/api/datasets/platforms/mybobs.ts create mode 100644 src/api/datasets/platforms/myntra.ts create mode 100644 src/api/datasets/platforms/naver.ts create mode 100644 src/api/datasets/platforms/nba.ts create mode 100644 src/api/datasets/platforms/olx.ts create mode 100644 src/api/datasets/platforms/otodom.ts create mode 100644 src/api/datasets/platforms/owler.ts create mode 100644 src/api/datasets/platforms/ozon.ts create mode 100644 src/api/datasets/platforms/pinterest.ts create mode 100644 src/api/datasets/platforms/pitchbook.ts create mode 100644 src/api/datasets/platforms/prada.ts create mode 100644 src/api/datasets/platforms/properati.ts create mode 100644 src/api/datasets/platforms/quora.ts create mode 100644 src/api/datasets/platforms/raymour_flanigan.ts create mode 100644 src/api/datasets/platforms/realtor.ts create mode 100644 src/api/datasets/platforms/reddit.ts create mode 100644 src/api/datasets/platforms/rona.ts create mode 100644 src/api/datasets/platforms/sephora.ts create mode 100644 src/api/datasets/platforms/shein.ts create mode 100644 src/api/datasets/platforms/shopee.ts create mode 100644 src/api/datasets/platforms/sleep_number.ts create mode 100644 src/api/datasets/platforms/slintel.ts create mode 100644 src/api/datasets/platforms/snapchat.ts create mode 100644 src/api/datasets/platforms/target.ts create mode 100644 src/api/datasets/platforms/toctoc.ts create mode 100644 src/api/datasets/platforms/tokopedia.ts create mode 100644 src/api/datasets/platforms/toysrus.ts create mode 100644 src/api/datasets/platforms/trustpilot.ts create mode 100644 src/api/datasets/platforms/trustradius.ts create mode 100644 src/api/datasets/platforms/us_lawyers.ts create mode 100644 src/api/datasets/platforms/ventureradar.ts create mode 100644 src/api/datasets/platforms/vimeo.ts create mode 100644 src/api/datasets/platforms/walmart.ts create mode 100644 src/api/datasets/platforms/wayfair.ts create mode 100644 src/api/datasets/platforms/webmotors.ts create mode 100644 src/api/datasets/platforms/wikipedia.ts create mode 100644 src/api/datasets/platforms/wildberries.ts create mode 100644 src/api/datasets/platforms/world_data.ts create mode 100644 src/api/datasets/platforms/xing.ts create mode 100644 src/api/datasets/platforms/yahoo_finance.ts create mode 100644 src/api/datasets/platforms/yapo.ts create mode 100644 src/api/datasets/platforms/yelp.ts create mode 100644 src/api/datasets/platforms/youtube.ts create mode 100644 src/api/datasets/platforms/ysl.ts create mode 100644 src/api/datasets/platforms/zalando.ts create mode 100644 src/api/datasets/platforms/zara.ts create mode 100644 src/api/datasets/platforms/zillow.ts create mode 100644 src/api/datasets/platforms/zonaprop.ts create mode 100644 src/api/datasets/platforms/zoominfo.ts create mode 100644 src/api/datasets/platforms/zoopla.ts create mode 100644 src/api/discover/index.ts create mode 100644 src/api/discover/job.ts create mode 100644 src/api/discover/result.ts create mode 100644 src/api/discover/service.ts create mode 100644 src/api/scrape/pinterest.ts create mode 100644 src/api/scraperstudio/index.ts create mode 100644 src/api/scraperstudio/job.ts create mode 100644 src/api/scraperstudio/service.ts create mode 100644 src/models/datasets.ts create mode 100644 src/schemas/discover.ts create mode 100644 src/schemas/responses.ts create mode 100644 src/schemas/scraperstudio.ts create mode 100644 src/types/discover.ts delete mode 100644 src/utils/error-utils.ts create mode 100644 tests/discover.test.ts create mode 100644 tests/integration/serp.test.ts create mode 100644 tests/integration/web_unlocker.test.ts create mode 100644 tests/response-schemas.test.ts create mode 100644 tests/scraper-studio.test.ts diff --git a/README.md b/README.md index bc6c623..5b493ad 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,9 @@ await client.close(); - **Web Scraping** — Scrape any website using anti-bot detection bypass and proxy support - **Search Engine Results** — Google, Bing, and Yandex search with batch support - **Platform Scrapers** — Structured data collection from LinkedIn, Amazon, Instagram, TikTok, YouTube, Reddit, and more -- **Datasets** — Access 19 pre-built datasets (Amazon, LinkedIn, Instagram, TikTok, X/Twitter) with query/download support +- **Discover API** — AI-powered web search with intent-based relevance ranking +- **Scraper Studio** — Trigger and fetch results from custom scrapers built in Bright Data's Scraper Studio +- **Datasets** — Access 126 pre-built datasets across dozens of platforms with query/download support - **Parallel Processing** — Concurrent processing for multiple URLs or queries - **Robust Error Handling** — Typed error classes with retry logic - **Zone Management** — Automatic zone creation and management @@ -168,11 +170,77 @@ console.log(result.status); // 'ready' console.log(result.rowCount); ``` -**Available platforms:** `linkedin`, `amazon`, `instagram`, `tiktok`, `youtube`, `reddit`, `facebook`, `chatGPT`, `digikey`, `perplexity` +**Available platforms:** `linkedin`, `amazon`, `instagram`, `tiktok`, `youtube`, `reddit`, `facebook`, `pinterest`, `chatGPT`, `digikey`, `perplexity` + +### Discover API + +AI-powered web search with relevance ranking based on intent. + +```javascript +// Basic search +const result = await client.discover('artificial intelligence trends 2026'); +console.log(result.data); // [{ link, title, description, relevance_score }, ...] + +// With intent for semantic ranking +const result = await client.discover('Tesla battery technology', { + intent: 'recent breakthroughs in EV battery chemistry', +}); + +// With filtering and localization +const result = await client.discover('sustainable fashion brands', { + intent: 'eco-friendly clothing companies', + filterKeywords: ['sustainability', 'eco-friendly', 'organic'], + country: 'us', + numResults: 10, +}); + +// Include full page content +const result = await client.discover('python asyncio tutorial', { + includeContent: true, + numResults: 3, +}); + +// Manual trigger/poll/fetch +const job = await client.discoverTrigger('market research SaaS', { + intent: 'competitor pricing strategies', +}); +await job.wait({ timeout: 60_000 }); +const data = await job.fetch(); +``` + +### Scraper Studio + +Trigger and fetch results from your custom scrapers built in [Scraper Studio](https://brightdata.com/cp/data_collector). + +```javascript +// Orchestrated — trigger + poll + return results +const results = await client.scraperStudio.run('c_your_collector_id', { + input: { url: 'https://example.com/product/1' }, +}); +// results: RunResult[] — one per input with { input, data, error, responseId, elapsedMs } + +// Multiple inputs (processed sequentially) +const results = await client.scraperStudio.run('c_your_collector_id', { + input: [ + { url: 'https://example.com/product/1' }, + { url: 'https://example.com/product/2' }, + ], +}); + +// Manual control — trigger, then poll yourself +const job = await client.scraperStudio.trigger('c_your_collector_id', { + url: 'https://example.com/product/1', +}); +const data = await job.waitAndFetch(); + +// Check job status (by job ID from the dashboard) +const status = await client.scraperStudio.status('j_abc123'); +console.log(status.status); // 'queued' | 'running' | 'done' | 'failed' +``` ### Datasets -Access pre-built datasets for querying and downloading structured data snapshots. +Access 126 pre-built datasets for querying and downloading structured data snapshots. ```javascript const ds = client.datasets; diff --git a/src/api/datasets/base.ts b/src/api/datasets/base.ts index b835b4d..361738d 100644 --- a/src/api/datasets/base.ts +++ b/src/api/datasets/base.ts @@ -2,8 +2,7 @@ import { Transport, assertResponse } from '../../core/transport'; import { API_ENDPOINT } from '../../utils/constants'; import { parseJSON } from '../../utils/misc'; import { getLogger } from '../../utils/logger'; -import { wrapAPIError } from '../../utils/error-utils'; -import { pollUntilReady } from '../../utils/polling'; +import { pollUntilStatus } from '../../utils/polling'; import type { DatasetMetadata, DatasetSnapshotStatus, @@ -34,13 +33,9 @@ export abstract class BaseDataset { '{dataset_id}', this.datasetId, ); - try { - const response = await this.transport.request(url); - const text = await assertResponse(response); - return parseJSON(text); - } catch (e: unknown) { - wrapAPIError(e, `datasets.${this.name}.getMetadata`); - } + const response = await this.transport.request(url); + const text = await assertResponse(response); + return parseJSON(text); } async query( @@ -48,27 +43,23 @@ export abstract class BaseDataset { opts?: DatasetQueryOptions, ): Promise { this.logger.debug('query', { filter }); - try { - const body: Record = { - dataset_id: this.datasetId, - filter, - }; - if (opts?.records_limit) { - body.records_limit = opts.records_limit; - } - const response = await this.transport.request( - API_ENDPOINT.DATASET_FILTER, - { - method: 'POST', - body: JSON.stringify(body), - }, - ); - const text = await assertResponse(response); - const result = parseJSON<{ snapshot_id: string }>(text); - return result.snapshot_id; - } catch (e: unknown) { - wrapAPIError(e, `datasets.${this.name}.query`); + const body: Record = { + dataset_id: this.datasetId, + filter, + }; + if (opts?.records_limit) { + body.records_limit = opts.records_limit; } + const response = await this.transport.request( + API_ENDPOINT.DATASET_FILTER, + { + method: 'POST', + body: JSON.stringify(body), + }, + ); + const text = await assertResponse(response); + const result = parseJSON<{ snapshot_id: string }>(text); + return result.snapshot_id; } async sample(opts?: DatasetQueryOptions): Promise { @@ -82,13 +73,9 @@ export abstract class BaseDataset { '{snapshot_id}', snapshotId, ); - try { - const response = await this.transport.request(url); - const text = await assertResponse(response); - return parseJSON(text); - } catch (e: unknown) { - wrapAPIError(e, `datasets.${this.name}.getStatus`); - } + const response = await this.transport.request(url); + const text = await assertResponse(response); + return parseJSON(text); } async download( @@ -96,19 +83,15 @@ export abstract class BaseDataset { opts?: DatasetDownloadOptions, ): Promise { this.logger.debug('download', { snapshotId }); - try { - await pollUntilReady(snapshotId, (id) => this.getStatus(id)); - const url = API_ENDPOINT.DATASET_SNAPSHOT_DOWNLOAD.replace( - '{snapshot_id}', - snapshotId, - ); - const query: Record = {}; - if (opts?.format) query.format = opts.format; - const response = await this.transport.request(url, { query }); - const text = await assertResponse(response); - return parseJSON(text); - } catch (e: unknown) { - wrapAPIError(e, `datasets.${this.name}.download`); - } + await pollUntilStatus(snapshotId, (id) => this.getStatus(id)); + const url = API_ENDPOINT.DATASET_SNAPSHOT_DOWNLOAD.replace( + '{snapshot_id}', + snapshotId, + ); + const query: Record = {}; + if (opts?.format) query.format = opts.format; + const response = await this.transport.request(url, { query }); + const text = await assertResponse(response); + return parseJSON(text); } } diff --git a/src/api/datasets/client.ts b/src/api/datasets/client.ts index ae91bea..befccce 100644 --- a/src/api/datasets/client.ts +++ b/src/api/datasets/client.ts @@ -2,12 +2,14 @@ import { Transport, assertResponse } from '../../core/transport'; import { API_ENDPOINT } from '../../utils/constants'; import { parseJSON } from '../../utils/misc'; import { getLogger } from '../../utils/logger'; -import { wrapAPIError } from '../../utils/error-utils'; import { BaseDataset } from './base'; import type { DatasetInfo } from './types'; import { LinkedinProfilesDataset, LinkedinCompaniesDataset, + LinkedinJobListingsDataset, + LinkedinPostsDataset, + LinkedinProfilesJobListingsDataset, } from './platforms/linkedin'; import { AmazonProductsDataset, @@ -34,6 +36,198 @@ import { XTwitterPostsDataset, XTwitterProfilesDataset, } from './platforms/x_twitter'; +import { AgodaPropertiesDataset } from './platforms/agoda'; +import { AirbnbPropertiesDataset } from './platforms/airbnb'; +import { AmericanEagleProductsDataset } from './platforms/american_eagle'; +import { + AppleAppStoreDataset, + AppleAppStoreReviewsDataset, +} from './platforms/apple'; +import { AsosProductsDataset } from './platforms/asos'; +import { AshleyFurnitureProductsDataset } from './platforms/ashley_furniture'; +import { AustraliaRealEstateDataset } from './platforms/australia_real_estate'; +import { AutozoneProductsDataset } from './platforms/autozone'; +import { BalenciagaProductsDataset } from './platforms/balenciaga'; +import { BbcNewsDataset } from './platforms/bbc'; +import { BerlutiProductsDataset } from './platforms/berluti'; +import { BestBuyProductsDataset } from './platforms/bestbuy'; +import { BhProductsDataset } from './platforms/bh'; +import { + BlueskyPostsDataset, + BlueskyTopProfilesDataset, +} from './platforms/bluesky'; +import { + BookingHotelListingsDataset, + BookingListingsSearchDataset, +} from './platforms/booking'; +import { BottegaVenetaProductsDataset } from './platforms/bottega_veneta'; +import { CarsalesListingsDataset } from './platforms/carsales'; +import { CartersProductsDataset } from './platforms/carters'; +import { CelineProductsDataset } from './platforms/celine'; +import { ChanelProductsDataset } from './platforms/chanel'; +import { ChileautosChileDataset } from './platforms/chileautos'; +import { CnnNewsDataset } from './platforms/cnn'; +import { CompaniesEnrichedDataset } from './platforms/companies_enriched'; +import { CostcoProductsDataset } from './platforms/costco'; +import { CrateAndBarrelProductsDataset } from './platforms/crate_and_barrel'; +import { + CreativeCommons3dModelsDataset, + CreativeCommonsImagesDataset, +} from './platforms/creative_commons'; +import { CrunchbaseCompaniesDataset } from './platforms/crunchbase'; +import { DelvauxProductsDataset } from './platforms/delvaux'; +import { DigikeyProductsDataset } from './platforms/digikey'; +import { DiorProductsDataset } from './platforms/dior'; +import { EbayProductsDataset } from './platforms/ebay'; +import { EmployeesEnrichedDataset } from './platforms/employees_enriched'; +import { EtsyProductsDataset } from './platforms/etsy'; +import { + FacebookCommentsDataset, + FacebookCompanyReviewsDataset, + FacebookEventsDataset, + FacebookGroupPostsDataset, + FacebookMarketplaceDataset, + FacebookPagesPostsDataset, + FacebookPagesProfilesDataset, + FacebookPostsByUrlDataset, + FacebookProfilesDataset, + FacebookReelsDataset, +} from './platforms/facebook'; +import { FanaticsProductsDataset } from './platforms/fanatics'; +import { FendiProductsDataset } from './platforms/fendi'; +import { G2ProductsDataset, G2ReviewsDataset } from './platforms/g2'; +import { GithubRepositoriesDataset } from './platforms/github'; +import { + GlassdoorCompaniesDataset, + GlassdoorJobsDataset, + GlassdoorReviewsDataset, +} from './platforms/glassdoor'; +import { GoodreadsBooksDataset } from './platforms/goodreads'; +import { + GoogleMapsFullInfoDataset, + GoogleMapsReviewsDataset, +} from './platforms/google_maps'; +import { GoogleNewsDataset } from './platforms/google_news'; +import { + GooglePlayReviewsDataset, + GooglePlayStoreDataset, +} from './platforms/google_play'; +import { + GoogleShoppingProductsDataset, + GoogleShoppingSearchUsDataset, +} from './platforms/google_shopping'; +import { HermesProductsDataset } from './platforms/hermes'; +import { HmProductsDataset } from './platforms/hm'; +import { + HomeDepotCaProductsDataset, + HomeDepotUsProductsDataset, +} from './platforms/home_depot'; +import { IkeaProductsDataset } from './platforms/ikea'; +import { ImdbMoviesDataset } from './platforms/imdb'; +import { + IndeedCompaniesDataset, + IndeedJobsDataset, +} from './platforms/indeed'; +import { InfocasasUruguayDataset } from './platforms/infocasas'; +import { Inmuebles24MexicoDataset } from './platforms/inmuebles24'; +import { KrogerProductsDataset } from './platforms/kroger'; +import { + LazadaProductsDataset, + LazadaProductsSearchDataset, + LazadaReviewsDataset, +} from './platforms/lazada'; +import { LaZBoyProductsDataset } from './platforms/lazboy'; +import { LegoProductsDataset } from './platforms/lego'; +import { LlBeanProductsDataset } from './platforms/llbean'; +import { LoeweProductsDataset } from './platforms/loewe'; +import { LowesProductsDataset } from './platforms/lowes'; +import { MacysProductsDataset } from './platforms/macys'; +import { MangoProductsDataset } from './platforms/mango'; +import { MantaBusinessesDataset } from './platforms/manta'; +import { MassimoDuttiProductsDataset } from './platforms/massimo_dutti'; +import { MattressFirmProductsDataset } from './platforms/mattress_firm'; +import { MediamarktProductsDataset } from './platforms/mediamarkt'; +import { MercadolivreProductsDataset } from './platforms/mercadolivre'; +import { MetrocuadradoPropertiesDataset } from './platforms/metrocuadrado'; +import { MicroCenterProductsDataset } from './platforms/microcenter'; +import { MontblancProductsDataset } from './platforms/montblanc'; +import { MouserProductsDataset } from './platforms/mouser'; +import { MoynatProductsDataset } from './platforms/moynat'; +import { MybobsProductsDataset } from './platforms/mybobs'; +import { MyntraProductsDataset } from './platforms/myntra'; +import { NaverProductsDataset } from './platforms/naver'; +import { NbaPlayersStatsDataset } from './platforms/nba'; +import { OlxBrazilDataset } from './platforms/olx'; +import { OtodomPolandDataset } from './platforms/otodom'; +import { OwlerCompaniesDataset } from './platforms/owler'; +import { OzonProductsDataset } from './platforms/ozon'; +import { + PinterestPostsDataset, + PinterestProfilesDataset, +} from './platforms/pinterest'; +import { PitchBookCompaniesDataset } from './platforms/pitchbook'; +import { PradaProductsDataset } from './platforms/prada'; +import { ProperatiPropertiesDataset } from './platforms/properati'; +import { QuoraPostsDataset } from './platforms/quora'; +import { RaymourFlaniganProductsDataset } from './platforms/raymour_flanigan'; +import { RealtorInternationalDataset } from './platforms/realtor'; +import { + RedditCommentsDataset, + RedditPostsDataset, +} from './platforms/reddit'; +import { RonaProductsDataset } from './platforms/rona'; +import { SephoraProductsDataset } from './platforms/sephora'; +import { SheinProductsDataset } from './platforms/shein'; +import { ShopeeProductsDataset } from './platforms/shopee'; +import { SleepNumberProductsDataset } from './platforms/sleep_number'; +import { SlintelCompaniesDataset } from './platforms/slintel'; +import { SnapchatPostsDataset } from './platforms/snapchat'; +import { TargetProductsDataset } from './platforms/target'; +import { ToctocPropertiesDataset } from './platforms/toctoc'; +import { TokopediaProductsDataset } from './platforms/tokopedia'; +import { ToysRUsProductsDataset } from './platforms/toysrus'; +import { TrustpilotReviewsDataset } from './platforms/trustpilot'; +import { TrustRadiusReviewsDataset } from './platforms/trustradius'; +import { UsLawyersDataset } from './platforms/us_lawyers'; +import { VentureRadarCompaniesDataset } from './platforms/ventureradar'; +import { VimeoVideosDataset } from './platforms/vimeo'; +import { + WalmartProductsDataset, + WalmartSellersInfoDataset, +} from './platforms/walmart'; +import { WayfairProductsDataset } from './platforms/wayfair'; +import { WebmotorsBrasilDataset } from './platforms/webmotors'; +import { WikipediaArticlesDataset } from './platforms/wikipedia'; +import { WildberriesProductsDataset } from './platforms/wildberries'; +import { + WorldPopulationDataset, + WorldZipcodesDataset, +} from './platforms/world_data'; +import { XingProfilesDataset } from './platforms/xing'; +import { YahooFinanceBusinessesDataset } from './platforms/yahoo_finance'; +import { YapoChileDataset } from './platforms/yapo'; +import { + YelpBusinessesDataset, + YelpReviewsDataset, +} from './platforms/yelp'; +import { + YoutubeCommentsDataset, + YoutubeProfilesDataset, + YoutubeVideosDataset, +} from './platforms/youtube'; +import { YslProductsDataset } from './platforms/ysl'; +import { ZalandoProductsDataset } from './platforms/zalando'; +import { + ZaraProductsDataset, + ZaraHomeProductsDataset, +} from './platforms/zara'; +import { + ZillowPriceHistoryDataset, + ZillowPropertiesDataset, +} from './platforms/zillow'; +import { ZonapropArgentinaDataset } from './platforms/zonaprop'; +import { ZoomInfoCompaniesDataset } from './platforms/zoominfo'; +import { ZooplaPropertiesDataset } from './platforms/zoopla'; export class DatasetsClient { private transport: Transport; @@ -46,15 +240,11 @@ export class DatasetsClient { async list(): Promise { this.logger.debug('list'); - try { - const response = await this.transport.request( - API_ENDPOINT.DATASET_LIST, - ); - const text = await assertResponse(response); - return parseJSON(text); - } catch (e: unknown) { - wrapAPIError(e, 'datasets.list'); - } + const response = await this.transport.request( + API_ENDPOINT.DATASET_LIST, + ); + const text = await assertResponse(response); + return parseJSON(text); } get linkedinProfiles(): LinkedinProfilesDataset { @@ -264,4 +454,2052 @@ export class DatasetsClient { } return this.cache.get('x_twitter_profiles') as XTwitterProfilesDataset; } + + // ── Agoda ───────────────────────────────────────────────────────── + + get agodaProperties(): AgodaPropertiesDataset { + if (!this.cache.has('agoda_properties')) { + this.cache.set( + 'agoda_properties', + new AgodaPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'agoda_properties', + ) as AgodaPropertiesDataset; + } + + // ── Airbnb ──────────────────────────────────────────────────────── + + get airbnbProperties(): AirbnbPropertiesDataset { + if (!this.cache.has('airbnb_properties')) { + this.cache.set( + 'airbnb_properties', + new AirbnbPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'airbnb_properties', + ) as AirbnbPropertiesDataset; + } + + // ── American Eagle ──────────────────────────────────────────────── + + get americanEagleProducts(): AmericanEagleProductsDataset { + if (!this.cache.has('american_eagle_products')) { + this.cache.set( + 'american_eagle_products', + new AmericanEagleProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'american_eagle_products', + ) as AmericanEagleProductsDataset; + } + + // ── Apple ───────────────────────────────────────────────────────── + + get appleAppStore(): AppleAppStoreDataset { + if (!this.cache.has('apple_app_store')) { + this.cache.set( + 'apple_app_store', + new AppleAppStoreDataset({ transport: this.transport }), + ); + } + return this.cache.get('apple_app_store') as AppleAppStoreDataset; + } + + get appleAppStoreReviews(): AppleAppStoreReviewsDataset { + if (!this.cache.has('apple_app_store_reviews')) { + this.cache.set( + 'apple_app_store_reviews', + new AppleAppStoreReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'apple_app_store_reviews', + ) as AppleAppStoreReviewsDataset; + } + + // ── ASOS ────────────────────────────────────────────────────────── + + get asosProducts(): AsosProductsDataset { + if (!this.cache.has('asos_products')) { + this.cache.set( + 'asos_products', + new AsosProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('asos_products') as AsosProductsDataset; + } + + // ── Ashley Furniture ────────────────────────────────────────────── + + get ashleyFurnitureProducts(): AshleyFurnitureProductsDataset { + if (!this.cache.has('ashley_furniture_products')) { + this.cache.set( + 'ashley_furniture_products', + new AshleyFurnitureProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'ashley_furniture_products', + ) as AshleyFurnitureProductsDataset; + } + + // ── Australia Real Estate ───────────────────────────────────────── + + get australiaRealEstate(): AustraliaRealEstateDataset { + if (!this.cache.has('australia_real_estate')) { + this.cache.set( + 'australia_real_estate', + new AustraliaRealEstateDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'australia_real_estate', + ) as AustraliaRealEstateDataset; + } + + // ── Autozone ────────────────────────────────────────────────────── + + get autozoneProducts(): AutozoneProductsDataset { + if (!this.cache.has('autozone_products')) { + this.cache.set( + 'autozone_products', + new AutozoneProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'autozone_products', + ) as AutozoneProductsDataset; + } + + // ── Balenciaga ──────────────────────────────────────────────────── + + get balenciagaProducts(): BalenciagaProductsDataset { + if (!this.cache.has('balenciaga_products')) { + this.cache.set( + 'balenciaga_products', + new BalenciagaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'balenciaga_products', + ) as BalenciagaProductsDataset; + } + + // ── BBC ─────────────────────────────────────────────────────────── + + get bbcNews(): BbcNewsDataset { + if (!this.cache.has('bbc_news')) { + this.cache.set( + 'bbc_news', + new BbcNewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('bbc_news') as BbcNewsDataset; + } + + // ── Berluti ─────────────────────────────────────────────────────── + + get berlutiProducts(): BerlutiProductsDataset { + if (!this.cache.has('berluti_products')) { + this.cache.set( + 'berluti_products', + new BerlutiProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'berluti_products', + ) as BerlutiProductsDataset; + } + + // ── Best Buy ────────────────────────────────────────────────────── + + get bestBuyProducts(): BestBuyProductsDataset { + if (!this.cache.has('bestbuy_products')) { + this.cache.set( + 'bestbuy_products', + new BestBuyProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'bestbuy_products', + ) as BestBuyProductsDataset; + } + + // ── B&H ─────────────────────────────────────────────────────────── + + get bhProducts(): BhProductsDataset { + if (!this.cache.has('bh_products')) { + this.cache.set( + 'bh_products', + new BhProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('bh_products') as BhProductsDataset; + } + + // ── Bluesky ─────────────────────────────────────────────────────── + + get blueskyPosts(): BlueskyPostsDataset { + if (!this.cache.has('bluesky_posts')) { + this.cache.set( + 'bluesky_posts', + new BlueskyPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get('bluesky_posts') as BlueskyPostsDataset; + } + + get blueskyTopProfiles(): BlueskyTopProfilesDataset { + if (!this.cache.has('bluesky_top_profiles')) { + this.cache.set( + 'bluesky_top_profiles', + new BlueskyTopProfilesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'bluesky_top_profiles', + ) as BlueskyTopProfilesDataset; + } + + // ── Booking ─────────────────────────────────────────────────────── + + get bookingHotelListings(): BookingHotelListingsDataset { + if (!this.cache.has('booking_hotel_listings')) { + this.cache.set( + 'booking_hotel_listings', + new BookingHotelListingsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'booking_hotel_listings', + ) as BookingHotelListingsDataset; + } + + get bookingListingsSearch(): BookingListingsSearchDataset { + if (!this.cache.has('booking_listings_search')) { + this.cache.set( + 'booking_listings_search', + new BookingListingsSearchDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'booking_listings_search', + ) as BookingListingsSearchDataset; + } + + // ── Bottega Veneta ──────────────────────────────────────────────── + + get bottegaVenetaProducts(): BottegaVenetaProductsDataset { + if (!this.cache.has('bottegaveneta_products')) { + this.cache.set( + 'bottegaveneta_products', + new BottegaVenetaProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'bottegaveneta_products', + ) as BottegaVenetaProductsDataset; + } + + // ── Carsales ────────────────────────────────────────────────────── + + get carsalesListings(): CarsalesListingsDataset { + if (!this.cache.has('carsales_listings')) { + this.cache.set( + 'carsales_listings', + new CarsalesListingsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'carsales_listings', + ) as CarsalesListingsDataset; + } + + // ── Carter's ────────────────────────────────────────────────────── + + get cartersProducts(): CartersProductsDataset { + if (!this.cache.has('carters_products')) { + this.cache.set( + 'carters_products', + new CartersProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'carters_products', + ) as CartersProductsDataset; + } + + // ── Celine ──────────────────────────────────────────────────────── + + get celineProducts(): CelineProductsDataset { + if (!this.cache.has('celine_products')) { + this.cache.set( + 'celine_products', + new CelineProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('celine_products') as CelineProductsDataset; + } + + // ── Chanel ──────────────────────────────────────────────────────── + + get chanelProducts(): ChanelProductsDataset { + if (!this.cache.has('chanel_products')) { + this.cache.set( + 'chanel_products', + new ChanelProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('chanel_products') as ChanelProductsDataset; + } + + // ── Chileautos ──────────────────────────────────────────────────── + + get chileautosChile(): ChileautosChileDataset { + if (!this.cache.has('chileautos_chile')) { + this.cache.set( + 'chileautos_chile', + new ChileautosChileDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'chileautos_chile', + ) as ChileautosChileDataset; + } + + // ── CNN ─────────────────────────────────────────────────────────── + + get cnnNews(): CnnNewsDataset { + if (!this.cache.has('cnn_news')) { + this.cache.set( + 'cnn_news', + new CnnNewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('cnn_news') as CnnNewsDataset; + } + + // ── Companies Enriched ──────────────────────────────────────────── + + get companiesEnriched(): CompaniesEnrichedDataset { + if (!this.cache.has('companies_enriched')) { + this.cache.set( + 'companies_enriched', + new CompaniesEnrichedDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'companies_enriched', + ) as CompaniesEnrichedDataset; + } + + // ── Costco ──────────────────────────────────────────────────────── + + get costcoProducts(): CostcoProductsDataset { + if (!this.cache.has('costco_products')) { + this.cache.set( + 'costco_products', + new CostcoProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('costco_products') as CostcoProductsDataset; + } + + // ── Crate & Barrel ──────────────────────────────────────────────── + + get crateAndBarrelProducts(): CrateAndBarrelProductsDataset { + if (!this.cache.has('crateandbarrel_products')) { + this.cache.set( + 'crateandbarrel_products', + new CrateAndBarrelProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'crateandbarrel_products', + ) as CrateAndBarrelProductsDataset; + } + + // ── Creative Commons ────────────────────────────────────────────── + + get creativeCommons3dModels(): CreativeCommons3dModelsDataset { + if (!this.cache.has('creative_commons_3d_models')) { + this.cache.set( + 'creative_commons_3d_models', + new CreativeCommons3dModelsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'creative_commons_3d_models', + ) as CreativeCommons3dModelsDataset; + } + + get creativeCommonsImages(): CreativeCommonsImagesDataset { + if (!this.cache.has('creative_commons_images')) { + this.cache.set( + 'creative_commons_images', + new CreativeCommonsImagesDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'creative_commons_images', + ) as CreativeCommonsImagesDataset; + } + + // ── Crunchbase ──────────────────────────────────────────────────── + + get crunchbaseCompanies(): CrunchbaseCompaniesDataset { + if (!this.cache.has('crunchbase_companies')) { + this.cache.set( + 'crunchbase_companies', + new CrunchbaseCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'crunchbase_companies', + ) as CrunchbaseCompaniesDataset; + } + + // ── Delvaux ─────────────────────────────────────────────────────── + + get delvauxProducts(): DelvauxProductsDataset { + if (!this.cache.has('delvaux_products')) { + this.cache.set( + 'delvaux_products', + new DelvauxProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'delvaux_products', + ) as DelvauxProductsDataset; + } + + // ── Digikey ─────────────────────────────────────────────────────── + + get digikeyProducts(): DigikeyProductsDataset { + if (!this.cache.has('digikey_products')) { + this.cache.set( + 'digikey_products', + new DigikeyProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'digikey_products', + ) as DigikeyProductsDataset; + } + + // ── Dior ────────────────────────────────────────────────────────── + + get diorProducts(): DiorProductsDataset { + if (!this.cache.has('dior_products')) { + this.cache.set( + 'dior_products', + new DiorProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('dior_products') as DiorProductsDataset; + } + + // ── eBay ────────────────────────────────────────────────────────── + + get ebayProducts(): EbayProductsDataset { + if (!this.cache.has('ebay_products')) { + this.cache.set( + 'ebay_products', + new EbayProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('ebay_products') as EbayProductsDataset; + } + + // ── Employees Enriched ──────────────────────────────────────────── + + get employeesEnriched(): EmployeesEnrichedDataset { + if (!this.cache.has('employees_enriched')) { + this.cache.set( + 'employees_enriched', + new EmployeesEnrichedDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'employees_enriched', + ) as EmployeesEnrichedDataset; + } + + // ── Etsy ────────────────────────────────────────────────────────── + + get etsyProducts(): EtsyProductsDataset { + if (!this.cache.has('etsy_products')) { + this.cache.set( + 'etsy_products', + new EtsyProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('etsy_products') as EtsyProductsDataset; + } + + // ── Facebook ────────────────────────────────────────────────────── + + get facebookComments(): FacebookCommentsDataset { + if (!this.cache.has('facebook_comments')) { + this.cache.set( + 'facebook_comments', + new FacebookCommentsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_comments', + ) as FacebookCommentsDataset; + } + + get facebookCompanyReviews(): FacebookCompanyReviewsDataset { + if (!this.cache.has('facebook_company_reviews')) { + this.cache.set( + 'facebook_company_reviews', + new FacebookCompanyReviewsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'facebook_company_reviews', + ) as FacebookCompanyReviewsDataset; + } + + get facebookEvents(): FacebookEventsDataset { + if (!this.cache.has('facebook_events')) { + this.cache.set( + 'facebook_events', + new FacebookEventsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_events', + ) as FacebookEventsDataset; + } + + get facebookGroupPosts(): FacebookGroupPostsDataset { + if (!this.cache.has('facebook_group_posts')) { + this.cache.set( + 'facebook_group_posts', + new FacebookGroupPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_group_posts', + ) as FacebookGroupPostsDataset; + } + + get facebookMarketplace(): FacebookMarketplaceDataset { + if (!this.cache.has('facebook_marketplace')) { + this.cache.set( + 'facebook_marketplace', + new FacebookMarketplaceDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_marketplace', + ) as FacebookMarketplaceDataset; + } + + get facebookPagesPosts(): FacebookPagesPostsDataset { + if (!this.cache.has('facebook_pages_posts')) { + this.cache.set( + 'facebook_pages_posts', + new FacebookPagesPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_pages_posts', + ) as FacebookPagesPostsDataset; + } + + get facebookPagesProfiles(): FacebookPagesProfilesDataset { + if (!this.cache.has('facebook_pages_profiles')) { + this.cache.set( + 'facebook_pages_profiles', + new FacebookPagesProfilesDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'facebook_pages_profiles', + ) as FacebookPagesProfilesDataset; + } + + get facebookPostsByUrl(): FacebookPostsByUrlDataset { + if (!this.cache.has('facebook_posts_by_url')) { + this.cache.set( + 'facebook_posts_by_url', + new FacebookPostsByUrlDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_posts_by_url', + ) as FacebookPostsByUrlDataset; + } + + get facebookProfiles(): FacebookProfilesDataset { + if (!this.cache.has('facebook_profiles')) { + this.cache.set( + 'facebook_profiles', + new FacebookProfilesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'facebook_profiles', + ) as FacebookProfilesDataset; + } + + get facebookReels(): FacebookReelsDataset { + if (!this.cache.has('facebook_reels')) { + this.cache.set( + 'facebook_reels', + new FacebookReelsDataset({ transport: this.transport }), + ); + } + return this.cache.get('facebook_reels') as FacebookReelsDataset; + } + + // ── Fanatics ────────────────────────────────────────────────────── + + get fanaticsProducts(): FanaticsProductsDataset { + if (!this.cache.has('fanatics_products')) { + this.cache.set( + 'fanatics_products', + new FanaticsProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'fanatics_products', + ) as FanaticsProductsDataset; + } + + // ── Fendi ───────────────────────────────────────────────────────── + + get fendiProducts(): FendiProductsDataset { + if (!this.cache.has('fendi_products')) { + this.cache.set( + 'fendi_products', + new FendiProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('fendi_products') as FendiProductsDataset; + } + + // ── G2 ──────────────────────────────────────────────────────────── + + get g2Products(): G2ProductsDataset { + if (!this.cache.has('g2_products')) { + this.cache.set( + 'g2_products', + new G2ProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('g2_products') as G2ProductsDataset; + } + + get g2Reviews(): G2ReviewsDataset { + if (!this.cache.has('g2_reviews')) { + this.cache.set( + 'g2_reviews', + new G2ReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('g2_reviews') as G2ReviewsDataset; + } + + // ── GitHub ──────────────────────────────────────────────────────── + + get githubRepositories(): GithubRepositoriesDataset { + if (!this.cache.has('github_repositories')) { + this.cache.set( + 'github_repositories', + new GithubRepositoriesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'github_repositories', + ) as GithubRepositoriesDataset; + } + + // ── Glassdoor ───────────────────────────────────────────────────── + + get glassdoorCompanies(): GlassdoorCompaniesDataset { + if (!this.cache.has('glassdoor_companies')) { + this.cache.set( + 'glassdoor_companies', + new GlassdoorCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'glassdoor_companies', + ) as GlassdoorCompaniesDataset; + } + + get glassdoorJobs(): GlassdoorJobsDataset { + if (!this.cache.has('glassdoor_jobs')) { + this.cache.set( + 'glassdoor_jobs', + new GlassdoorJobsDataset({ transport: this.transport }), + ); + } + return this.cache.get('glassdoor_jobs') as GlassdoorJobsDataset; + } + + get glassdoorReviews(): GlassdoorReviewsDataset { + if (!this.cache.has('glassdoor_reviews')) { + this.cache.set( + 'glassdoor_reviews', + new GlassdoorReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'glassdoor_reviews', + ) as GlassdoorReviewsDataset; + } + + // ── Goodreads ───────────────────────────────────────────────────── + + get goodreadsBooks(): GoodreadsBooksDataset { + if (!this.cache.has('goodreads_books')) { + this.cache.set( + 'goodreads_books', + new GoodreadsBooksDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'goodreads_books', + ) as GoodreadsBooksDataset; + } + + // ── Google Maps ─────────────────────────────────────────────────── + + get googleMapsFullInfo(): GoogleMapsFullInfoDataset { + if (!this.cache.has('google_maps_full_info')) { + this.cache.set( + 'google_maps_full_info', + new GoogleMapsFullInfoDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'google_maps_full_info', + ) as GoogleMapsFullInfoDataset; + } + + get googleMapsReviews(): GoogleMapsReviewsDataset { + if (!this.cache.has('google_maps_reviews')) { + this.cache.set( + 'google_maps_reviews', + new GoogleMapsReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'google_maps_reviews', + ) as GoogleMapsReviewsDataset; + } + + // ── Google News ─────────────────────────────────────────────────── + + get googleNews(): GoogleNewsDataset { + if (!this.cache.has('google_news')) { + this.cache.set( + 'google_news', + new GoogleNewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('google_news') as GoogleNewsDataset; + } + + // ── Google Play ─────────────────────────────────────────────────── + + get googlePlayReviews(): GooglePlayReviewsDataset { + if (!this.cache.has('google_play_reviews')) { + this.cache.set( + 'google_play_reviews', + new GooglePlayReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'google_play_reviews', + ) as GooglePlayReviewsDataset; + } + + get googlePlayStore(): GooglePlayStoreDataset { + if (!this.cache.has('google_play_store')) { + this.cache.set( + 'google_play_store', + new GooglePlayStoreDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'google_play_store', + ) as GooglePlayStoreDataset; + } + + // ── Google Shopping ─────────────────────────────────────────────── + + get googleShoppingProducts(): GoogleShoppingProductsDataset { + if (!this.cache.has('google_shopping_products')) { + this.cache.set( + 'google_shopping_products', + new GoogleShoppingProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'google_shopping_products', + ) as GoogleShoppingProductsDataset; + } + + get googleShoppingSearchUs(): GoogleShoppingSearchUsDataset { + if (!this.cache.has('google_shopping_search_us')) { + this.cache.set( + 'google_shopping_search_us', + new GoogleShoppingSearchUsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'google_shopping_search_us', + ) as GoogleShoppingSearchUsDataset; + } + + // ── Hermes ──────────────────────────────────────────────────────── + + get hermesProducts(): HermesProductsDataset { + if (!this.cache.has('hermes_products')) { + this.cache.set( + 'hermes_products', + new HermesProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('hermes_products') as HermesProductsDataset; + } + + // ── H&M ─────────────────────────────────────────────────────────── + + get hmProducts(): HmProductsDataset { + if (!this.cache.has('hm_products')) { + this.cache.set( + 'hm_products', + new HmProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('hm_products') as HmProductsDataset; + } + + // ── Home Depot ──────────────────────────────────────────────────── + + get homeDepotCaProducts(): HomeDepotCaProductsDataset { + if (!this.cache.has('homedepot_ca_products')) { + this.cache.set( + 'homedepot_ca_products', + new HomeDepotCaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'homedepot_ca_products', + ) as HomeDepotCaProductsDataset; + } + + get homeDepotUsProducts(): HomeDepotUsProductsDataset { + if (!this.cache.has('homedepot_us_products')) { + this.cache.set( + 'homedepot_us_products', + new HomeDepotUsProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'homedepot_us_products', + ) as HomeDepotUsProductsDataset; + } + + // ── IKEA ────────────────────────────────────────────────────────── + + get ikeaProducts(): IkeaProductsDataset { + if (!this.cache.has('ikea_products')) { + this.cache.set( + 'ikea_products', + new IkeaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('ikea_products') as IkeaProductsDataset; + } + + // ── IMDB ────────────────────────────────────────────────────────── + + get imdbMovies(): ImdbMoviesDataset { + if (!this.cache.has('imdb_movies')) { + this.cache.set( + 'imdb_movies', + new ImdbMoviesDataset({ transport: this.transport }), + ); + } + return this.cache.get('imdb_movies') as ImdbMoviesDataset; + } + + // ── Indeed ──────────────────────────────────────────────────────── + + get indeedCompanies(): IndeedCompaniesDataset { + if (!this.cache.has('indeed_companies')) { + this.cache.set( + 'indeed_companies', + new IndeedCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'indeed_companies', + ) as IndeedCompaniesDataset; + } + + get indeedJobs(): IndeedJobsDataset { + if (!this.cache.has('indeed_jobs')) { + this.cache.set( + 'indeed_jobs', + new IndeedJobsDataset({ transport: this.transport }), + ); + } + return this.cache.get('indeed_jobs') as IndeedJobsDataset; + } + + // ── Infocasas ───────────────────────────────────────────────────── + + get infocasasUruguay(): InfocasasUruguayDataset { + if (!this.cache.has('infocasas_uruguay')) { + this.cache.set( + 'infocasas_uruguay', + new InfocasasUruguayDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'infocasas_uruguay', + ) as InfocasasUruguayDataset; + } + + // ── Inmuebles24 ─────────────────────────────────────────────────── + + get inmuebles24Mexico(): Inmuebles24MexicoDataset { + if (!this.cache.has('inmuebles24_mexico')) { + this.cache.set( + 'inmuebles24_mexico', + new Inmuebles24MexicoDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'inmuebles24_mexico', + ) as Inmuebles24MexicoDataset; + } + + // ── Kroger ──────────────────────────────────────────────────────── + + get krogerProducts(): KrogerProductsDataset { + if (!this.cache.has('kroger_products')) { + this.cache.set( + 'kroger_products', + new KrogerProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('kroger_products') as KrogerProductsDataset; + } + + // ── Lazada ──────────────────────────────────────────────────────── + + get lazadaProducts(): LazadaProductsDataset { + if (!this.cache.has('lazada_products')) { + this.cache.set( + 'lazada_products', + new LazadaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'lazada_products', + ) as LazadaProductsDataset; + } + + get lazadaProductsSearch(): LazadaProductsSearchDataset { + if (!this.cache.has('lazada_products_search')) { + this.cache.set( + 'lazada_products_search', + new LazadaProductsSearchDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'lazada_products_search', + ) as LazadaProductsSearchDataset; + } + + get lazadaReviews(): LazadaReviewsDataset { + if (!this.cache.has('lazada_reviews')) { + this.cache.set( + 'lazada_reviews', + new LazadaReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('lazada_reviews') as LazadaReviewsDataset; + } + + // ── La-Z-Boy ────────────────────────────────────────────────────── + + get laZBoyProducts(): LaZBoyProductsDataset { + if (!this.cache.has('lazboy_products')) { + this.cache.set( + 'lazboy_products', + new LaZBoyProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'lazboy_products', + ) as LaZBoyProductsDataset; + } + + // ── Lego ──────────────────────────────────────────────────────────── + + get legoProducts(): LegoProductsDataset { + if (!this.cache.has('lego_products')) { + this.cache.set( + 'lego_products', + new LegoProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('lego_products') as LegoProductsDataset; + } + + // ── LinkedIn (continued) ────────────────────────────────────────── + + get linkedinJobListings(): LinkedinJobListingsDataset { + if (!this.cache.has('linkedin_job_listings')) { + this.cache.set( + 'linkedin_job_listings', + new LinkedinJobListingsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'linkedin_job_listings', + ) as LinkedinJobListingsDataset; + } + + get linkedinPosts(): LinkedinPostsDataset { + if (!this.cache.has('linkedin_posts')) { + this.cache.set( + 'linkedin_posts', + new LinkedinPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get('linkedin_posts') as LinkedinPostsDataset; + } + + get linkedinProfilesJobListings(): LinkedinProfilesJobListingsDataset { + if (!this.cache.has('linkedin_profiles_job_listings')) { + this.cache.set( + 'linkedin_profiles_job_listings', + new LinkedinProfilesJobListingsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'linkedin_profiles_job_listings', + ) as LinkedinProfilesJobListingsDataset; + } + + // ── L.L.Bean ────────────────────────────────────────────────────── + + get llBeanProducts(): LlBeanProductsDataset { + if (!this.cache.has('llbean_products')) { + this.cache.set( + 'llbean_products', + new LlBeanProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'llbean_products', + ) as LlBeanProductsDataset; + } + + // ── Loewe ───────────────────────────────────────────────────────── + + get loeweProducts(): LoeweProductsDataset { + if (!this.cache.has('loewe_products')) { + this.cache.set( + 'loewe_products', + new LoeweProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('loewe_products') as LoeweProductsDataset; + } + + // ── Lowe's ──────────────────────────────────────────────────────── + + get lowesProducts(): LowesProductsDataset { + if (!this.cache.has('lowes_products')) { + this.cache.set( + 'lowes_products', + new LowesProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('lowes_products') as LowesProductsDataset; + } + + // ── Macy's ──────────────────────────────────────────────────────── + + get macysProducts(): MacysProductsDataset { + if (!this.cache.has('macys_products')) { + this.cache.set( + 'macys_products', + new MacysProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('macys_products') as MacysProductsDataset; + } + + // ── Mango ───────────────────────────────────────────────────────── + + get mangoProducts(): MangoProductsDataset { + if (!this.cache.has('mango_products')) { + this.cache.set( + 'mango_products', + new MangoProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('mango_products') as MangoProductsDataset; + } + + // ── Manta ───────────────────────────────────────────────────────── + + get mantaBusinesses(): MantaBusinessesDataset { + if (!this.cache.has('manta_businesses')) { + this.cache.set( + 'manta_businesses', + new MantaBusinessesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'manta_businesses', + ) as MantaBusinessesDataset; + } + + // ── Massimo Dutti ───────────────────────────────────────────────── + + get massimoDuttiProducts(): MassimoDuttiProductsDataset { + if (!this.cache.has('massimo_dutti_products')) { + this.cache.set( + 'massimo_dutti_products', + new MassimoDuttiProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'massimo_dutti_products', + ) as MassimoDuttiProductsDataset; + } + + // ── Mattress Firm ───────────────────────────────────────────────── + + get mattressFirmProducts(): MattressFirmProductsDataset { + if (!this.cache.has('mattressfirm_products')) { + this.cache.set( + 'mattressfirm_products', + new MattressFirmProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'mattressfirm_products', + ) as MattressFirmProductsDataset; + } + + // ── MediaMarkt ──────────────────────────────────────────────────── + + get mediamarktProducts(): MediamarktProductsDataset { + if (!this.cache.has('mediamarkt_products')) { + this.cache.set( + 'mediamarkt_products', + new MediamarktProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'mediamarkt_products', + ) as MediamarktProductsDataset; + } + + // ── Mercado Livre ───────────────────────────────────────────────── + + get mercadolivreProducts(): MercadolivreProductsDataset { + if (!this.cache.has('mercadolivre_products')) { + this.cache.set( + 'mercadolivre_products', + new MercadolivreProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'mercadolivre_products', + ) as MercadolivreProductsDataset; + } + + // ── Metrocuadrado ────────────────────────────────────────────────── + + get metrocuadradoProperties(): MetrocuadradoPropertiesDataset { + if (!this.cache.has('metrocuadrado_properties')) { + this.cache.set( + 'metrocuadrado_properties', + new MetrocuadradoPropertiesDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'metrocuadrado_properties', + ) as MetrocuadradoPropertiesDataset; + } + + // ── Micro Center ────────────────────────────────────────────────── + + get microCenterProducts(): MicroCenterProductsDataset { + if (!this.cache.has('microcenter_products')) { + this.cache.set( + 'microcenter_products', + new MicroCenterProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'microcenter_products', + ) as MicroCenterProductsDataset; + } + + // ── Montblanc ───────────────────────────────────────────────────── + + get montblancProducts(): MontblancProductsDataset { + if (!this.cache.has('montblanc_products')) { + this.cache.set( + 'montblanc_products', + new MontblancProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'montblanc_products', + ) as MontblancProductsDataset; + } + + // ── Mouser ──────────────────────────────────────────────────────── + + get mouserProducts(): MouserProductsDataset { + if (!this.cache.has('mouser_products')) { + this.cache.set( + 'mouser_products', + new MouserProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('mouser_products') as MouserProductsDataset; + } + + // ── Moynat ──────────────────────────────────────────────────────── + + get moynatProducts(): MoynatProductsDataset { + if (!this.cache.has('moynat_products')) { + this.cache.set( + 'moynat_products', + new MoynatProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('moynat_products') as MoynatProductsDataset; + } + + // ── MyBobs ──────────────────────────────────────────────────────── + + get mybobsProducts(): MybobsProductsDataset { + if (!this.cache.has('mybobs_products')) { + this.cache.set( + 'mybobs_products', + new MybobsProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'mybobs_products', + ) as MybobsProductsDataset; + } + + // ── Myntra ──────────────────────────────────────────────────────── + + get myntraProducts(): MyntraProductsDataset { + if (!this.cache.has('myntra_products')) { + this.cache.set( + 'myntra_products', + new MyntraProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('myntra_products') as MyntraProductsDataset; + } + + // ── Naver ───────────────────────────────────────────────────────── + + get naverProducts(): NaverProductsDataset { + if (!this.cache.has('naver_products')) { + this.cache.set( + 'naver_products', + new NaverProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('naver_products') as NaverProductsDataset; + } + + // ── NBA ─────────────────────────────────────────────────────────── + + get nbaPlayersStats(): NbaPlayersStatsDataset { + if (!this.cache.has('nba_players_stats')) { + this.cache.set( + 'nba_players_stats', + new NbaPlayersStatsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'nba_players_stats', + ) as NbaPlayersStatsDataset; + } + + // ── OLX ─────────────────────────────────────────────────────────── + + get olxBrazil(): OlxBrazilDataset { + if (!this.cache.has('olx_brazil')) { + this.cache.set( + 'olx_brazil', + new OlxBrazilDataset({ transport: this.transport }), + ); + } + return this.cache.get('olx_brazil') as OlxBrazilDataset; + } + + // ── Otodom ──────────────────────────────────────────────────────── + + get otodomPoland(): OtodomPolandDataset { + if (!this.cache.has('otodom_poland')) { + this.cache.set( + 'otodom_poland', + new OtodomPolandDataset({ transport: this.transport }), + ); + } + return this.cache.get('otodom_poland') as OtodomPolandDataset; + } + + // ── Owler ───────────────────────────────────────────────────────── + + get owlerCompanies(): OwlerCompaniesDataset { + if (!this.cache.has('owler_companies')) { + this.cache.set( + 'owler_companies', + new OwlerCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'owler_companies', + ) as OwlerCompaniesDataset; + } + + // ── Ozon ────────────────────────────────────────────────────────── + + get ozonProducts(): OzonProductsDataset { + if (!this.cache.has('ozon_products')) { + this.cache.set( + 'ozon_products', + new OzonProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('ozon_products') as OzonProductsDataset; + } + + // ── Pinterest ───────────────────────────────────────────────────── + + get pinterestPosts(): PinterestPostsDataset { + if (!this.cache.has('pinterest_posts')) { + this.cache.set( + 'pinterest_posts', + new PinterestPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'pinterest_posts', + ) as PinterestPostsDataset; + } + + get pinterestProfiles(): PinterestProfilesDataset { + if (!this.cache.has('pinterest_profiles')) { + this.cache.set( + 'pinterest_profiles', + new PinterestProfilesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'pinterest_profiles', + ) as PinterestProfilesDataset; + } + + // ── PitchBook ───────────────────────────────────────────────────── + + get pitchBookCompanies(): PitchBookCompaniesDataset { + if (!this.cache.has('pitchbook_companies')) { + this.cache.set( + 'pitchbook_companies', + new PitchBookCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'pitchbook_companies', + ) as PitchBookCompaniesDataset; + } + + // ── Prada ───────────────────────────────────────────────────────── + + get pradaProducts(): PradaProductsDataset { + if (!this.cache.has('prada_products')) { + this.cache.set( + 'prada_products', + new PradaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('prada_products') as PradaProductsDataset; + } + + // ── Properati ───────────────────────────────────────────────────── + + get properatiProperties(): ProperatiPropertiesDataset { + if (!this.cache.has('properati_properties')) { + this.cache.set( + 'properati_properties', + new ProperatiPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'properati_properties', + ) as ProperatiPropertiesDataset; + } + + // ── Quora ───────────────────────────────────────────────────────── + + get quoraPosts(): QuoraPostsDataset { + if (!this.cache.has('quora_posts')) { + this.cache.set( + 'quora_posts', + new QuoraPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get('quora_posts') as QuoraPostsDataset; + } + + // ── Raymour & Flanigan ──────────────────────────────────────────── + + get raymourFlaniganProducts(): RaymourFlaniganProductsDataset { + if (!this.cache.has('raymourflanigan_products')) { + this.cache.set( + 'raymourflanigan_products', + new RaymourFlaniganProductsDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'raymourflanigan_products', + ) as RaymourFlaniganProductsDataset; + } + + // ── Realtor ─────────────────────────────────────────────────────── + + get realtorInternational(): RealtorInternationalDataset { + if (!this.cache.has('realtor_international_properties')) { + this.cache.set( + 'realtor_international_properties', + new RealtorInternationalDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'realtor_international_properties', + ) as RealtorInternationalDataset; + } + + // ── Reddit ──────────────────────────────────────────────────────── + + get redditComments(): RedditCommentsDataset { + if (!this.cache.has('reddit_comments')) { + this.cache.set( + 'reddit_comments', + new RedditCommentsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'reddit_comments', + ) as RedditCommentsDataset; + } + + get redditPosts(): RedditPostsDataset { + if (!this.cache.has('reddit_posts')) { + this.cache.set( + 'reddit_posts', + new RedditPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get('reddit_posts') as RedditPostsDataset; + } + + // ── Rona ────────────────────────────────────────────────────────── + + get ronaProducts(): RonaProductsDataset { + if (!this.cache.has('rona_products')) { + this.cache.set( + 'rona_products', + new RonaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('rona_products') as RonaProductsDataset; + } + + // ── Sephora ─────────────────────────────────────────────────────── + + get sephoraProducts(): SephoraProductsDataset { + if (!this.cache.has('sephora_products')) { + this.cache.set( + 'sephora_products', + new SephoraProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'sephora_products', + ) as SephoraProductsDataset; + } + + // ── SHEIN ───────────────────────────────────────────────────────── + + get sheinProducts(): SheinProductsDataset { + if (!this.cache.has('shein_products')) { + this.cache.set( + 'shein_products', + new SheinProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('shein_products') as SheinProductsDataset; + } + + // ── Shopee ──────────────────────────────────────────────────────── + + get shopeeProducts(): ShopeeProductsDataset { + if (!this.cache.has('shopee_products')) { + this.cache.set( + 'shopee_products', + new ShopeeProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('shopee_products') as ShopeeProductsDataset; + } + + // ── Sleep Number ────────────────────────────────────────────────── + + get sleepNumberProducts(): SleepNumberProductsDataset { + if (!this.cache.has('sleepnumber_products')) { + this.cache.set( + 'sleepnumber_products', + new SleepNumberProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'sleepnumber_products', + ) as SleepNumberProductsDataset; + } + + // ── Slintel ─────────────────────────────────────────────────────── + + get slintelCompanies(): SlintelCompaniesDataset { + if (!this.cache.has('slintel_companies')) { + this.cache.set( + 'slintel_companies', + new SlintelCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'slintel_companies', + ) as SlintelCompaniesDataset; + } + + // ── Snapchat ────────────────────────────────────────────────────── + + get snapchatPosts(): SnapchatPostsDataset { + if (!this.cache.has('snapchat_posts')) { + this.cache.set( + 'snapchat_posts', + new SnapchatPostsDataset({ transport: this.transport }), + ); + } + return this.cache.get('snapchat_posts') as SnapchatPostsDataset; + } + + // ── Target ────────────────────────────────────────────────────────── + + get targetProducts(): TargetProductsDataset { + if (!this.cache.has('target_products')) { + this.cache.set( + 'target_products', + new TargetProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'target_products', + ) as TargetProductsDataset; + } + + // ── TocToc ──────────────────────────────────────────────────────── + + get toctocProperties(): ToctocPropertiesDataset { + if (!this.cache.has('toctoc_properties')) { + this.cache.set( + 'toctoc_properties', + new ToctocPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'toctoc_properties', + ) as ToctocPropertiesDataset; + } + + // ── Tokopedia ───────────────────────────────────────────────────── + + get tokopediaProducts(): TokopediaProductsDataset { + if (!this.cache.has('tokopedia_products')) { + this.cache.set( + 'tokopedia_products', + new TokopediaProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'tokopedia_products', + ) as TokopediaProductsDataset; + } + + // ── Toys"R"Us ───────────────────────────────────────────────────── + + get toysRUsProducts(): ToysRUsProductsDataset { + if (!this.cache.has('toysrus_products')) { + this.cache.set( + 'toysrus_products', + new ToysRUsProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'toysrus_products', + ) as ToysRUsProductsDataset; + } + + // ── Trustpilot ──────────────────────────────────────────────────── + + get trustpilotReviews(): TrustpilotReviewsDataset { + if (!this.cache.has('trustpilot_reviews')) { + this.cache.set( + 'trustpilot_reviews', + new TrustpilotReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'trustpilot_reviews', + ) as TrustpilotReviewsDataset; + } + + // ── TrustRadius ─────────────────────────────────────────────────── + + get trustRadiusReviews(): TrustRadiusReviewsDataset { + if (!this.cache.has('trustradius_reviews')) { + this.cache.set( + 'trustradius_reviews', + new TrustRadiusReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'trustradius_reviews', + ) as TrustRadiusReviewsDataset; + } + + // ── US Lawyers ──────────────────────────────────────────────────── + + get usLawyers(): UsLawyersDataset { + if (!this.cache.has('us_lawyers')) { + this.cache.set( + 'us_lawyers', + new UsLawyersDataset({ transport: this.transport }), + ); + } + return this.cache.get('us_lawyers') as UsLawyersDataset; + } + + // ── VentureRadar ────────────────────────────────────────────────── + + get ventureRadarCompanies(): VentureRadarCompaniesDataset { + if (!this.cache.has('ventureradar_companies')) { + this.cache.set( + 'ventureradar_companies', + new VentureRadarCompaniesDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'ventureradar_companies', + ) as VentureRadarCompaniesDataset; + } + + // ── Vimeo ───────────────────────────────────────────────────────── + + get vimeoVideos(): VimeoVideosDataset { + if (!this.cache.has('vimeo_videos')) { + this.cache.set( + 'vimeo_videos', + new VimeoVideosDataset({ transport: this.transport }), + ); + } + return this.cache.get('vimeo_videos') as VimeoVideosDataset; + } + + // ── Walmart ─────────────────────────────────────────────────────── + + get walmartProducts(): WalmartProductsDataset { + if (!this.cache.has('walmart_products')) { + this.cache.set( + 'walmart_products', + new WalmartProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'walmart_products', + ) as WalmartProductsDataset; + } + + get walmartSellersInfo(): WalmartSellersInfoDataset { + if (!this.cache.has('walmart_sellers_info')) { + this.cache.set( + 'walmart_sellers_info', + new WalmartSellersInfoDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'walmart_sellers_info', + ) as WalmartSellersInfoDataset; + } + + // ── Wayfair ─────────────────────────────────────────────────────── + + get wayfairProducts(): WayfairProductsDataset { + if (!this.cache.has('wayfair_products')) { + this.cache.set( + 'wayfair_products', + new WayfairProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'wayfair_products', + ) as WayfairProductsDataset; + } + + // ── Webmotors ───────────────────────────────────────────────────── + + get webmotorsBrasil(): WebmotorsBrasilDataset { + if (!this.cache.has('webmotors_brasil')) { + this.cache.set( + 'webmotors_brasil', + new WebmotorsBrasilDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'webmotors_brasil', + ) as WebmotorsBrasilDataset; + } + + // ── Wikipedia ───────────────────────────────────────────────────── + + get wikipediaArticles(): WikipediaArticlesDataset { + if (!this.cache.has('wikipedia_articles')) { + this.cache.set( + 'wikipedia_articles', + new WikipediaArticlesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'wikipedia_articles', + ) as WikipediaArticlesDataset; + } + + // ── Wildberries ─────────────────────────────────────────────────── + + get wildberriesProducts(): WildberriesProductsDataset { + if (!this.cache.has('wildberries_products')) { + this.cache.set( + 'wildberries_products', + new WildberriesProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'wildberries_products', + ) as WildberriesProductsDataset; + } + + // ── World Data ──────────────────────────────────────────────────── + + get worldPopulation(): WorldPopulationDataset { + if (!this.cache.has('world_population')) { + this.cache.set( + 'world_population', + new WorldPopulationDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'world_population', + ) as WorldPopulationDataset; + } + + get worldZipcodes(): WorldZipcodesDataset { + if (!this.cache.has('world_zipcodes')) { + this.cache.set( + 'world_zipcodes', + new WorldZipcodesDataset({ transport: this.transport }), + ); + } + return this.cache.get('world_zipcodes') as WorldZipcodesDataset; + } + + // ── Xing ──────────────────────────────────────────────────────────── + + get xingProfiles(): XingProfilesDataset { + if (!this.cache.has('xing_profiles')) { + this.cache.set( + 'xing_profiles', + new XingProfilesDataset({ transport: this.transport }), + ); + } + return this.cache.get('xing_profiles') as XingProfilesDataset; + } + + // ── Yahoo Finance ───────────────────────────────────────────────── + + get yahooFinanceBusinesses(): YahooFinanceBusinessesDataset { + if (!this.cache.has('yahoo_finance_businesses')) { + this.cache.set( + 'yahoo_finance_businesses', + new YahooFinanceBusinessesDataset({ + transport: this.transport, + }), + ); + } + return this.cache.get( + 'yahoo_finance_businesses', + ) as YahooFinanceBusinessesDataset; + } + + // ── Yapo ────────────────────────────────────────────────────────── + + get yapoChile(): YapoChileDataset { + if (!this.cache.has('yapo_chile')) { + this.cache.set( + 'yapo_chile', + new YapoChileDataset({ transport: this.transport }), + ); + } + return this.cache.get('yapo_chile') as YapoChileDataset; + } + + // ── Yelp ────────────────────────────────────────────────────────── + + get yelpBusinesses(): YelpBusinessesDataset { + if (!this.cache.has('yelp_businesses')) { + this.cache.set( + 'yelp_businesses', + new YelpBusinessesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'yelp_businesses', + ) as YelpBusinessesDataset; + } + + get yelpReviews(): YelpReviewsDataset { + if (!this.cache.has('yelp_reviews')) { + this.cache.set( + 'yelp_reviews', + new YelpReviewsDataset({ transport: this.transport }), + ); + } + return this.cache.get('yelp_reviews') as YelpReviewsDataset; + } + + // ── YouTube ──────────────────────────────────────────────────────── + + get youtubeComments(): YoutubeCommentsDataset { + if (!this.cache.has('youtube_comments')) { + this.cache.set( + 'youtube_comments', + new YoutubeCommentsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'youtube_comments', + ) as YoutubeCommentsDataset; + } + + get youtubeProfiles(): YoutubeProfilesDataset { + if (!this.cache.has('youtube_profiles')) { + this.cache.set( + 'youtube_profiles', + new YoutubeProfilesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'youtube_profiles', + ) as YoutubeProfilesDataset; + } + + get youtubeVideos(): YoutubeVideosDataset { + if (!this.cache.has('youtube_videos')) { + this.cache.set( + 'youtube_videos', + new YoutubeVideosDataset({ transport: this.transport }), + ); + } + return this.cache.get('youtube_videos') as YoutubeVideosDataset; + } + + // ── YSL ──────────────────────────────────────────────────────────── + + get yslProducts(): YslProductsDataset { + if (!this.cache.has('ysl_products')) { + this.cache.set( + 'ysl_products', + new YslProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('ysl_products') as YslProductsDataset; + } + + // ── Zalando ──────────────────────────────────────────────────────── + + get zalandoProducts(): ZalandoProductsDataset { + if (!this.cache.has('zalando_products')) { + this.cache.set( + 'zalando_products', + new ZalandoProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zalando_products', + ) as ZalandoProductsDataset; + } + + // ── Zara ─────────────────────────────────────────────────────────── + + get zaraHomeProducts(): ZaraHomeProductsDataset { + if (!this.cache.has('zara_home_products')) { + this.cache.set( + 'zara_home_products', + new ZaraHomeProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zara_home_products', + ) as ZaraHomeProductsDataset; + } + + get zaraProducts(): ZaraProductsDataset { + if (!this.cache.has('zara_products')) { + this.cache.set( + 'zara_products', + new ZaraProductsDataset({ transport: this.transport }), + ); + } + return this.cache.get('zara_products') as ZaraProductsDataset; + } + + // ── Zillow ───────────────────────────────────────────────────────── + + get zillowPriceHistory(): ZillowPriceHistoryDataset { + if (!this.cache.has('zillow_price_history')) { + this.cache.set( + 'zillow_price_history', + new ZillowPriceHistoryDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zillow_price_history', + ) as ZillowPriceHistoryDataset; + } + + get zillowProperties(): ZillowPropertiesDataset { + if (!this.cache.has('zillow_properties')) { + this.cache.set( + 'zillow_properties', + new ZillowPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zillow_properties', + ) as ZillowPropertiesDataset; + } + + // ── Zonaprop ─────────────────────────────────────────────────────── + + get zonapropArgentina(): ZonapropArgentinaDataset { + if (!this.cache.has('zonaprop_argentina')) { + this.cache.set( + 'zonaprop_argentina', + new ZonapropArgentinaDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zonaprop_argentina', + ) as ZonapropArgentinaDataset; + } + + // ── ZoomInfo ─────────────────────────────────────────────────────── + + get zoomInfoCompanies(): ZoomInfoCompaniesDataset { + if (!this.cache.has('zoominfo_companies')) { + this.cache.set( + 'zoominfo_companies', + new ZoomInfoCompaniesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zoominfo_companies', + ) as ZoomInfoCompaniesDataset; + } + + // ── Zoopla ───────────────────────────────────────────────────────── + + get zooplaProperties(): ZooplaPropertiesDataset { + if (!this.cache.has('zoopla_properties')) { + this.cache.set( + 'zoopla_properties', + new ZooplaPropertiesDataset({ transport: this.transport }), + ); + } + return this.cache.get( + 'zoopla_properties', + ) as ZooplaPropertiesDataset; + } } diff --git a/src/api/datasets/platforms/agoda.ts b/src/api/datasets/platforms/agoda.ts new file mode 100644 index 0000000..09a375f --- /dev/null +++ b/src/api/datasets/platforms/agoda.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AgodaPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_m837ssst155rq3a1xo'; + readonly name = 'agoda_properties'; +} diff --git a/src/api/datasets/platforms/airbnb.ts b/src/api/datasets/platforms/airbnb.ts new file mode 100644 index 0000000..e94e6e1 --- /dev/null +++ b/src/api/datasets/platforms/airbnb.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AirbnbPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_ld7ll037kqy322v05'; + readonly name = 'airbnb_properties'; +} diff --git a/src/api/datasets/platforms/american_eagle.ts b/src/api/datasets/platforms/american_eagle.ts new file mode 100644 index 0000000..46f344c --- /dev/null +++ b/src/api/datasets/platforms/american_eagle.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AmericanEagleProductsDataset extends BaseDataset { + readonly datasetId = 'gd_le6plu065keypwyir'; + readonly name = 'american_eagle_products'; +} diff --git a/src/api/datasets/platforms/apple.ts b/src/api/datasets/platforms/apple.ts new file mode 100644 index 0000000..2315ae8 --- /dev/null +++ b/src/api/datasets/platforms/apple.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class AppleAppStoreDataset extends BaseDataset { + readonly datasetId = 'gd_lsk9ki3u2iishmwrui'; + readonly name = 'apple_app_store'; +} + +export class AppleAppStoreReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_m734msue16e0adkbit'; + readonly name = 'apple_app_store_reviews'; +} diff --git a/src/api/datasets/platforms/ashley_furniture.ts b/src/api/datasets/platforms/ashley_furniture.ts new file mode 100644 index 0000000..37f14a9 --- /dev/null +++ b/src/api/datasets/platforms/ashley_furniture.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AshleyFurnitureProductsDataset extends BaseDataset { + readonly datasetId = 'gd_le1ddqrs16uevi5vc4'; + readonly name = 'ashley_furniture_products'; +} diff --git a/src/api/datasets/platforms/asos.ts b/src/api/datasets/platforms/asos.ts new file mode 100644 index 0000000..f1689b3 --- /dev/null +++ b/src/api/datasets/platforms/asos.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AsosProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ldbg7we91cp53nr2z4'; + readonly name = 'asos_products'; +} diff --git a/src/api/datasets/platforms/australia_real_estate.ts b/src/api/datasets/platforms/australia_real_estate.ts new file mode 100644 index 0000000..ddf5155 --- /dev/null +++ b/src/api/datasets/platforms/australia_real_estate.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AustraliaRealEstateDataset extends BaseDataset { + readonly datasetId = 'gd_l3cvjh111l943r4awk'; + readonly name = 'australia_real_estate'; +} diff --git a/src/api/datasets/platforms/autozone.ts b/src/api/datasets/platforms/autozone.ts new file mode 100644 index 0000000..aa8bcc8 --- /dev/null +++ b/src/api/datasets/platforms/autozone.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class AutozoneProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mkcnp8yy1kf8oxpvij'; + readonly name = 'autozone_products'; +} diff --git a/src/api/datasets/platforms/balenciaga.ts b/src/api/datasets/platforms/balenciaga.ts new file mode 100644 index 0000000..b7862b4 --- /dev/null +++ b/src/api/datasets/platforms/balenciaga.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BalenciagaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7oemkb2f9h596dfn'; + readonly name = 'balenciaga_products'; +} diff --git a/src/api/datasets/platforms/bbc.ts b/src/api/datasets/platforms/bbc.ts new file mode 100644 index 0000000..292541f --- /dev/null +++ b/src/api/datasets/platforms/bbc.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BbcNewsDataset extends BaseDataset { + readonly datasetId = 'gd_ly5lkfzd1h8c85feyh'; + readonly name = 'bbc_news'; +} diff --git a/src/api/datasets/platforms/berluti.ts b/src/api/datasets/platforms/berluti.ts new file mode 100644 index 0000000..cf4a493 --- /dev/null +++ b/src/api/datasets/platforms/berluti.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BerlutiProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7sef5p16tcupyuy3'; + readonly name = 'berluti_products'; +} diff --git a/src/api/datasets/platforms/bestbuy.ts b/src/api/datasets/platforms/bestbuy.ts new file mode 100644 index 0000000..63cac7c --- /dev/null +++ b/src/api/datasets/platforms/bestbuy.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BestBuyProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltre1jqe1jfr7cccf'; + readonly name = 'bestbuy_products'; +} diff --git a/src/api/datasets/platforms/bh.ts b/src/api/datasets/platforms/bh.ts new file mode 100644 index 0000000..943c202 --- /dev/null +++ b/src/api/datasets/platforms/bh.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BhProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mkce0sox1mchrlpp8g'; + readonly name = 'bh_products'; +} diff --git a/src/api/datasets/platforms/bluesky.ts b/src/api/datasets/platforms/bluesky.ts new file mode 100644 index 0000000..52f68e4 --- /dev/null +++ b/src/api/datasets/platforms/bluesky.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class BlueskyPostsDataset extends BaseDataset { + readonly datasetId = 'gd_m6hn4r5s27zfhc7w4'; + readonly name = 'bluesky_posts'; +} + +export class BlueskyTopProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_m45p78dl1m017wi5lj'; + readonly name = 'bluesky_top_profiles'; +} diff --git a/src/api/datasets/platforms/booking.ts b/src/api/datasets/platforms/booking.ts new file mode 100644 index 0000000..cf6f9ac --- /dev/null +++ b/src/api/datasets/platforms/booking.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class BookingHotelListingsDataset extends BaseDataset { + readonly datasetId = 'gd_m5mbdl081229ln6t4a'; + readonly name = 'booking_hotel_listings'; +} + +export class BookingListingsSearchDataset extends BaseDataset { + readonly datasetId = 'gd_m4bf7a917zfezv9d5'; + readonly name = 'booking_listings_search'; +} diff --git a/src/api/datasets/platforms/bottega_veneta.ts b/src/api/datasets/platforms/bottega_veneta.ts new file mode 100644 index 0000000..ec1f0b8 --- /dev/null +++ b/src/api/datasets/platforms/bottega_veneta.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class BottegaVenetaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7os5q91y20h69xj'; + readonly name = 'bottegaveneta_products'; +} diff --git a/src/api/datasets/platforms/carsales.ts b/src/api/datasets/platforms/carsales.ts new file mode 100644 index 0000000..bbc0efe --- /dev/null +++ b/src/api/datasets/platforms/carsales.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CarsalesListingsDataset extends BaseDataset { + readonly datasetId = 'gd_m8h7qkn317z9rvlngb'; + readonly name = 'carsales_listings'; +} diff --git a/src/api/datasets/platforms/carters.ts b/src/api/datasets/platforms/carters.ts new file mode 100644 index 0000000..f8c5b01 --- /dev/null +++ b/src/api/datasets/platforms/carters.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CartersProductsDataset extends BaseDataset { + readonly datasetId = 'gd_le60f5v0dj17xgv6u'; + readonly name = 'carters_products'; +} diff --git a/src/api/datasets/platforms/celine.ts b/src/api/datasets/platforms/celine.ts new file mode 100644 index 0000000..8c7d866 --- /dev/null +++ b/src/api/datasets/platforms/celine.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CelineProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7qnf8rwtn2c2uuc'; + readonly name = 'celine_products'; +} diff --git a/src/api/datasets/platforms/chanel.ts b/src/api/datasets/platforms/chanel.ts new file mode 100644 index 0000000..ca6e2df --- /dev/null +++ b/src/api/datasets/platforms/chanel.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ChanelProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ldwwuwqe1oh3zav3js'; + readonly name = 'chanel_products'; +} diff --git a/src/api/datasets/platforms/chileautos.ts b/src/api/datasets/platforms/chileautos.ts new file mode 100644 index 0000000..cd41939 --- /dev/null +++ b/src/api/datasets/platforms/chileautos.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ChileautosChileDataset extends BaseDataset { + readonly datasetId = 'gd_lfsbqgb01iiit5ppju'; + readonly name = 'chileautos_chile'; +} diff --git a/src/api/datasets/platforms/cnn.ts b/src/api/datasets/platforms/cnn.ts new file mode 100644 index 0000000..dcd7ce4 --- /dev/null +++ b/src/api/datasets/platforms/cnn.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CnnNewsDataset extends BaseDataset { + readonly datasetId = 'gd_lycz8783197ch4wvwg'; + readonly name = 'cnn_news'; +} diff --git a/src/api/datasets/platforms/companies_enriched.ts b/src/api/datasets/platforms/companies_enriched.ts new file mode 100644 index 0000000..2d018f9 --- /dev/null +++ b/src/api/datasets/platforms/companies_enriched.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CompaniesEnrichedDataset extends BaseDataset { + readonly datasetId = 'gd_m3fl0mwzmfpfn4cw4'; + readonly name = 'companies_enriched'; +} diff --git a/src/api/datasets/platforms/costco.ts b/src/api/datasets/platforms/costco.ts new file mode 100644 index 0000000..206345c --- /dev/null +++ b/src/api/datasets/platforms/costco.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CostcoProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mkcbmac44j178pook'; + readonly name = 'costco_products'; +} diff --git a/src/api/datasets/platforms/crate_and_barrel.ts b/src/api/datasets/platforms/crate_and_barrel.ts new file mode 100644 index 0000000..a31a888 --- /dev/null +++ b/src/api/datasets/platforms/crate_and_barrel.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CrateAndBarrelProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lemtcp2p2qdyd24vq5'; + readonly name = 'crateandbarrel_products'; +} diff --git a/src/api/datasets/platforms/creative_commons.ts b/src/api/datasets/platforms/creative_commons.ts new file mode 100644 index 0000000..4dbccde --- /dev/null +++ b/src/api/datasets/platforms/creative_commons.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class CreativeCommons3dModelsDataset extends BaseDataset { + readonly datasetId = 'gd_m4jr2hyr2kfhtvba6e'; + readonly name = 'creative_commons_3d_models'; +} + +export class CreativeCommonsImagesDataset extends BaseDataset { + readonly datasetId = 'gd_m23cxdw82ct6k022y3'; + readonly name = 'creative_commons_images'; +} diff --git a/src/api/datasets/platforms/crunchbase.ts b/src/api/datasets/platforms/crunchbase.ts new file mode 100644 index 0000000..d3c1c25 --- /dev/null +++ b/src/api/datasets/platforms/crunchbase.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class CrunchbaseCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vijqt9jfj7olije'; + readonly name = 'crunchbase_companies'; +} diff --git a/src/api/datasets/platforms/delvaux.ts b/src/api/datasets/platforms/delvaux.ts new file mode 100644 index 0000000..297562d --- /dev/null +++ b/src/api/datasets/platforms/delvaux.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class DelvauxProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lhahvbli142qv9r0v1'; + readonly name = 'delvaux_products'; +} diff --git a/src/api/datasets/platforms/digikey.ts b/src/api/datasets/platforms/digikey.ts new file mode 100644 index 0000000..a4c06b2 --- /dev/null +++ b/src/api/datasets/platforms/digikey.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class DigikeyProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lj74waf72416ro0k65'; + readonly name = 'digikey_products'; +} diff --git a/src/api/datasets/platforms/dior.ts b/src/api/datasets/platforms/dior.ts new file mode 100644 index 0000000..866d352 --- /dev/null +++ b/src/api/datasets/platforms/dior.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class DiorProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7o3kqu6wp7qmqkl'; + readonly name = 'dior_products'; +} diff --git a/src/api/datasets/platforms/ebay.ts b/src/api/datasets/platforms/ebay.ts new file mode 100644 index 0000000..e71f038 --- /dev/null +++ b/src/api/datasets/platforms/ebay.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class EbayProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltr9mjt81n0zzdk1fb'; + readonly name = 'ebay_products'; +} diff --git a/src/api/datasets/platforms/employees_enriched.ts b/src/api/datasets/platforms/employees_enriched.ts new file mode 100644 index 0000000..c6827b4 --- /dev/null +++ b/src/api/datasets/platforms/employees_enriched.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class EmployeesEnrichedDataset extends BaseDataset { + readonly datasetId = 'gd_m18zt6ec11wfqohyrs'; + readonly name = 'employees_enriched'; +} diff --git a/src/api/datasets/platforms/etsy.ts b/src/api/datasets/platforms/etsy.ts new file mode 100644 index 0000000..9bf7a12 --- /dev/null +++ b/src/api/datasets/platforms/etsy.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class EtsyProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltppk0jdv1jqz25mz'; + readonly name = 'etsy_products'; +} diff --git a/src/api/datasets/platforms/facebook.ts b/src/api/datasets/platforms/facebook.ts new file mode 100644 index 0000000..9257e13 --- /dev/null +++ b/src/api/datasets/platforms/facebook.ts @@ -0,0 +1,51 @@ +import { BaseDataset } from '../base'; + +export class FacebookCommentsDataset extends BaseDataset { + readonly datasetId = 'gd_lkay758p1eanlolqw8'; + readonly name = 'facebook_comments'; +} + +export class FacebookCompanyReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_m0dtqpiu1mbcyc2g86'; + readonly name = 'facebook_company_reviews'; +} + +export class FacebookEventsDataset extends BaseDataset { + readonly datasetId = 'gd_m14sd0to1jz48ppm51'; + readonly name = 'facebook_events'; +} + +export class FacebookGroupPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lz11l67o2cb3r0lkj3'; + readonly name = 'facebook_group_posts'; +} + +export class FacebookMarketplaceDataset extends BaseDataset { + readonly datasetId = 'gd_lvt9iwuh6fbcwmx1a'; + readonly name = 'facebook_marketplace'; +} + +export class FacebookPagesPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lkaxegm826bjpoo9m5'; + readonly name = 'facebook_pages_posts'; +} + +export class FacebookPagesProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_mf124a0511bauquyow'; + readonly name = 'facebook_pages_profiles'; +} + +export class FacebookPostsByUrlDataset extends BaseDataset { + readonly datasetId = 'gd_lyclm1571iy3mv57zw'; + readonly name = 'facebook_posts_by_url'; +} + +export class FacebookProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_mf0urb782734ik94dz'; + readonly name = 'facebook_profiles'; +} + +export class FacebookReelsDataset extends BaseDataset { + readonly datasetId = 'gd_lyclm3ey2q6rww027t'; + readonly name = 'facebook_reels'; +} diff --git a/src/api/datasets/platforms/fanatics.ts b/src/api/datasets/platforms/fanatics.ts new file mode 100644 index 0000000..3359764 --- /dev/null +++ b/src/api/datasets/platforms/fanatics.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class FanaticsProductsDataset extends BaseDataset { + readonly datasetId = 'gd_le124kuq1uoj7zj8hb'; + readonly name = 'fanatics_products'; +} diff --git a/src/api/datasets/platforms/fendi.ts b/src/api/datasets/platforms/fendi.ts new file mode 100644 index 0000000..15b5aae --- /dev/null +++ b/src/api/datasets/platforms/fendi.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class FendiProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lbqsfpfk71ubir3pi'; + readonly name = 'fendi_products'; +} diff --git a/src/api/datasets/platforms/g2.ts b/src/api/datasets/platforms/g2.ts new file mode 100644 index 0000000..fa317a0 --- /dev/null +++ b/src/api/datasets/platforms/g2.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class G2ProductsDataset extends BaseDataset { + readonly datasetId = 'gd_l88xp4k01qnhvyqlvw'; + readonly name = 'g2_products'; +} + +export class G2ReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_l88xvdka1uao86xvlb'; + readonly name = 'g2_reviews'; +} diff --git a/src/api/datasets/platforms/github.ts b/src/api/datasets/platforms/github.ts new file mode 100644 index 0000000..d23d176 --- /dev/null +++ b/src/api/datasets/platforms/github.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class GithubRepositoriesDataset extends BaseDataset { + readonly datasetId = 'gd_lyrexgxc24b3d4imjt'; + readonly name = 'github_repositories'; +} diff --git a/src/api/datasets/platforms/glassdoor.ts b/src/api/datasets/platforms/glassdoor.ts new file mode 100644 index 0000000..5cbfcdd --- /dev/null +++ b/src/api/datasets/platforms/glassdoor.ts @@ -0,0 +1,16 @@ +import { BaseDataset } from '../base'; + +export class GlassdoorCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l7j0bx501ockwldaqf'; + readonly name = 'glassdoor_companies'; +} + +export class GlassdoorJobsDataset extends BaseDataset { + readonly datasetId = 'gd_lpfbbndm1xnopbrcr0'; + readonly name = 'glassdoor_jobs'; +} + +export class GlassdoorReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_l7j1po0921hbu0ri1z'; + readonly name = 'glassdoor_reviews'; +} diff --git a/src/api/datasets/platforms/goodreads.ts b/src/api/datasets/platforms/goodreads.ts new file mode 100644 index 0000000..faa6436 --- /dev/null +++ b/src/api/datasets/platforms/goodreads.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class GoodreadsBooksDataset extends BaseDataset { + readonly datasetId = 'gd_lreq6ho72fhvovjj7a'; + readonly name = 'goodreads_books'; +} diff --git a/src/api/datasets/platforms/google_maps.ts b/src/api/datasets/platforms/google_maps.ts new file mode 100644 index 0000000..4004094 --- /dev/null +++ b/src/api/datasets/platforms/google_maps.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class GoogleMapsFullInfoDataset extends BaseDataset { + readonly datasetId = 'gd_m8ebnr0q2qlklc02fz'; + readonly name = 'google_maps_full_info'; +} + +export class GoogleMapsReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_luzfs1dn2oa0teb81'; + readonly name = 'google_maps_reviews'; +} diff --git a/src/api/datasets/platforms/google_news.ts b/src/api/datasets/platforms/google_news.ts new file mode 100644 index 0000000..74d0176 --- /dev/null +++ b/src/api/datasets/platforms/google_news.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class GoogleNewsDataset extends BaseDataset { + readonly datasetId = 'gd_lnsxoxzi1omrwnka5r'; + readonly name = 'google_news'; +} diff --git a/src/api/datasets/platforms/google_play.ts b/src/api/datasets/platforms/google_play.ts new file mode 100644 index 0000000..20115fe --- /dev/null +++ b/src/api/datasets/platforms/google_play.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class GooglePlayReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_m6zagkt024uwvvwuyu'; + readonly name = 'google_play_reviews'; +} + +export class GooglePlayStoreDataset extends BaseDataset { + readonly datasetId = 'gd_lsk382l8xei8vzm4u'; + readonly name = 'google_play_store'; +} diff --git a/src/api/datasets/platforms/google_shopping.ts b/src/api/datasets/platforms/google_shopping.ts new file mode 100644 index 0000000..47c57d0 --- /dev/null +++ b/src/api/datasets/platforms/google_shopping.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class GoogleShoppingProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltppk50q18kdw67omz'; + readonly name = 'google_shopping_products'; +} + +export class GoogleShoppingSearchUsDataset extends BaseDataset { + readonly datasetId = 'gd_m31f2k0d2m1bah4f3b'; + readonly name = 'google_shopping_search_us'; +} diff --git a/src/api/datasets/platforms/hermes.ts b/src/api/datasets/platforms/hermes.ts new file mode 100644 index 0000000..9b970da --- /dev/null +++ b/src/api/datasets/platforms/hermes.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class HermesProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7sn8rz1g95zt4lwk'; + readonly name = 'hermes_products'; +} diff --git a/src/api/datasets/platforms/hm.ts b/src/api/datasets/platforms/hm.ts new file mode 100644 index 0000000..a910f0e --- /dev/null +++ b/src/api/datasets/platforms/hm.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class HmProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lebec5ir293umvxh5g'; + readonly name = 'hm_products'; +} diff --git a/src/api/datasets/platforms/home_depot.ts b/src/api/datasets/platforms/home_depot.ts new file mode 100644 index 0000000..1f5dbaf --- /dev/null +++ b/src/api/datasets/platforms/home_depot.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class HomeDepotCaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lmyvvktscoojdor83'; + readonly name = 'homedepot_ca_products'; +} + +export class HomeDepotUsProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lmusivh019i7g97q2n'; + readonly name = 'homedepot_us_products'; +} diff --git a/src/api/datasets/platforms/ikea.ts b/src/api/datasets/platforms/ikea.ts new file mode 100644 index 0000000..92d3495 --- /dev/null +++ b/src/api/datasets/platforms/ikea.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class IkeaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_le2lfu10qrjmrqo60'; + readonly name = 'ikea_products'; +} diff --git a/src/api/datasets/platforms/imdb.ts b/src/api/datasets/platforms/imdb.ts new file mode 100644 index 0000000..9540505 --- /dev/null +++ b/src/api/datasets/platforms/imdb.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ImdbMoviesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vikf2h1a4t6x8qzu'; + readonly name = 'imdb_movies'; +} diff --git a/src/api/datasets/platforms/indeed.ts b/src/api/datasets/platforms/indeed.ts new file mode 100644 index 0000000..5c25e83 --- /dev/null +++ b/src/api/datasets/platforms/indeed.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class IndeedCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l7qekxkv2i7ve6hx1s'; + readonly name = 'indeed_companies'; +} + +export class IndeedJobsDataset extends BaseDataset { + readonly datasetId = 'gd_l4dx9j9sscpvs7no2'; + readonly name = 'indeed_jobs'; +} diff --git a/src/api/datasets/platforms/infocasas.ts b/src/api/datasets/platforms/infocasas.ts new file mode 100644 index 0000000..77c6801 --- /dev/null +++ b/src/api/datasets/platforms/infocasas.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class InfocasasUruguayDataset extends BaseDataset { + readonly datasetId = 'gd_lftpmbga1jwon80ddh'; + readonly name = 'infocasas_uruguay'; +} diff --git a/src/api/datasets/platforms/inmuebles24.ts b/src/api/datasets/platforms/inmuebles24.ts new file mode 100644 index 0000000..5c0fe1e --- /dev/null +++ b/src/api/datasets/platforms/inmuebles24.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class Inmuebles24MexicoDataset extends BaseDataset { + readonly datasetId = 'gd_lfsa1vgv183347v45m'; + readonly name = 'inmuebles24_mexico'; +} diff --git a/src/api/datasets/platforms/kroger.ts b/src/api/datasets/platforms/kroger.ts new file mode 100644 index 0000000..1695484 --- /dev/null +++ b/src/api/datasets/platforms/kroger.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class KrogerProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mhlod8vh2kwgoi9yw3'; + readonly name = 'kroger_products'; +} diff --git a/src/api/datasets/platforms/lazada.ts b/src/api/datasets/platforms/lazada.ts new file mode 100644 index 0000000..bb934b5 --- /dev/null +++ b/src/api/datasets/platforms/lazada.ts @@ -0,0 +1,16 @@ +import { BaseDataset } from '../base'; + +export class LazadaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lk14r4zxuiw2uxpk6'; + readonly name = 'lazada_products'; +} + +export class LazadaProductsSearchDataset extends BaseDataset { + readonly datasetId = 'gd_lwd9icor28eg4srnxi'; + readonly name = 'lazada_products_search'; +} + +export class LazadaReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_lub6mys21lzcklkq1z'; + readonly name = 'lazada_reviews'; +} diff --git a/src/api/datasets/platforms/lazboy.ts b/src/api/datasets/platforms/lazboy.ts new file mode 100644 index 0000000..48b2aca --- /dev/null +++ b/src/api/datasets/platforms/lazboy.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class LaZBoyProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lg0nhuxkvxagfannn'; + readonly name = 'lazboy_products'; +} diff --git a/src/api/datasets/platforms/lego.ts b/src/api/datasets/platforms/lego.ts new file mode 100644 index 0000000..70a68e9 --- /dev/null +++ b/src/api/datasets/platforms/lego.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class LegoProductsDataset extends BaseDataset { + readonly datasetId = 'gd_leenwt162rg85apy87'; + readonly name = 'lego_products'; +} diff --git a/src/api/datasets/platforms/linkedin.ts b/src/api/datasets/platforms/linkedin.ts index 072a7ef..bbb7e51 100644 --- a/src/api/datasets/platforms/linkedin.ts +++ b/src/api/datasets/platforms/linkedin.ts @@ -9,3 +9,18 @@ export class LinkedinCompaniesDataset extends BaseDataset { readonly datasetId = 'gd_l1vikfnt1wgvvqz95w'; readonly name = 'linkedin_companies'; } + +export class LinkedinJobListingsDataset extends BaseDataset { + readonly datasetId = 'gd_lpfll7v5hcqtkxl6l'; + readonly name = 'linkedin_job_listings'; +} + +export class LinkedinPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lyy3tktm25m4avu764'; + readonly name = 'linkedin_posts'; +} + +export class LinkedinProfilesJobListingsDataset extends BaseDataset { + readonly datasetId = 'gd_m487ihp32jtc4ujg45'; + readonly name = 'linkedin_profiles_job_listings'; +} diff --git a/src/api/datasets/platforms/llbean.ts b/src/api/datasets/platforms/llbean.ts new file mode 100644 index 0000000..39d157c --- /dev/null +++ b/src/api/datasets/platforms/llbean.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class LlBeanProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lemtwv4s1mglzlzh57'; + readonly name = 'llbean_products'; +} diff --git a/src/api/datasets/platforms/loewe.ts b/src/api/datasets/platforms/loewe.ts new file mode 100644 index 0000000..5630278 --- /dev/null +++ b/src/api/datasets/platforms/loewe.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class LoeweProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7rkj4wwka9q19t'; + readonly name = 'loewe_products'; +} diff --git a/src/api/datasets/platforms/lowes.ts b/src/api/datasets/platforms/lowes.ts new file mode 100644 index 0000000..8cc16be --- /dev/null +++ b/src/api/datasets/platforms/lowes.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class LowesProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lnvl79pfftqh18u2o'; + readonly name = 'lowes_products'; +} diff --git a/src/api/datasets/platforms/macys.ts b/src/api/datasets/platforms/macys.ts new file mode 100644 index 0000000..50a80df --- /dev/null +++ b/src/api/datasets/platforms/macys.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MacysProductsDataset extends BaseDataset { + readonly datasetId = 'gd_miebqh4a18ivg65bpa'; + readonly name = 'macys_products'; +} diff --git a/src/api/datasets/platforms/mango.ts b/src/api/datasets/platforms/mango.ts new file mode 100644 index 0000000..7e4cfb2 --- /dev/null +++ b/src/api/datasets/platforms/mango.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MangoProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lcyua5iy1go06own9d'; + readonly name = 'mango_products'; +} diff --git a/src/api/datasets/platforms/manta.ts b/src/api/datasets/platforms/manta.ts new file mode 100644 index 0000000..c6b969e --- /dev/null +++ b/src/api/datasets/platforms/manta.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MantaBusinessesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vil1d81g0u8763b2'; + readonly name = 'manta_businesses'; +} diff --git a/src/api/datasets/platforms/massimo_dutti.ts b/src/api/datasets/platforms/massimo_dutti.ts new file mode 100644 index 0000000..725e1fe --- /dev/null +++ b/src/api/datasets/platforms/massimo_dutti.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MassimoDuttiProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lcxf9r252p7e46ul5b'; + readonly name = 'massimo_dutti_products'; +} diff --git a/src/api/datasets/platforms/mattress_firm.ts b/src/api/datasets/platforms/mattress_firm.ts new file mode 100644 index 0000000..cc3f181 --- /dev/null +++ b/src/api/datasets/platforms/mattress_firm.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MattressFirmProductsDataset extends BaseDataset { + readonly datasetId = 'gd_legw5t6c2bvw9d7e4k'; + readonly name = 'mattressfirm_products'; +} diff --git a/src/api/datasets/platforms/mediamarkt.ts b/src/api/datasets/platforms/mediamarkt.ts new file mode 100644 index 0000000..64b630d --- /dev/null +++ b/src/api/datasets/platforms/mediamarkt.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MediamarktProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lbl2lo6y11m37z3gwq'; + readonly name = 'mediamarkt_products'; +} diff --git a/src/api/datasets/platforms/mercadolivre.ts b/src/api/datasets/platforms/mercadolivre.ts new file mode 100644 index 0000000..d169b1e --- /dev/null +++ b/src/api/datasets/platforms/mercadolivre.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MercadolivreProductsDataset extends BaseDataset { + readonly datasetId = 'gd_m7re62tb1w88ymy86r'; + readonly name = 'mercadolivre_products'; +} diff --git a/src/api/datasets/platforms/metrocuadrado.ts b/src/api/datasets/platforms/metrocuadrado.ts new file mode 100644 index 0000000..ba897eb --- /dev/null +++ b/src/api/datasets/platforms/metrocuadrado.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MetrocuadradoPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_lfsblgpf2oq16yrbny'; + readonly name = 'metrocuadrado_properties'; +} diff --git a/src/api/datasets/platforms/microcenter.ts b/src/api/datasets/platforms/microcenter.ts new file mode 100644 index 0000000..5fc0b79 --- /dev/null +++ b/src/api/datasets/platforms/microcenter.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MicroCenterProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mkckexq2uquhupguv'; + readonly name = 'microcenter_products'; +} diff --git a/src/api/datasets/platforms/montblanc.ts b/src/api/datasets/platforms/montblanc.ts new file mode 100644 index 0000000..f3af597 --- /dev/null +++ b/src/api/datasets/platforms/montblanc.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MontblancProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lhahz3n9dr6srx4cm'; + readonly name = 'montblanc_products'; +} diff --git a/src/api/datasets/platforms/mouser.ts b/src/api/datasets/platforms/mouser.ts new file mode 100644 index 0000000..1cbdf04 --- /dev/null +++ b/src/api/datasets/platforms/mouser.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MouserProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lfjty8942ogxzhmp8t'; + readonly name = 'mouser_products'; +} diff --git a/src/api/datasets/platforms/moynat.ts b/src/api/datasets/platforms/moynat.ts new file mode 100644 index 0000000..3b2b22b --- /dev/null +++ b/src/api/datasets/platforms/moynat.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MoynatProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lh7rh0d12qkaid87e1'; + readonly name = 'moynat_products'; +} diff --git a/src/api/datasets/platforms/mybobs.ts b/src/api/datasets/platforms/mybobs.ts new file mode 100644 index 0000000..bfaee9f --- /dev/null +++ b/src/api/datasets/platforms/mybobs.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MybobsProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lf14k1zw1l3zcxs9m4'; + readonly name = 'mybobs_products'; +} diff --git a/src/api/datasets/platforms/myntra.ts b/src/api/datasets/platforms/myntra.ts new file mode 100644 index 0000000..c677aba --- /dev/null +++ b/src/api/datasets/platforms/myntra.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class MyntraProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lptvxr8b1qx1d9thgp'; + readonly name = 'myntra_products'; +} diff --git a/src/api/datasets/platforms/naver.ts b/src/api/datasets/platforms/naver.ts new file mode 100644 index 0000000..ebad597 --- /dev/null +++ b/src/api/datasets/platforms/naver.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class NaverProductsDataset extends BaseDataset { + readonly datasetId = 'gd_m9qqjxxr1hab7okefj'; + readonly name = 'naver_products'; +} diff --git a/src/api/datasets/platforms/nba.ts b/src/api/datasets/platforms/nba.ts new file mode 100644 index 0000000..05c1675 --- /dev/null +++ b/src/api/datasets/platforms/nba.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class NbaPlayersStatsDataset extends BaseDataset { + readonly datasetId = 'gd_lrqirmftwxxatiorf'; + readonly name = 'nba_players_stats'; +} diff --git a/src/api/datasets/platforms/olx.ts b/src/api/datasets/platforms/olx.ts new file mode 100644 index 0000000..4473223 --- /dev/null +++ b/src/api/datasets/platforms/olx.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class OlxBrazilDataset extends BaseDataset { + readonly datasetId = 'gd_lguvsr0wp4rx7fjfo'; + readonly name = 'olx_brazil'; +} diff --git a/src/api/datasets/platforms/otodom.ts b/src/api/datasets/platforms/otodom.ts new file mode 100644 index 0000000..895aeb9 --- /dev/null +++ b/src/api/datasets/platforms/otodom.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class OtodomPolandDataset extends BaseDataset { + readonly datasetId = 'gd_ld739mwou49s5y9ko'; + readonly name = 'otodom_poland'; +} diff --git a/src/api/datasets/platforms/owler.ts b/src/api/datasets/platforms/owler.ts new file mode 100644 index 0000000..1cce1cc --- /dev/null +++ b/src/api/datasets/platforms/owler.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class OwlerCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vilaxi10wutoage7'; + readonly name = 'owler_companies'; +} diff --git a/src/api/datasets/platforms/ozon.ts b/src/api/datasets/platforms/ozon.ts new file mode 100644 index 0000000..2df02d9 --- /dev/null +++ b/src/api/datasets/platforms/ozon.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class OzonProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lutq85sl13rlndbzai'; + readonly name = 'ozon_products'; +} diff --git a/src/api/datasets/platforms/pinterest.ts b/src/api/datasets/platforms/pinterest.ts new file mode 100644 index 0000000..87927b7 --- /dev/null +++ b/src/api/datasets/platforms/pinterest.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class PinterestPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lk0sjs4d21kdr7cnlv'; + readonly name = 'pinterest_posts'; +} + +export class PinterestProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_lk0zv93c2m9qdph46z'; + readonly name = 'pinterest_profiles'; +} diff --git a/src/api/datasets/platforms/pitchbook.ts b/src/api/datasets/platforms/pitchbook.ts new file mode 100644 index 0000000..9ff8709 --- /dev/null +++ b/src/api/datasets/platforms/pitchbook.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class PitchBookCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_m4ijiqfp2n9oe3oluj'; + readonly name = 'pitchbook_companies'; +} diff --git a/src/api/datasets/platforms/prada.ts b/src/api/datasets/platforms/prada.ts new file mode 100644 index 0000000..81ae02d --- /dev/null +++ b/src/api/datasets/platforms/prada.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class PradaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lhahqiq52egng5v35i'; + readonly name = 'prada_products'; +} diff --git a/src/api/datasets/platforms/properati.ts b/src/api/datasets/platforms/properati.ts new file mode 100644 index 0000000..35bf30c --- /dev/null +++ b/src/api/datasets/platforms/properati.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ProperatiPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_lg3nvn6ibrhbotstw'; + readonly name = 'properati_properties'; +} diff --git a/src/api/datasets/platforms/quora.ts b/src/api/datasets/platforms/quora.ts new file mode 100644 index 0000000..96a5ce9 --- /dev/null +++ b/src/api/datasets/platforms/quora.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class QuoraPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lvz1rbj81afv3m6n5y'; + readonly name = 'quora_posts'; +} diff --git a/src/api/datasets/platforms/raymour_flanigan.ts b/src/api/datasets/platforms/raymour_flanigan.ts new file mode 100644 index 0000000..919d4d8 --- /dev/null +++ b/src/api/datasets/platforms/raymour_flanigan.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class RaymourFlaniganProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lf8cwb8wxoiqarizb'; + readonly name = 'raymourflanigan_products'; +} diff --git a/src/api/datasets/platforms/realtor.ts b/src/api/datasets/platforms/realtor.ts new file mode 100644 index 0000000..8fe667a --- /dev/null +++ b/src/api/datasets/platforms/realtor.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class RealtorInternationalDataset extends BaseDataset { + readonly datasetId = 'gd_m517agnc1jppzwgtmw'; + readonly name = 'realtor_international_properties'; +} diff --git a/src/api/datasets/platforms/reddit.ts b/src/api/datasets/platforms/reddit.ts new file mode 100644 index 0000000..86c6b2e --- /dev/null +++ b/src/api/datasets/platforms/reddit.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class RedditCommentsDataset extends BaseDataset { + readonly datasetId = 'gd_lvzdpsdlw09j6t702'; + readonly name = 'reddit_comments'; +} + +export class RedditPostsDataset extends BaseDataset { + readonly datasetId = 'gd_lvz8ah06191smkebj4'; + readonly name = 'reddit_posts'; +} diff --git a/src/api/datasets/platforms/rona.ts b/src/api/datasets/platforms/rona.ts new file mode 100644 index 0000000..b7128f8 --- /dev/null +++ b/src/api/datasets/platforms/rona.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class RonaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_mf53alwg1a35fv69pw'; + readonly name = 'rona_products'; +} diff --git a/src/api/datasets/platforms/sephora.ts b/src/api/datasets/platforms/sephora.ts new file mode 100644 index 0000000..3e4baab --- /dev/null +++ b/src/api/datasets/platforms/sephora.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class SephoraProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lbz49igcthopwaygd'; + readonly name = 'sephora_products'; +} diff --git a/src/api/datasets/platforms/shein.ts b/src/api/datasets/platforms/shein.ts new file mode 100644 index 0000000..2277239 --- /dev/null +++ b/src/api/datasets/platforms/shein.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class SheinProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lemu5ceq1jxjo7vzit'; + readonly name = 'shein_products'; +} diff --git a/src/api/datasets/platforms/shopee.ts b/src/api/datasets/platforms/shopee.ts new file mode 100644 index 0000000..2b6bc96 --- /dev/null +++ b/src/api/datasets/platforms/shopee.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ShopeeProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lk122xxgf86xf97py'; + readonly name = 'shopee_products'; +} diff --git a/src/api/datasets/platforms/sleep_number.ts b/src/api/datasets/platforms/sleep_number.ts new file mode 100644 index 0000000..59fb548 --- /dev/null +++ b/src/api/datasets/platforms/sleep_number.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class SleepNumberProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lf8ctgxj1dpkzvl862'; + readonly name = 'sleepnumber_products'; +} diff --git a/src/api/datasets/platforms/slintel.ts b/src/api/datasets/platforms/slintel.ts new file mode 100644 index 0000000..f93247a --- /dev/null +++ b/src/api/datasets/platforms/slintel.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class SlintelCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vilg5a1decoahvgq'; + readonly name = 'slintel_companies'; +} diff --git a/src/api/datasets/platforms/snapchat.ts b/src/api/datasets/platforms/snapchat.ts new file mode 100644 index 0000000..101528c --- /dev/null +++ b/src/api/datasets/platforms/snapchat.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class SnapchatPostsDataset extends BaseDataset { + readonly datasetId = 'gd_ma0ydx431w6stl16ge'; + readonly name = 'snapchat_posts'; +} diff --git a/src/api/datasets/platforms/target.ts b/src/api/datasets/platforms/target.ts new file mode 100644 index 0000000..dd28a0f --- /dev/null +++ b/src/api/datasets/platforms/target.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class TargetProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltppk5mx2lp0v1k0vo'; + readonly name = 'target_products'; +} diff --git a/src/api/datasets/platforms/toctoc.ts b/src/api/datasets/platforms/toctoc.ts new file mode 100644 index 0000000..c4dc3dc --- /dev/null +++ b/src/api/datasets/platforms/toctoc.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ToctocPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_lgfdx3l01behlrboh7'; + readonly name = 'toctoc_properties'; +} diff --git a/src/api/datasets/platforms/tokopedia.ts b/src/api/datasets/platforms/tokopedia.ts new file mode 100644 index 0000000..08cbefb --- /dev/null +++ b/src/api/datasets/platforms/tokopedia.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class TokopediaProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lxk24yba297r8qd3tp'; + readonly name = 'tokopedia_products'; +} diff --git a/src/api/datasets/platforms/toysrus.ts b/src/api/datasets/platforms/toysrus.ts new file mode 100644 index 0000000..811b639 --- /dev/null +++ b/src/api/datasets/platforms/toysrus.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ToysRUsProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lemuapao1lkjggvn05'; + readonly name = 'toysrus_products'; +} diff --git a/src/api/datasets/platforms/trustpilot.ts b/src/api/datasets/platforms/trustpilot.ts new file mode 100644 index 0000000..a7c52b5 --- /dev/null +++ b/src/api/datasets/platforms/trustpilot.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class TrustpilotReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_lm5zmhwd2sni130p'; + readonly name = 'trustpilot_reviews'; +} diff --git a/src/api/datasets/platforms/trustradius.ts b/src/api/datasets/platforms/trustradius.ts new file mode 100644 index 0000000..a7597e8 --- /dev/null +++ b/src/api/datasets/platforms/trustradius.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class TrustRadiusReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_lztojazw1389985ops'; + readonly name = 'trustradius_reviews'; +} diff --git a/src/api/datasets/platforms/us_lawyers.ts b/src/api/datasets/platforms/us_lawyers.ts new file mode 100644 index 0000000..8f78fcf --- /dev/null +++ b/src/api/datasets/platforms/us_lawyers.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class UsLawyersDataset extends BaseDataset { + readonly datasetId = 'gd_l1vil5n11okchcbvax'; + readonly name = 'us_lawyers'; +} diff --git a/src/api/datasets/platforms/ventureradar.ts b/src/api/datasets/platforms/ventureradar.ts new file mode 100644 index 0000000..59c580c --- /dev/null +++ b/src/api/datasets/platforms/ventureradar.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class VentureRadarCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_l1vilsfd1xpsndbtpr'; + readonly name = 'ventureradar_companies'; +} diff --git a/src/api/datasets/platforms/vimeo.ts b/src/api/datasets/platforms/vimeo.ts new file mode 100644 index 0000000..edd8a16 --- /dev/null +++ b/src/api/datasets/platforms/vimeo.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class VimeoVideosDataset extends BaseDataset { + readonly datasetId = 'gd_lxk88z3v1ketji4pn'; + readonly name = 'vimeo_videos'; +} diff --git a/src/api/datasets/platforms/walmart.ts b/src/api/datasets/platforms/walmart.ts new file mode 100644 index 0000000..f02b90d --- /dev/null +++ b/src/api/datasets/platforms/walmart.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class WalmartProductsDataset extends BaseDataset { + readonly datasetId = 'gd_l95fol7l1ru6rlo116'; + readonly name = 'walmart_products'; +} + +export class WalmartSellersInfoDataset extends BaseDataset { + readonly datasetId = 'gd_m7ke48w81ocyu4hhz0'; + readonly name = 'walmart_sellers_info'; +} diff --git a/src/api/datasets/platforms/wayfair.ts b/src/api/datasets/platforms/wayfair.ts new file mode 100644 index 0000000..a4391fe --- /dev/null +++ b/src/api/datasets/platforms/wayfair.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class WayfairProductsDataset extends BaseDataset { + readonly datasetId = 'gd_ltr9ne3p24zrhrbu28'; + readonly name = 'wayfair_products'; +} diff --git a/src/api/datasets/platforms/webmotors.ts b/src/api/datasets/platforms/webmotors.ts new file mode 100644 index 0000000..25bf5aa --- /dev/null +++ b/src/api/datasets/platforms/webmotors.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class WebmotorsBrasilDataset extends BaseDataset { + readonly datasetId = 'gd_ld73zt91j10sphddj'; + readonly name = 'webmotors_brasil'; +} diff --git a/src/api/datasets/platforms/wikipedia.ts b/src/api/datasets/platforms/wikipedia.ts new file mode 100644 index 0000000..4f29d31 --- /dev/null +++ b/src/api/datasets/platforms/wikipedia.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class WikipediaArticlesDataset extends BaseDataset { + readonly datasetId = 'gd_lr9978962kkjr3nx49'; + readonly name = 'wikipedia_articles'; +} diff --git a/src/api/datasets/platforms/wildberries.ts b/src/api/datasets/platforms/wildberries.ts new file mode 100644 index 0000000..5bc7d5a --- /dev/null +++ b/src/api/datasets/platforms/wildberries.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class WildberriesProductsDataset extends BaseDataset { + readonly datasetId = 'gd_luz4fboh2dicd27hhm'; + readonly name = 'wildberries_products'; +} diff --git a/src/api/datasets/platforms/world_data.ts b/src/api/datasets/platforms/world_data.ts new file mode 100644 index 0000000..69a0307 --- /dev/null +++ b/src/api/datasets/platforms/world_data.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class WorldPopulationDataset extends BaseDataset { + readonly datasetId = 'gd_lrqeq7u3bil0pmelk'; + readonly name = 'world_population'; +} + +export class WorldZipcodesDataset extends BaseDataset { + readonly datasetId = 'gd_licvqc95ta2552qxu'; + readonly name = 'world_zipcodes'; +} diff --git a/src/api/datasets/platforms/xing.ts b/src/api/datasets/platforms/xing.ts new file mode 100644 index 0000000..9d61ff7 --- /dev/null +++ b/src/api/datasets/platforms/xing.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class XingProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_l3lh4ev31oqrvvblv6'; + readonly name = 'xing_profiles'; +} diff --git a/src/api/datasets/platforms/yahoo_finance.ts b/src/api/datasets/platforms/yahoo_finance.ts new file mode 100644 index 0000000..8fc55de --- /dev/null +++ b/src/api/datasets/platforms/yahoo_finance.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class YahooFinanceBusinessesDataset extends BaseDataset { + readonly datasetId = 'gd_lmrpz3vxmz972ghd7'; + readonly name = 'yahoo_finance_businesses'; +} diff --git a/src/api/datasets/platforms/yapo.ts b/src/api/datasets/platforms/yapo.ts new file mode 100644 index 0000000..417f132 --- /dev/null +++ b/src/api/datasets/platforms/yapo.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class YapoChileDataset extends BaseDataset { + readonly datasetId = 'gd_lgfcz12mk6og7lvhs'; + readonly name = 'yapo_chile'; +} diff --git a/src/api/datasets/platforms/yelp.ts b/src/api/datasets/platforms/yelp.ts new file mode 100644 index 0000000..f67ffd9 --- /dev/null +++ b/src/api/datasets/platforms/yelp.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class YelpBusinessesDataset extends BaseDataset { + readonly datasetId = 'gd_lgugwl0519h1p14rwk'; + readonly name = 'yelp_businesses'; +} + +export class YelpReviewsDataset extends BaseDataset { + readonly datasetId = 'gd_lgzhlu9323u3k24jkv'; + readonly name = 'yelp_reviews'; +} diff --git a/src/api/datasets/platforms/youtube.ts b/src/api/datasets/platforms/youtube.ts new file mode 100644 index 0000000..1aa5636 --- /dev/null +++ b/src/api/datasets/platforms/youtube.ts @@ -0,0 +1,16 @@ +import { BaseDataset } from '../base'; + +export class YoutubeCommentsDataset extends BaseDataset { + readonly datasetId = 'gd_lk9q0ew71spt1mxywf'; + readonly name = 'youtube_comments'; +} + +export class YoutubeProfilesDataset extends BaseDataset { + readonly datasetId = 'gd_lk538t2k2p1k3oos71'; + readonly name = 'youtube_profiles'; +} + +export class YoutubeVideosDataset extends BaseDataset { + readonly datasetId = 'gd_lk56epmy2i5g7lzu0k'; + readonly name = 'youtube_videos'; +} diff --git a/src/api/datasets/platforms/ysl.ts b/src/api/datasets/platforms/ysl.ts new file mode 100644 index 0000000..8c6464f --- /dev/null +++ b/src/api/datasets/platforms/ysl.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class YslProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lhai2io04wilkad5z'; + readonly name = 'ysl_products'; +} diff --git a/src/api/datasets/platforms/zalando.ts b/src/api/datasets/platforms/zalando.ts new file mode 100644 index 0000000..b17e8f6 --- /dev/null +++ b/src/api/datasets/platforms/zalando.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ZalandoProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lbqj6l5s28ofha6mlk'; + readonly name = 'zalando_products'; +} diff --git a/src/api/datasets/platforms/zara.ts b/src/api/datasets/platforms/zara.ts new file mode 100644 index 0000000..906b954 --- /dev/null +++ b/src/api/datasets/platforms/zara.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class ZaraProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lct4vafw1tgx27d4o0'; + readonly name = 'zara_products'; +} + +export class ZaraHomeProductsDataset extends BaseDataset { + readonly datasetId = 'gd_lcx5utgek9mxrsiie'; + readonly name = 'zara_home_products'; +} diff --git a/src/api/datasets/platforms/zillow.ts b/src/api/datasets/platforms/zillow.ts new file mode 100644 index 0000000..fc3d21b --- /dev/null +++ b/src/api/datasets/platforms/zillow.ts @@ -0,0 +1,11 @@ +import { BaseDataset } from '../base'; + +export class ZillowPriceHistoryDataset extends BaseDataset { + readonly datasetId = 'gd_lxu1cz9r88uiqsosl'; + readonly name = 'zillow_price_history'; +} + +export class ZillowPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_lfqkr8wm13ixtbd8f5'; + readonly name = 'zillow_properties'; +} diff --git a/src/api/datasets/platforms/zonaprop.ts b/src/api/datasets/platforms/zonaprop.ts new file mode 100644 index 0000000..63b674c --- /dev/null +++ b/src/api/datasets/platforms/zonaprop.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ZonapropArgentinaDataset extends BaseDataset { + readonly datasetId = 'gd_lfsbhfgo2bglgrecm6'; + readonly name = 'zonaprop_argentina'; +} diff --git a/src/api/datasets/platforms/zoominfo.ts b/src/api/datasets/platforms/zoominfo.ts new file mode 100644 index 0000000..fc2e823 --- /dev/null +++ b/src/api/datasets/platforms/zoominfo.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ZoomInfoCompaniesDataset extends BaseDataset { + readonly datasetId = 'gd_m0ci4a4ivx3j5l6nx'; + readonly name = 'zoominfo_companies'; +} diff --git a/src/api/datasets/platforms/zoopla.ts b/src/api/datasets/platforms/zoopla.ts new file mode 100644 index 0000000..7a45d54 --- /dev/null +++ b/src/api/datasets/platforms/zoopla.ts @@ -0,0 +1,6 @@ +import { BaseDataset } from '../base'; + +export class ZooplaPropertiesDataset extends BaseDataset { + readonly datasetId = 'gd_lnabksndfp1pegwzh'; + readonly name = 'zoopla_properties'; +} diff --git a/src/api/discover/index.ts b/src/api/discover/index.ts new file mode 100644 index 0000000..b1def2b --- /dev/null +++ b/src/api/discover/index.ts @@ -0,0 +1,5 @@ +export { DiscoverService } from './service'; +export { DiscoverResult } from './result'; +export { DiscoverJob } from './job'; +export type { DiscoverResultItem, DiscoverResultFields } from './result'; +export type { DiscoverPollOptions } from './job'; diff --git a/src/api/discover/job.ts b/src/api/discover/job.ts new file mode 100644 index 0000000..fc53036 --- /dev/null +++ b/src/api/discover/job.ts @@ -0,0 +1,142 @@ +import { getLogger } from '../../utils/logger'; +import { sleep } from '../../utils/misc'; +import { TimeoutError, APIError } from '../../utils/errors'; +import { Deadline } from '../../utils/deadline'; +import { DiscoverResult, type DiscoverResultItem } from './result'; +import type { DiscoverOperations } from '../../types/discover'; + +const logger = getLogger('discover.job'); + +export interface DiscoverPollOptions { + /** Maximum milliseconds to wait (default: 60_000) */ + timeout?: number; + /** Milliseconds between status checks (default: 2_000) */ + pollInterval?: number; +} + +export class DiscoverJob { + readonly taskId: string; + readonly query: string; + readonly intent: string | null; + readonly triggeredAt: Date; + + private readonly ops: DiscoverOperations; + private cachedStatus: string | null = null; + private cachedResults: unknown[] | null = null; + private cachedDuration: number | null = null; + + constructor( + taskId: string, + ops: DiscoverOperations, + options?: { query?: string; intent?: string }, + ) { + this.taskId = taskId; + this.ops = ops; + this.query = options?.query ?? ''; + this.intent = options?.intent ?? null; + this.triggeredAt = new Date(); + } + + toString(): string { + return ``; + } + + async status(refresh = true): Promise { + if (!refresh && this.cachedStatus) { + return this.cachedStatus; + } + const response = await this.ops.pollOnce(this.taskId); + this.cachedStatus = response.status; + if (response.results) this.cachedResults = response.results; + if (response.duration_seconds != null) this.cachedDuration = response.duration_seconds; + return this.cachedStatus; + } + + async wait(options?: DiscoverPollOptions): Promise { + const interval = options?.pollInterval ?? 2_000; + const deadline = new Deadline(options?.timeout ?? 60_000); + + logger.debug(`${this}: waiting for completion`); + + for (;;) { + if (deadline.expired) { + throw new TimeoutError( + `Discover task timed out after ${Math.round(deadline.elapsed / 1000)}s for ${this.taskId}`, + ); + } + + const response = await this.ops.pollOnce(this.taskId); + this.cachedStatus = response.status; + + logger.debug(`${this}: status=${response.status} elapsed=${Math.round(deadline.elapsed / 1000)}s`); + + if (response.status === 'done') { + if (response.results) this.cachedResults = response.results; + if (response.duration_seconds != null) this.cachedDuration = response.duration_seconds; + return 'done'; + } + + if (response.status === 'error' || response.status === 'failed') { + throw new APIError( + `Discover task ${this.taskId} failed with status: ${response.status}`, + ); + } + + await sleep(interval); + } + } + + async fetch(): Promise { + if (this.cachedResults) { + return this.cachedResults as DiscoverResultItem[]; + } + const response = await this.ops.pollOnce(this.taskId); + this.cachedStatus = response.status; + if (response.results) this.cachedResults = response.results; + if (response.duration_seconds != null) this.cachedDuration = response.duration_seconds; + return (response.results ?? []) as DiscoverResultItem[]; + } + + async toResult(options?: DiscoverPollOptions): Promise { + logger.debug(`${this}: starting toResult()`); + + try { + await this.wait(options); + } catch (e: unknown) { + return new DiscoverResult({ + success: false, + error: (e as Error).message, + query: this.query, + intent: this.intent, + taskId: this.taskId, + triggerSentAt: this.triggeredAt, + dataFetchedAt: new Date(), + }); + } + + try { + const data = await this.fetch(); + return new DiscoverResult({ + success: true, + data, + query: this.query, + intent: this.intent, + taskId: this.taskId, + durationSeconds: this.cachedDuration, + totalResults: data.length, + triggerSentAt: this.triggeredAt, + dataFetchedAt: new Date(), + }); + } catch (e: unknown) { + return new DiscoverResult({ + success: false, + error: (e as Error).message, + query: this.query, + intent: this.intent, + taskId: this.taskId, + triggerSentAt: this.triggeredAt, + dataFetchedAt: new Date(), + }); + } + } +} diff --git a/src/api/discover/result.ts b/src/api/discover/result.ts new file mode 100644 index 0000000..6ca1a37 --- /dev/null +++ b/src/api/discover/result.ts @@ -0,0 +1,53 @@ +import { BaseResult, type BaseResultFields } from '../../models/result'; + +export interface DiscoverResultItem { + link: string; + title: string; + description: string; + relevance_score: number; + content?: string; +} + +export interface DiscoverResultFields extends BaseResultFields { + query: string; + intent?: string | null; + durationSeconds?: number | null; + totalResults?: number | null; + taskId?: string | null; +} + +export class DiscoverResult extends BaseResult { + readonly query: string; + readonly intent: string | null; + readonly durationSeconds: number | null; + readonly totalResults: number | null; + readonly taskId: string | null; + + constructor(fields: DiscoverResultFields) { + super(fields); + this.query = fields.query; + this.intent = fields.intent ?? null; + this.durationSeconds = fields.durationSeconds ?? null; + this.totalResults = fields.totalResults ?? null; + this.taskId = fields.taskId ?? null; + } + + override toJSON(): Record { + return { + ...super.toJSON(), + query: this.query, + intent: this.intent, + durationSeconds: this.durationSeconds, + totalResults: this.totalResults, + taskId: this.taskId, + }; + } + + override toString(): string { + const base = super.toString(); + const queryPreview = this.query.length > 50 + ? this.query.slice(0, 50) + '...' + : this.query; + return ``; + } +} diff --git a/src/api/discover/service.ts b/src/api/discover/service.ts new file mode 100644 index 0000000..8c65000 --- /dev/null +++ b/src/api/discover/service.ts @@ -0,0 +1,89 @@ +import type { z } from 'zod'; +import { Transport, assertResponse } from '../../core/transport'; +import { API_ENDPOINT } from '../../utils/constants'; +import { parseResponse } from '../../utils/misc'; +import { getLogger } from '../../utils/logger'; +import { assertSchema } from '../../schemas/utils'; +import { + DiscoverOptionsSchema, + DiscoverQuerySchema, + DiscoverTriggerResponseSchema, + DiscoverPollResponseSchema, + type DiscoverOptions, +} from '../../schemas/discover'; +import { DiscoverJob } from './job'; +import type { DiscoverResult } from './result'; + +const logger = getLogger('discover'); + +export class DiscoverService { + private transport: Transport; + + constructor(opts: { transport: Transport }) { + this.transport = opts.transport; + } + + async search(query: string, opts?: DiscoverOptions): Promise { + const safeQuery = assertSchema(DiscoverQuerySchema, query, 'discover.query'); + const safeOpts = assertSchema(DiscoverOptionsSchema, opts ?? {}, 'discover.options'); + + logger.info(`discover search: "${safeQuery}"`); + + const job = await this._trigger(safeQuery, safeOpts); + return job.toResult({ + timeout: safeOpts.timeout, + pollInterval: safeOpts.pollInterval, + }); + } + + async trigger(query: string, opts?: DiscoverOptions): Promise { + const safeQuery = assertSchema(DiscoverQuerySchema, query, 'discover.query'); + const safeOpts = assertSchema(DiscoverOptionsSchema, opts ?? {}, 'discover.options'); + + logger.info(`discover trigger: "${safeQuery}"`); + + return this._trigger(safeQuery, safeOpts); + } + + async pollOnce(taskId: string) { + const response = await this.transport.request(API_ENDPOINT.DISCOVER, { + method: 'GET', + query: { task_id: taskId } as Record, + }); + + const responseTxt = await assertResponse(response); + return parseResponse(responseTxt, DiscoverPollResponseSchema, 'discover (poll)'); + } + + private async _trigger( + query: string, + opts: z.output, + ): Promise { + const body: Record = { query }; + if (opts.intent) body.intent = opts.intent; + if (opts.includeContent) body.include_content = opts.includeContent; + if (opts.country) body.country = opts.country; + if (opts.city) body.city = opts.city; + if (opts.language) body.language = opts.language; + if (opts.filterKeywords) body.filter_keywords = opts.filterKeywords; + if (opts.numResults) body.num_results = opts.numResults; + if (opts.format) body.format = opts.format; + + const response = await this.transport.request(API_ENDPOINT.DISCOVER, { + method: 'POST', + body: JSON.stringify(body), + }); + + const responseTxt = await assertResponse(response); + const result = parseResponse( + responseTxt, + DiscoverTriggerResponseSchema, + 'discover (trigger)', + ); + + return new DiscoverJob(result.task_id, this, { + query, + intent: opts.intent, + }); + } +} diff --git a/src/api/scrape/base.ts b/src/api/scrape/base.ts index 05856bb..b3c2127 100644 --- a/src/api/scrape/base.ts +++ b/src/api/scrape/base.ts @@ -1,15 +1,14 @@ import { API_ENDPOINT } from '../../utils/constants'; import { getLogger } from '../../utils/logger'; -import { wrapAPIError } from '../../utils/error-utils'; import { Transport, assertResponse } from '../../core/transport'; -import { dropEmptyKeys, parseJSON } from '../../utils/misc'; +import { dropEmptyKeys, parseJSON, parseResponse } from '../../utils/misc'; +import { SnapshotMetaResponseSchema } from '../../schemas/responses'; import { ScrapeJob } from './job'; import type { DatasetOptions, OrchestrateOptions, UnknownRecord, SnapshotFormat, - SnapshotMeta, SnapshotOperations, } from '../../types/datasets'; import type { ScrapeResult } from '../../models/result'; @@ -121,38 +120,34 @@ export class BaseAPI { ? API_ENDPOINT.SCRAPE_ASYNC : API_ENDPOINT.SCRAPE_SYNC; - try { - const response = await this.transport.request(endpoint, { - method: 'POST', - query: this.#getRequestQuery( - datasetId, - opt, - ) as unknown as Record, - body: JSON.stringify(body), - }); + const response = await this.transport.request(endpoint, { + method: 'POST', + query: this.#getRequestQuery( + datasetId, + opt, + ) as unknown as Record, + body: JSON.stringify(body), + }); - const responseTxt = await assertResponse(response); - - if (opt.async || response.statusCode === 202) { - if (response.statusCode === 202 && !opt.async) { - this.logger.info( - 'request exceeded sync request timeout, converted to async', - ); - } - const meta = parseJSON(responseTxt); - return new ScrapeJob(meta.snapshot_id, this.snapshotOps!, { - platform: this.name, - }); - } + const responseTxt = await assertResponse(response); - if (opt.format === 'json') { - return parseJSON(responseTxt); + if (opt.async || response.statusCode === 202) { + if (response.statusCode === 202 && !opt.async) { + this.logger.info( + 'request exceeded sync request timeout, converted to async', + ); } + const meta = parseResponse(responseTxt, SnapshotMetaResponseSchema, 'datasets/v3/trigger'); + return new ScrapeJob(meta.snapshot_id, this.snapshotOps!, { + platform: this.name, + }); + } - return responseTxt; - } catch (e: unknown) { - wrapAPIError(e, 'scrape.run'); + if (opt.format === 'json') { + return parseJSON(responseTxt); } + + return responseTxt; } /** diff --git a/src/api/scrape/digikey.ts b/src/api/scrape/digikey.ts index 6c044b4..7f45dc1 100644 --- a/src/api/scrape/digikey.ts +++ b/src/api/scrape/digikey.ts @@ -1,5 +1,6 @@ import type { DatasetOptions, + DiscoverOptions, OrchestrateOptions, UnknownRecord, } from '../../types/datasets'; @@ -48,4 +49,26 @@ export class DigikeyAPI extends BaseAPI { const [safeInput] = assertInput(input, {}, 'products'); return this.orchestrate(safeInput, DATASET_ID.PRODUCT, opts); } + /** + * Discover DigiKey products by category URL. + * @param input - an array of category URLs + * @param opt - discover options to control the request behavior + * @returns a promise that resolves with snapshot meta + */ + discoverByCategory(input: string[], opt: DiscoverOptions) { + this.logger.info( + `discoverByCategory for ${input.length} urls`, + ); + const [safeInput, safeOpt] = assertInput( + input, + opt, + 'discoverByCategory', + ); + return this.run(safeInput, DATASET_ID.PRODUCT, { + ...safeOpt, + async: true, + type: 'discover_new', + discoverBy: 'category', + }); + } } diff --git a/src/api/scrape/job.ts b/src/api/scrape/job.ts index 926c043..050fc74 100644 --- a/src/api/scrape/job.ts +++ b/src/api/scrape/job.ts @@ -1,4 +1,4 @@ -import { pollUntilReady, type PollOptions } from '../../utils/polling'; +import { pollUntilStatus, type PollOptions } from '../../utils/polling'; import { DataNotReadyError, TimeoutError } from '../../utils/errors'; import { sleep } from '../../utils/misc'; import { getLogger } from '../../utils/logger'; @@ -62,7 +62,7 @@ export class ScrapeJob { * @throws BRDError if job fails */ async wait(options?: PollOptions): Promise { - await pollUntilReady( + await pollUntilStatus( this.snapshotId, (id) => this.snapshotOps.getStatus(id), options, diff --git a/src/api/scrape/perplexity.ts b/src/api/scrape/perplexity.ts index 64f37bf..585c9e4 100644 --- a/src/api/scrape/perplexity.ts +++ b/src/api/scrape/perplexity.ts @@ -11,8 +11,7 @@ import { assertSchema } from '../../schemas/utils'; import { BaseAPI, type BaseAPIOptions } from './base'; const DATASET_ID = { - // TODO: confirm dataset ID from Bright Data API registry - SEARCH: 'gd_m0ci4ikq4icr52snty', + SEARCH: 'gd_m7dhdot1vw9a7gc1n', }; const assertInput = ( diff --git a/src/api/scrape/pinterest.ts b/src/api/scrape/pinterest.ts new file mode 100644 index 0000000..97ec4a5 --- /dev/null +++ b/src/api/scrape/pinterest.ts @@ -0,0 +1,66 @@ +import type { + DatasetOptions, + OrchestrateOptions, + UnknownRecord, +} from '../../types/datasets'; +import { + DatasetOptionsSchema, + DatasetMixedInputSchema, +} from '../../schemas/datasets'; +import { assertSchema } from '../../schemas/utils'; +import { BaseAPI, type BaseAPIOptions } from './base'; + +const DATASET_ID = { + POST: 'gd_lk0sjs4d21kdr7cnlv', + PROFILE: 'gd_lk0zv93c2m9qdph46z', +}; + +const assertInput = ( + input: UnknownRecord[] | string[], + opts: DatasetOptions = {}, + fn: string, +) => { + const prefix = `pinterest.${fn}: `; + return [ + assertSchema(DatasetMixedInputSchema, input, `${prefix}invalid input`), + assertSchema(DatasetOptionsSchema, opts, `${prefix}invalid options`), + ] as const; +}; + +export class PinterestAPI extends BaseAPI { + constructor(opts: BaseAPIOptions) { + super(opts); + this.name = 'pinterest'; + this.init(); + } + + collectPosts(input: string[], opt: DatasetOptions) { + this.logger.info(`collectPosts for ${input.length} urls`); + const [safeInput, safeOpt] = assertInput(input, opt, 'collectPosts'); + return this.run(safeInput, DATASET_ID.POST, safeOpt); + } + + collectProfiles(input: string[], opt: DatasetOptions) { + this.logger.info(`collectProfiles for ${input.length} urls`); + const [safeInput, safeOpt] = assertInput( + input, + opt, + 'collectProfiles', + ); + return this.run(safeInput, DATASET_ID.PROFILE, safeOpt); + } + + async posts(input: string[], opts?: OrchestrateOptions) { + this.logger.info(`posts (orchestrated) for ${input.length} urls`); + const [safeInput] = assertInput(input, {}, 'posts'); + return this.orchestrate(safeInput, DATASET_ID.POST, opts); + } + + async profiles(input: string[], opts?: OrchestrateOptions) { + this.logger.info( + `profiles (orchestrated) for ${input.length} urls`, + ); + const [safeInput] = assertInput(input, {}, 'profiles'); + return this.orchestrate(safeInput, DATASET_ID.PROFILE, opts); + } +} diff --git a/src/api/scrape/router.ts b/src/api/scrape/router.ts index 803b59b..7ea12cf 100644 --- a/src/api/scrape/router.ts +++ b/src/api/scrape/router.ts @@ -9,6 +9,7 @@ import { PerplexityAPI } from './perplexity'; import { TiktokAPI } from './tiktok'; import { YoutubeAPI } from './youtube'; import { DigikeyAPI } from './digikey'; +import { PinterestAPI } from './pinterest'; import { RedditAPI } from './reddit'; export class ScrapeRouter { @@ -22,6 +23,7 @@ export class ScrapeRouter { tiktok: TiktokAPI; youtube: YoutubeAPI; digikey: DigikeyAPI; + pinterest: PinterestAPI; reddit: RedditAPI; constructor(opts: BaseAPIOptions) { @@ -37,6 +39,7 @@ export class ScrapeRouter { this.tiktok = new TiktokAPI(platformOpts); this.youtube = new YoutubeAPI(platformOpts); this.digikey = new DigikeyAPI(platformOpts); + this.pinterest = new PinterestAPI(platformOpts); this.reddit = new RedditAPI(platformOpts); } } diff --git a/src/api/scrape/snapshot.ts b/src/api/scrape/snapshot.ts index b46c724..7265ffb 100644 --- a/src/api/scrape/snapshot.ts +++ b/src/api/scrape/snapshot.ts @@ -1,15 +1,14 @@ import type { Dispatcher } from 'undici'; import { API_ENDPOINT } from '../../utils/constants'; import { DataNotReadyError } from '../../utils/errors'; -import { wrapAPIError } from '../../utils/error-utils'; import { assertResponse, throwInvalidStatus } from '../../core/transport'; import { routeDownloadStream, getFilename, getAbsAndEnsureDir, } from '../../utils/files'; -import { parseJSON } from '../../utils/misc'; -import { pollUntilReady } from '../../utils/polling'; +import { parseJSON, parseResponse } from '../../utils/misc'; +import { pollUntilStatus } from '../../utils/polling'; import type { z } from 'zod'; import { SnapshotIdSchema, @@ -18,7 +17,7 @@ import { } from '../../schemas/datasets'; import type { SnapshotDownloadOptions, SnapshotFetchOptions } from '../../schemas/datasets'; import { assertSchema } from '../../schemas/utils'; -import type { SnapshotStatusResponse } from '../../types/datasets'; +import { SnapshotStatusResponseSchema } from '../../schemas/responses'; import { BaseAPI, BaseAPIOptions } from './base'; const assertDownloadStatus = (status: number) => { @@ -111,13 +110,9 @@ export class SnapshotAPI extends BaseAPI { snapshotId, ); - try { - const response = await this.transport.request(url, {}); - const responseTxt = await assertResponse(response); - return parseJSON(responseTxt); - } catch (e: unknown) { - wrapAPIError(e, 'snapshot.getStatus'); - } + const response = await this.transport.request(url, {}); + const responseTxt = await assertResponse(response); + return parseResponse(responseTxt, SnapshotStatusResponseSchema, 'datasets/v3/progress'); } async #fetch( @@ -133,33 +128,29 @@ export class SnapshotAPI extends BaseAPI { snapshotId, ); - try { - const response = await this.transport.request(url, { - method: 'GET', - query: { - format: options.format, - } as Record, - }); - - // Must consume body before throwing so the connection is - // released back to undici's pool - if (response.statusCode === 202) { - await response.body.text(); - throw new DataNotReadyError( - 'snapshot is not ready yet, please try again later', - ); - } + const response = await this.transport.request(url, { + method: 'GET', + query: { + format: options.format, + } as Record, + }); - const responseTxt = await assertResponse(response); + // Must consume body before throwing so the connection is + // released back to undici's pool + if (response.statusCode === 202) { + await response.body.text(); + throw new DataNotReadyError( + 'snapshot is not ready yet, please try again later', + ); + } - if (options.format === 'json') { - return parseJSON(responseTxt); - } + const responseTxt = await assertResponse(response); - return responseTxt; - } catch (e: unknown) { - wrapAPIError(e, 'snapshot.fetch', 'parsing response'); + if (options.format === 'json') { + return parseJSON(responseTxt); } + + return responseTxt; } async #download( @@ -173,44 +164,40 @@ export class SnapshotAPI extends BaseAPI { snapshotId, ); - try { - if (options.statusPolling) { - await this.#awaitReady(snapshotId); - } + if (options.statusPolling) { + await this.#awaitReady(snapshotId); + } - const filename = getFilename(options.filename, options.format); - const target = await getAbsAndEnsureDir(filename); + const filename = getFilename(options.filename, options.format); + const target = await getAbsAndEnsureDir(filename); - this.logger.info( - `starting streaming snapshot ${snapshotId} data to ${target}`, - ); + this.logger.info( + `starting streaming snapshot ${snapshotId} data to ${target}`, + ); - await this.transport.stream( - url, - { - method: 'GET', - query: { - format: options.format, - compress: options.compress, - } as Record, - opaque: { - filename: target, - assertStatus: assertDownloadStatus, - }, + await this.transport.stream( + url, + { + method: 'GET', + query: { + format: options.format, + compress: options.compress, + } as Record, + opaque: { + filename: target, + assertStatus: assertDownloadStatus, }, - routeDownloadStream as unknown as Dispatcher.StreamFactory, - ); + }, + routeDownloadStream as unknown as Dispatcher.StreamFactory, + ); - return target; - } catch (e: unknown) { - wrapAPIError(e, 'snapshot.download'); - } + return target; } async #awaitReady(snapshotId: string): Promise { this.logger.info(`polling snapshot status for id ${snapshotId}`); - await pollUntilReady(snapshotId, (id) => this.#getStatus(id), { + await pollUntilStatus(snapshotId, (id) => this.#getStatus(id), { pollInterval: 10_000, onStatus: (status, elapsed) => { this.logger.info( @@ -227,14 +214,10 @@ export class SnapshotAPI extends BaseAPI { snapshotId, ); - try { - const response = await this.transport.request(url, { - method: 'POST', - }); + const response = await this.transport.request(url, { + method: 'POST', + }); - await assertResponse(response); - } catch (e: unknown) { - wrapAPIError(e, 'snapshot.cancel'); - } + await assertResponse(response); } } diff --git a/src/api/scrape/tiktok.ts b/src/api/scrape/tiktok.ts index 89cb860..8fc2485 100644 --- a/src/api/scrape/tiktok.ts +++ b/src/api/scrape/tiktok.ts @@ -14,6 +14,9 @@ const DATASET_ID = { POST: 'gd_lu702nij2f790tmv9h', PROFILE: 'gd_l1villgoiiidt09ci', COMMENTS: 'gd_lkf2st302ap89utw5k', + POSTS_BY_PROFILE_FAST: 'gd_m7n5v2gq296pex2f5m', + POSTS_BY_URL_FAST: 'gd_m736hjp71lejc5dc0l', + POSTS_BY_SEARCH_URL_FAST: 'gd_m7n5ixlw1gc4no56kx', }; const assertInput = ( @@ -78,4 +81,87 @@ export class TiktokAPI extends BaseAPI { const [safeInput] = assertInput(input, {}, 'comments'); return this.orchestrate(safeInput, DATASET_ID.COMMENTS, opts); } + + collectPostsByProfileFast(input: string[], opt: DatasetOptions) { + this.logger.info( + `collectPostsByProfileFast for ${input.length} urls`, + ); + const [safeInput, safeOpt] = assertInput( + input, + opt, + 'collectPostsByProfileFast', + ); + return this.run( + safeInput, + DATASET_ID.POSTS_BY_PROFILE_FAST, + safeOpt, + ); + } + + collectPostsByUrlFast(input: string[], opt: DatasetOptions) { + this.logger.info( + `collectPostsByUrlFast for ${input.length} urls`, + ); + const [safeInput, safeOpt] = assertInput( + input, + opt, + 'collectPostsByUrlFast', + ); + return this.run(safeInput, DATASET_ID.POSTS_BY_URL_FAST, safeOpt); + } + + collectPostsBySearchUrlFast(input: string[], opt: DatasetOptions) { + this.logger.info( + `collectPostsBySearchUrlFast for ${input.length} urls`, + ); + const [safeInput, safeOpt] = assertInput( + input, + opt, + 'collectPostsBySearchUrlFast', + ); + return this.run( + safeInput, + DATASET_ID.POSTS_BY_SEARCH_URL_FAST, + safeOpt, + ); + } + + async postsByProfileFast(input: string[], opts?: OrchestrateOptions) { + this.logger.info( + `postsByProfileFast (orchestrated) for ${input.length} urls`, + ); + const [safeInput] = assertInput(input, {}, 'postsByProfileFast'); + return this.orchestrate( + safeInput, + DATASET_ID.POSTS_BY_PROFILE_FAST, + opts, + ); + } + + async postsByUrlFast(input: string[], opts?: OrchestrateOptions) { + this.logger.info( + `postsByUrlFast (orchestrated) for ${input.length} urls`, + ); + const [safeInput] = assertInput(input, {}, 'postsByUrlFast'); + return this.orchestrate( + safeInput, + DATASET_ID.POSTS_BY_URL_FAST, + opts, + ); + } + + async postsBySearchUrlFast( + input: string[], + opts?: OrchestrateOptions, + ) { + this.logger.info( + `postsBySearchUrlFast (orchestrated) for ${input.length} urls`, + ); + const [safeInput] = assertInput(input, {}, 'postsBySearchUrlFast'); + return this.orchestrate( + safeInput, + DATASET_ID.POSTS_BY_SEARCH_URL_FAST, + opts, + ); + } } diff --git a/src/api/scraperstudio/index.ts b/src/api/scraperstudio/index.ts new file mode 100644 index 0000000..f7f3f71 --- /dev/null +++ b/src/api/scraperstudio/index.ts @@ -0,0 +1,3 @@ +export { ScraperStudioService } from './service'; +export { ScraperStudioJob } from './job'; +export type { ScraperStudioPollOptions } from './job'; diff --git a/src/api/scraperstudio/job.ts b/src/api/scraperstudio/job.ts new file mode 100644 index 0000000..31d6c8e --- /dev/null +++ b/src/api/scraperstudio/job.ts @@ -0,0 +1,107 @@ +import { + DataNotReadyError, + NetworkError, + TimeoutError, +} from '../../utils/errors'; +import { sleep, parseJSON } from '../../utils/misc'; +import { Deadline } from '../../utils/deadline'; +import { getLogger } from '../../utils/logger'; +import { Transport, assertResponse } from '../../core/transport'; +import { API_ENDPOINT } from '../../utils/constants'; + +const logger = getLogger('scraperstudio.job'); +const MAX_NETWORK_RETRIES = 3; + +export interface ScraperStudioPollOptions { + /** Maximum milliseconds to wait (default: 180_000) */ + timeout?: number; + /** Milliseconds between fetch attempts (default: 10_000) */ + pollInterval?: number; +} + +export class ScraperStudioJob { + readonly responseId: string; + readonly triggeredAt: Date; + + // Takes Transport directly — no operations interface needed. + // Unlike ScrapeJob→SnapshotOperations, there's no circular import to break. + private readonly transport: Transport; + + constructor(responseId: string, transport: Transport) { + this.responseId = responseId; + this.transport = transport; + this.triggeredAt = new Date(); + } + + toString(): string { + return ``; + } + + async fetch(): Promise { + const response = await this.transport.request( + API_ENDPOINT.DCA_GET_RESULT, + { + method: 'GET', + query: { + response_id: this.responseId, + } as Record, + }, + ); + + // Must check 202 before assertResponse — 202 is "not ready", + // and the body must be consumed to release the undici connection. + if (response.statusCode === 202) { + await response.body.text(); + throw new DataNotReadyError( + 'scraper studio result not ready yet', + ); + } + + const responseTxt = await assertResponse(response); + return parseJSON(responseTxt); + } + + async waitAndFetch(options?: ScraperStudioPollOptions): Promise { + const deadline = new Deadline(options?.timeout ?? 500_000); + const interval = options?.pollInterval ?? 10_000; + let networkRetries = 0; + + logger.debug(`${this}: waiting for results`); + + for (;;) { + if (deadline.expired) { + throw new TimeoutError( + `Scraper Studio job timed out after ${Math.round(deadline.elapsed / 1000)}s for ${this.responseId}`, + ); + } + + try { + const data = await this.fetch(); + logger.debug( + `${this}: got ${data.length} results after ${Math.round(deadline.elapsed / 1000)}s`, + ); + return data; + } catch (e) { + if (e instanceof DataNotReadyError) { + logger.debug( + `${this}: not ready, retrying in ${interval}ms`, + ); + await sleep(interval); + continue; + } + if ( + e instanceof NetworkError && + networkRetries < MAX_NETWORK_RETRIES + ) { + networkRetries++; + logger.debug( + `${this}: transient network error (${networkRetries}/${MAX_NETWORK_RETRIES}), retrying`, + ); + await sleep(interval); + continue; + } + throw e; + } + } + } +} diff --git a/src/api/scraperstudio/service.ts b/src/api/scraperstudio/service.ts new file mode 100644 index 0000000..1da057d --- /dev/null +++ b/src/api/scraperstudio/service.ts @@ -0,0 +1,146 @@ +import { Transport, assertResponse } from '../../core/transport'; +import { API_ENDPOINT } from '../../utils/constants'; +import { parseResponse } from '../../utils/misc'; +import { getLogger } from '../../utils/logger'; +import { assertSchema } from '../../schemas/utils'; +import { + CollectorIdSchema, + ScraperStudioRunOptionsSchema, + TriggerResponseSchema, + JobStatusResponseSchema, + type ScraperStudioRunOptions, + type JobStatus, + type RunResult, +} from '../../schemas/scraperstudio'; +import { ScraperStudioJob } from './job'; + +const logger = getLogger('scraperstudio'); + +export class ScraperStudioService { + private transport: Transport; + + constructor(opts: { transport: Transport }) { + this.transport = opts.transport; + } + + async run( + collector: string, + options: ScraperStudioRunOptions, + ): Promise { + const safeCollector = assertSchema( + CollectorIdSchema, + collector, + 'scraperStudio.run.collector', + ); + const safeOpts = assertSchema( + ScraperStudioRunOptionsSchema, + options, + 'scraperStudio.run.options', + ); + + const inputs = Array.isArray(safeOpts.input) + ? safeOpts.input + : [safeOpts.input]; + + logger.info( + `run: ${inputs.length} input(s) for collector ${safeCollector}`, + ); + + const results: RunResult[] = []; + + for (const input of inputs) { + const start = Date.now(); + try { + const job = await this._trigger(safeCollector, input); + const data = await job.waitAndFetch({ + timeout: safeOpts.timeout, + pollInterval: safeOpts.pollInterval, + }); + results.push({ + input, + data, + error: null, + responseId: job.responseId, + elapsedMs: Date.now() - start, + }); + } catch (e: unknown) { + results.push({ + input, + data: null, + error: (e as Error).message, + responseId: null, + elapsedMs: Date.now() - start, + }); + } + } + + const succeeded = results.filter((r) => r.data !== null).length; + logger.info( + `run complete: ${succeeded}/${results.length} succeeded`, + ); + + return results; + } + + async trigger( + collector: string, + input: Record, + ): Promise { + const safeCollector = assertSchema( + CollectorIdSchema, + collector, + 'scraperStudio.trigger.collector', + ); + + logger.info(`trigger: collector ${safeCollector}`); + + return this._trigger(safeCollector, input); + } + + async status(jobId: string): Promise { + assertSchema( + CollectorIdSchema, + jobId, + 'scraperStudio.status.jobId', + ); + + logger.info(`status: job ${jobId}`); + + const url = `${API_ENDPOINT.DCA_LOG}/${jobId}`; + const response = await this.transport.request(url, {}); + const responseTxt = await assertResponse(response); + return parseResponse( + responseTxt, + JobStatusResponseSchema, + 'dca/log', + ); + } + + async fetch(responseId: string): Promise { + const job = new ScraperStudioJob(responseId, this.transport); + return job.fetch(); + } + + private async _trigger( + collector: string, + input: Record, + ): Promise { + const response = await this.transport.request( + API_ENDPOINT.DCA_TRIGGER, + { + method: 'POST', + query: { collector } as Record, + body: JSON.stringify(input), + }, + ); + + const responseTxt = await assertResponse(response); + const result = parseResponse( + responseTxt, + TriggerResponseSchema, + 'dca/trigger_immediate', + ); + + return new ScraperStudioJob(result.response_id, this.transport); + } +} diff --git a/src/api/unlocker/request.ts b/src/api/unlocker/request.ts index 663b45e..851adea 100644 --- a/src/api/unlocker/request.ts +++ b/src/api/unlocker/request.ts @@ -2,7 +2,6 @@ import { PromisePool } from '@supercharge/promise-pool'; import { API_ENDPOINT, DEFAULT_CONCURRENCY } from '../../utils/constants'; import { getLogger } from '../../utils/logger'; import { BRDError } from '../../utils/errors'; -import { wrapAPIError } from '../../utils/error-utils'; import { Transport, assertResponse } from '../../core/transport'; import { dropEmptyKeys, parseJSON } from '../../utils/misc'; import { ZoneNameSchema } from '../../schemas/shared'; @@ -119,24 +118,20 @@ export class RequestAPI { ): Promise { const body = this.getRequestBody(val, zone, opt); - try { - const response = await this.transport.request( - API_ENDPOINT.REQUEST, - { - method: 'POST', - body: JSON.stringify(body), - timeout: opt.timeout, - }, - ); + const response = await this.transport.request( + API_ENDPOINT.REQUEST, + { + method: 'POST', + body: JSON.stringify(body), + timeout: opt.timeout, + }, + ); - const responseTxt = await assertResponse(response); - if (opt.format === 'json') { - return parseJSON(responseTxt); - } - return responseTxt; - } catch (e: unknown) { - wrapAPIError(e, 'request.handle'); + const responseTxt = await assertResponse(response); + if (opt.format === 'json') { + return parseJSON(responseTxt); } + return responseTxt; } // prettier-ignore private async handleBatch(inputs: string[], zone: string, opt: RequestJSONOptions): Promise; @@ -153,32 +148,27 @@ export class RequestAPI { `processing ${inputs.length} items, concurrency is ${limit}`, ); - try { - const { results } = await PromisePool.for(inputs) - .withConcurrency(limit) - .useCorrespondingResults() - .process(async (url) => { - try { - return await this.handleSingle(url, zone, opt); - } catch (e: unknown) { - return e as BRDError; - } - }); - - const res = results.map((v) => { - if (v === PromisePool.failed || v === PromisePool.notRun) - return new BRDError('unknown error occurred'); - return v as Exclude; + const { results } = await PromisePool.for(inputs) + .withConcurrency(limit) + .useCorrespondingResults() + .process(async (url) => { + try { + return await this.handleSingle(url, zone, opt); + } catch (e: unknown) { + return e as BRDError; + } }); - this.logger.info( - `completed batch operation: ${res.length} results`, - ); + const res = results.map((v) => { + if (v === PromisePool.failed || v === PromisePool.notRun) + return new BRDError('unknown error occurred'); + return v as Exclude; + }); - return res; - } catch (error: unknown) { - this.logger.error(`batch operation failed: ${(error as Error).message}`); - wrapAPIError(error, 'request.handleBatch'); - } + this.logger.info( + `completed batch operation: ${res.length} results`, + ); + + return res; } } diff --git a/src/api/zones.ts b/src/api/zones.ts index 0def732..480e9bb 100644 --- a/src/api/zones.ts +++ b/src/api/zones.ts @@ -3,7 +3,6 @@ import { API_ENDPOINT } from '../utils/constants'; import { parseJSON } from '../utils/misc'; import { Transport, assertResponse } from '../core/transport'; import { ZoneError, BRDError } from '../utils/errors'; -import { wrapAPIError } from '../utils/error-utils'; import type { ZoneInfo, ZoneInfoResponse } from '../types/zones'; const logger = getLogger('api.zones'); @@ -52,30 +51,24 @@ export class ZonesAPI { async listZones(): Promise { logger.info('fetching list of active zones'); - try { - const response = await this.transport.request( - API_ENDPOINT.ZONE_LIST, - {}, - ); - - const responseTxt = await assertResponse(response); - const zones = parseJSON(responseTxt); + const response = await this.transport.request( + API_ENDPOINT.ZONE_LIST, + {}, + ); - logger.info(`found ${zones.length} active zones`); + const responseTxt = await assertResponse(response); + const zones = parseJSON(responseTxt); - const res = zones.map((zone) => ({ - name: zone.zone || zone.name, - type: zone.zone_type || zone.type, - status: zone.status, - ips: zone.ips || 0, - bandwidth: zone.bandwidth || 0, - created: zone.created_at || zone.created, - })); + logger.info(`found ${zones.length} active zones`); - return res; - } catch (e: unknown) { - wrapAPIError(e, 'listZones'); - } + return zones.map((zone) => ({ + name: zone.zone || zone.name, + type: zone.zone_type || zone.type, + status: zone.status, + ips: zone.ips || 0, + bandwidth: zone.bandwidth || 0, + created: zone.created_at || zone.created, + })); } async ensureZone(name: string, opts: EnsureZoneOpts) { diff --git a/src/client.ts b/src/client.ts index a3a4be5..b552c6d 100644 --- a/src/client.ts +++ b/src/client.ts @@ -3,6 +3,11 @@ import { ZonesAPI } from './api/zones'; import { ScrapeRouter } from './api/scrape/router'; import { SearchRouter } from './api/search/router'; import { DatasetsClient } from './api/datasets/client'; +import { DiscoverService } from './api/discover/service'; +import type { DiscoverResult } from './api/discover/result'; +import type { DiscoverJob } from './api/discover/job'; +import type { DiscoverOptions } from './schemas/discover'; +import { ScraperStudioService } from './api/scraperstudio/service'; import { setup as setupLogger, getLogger } from './utils/logger'; import { DEFAULT_WEB_UNLOCKER_ZONE, @@ -77,6 +82,7 @@ function defineLazy(obj: object, key: string, factory: () => T): void { */ export class bdclient { private _scrapeAPI: ScrapeAPI | null = null; + private _discoverService: DiscoverService | null = null; private zonesAPI: ZonesAPI; private transport: Transport; private autoCreateZones: boolean; @@ -86,6 +92,7 @@ export class bdclient { declare scrape: ScrapeRouter; declare search: SearchRouter; declare datasets: DatasetsClient; + declare scraperStudio: ScraperStudioService; constructor(options?: BdClientOptions) { const opt = assertSchema( @@ -154,6 +161,10 @@ export class bdclient { defineLazy(this, 'datasets', () => new DatasetsClient({ transport: this.transport }), ); + + defineLazy(this, 'scraperStudio', () => + new ScraperStudioService({ transport: this.transport }), + ); } private get scrapeAPI(): ScrapeAPI { @@ -168,6 +179,15 @@ export class bdclient { return this._scrapeAPI; } + private get discoverService(): DiscoverService { + if (!this._discoverService) { + this._discoverService = new DiscoverService({ + transport: this.transport, + }); + } + return this._discoverService; + } + /** * Scrape a single URL using Bright Data Web Unlocker API * @@ -311,6 +331,41 @@ export class bdclient { return await this.zonesAPI.listZones(); } + /** + * Search the web with AI-powered relevance ranking. + * Triggers a search, polls until complete, returns results. + * + * @example + * ```javascript + * const result = await client.discover('AI trends 2026', { + * intent: 'latest technology developments', + * includeContent: true, + * }); + * for (const item of result.data) { + * console.log(`[${item.relevance_score}] ${item.title}`); + * } + * ``` + */ + async discover(query: string, opts?: DiscoverOptions): Promise { + return this.discoverService.search(query, opts); + } + + /** + * Trigger a discover search and return a job for manual polling. + * + * @example + * ```javascript + * const job = await client.discoverTrigger('market research SaaS', { + * intent: 'competitor pricing', + * }); + * await job.wait({ timeout: 60_000 }); + * const data = await job.fetch(); + * ``` + */ + async discoverTrigger(query: string, opts?: DiscoverOptions): Promise { + return this.discoverService.trigger(query, opts); + } + async close(): Promise { await this.transport.close(); } diff --git a/src/core/transport.ts b/src/core/transport.ts index 5955a25..06b55f3 100644 --- a/src/core/transport.ts +++ b/src/core/transport.ts @@ -14,6 +14,8 @@ import { MAX_RETRIES, RETRY_BACKOFF_FACTOR, RETRY_STATUSES, + RETRY_METHODS, + RETRY_ERROR_CODES, } from '../utils/constants'; import { APIError, @@ -35,6 +37,13 @@ const TIMEOUT_ERROR_NAMES = new Set([ 'ConnectTimeoutError', ]); +function isAbortTimeout(err: Error): boolean { + return ( + (err.name === 'TimeoutError' && !(err instanceof BRDError)) || + (err.name === 'AbortError' && err.message?.includes('timeout')) + ); +} + export interface TransportOptions { apiKey: string; timeout?: number; @@ -96,6 +105,8 @@ export class Transport { maxRetries: MAX_RETRIES, timeoutFactor: RETRY_BACKOFF_FACTOR, statusCodes: RETRY_STATUSES, + methods: RETRY_METHODS as unknown as Dispatcher.HttpMethod[], + errorCodes: RETRY_ERROR_CODES, }), ); process.on('beforeExit', this.onBeforeExit); @@ -163,19 +174,7 @@ export class Transport { error: (e as Error).message, }, ); - if (e instanceof BRDError) throw e; - const err = e as Error; - if (TIMEOUT_ERROR_NAMES.has(err.name)) { - throw new NetworkTimeoutError( - `Request timed out: ${err.message}`, - { - cause: err, - }, - ); - } - throw new NetworkError(`Network error: ${err.message}`, { - cause: err, - }); + this.classifyError(e); } } @@ -229,19 +228,7 @@ export class Transport { error: (e as Error).message, }, ); - if (e instanceof BRDError) throw e; - const err = e as Error; - if (TIMEOUT_ERROR_NAMES.has(err.name)) { - throw new NetworkTimeoutError( - `Request timed out: ${err.message}`, - { - cause: err, - }, - ); - } - throw new NetworkError(`Network error: ${err.message}`, { - cause: err, - }); + this.classifyError(e); } } @@ -272,6 +259,23 @@ export class Transport { logRequest(method, JSON.stringify(url), meta); } + + private classifyError(err: unknown): never { + if (err instanceof BRDError) throw err; + const e = err as Error; + if (TIMEOUT_ERROR_NAMES.has(e.name) || isAbortTimeout(e)) { + throw new NetworkTimeoutError(`Request timed out: ${e.message}`, { + cause: e, + }); + } + if (e.name === 'RequestRetryError' && 'statusCode' in e) { + throwInvalidStatus( + (e as Error & { statusCode: number }).statusCode, + `retries exhausted: ${e.message}`, + ); + } + throw new NetworkError(`Network error: ${e.message}`, { cause: e }); + } } export function throwInvalidStatus(status: number, responseTxt: string): never { @@ -301,8 +305,21 @@ export async function assertResponse( parse = true, ): Promise { if (response.statusCode < 400) { - return parse ? await response.body.text() : response.body; + if (!parse) return response.body; + try { + return await response.body.text(); + } catch (e) { + throw new NetworkError( + `Failed to read response body: ${(e as Error).message}`, + { cause: e as Error }, + ); + } } - - throwInvalidStatus(response.statusCode, await response.body.text()); + let bodyText: string; + try { + bodyText = await response.body.text(); + } catch { + bodyText = '(response body unreadable)'; + } + throwInvalidStatus(response.statusCode, bodyText); } diff --git a/src/index.ts b/src/index.ts index f08cde5..5b2277b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,6 +20,24 @@ export type { BaseResultFields } from './models/result'; export type * from './types/client'; export type * from './types/request'; export type * from './types/zones'; +export type * from './types/discover'; + +// ── Discover ───────────────────────────────────────────────────── +export { DiscoverResult } from './api/discover/result'; +export { DiscoverJob } from './api/discover/job'; +export type { DiscoverResultItem, DiscoverResultFields } from './api/discover/result'; +export type { DiscoverPollOptions } from './api/discover/job'; + +// ── Scraper Studio ────────────────────────────────────────────── +export { ScraperStudioService } from './api/scraperstudio/service'; +export { ScraperStudioJob } from './api/scraperstudio/job'; +export type { ScraperStudioPollOptions } from './api/scraperstudio/job'; +export type { + ScraperStudioRunOptions, + ScraperStudioInput, + JobStatus, + RunResult, +} from './schemas/scraperstudio'; // ── Subpath re-exports (backward compat) ───────────────────────── // Consumers can also import these from '@brightdata/sdk/scrapers', diff --git a/src/models/datasets.ts b/src/models/datasets.ts new file mode 100644 index 0000000..e2ae7f5 --- /dev/null +++ b/src/models/datasets.ts @@ -0,0 +1,64 @@ +export interface AmazonBestSellerRecord { + title: string; + seller_name: string | null; + brand: string; + description: string | null; + initial_price: number | null; + final_price: number | null; + final_price_high: number | null; + currency: string; + availability: string; + reviews_count: number; + categories: string[][] | null; + asin: string; + buybox_seller: string; + number_of_sellers: number; + root_bs_rank: number; + ISBN10: string | null; + answered_questions: number; + domain: string; + images_count: number; + url: string; + video_count: number; + image_url: string; + item_weight: string | null; + rating: number; + product_dimensions: string | null; + seller_id: string; + image: string; + date_first_available: string | null; + discount: string | null; + model_number: string | null; + manufacturer: string | null; + department: string | null; + plus_content: boolean; + upc: string | null; + video: boolean; + top_review: string | null; + variations: AmazonVariation[] | null; + delivery: string[] | null; + features: string[] | null; + buybox_prices: AmazonBuyboxPrices | null; + origin_url: string | null; + bs_rank: number | null; + bs_rank_category: string | null; + sponsered: boolean | null; +} + +export interface AmazonVariation { + asin: string; + name: string; +} + +export interface AmazonBuyboxPrices { + final_price: number | null; + initial_price: number | null; + discount: string | null; + sns_price: AmazonSnsPrice | null; + monthly_cost: number | null; +} + +export interface AmazonSnsPrice { + base_price: number | null; + tiered_price: number | null; +} diff --git a/src/schemas/client.ts b/src/schemas/client.ts index 1d5acdd..729599e 100644 --- a/src/schemas/client.ts +++ b/src/schemas/client.ts @@ -7,13 +7,19 @@ export const ApiKeySchema = z export const VerboseSchema = z.stringbool().optional(); +export const LogLevelSchema = z.enum([ + 'DEBUG', + 'INFO', + 'WARNING', + 'ERROR', + 'CRITICAL', +]); + export const ClientOptionsSchema = z.object({ apiKey: ApiKeySchema.optional(), webUnlockerZone: ZoneNameSchema.optional(), serpZone: ZoneNameSchema.optional(), - logLevel: z - .enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']) - .optional(), + logLevel: LogLevelSchema.optional(), verbose: z.boolean().optional(), structuredLogging: z.boolean().default(true), autoCreateZones: z.boolean().default(true), diff --git a/src/schemas/discover.ts b/src/schemas/discover.ts new file mode 100644 index 0000000..0393cb1 --- /dev/null +++ b/src/schemas/discover.ts @@ -0,0 +1,34 @@ +import { z } from 'zod'; + +export const DiscoverQuerySchema = z.string().min(1, 'query must not be empty'); + +export const DiscoverOptionsSchema = z.object({ + intent: z.string().optional(), + includeContent: z.boolean().optional(), + country: z + .string() + .length(2, 'country code must be exactly 2 characters') + .transform((v) => v.toLowerCase()) + .optional(), + city: z.string().optional(), + language: z.string().optional(), + filterKeywords: z.array(z.string()).optional(), + numResults: z.int().positive().optional(), + format: z.enum(['json']).default('json'), + timeout: z.number().positive().default(60_000), + pollInterval: z.number().positive().default(2_000), +}); + +export type DiscoverOptions = z.input; + +export const DiscoverTriggerResponseSchema = z + .object({ + task_id: z.string().min(1), + }) + .passthrough(); + +export const DiscoverPollResponseSchema = z + .object({ + status: z.enum(['processing', 'done', 'error', 'failed']), + }) + .passthrough(); diff --git a/src/schemas/responses.ts b/src/schemas/responses.ts new file mode 100644 index 0000000..49caeef --- /dev/null +++ b/src/schemas/responses.ts @@ -0,0 +1,31 @@ +import { z } from 'zod'; + +/** + * Response from POST /datasets/v3/trigger + * Used by: BaseAPI.run() to construct ScrapeJob + * Critical field: snapshot_id (without it, job polls "undefined" forever) + */ +export const SnapshotMetaResponseSchema = z + .object({ + snapshot_id: z.string().min(1), + }) + .passthrough(); + +export type SnapshotMeta = z.infer; + +/** + * Response from GET /datasets/v3/progress/{id} + * Used by: SnapshotAPI.#getStatus(), polling loop + * Critical field: status (without it, poll never sees 'ready') + */ +export const SnapshotStatusResponseSchema = z + .object({ + snapshot_id: z.string(), + dataset_id: z.string(), + status: z.enum(['running', 'ready', 'failed', 'cancelled', 'error']), + }) + .passthrough(); + +export type SnapshotStatusResponse = z.infer< + typeof SnapshotStatusResponseSchema +>; diff --git a/src/schemas/scraperstudio.ts b/src/schemas/scraperstudio.ts new file mode 100644 index 0000000..f98d70b --- /dev/null +++ b/src/schemas/scraperstudio.ts @@ -0,0 +1,95 @@ +import { z } from 'zod'; + +// --- Input validation --- + +export const CollectorIdSchema = z + .string() + .min(1, 'collector ID is required'); + +export const ScraperStudioInputSchema = z.union([ + z.record(z.string(), z.any()), + z.array(z.record(z.string(), z.any())), +]); + +export type ScraperStudioInput = z.input; + +export const ScraperStudioRunOptionsSchema = z.object({ + input: ScraperStudioInputSchema, + timeout: z.number().positive().default(500_000), + pollInterval: z.number().positive().default(10_000), +}); + +export type ScraperStudioRunOptions = z.input; + +// --- Result type for run() --- + +export interface RunResult { + input: Record; + data: unknown[] | null; + error: string | null; + responseId: string | null; + elapsedMs: number; +} + +// --- Response validation --- + +export const TriggerResponseSchema = z + .object({ + response_id: z.string().min(1), + }) + .passthrough(); + +// Job status from GET /dca/log/{id} +// The Bright Data API returns mixed-case keys: Id, Status, Collector, Success_rate, Job_time, etc. +// Validate critical fields (accepting both casings), then normalize to camelCase via transform. +const StatusField = z.enum([ + 'queued', + 'running', + 'done', + 'failed', + 'cancelled', + 'unknown', +]); + +export const JobStatusResponseSchema = z + .object({ + id: z.string().optional(), + Id: z.string().optional(), + status: StatusField.optional(), + Status: StatusField.optional(), + }) + .passthrough() + .refine((d) => d.id || d.Id, 'response missing id field') + .refine((d) => d.status || d.Status, 'response missing status field') + .transform((data: Record) => { + const get = (key: string) => + data[key] ?? + data[key.toLowerCase()] ?? + data[key.charAt(0).toUpperCase() + key.slice(1)]; + return { + id: (get('id') ?? get('Id') ?? '') as string, + status: (get('status') ?? get('Status') ?? 'unknown') as string, + collector: (get('collector') ?? get('Collector') ?? '') as string, + inputs: (get('inputs') ?? get('Inputs') ?? 0) as number, + lines: (get('lines') ?? get('Lines') ?? 0) as number, + fails: (get('fails') ?? get('Fails') ?? 0) as number, + successRate: (get('success_rate') ?? + get('Success_rate') ?? + 0) as number, + created: (get('created') ?? get('Created') ?? '') as string, + started: (get('started') ?? get('Started') ?? null) as + | string + | null, + finished: (get('finished') ?? get('Finished') ?? null) as + | string + | null, + jobTime: (get('job_time') ?? get('Job_time') ?? null) as + | number + | null, + queueTime: (get('queue_time') ?? get('Queue_time') ?? null) as + | number + | null, + }; + }); + +export type JobStatus = z.output; diff --git a/src/types/client.ts b/src/types/client.ts index e1d1489..d76437a 100644 --- a/src/types/client.ts +++ b/src/types/client.ts @@ -1,4 +1,7 @@ -export type LOG_LEVEL = 'DEBUG' | 'INFO' | 'WARNING' | 'ERROR' | 'CRITICAL'; +import type { z } from 'zod'; +import type { LogLevelSchema } from '../schemas/client'; + +export type LOG_LEVEL = z.infer; export type { BdClientOptions } from '../schemas/client'; export type { SaveOptions, ContentFormat } from '../schemas/misc'; diff --git a/src/types/datasets.ts b/src/types/datasets.ts index 5af6a58..58b6d3a 100644 --- a/src/types/datasets.ts +++ b/src/types/datasets.ts @@ -1,4 +1,5 @@ import type { PollOptions } from '../utils/polling'; +import type { SnapshotStatusResponse } from '../schemas/responses'; export type { DatasetOptionsSync, @@ -11,16 +12,9 @@ export type { export type SnapshotFormat = 'json' | 'ndjson' | 'jsonl' | 'csv'; -export type SnapshotStatus = 'running' | 'ready' | 'failed' | 'cancelled'; +export type { SnapshotMeta, SnapshotStatusResponse } from '../schemas/responses'; -export interface SnapshotMeta { - snapshot_id: string; -} - -export interface SnapshotStatusResponse extends SnapshotMeta { - dataset_id: string; - status: SnapshotStatus; -} +export type SnapshotStatus = SnapshotStatusResponse['status']; /** * Interface for snapshot operations needed by ScrapeJob. diff --git a/src/types/discover.ts b/src/types/discover.ts new file mode 100644 index 0000000..5f0a7a8 --- /dev/null +++ b/src/types/discover.ts @@ -0,0 +1,12 @@ +/** + * Interface for discover operations needed by DiscoverJob. + * DiscoverService implements this via structural typing. + * Defined here (not in service.ts) to avoid circular imports. + */ +export interface DiscoverOperations { + pollOnce(taskId: string): Promise<{ + status: string; + results?: unknown[]; + duration_seconds?: number; + }>; +} diff --git a/src/utils/constants.ts b/src/utils/constants.ts index d65e193..f1a407e 100644 --- a/src/utils/constants.ts +++ b/src/utils/constants.ts @@ -7,6 +7,27 @@ export const DEFAULT_TIMEOUT = 120_000; export const MAX_RETRIES = 3; export const RETRY_BACKOFF_FACTOR = 1.5; export const RETRY_STATUSES = [429, 500, 502, 503, 504]; +export const RETRY_METHODS = [ + 'GET', + 'HEAD', + 'OPTIONS', + 'PUT', + 'DELETE', + 'TRACE', + 'POST', +] as const; +export const RETRY_ERROR_CODES: string[] = [ + 'ECONNRESET', + 'ECONNREFUSED', + 'ENOTFOUND', + 'ENETDOWN', + 'ENETUNREACH', + 'EHOSTDOWN', + 'EHOSTUNREACH', + 'EPIPE', + 'UND_ERR_SOCKET', + 'UND_ERR_CONNECT_TIMEOUT', +]; export const DEFAULT_CONNECTIONS = DEFAULT_CONCURRENCY * (MAX_RETRIES + 1); // 40 export const DEFAULT_KEEP_ALIVE_TIMEOUT = 30_000; export const DEFAULT_KEEP_ALIVE_MAX_TIMEOUT = 120_000; @@ -24,6 +45,14 @@ export const API_ENDPOINT = { SNAPSHOT_DELIVER: `${API_BASE_URL}/datasets/v3/deliver/{snapshot_id}`, SNAPSHOT_CANCEL: `${API_BASE_URL}/datasets/v3/snapshot/{snapshot_id}/cancel`, + // Discover API (AI-powered web search) + DISCOVER: `${API_BASE_URL}/discover`, + + // Scraper Studio / DCA (Data Collection Automation) + DCA_TRIGGER: `${API_BASE_URL}/dca/trigger_immediate`, + DCA_GET_RESULT: `${API_BASE_URL}/dca/get_result`, + DCA_LOG: `${API_BASE_URL}/dca/log`, + // Datasets service (pre-collected data — separate from /datasets/v3/ scraper endpoints) DATASET_LIST: `${API_BASE_URL}/datasets/list`, DATASET_METADATA: `${API_BASE_URL}/datasets/{dataset_id}/metadata`, diff --git a/src/utils/error-utils.ts b/src/utils/error-utils.ts deleted file mode 100644 index 3bcd194..0000000 --- a/src/utils/error-utils.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { BRDError, APIError } from './errors'; - -export function wrapAPIError( - e: unknown, - location: string, - operation?: string, -): never { - if (e instanceof BRDError) throw e; - const prefix = operation ? `${location}: ${operation}` : location; - throw new APIError(`${prefix}: ${(e as Error).message}`); -} diff --git a/src/utils/misc.ts b/src/utils/misc.ts index efea798..bc5e036 100644 --- a/src/utils/misc.ts +++ b/src/utils/misc.ts @@ -1,3 +1,4 @@ +import { z } from 'zod'; import { getLogger } from './logger'; import { APIError } from './errors'; @@ -17,6 +18,28 @@ export function parseJSON(data: string): T { } } +export function parseResponse( + data: string, + schema: T, + label: string, +): z.infer { + const raw = parseJSON(data); + const result = schema.safeParse(raw); + if (!result.success) { + const preview = + data.length > 300 ? data.substring(0, 300) + '...' : data; + const logger = getLogger('utils.response'); + logger.warning(`unexpected API response shape for ${label}`, { + error: z.prettifyError(result.error), + data: preview, + }); + throw new APIError( + `Unexpected response from ${label}: ${z.prettifyError(result.error)}`, + ); + } + return result.data; +} + export const isStrArray = (maybeArr: unknown): maybeArr is string[] => Array.isArray(maybeArr) && maybeArr.every((item) => typeof item === 'string'); diff --git a/src/utils/polling.ts b/src/utils/polling.ts index 1ee4ee9..d9b973d 100644 --- a/src/utils/polling.ts +++ b/src/utils/polling.ts @@ -9,48 +9,51 @@ export interface PollOptions { pollInterval?: number; /** Maximum milliseconds to wait before timing out (default: 600_000) */ pollTimeout?: number; + /** Status value that means "done" (default: 'ready') */ + readyStatus?: string; /** Optional callback invoked after each status check */ onStatus?: (status: string, elapsedMs: number) => void; } -export async function pollUntilReady( - snapshotId: string, +export async function pollUntilStatus( + id: string, getStatus: (id: string) => Promise<{ status: string }>, options?: PollOptions, ): Promise { const interval = options?.pollInterval ?? 10_000; const timeout = options?.pollTimeout ?? 600_000; + const ready = options?.readyStatus ?? 'ready'; const start = Date.now(); - logger.debug(`starting poll for snapshot ${snapshotId}`, { snapshotId }); + logger.debug(`starting poll for ${id}`, { id }); for (;;) { const elapsed = Date.now() - start; if (elapsed > timeout) { throw new TimeoutError( - `Polling timed out after ${Math.round(elapsed / 1000)}s for snapshot ${snapshotId}`, + `Polling timed out after ${Math.round(elapsed / 1000)}s for ${id}`, ); } - const { status } = await getStatus(snapshotId); + const { status } = await getStatus(id); logger.debug( - `poll ${snapshotId}: status=${status} elapsed=${Math.round(elapsed / 1000)}s`, - { snapshotId, status, elapsedMs: elapsed }, + `poll ${id}: status=${status} elapsed=${Math.round(elapsed / 1000)}s`, + { id, status, elapsedMs: elapsed }, ); options?.onStatus?.(status, elapsed); - if (status === 'ready') { + if (status === ready) { logger.debug( - `poll ${snapshotId}: ready after ${Math.round(elapsed / 1000)}s`, - { snapshotId, elapsedMs: elapsed }, + `poll ${id}: ${ready} after ${Math.round(elapsed / 1000)}s`, + { id, elapsedMs: elapsed }, ); return; } if (status === 'failed' || status === 'error') { throw new BRDError( - `Snapshot ${snapshotId} failed with status: ${status}`, + `${id} failed with status: ${status}`, ); } diff --git a/tests/discover.test.ts b/tests/discover.test.ts new file mode 100644 index 0000000..23ec5de --- /dev/null +++ b/tests/discover.test.ts @@ -0,0 +1,290 @@ +import type { Dispatcher } from 'undici'; +import { describe, expect, test, vi, beforeEach } from 'vitest'; +import { DiscoverService } from '../src/api/discover/service'; +import { DiscoverJob } from '../src/api/discover/job'; +import { DiscoverResult } from '../src/api/discover/result'; +import { Transport } from '../src/core/transport'; +import { ValidationError, APIError, TimeoutError } from '../src/utils/errors'; + +const mockTransport = { + request: vi.fn(), + stream: vi.fn(), +} as unknown as Transport; + +function mockRequest(statusCode: number, body: string) { + vi.mocked(mockTransport.request).mockResolvedValue({ + statusCode, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { + text: () => Promise.resolve(body), + }, + } as unknown as Dispatcher.ResponseData); +} + +function mockRequestSequence(responses: Array<{ statusCode: number; body: string }>) { + const mock = vi.mocked(mockTransport.request); + for (const r of responses) { + mock.mockResolvedValueOnce({ + statusCode: r.statusCode, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { + text: () => Promise.resolve(r.body), + }, + } as unknown as Dispatcher.ResponseData); + } +} + +// --- DiscoverService --- + +describe('DiscoverService.trigger', () => { + let service: DiscoverService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new DiscoverService({ transport: mockTransport }); + }); + + test('returns DiscoverJob with taskId', async () => { + mockRequest(200, JSON.stringify({ task_id: 'task_abc123' })); + + const job = await service.trigger('AI trends'); + expect(job).toBeInstanceOf(DiscoverJob); + expect(job.taskId).toBe('task_abc123'); + expect(job.query).toBe('AI trends'); + }); + + test('passes options as snake_case in body', async () => { + mockRequest(200, JSON.stringify({ task_id: 'task_123' })); + + await service.trigger('test query', { + intent: 'research', + includeContent: true, + country: 'US', + numResults: 5, + }); + + const callBody = JSON.parse( + vi.mocked(mockTransport.request).mock.calls[0][1]?.body as string, + ); + expect(callBody.query).toBe('test query'); + expect(callBody.intent).toBe('research'); + expect(callBody.include_content).toBe(true); + expect(callBody.country).toBe('us'); + expect(callBody.num_results).toBe(5); + }); + + test('validates empty query', async () => { + await expect(service.trigger('')).rejects.toThrow(ValidationError); + }); + + test('throws on API error', async () => { + mockRequest(500, 'Internal Server Error'); + await expect(service.trigger('test')).rejects.toThrow(APIError); + }); +}); + +describe('DiscoverService.search', () => { + let service: DiscoverService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new DiscoverService({ transport: mockTransport }); + }); + + test('triggers and returns DiscoverResult on success', async () => { + const results = [ + { link: 'https://example.com', title: 'Example', description: 'desc', relevance_score: 0.95 }, + ]; + mockRequestSequence([ + { statusCode: 200, body: JSON.stringify({ task_id: 'task_abc' }) }, + { statusCode: 200, body: JSON.stringify({ status: 'done', results, duration_seconds: 3.5 }) }, + ]); + + const result = await service.search('test query'); + expect(result).toBeInstanceOf(DiscoverResult); + expect(result.success).toBe(true); + expect(result.data).toEqual(results); + expect(result.query).toBe('test query'); + expect(result.durationSeconds).toBe(3.5); + expect(result.totalResults).toBe(1); + }); + + test('returns failed result on error status', async () => { + mockRequestSequence([ + { statusCode: 200, body: JSON.stringify({ task_id: 'task_fail' }) }, + { statusCode: 200, body: JSON.stringify({ status: 'error' }) }, + ]); + + const result = await service.search('test'); + expect(result.success).toBe(false); + expect(result.error).toContain('failed with status: error'); + }); +}); + +// --- DiscoverJob --- + +describe('DiscoverJob', () => { + const mockOps = { + pollOnce: vi.fn(), + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('status() calls pollOnce and returns status', async () => { + mockOps.pollOnce.mockResolvedValueOnce({ status: 'processing' }); + + const job = new DiscoverJob('task_123', mockOps, { query: 'test' }); + const status = await job.status(); + expect(status).toBe('processing'); + expect(mockOps.pollOnce).toHaveBeenCalledWith('task_123'); + }); + + test('status(false) returns cached status', async () => { + mockOps.pollOnce.mockResolvedValueOnce({ status: 'processing' }); + + const job = new DiscoverJob('task_123', mockOps); + await job.status(); + const cached = await job.status(false); + expect(cached).toBe('processing'); + expect(mockOps.pollOnce).toHaveBeenCalledTimes(1); + }); + + test('wait() polls until done', async () => { + mockOps.pollOnce + .mockResolvedValueOnce({ status: 'processing' }) + .mockResolvedValueOnce({ status: 'processing' }) + .mockResolvedValueOnce({ status: 'done', results: [{ link: 'https://x.com' }], duration_seconds: 2 }); + + const job = new DiscoverJob('task_123', mockOps); + const result = await job.wait({ pollInterval: 10 }); + expect(result).toBe('done'); + expect(mockOps.pollOnce).toHaveBeenCalledTimes(3); + }); + + test('wait() throws TimeoutError on timeout', async () => { + mockOps.pollOnce.mockResolvedValue({ status: 'processing' }); + + const job = new DiscoverJob('task_123', mockOps); + await expect( + job.wait({ timeout: 50, pollInterval: 10 }), + ).rejects.toThrow(TimeoutError); + }); + + test('wait() throws APIError on error status', async () => { + mockOps.pollOnce.mockResolvedValueOnce({ status: 'error' }); + + const job = new DiscoverJob('task_123', mockOps); + await expect(job.wait({ pollInterval: 10 })).rejects.toThrow(APIError); + }); + + test('fetch() returns cached results after wait', async () => { + const items = [{ link: 'https://a.com', title: 'A', description: 'd', relevance_score: 1 }]; + mockOps.pollOnce.mockResolvedValueOnce({ status: 'done', results: items }); + + const job = new DiscoverJob('task_123', mockOps); + await job.wait({ pollInterval: 10 }); + + // fetch should use cache, not call pollOnce again + const data = await job.fetch(); + expect(data).toEqual(items); + expect(mockOps.pollOnce).toHaveBeenCalledTimes(1); + }); + + test('toResult() returns DiscoverResult on success', async () => { + const items = [{ link: 'https://a.com', title: 'A', description: 'd', relevance_score: 0.9 }]; + mockOps.pollOnce.mockResolvedValueOnce({ status: 'done', results: items, duration_seconds: 1.5 }); + + const job = new DiscoverJob('task_123', mockOps, { query: 'test q' }); + const result = await job.toResult({ pollInterval: 10 }); + + expect(result).toBeInstanceOf(DiscoverResult); + expect(result.success).toBe(true); + expect(result.data).toEqual(items); + expect(result.query).toBe('test q'); + expect(result.taskId).toBe('task_123'); + expect(result.durationSeconds).toBe(1.5); + expect(result.totalResults).toBe(1); + }); + + test('toResult() returns failed result on timeout', async () => { + mockOps.pollOnce.mockResolvedValue({ status: 'processing' }); + + const job = new DiscoverJob('task_123', mockOps, { query: 'test' }); + const result = await job.toResult({ timeout: 50, pollInterval: 10 }); + + expect(result.success).toBe(false); + expect(result.error).toContain('timed out'); + }); + + test('toResult() returns failed result on error status', async () => { + mockOps.pollOnce.mockResolvedValueOnce({ status: 'failed' }); + + const job = new DiscoverJob('task_123', mockOps, { query: 'test' }); + const result = await job.toResult({ pollInterval: 10 }); + + expect(result.success).toBe(false); + expect(result.error).toContain('failed with status'); + }); +}); + +// --- DiscoverResult --- + +describe('DiscoverResult', () => { + test('extends BaseResult', () => { + const result = new DiscoverResult({ + success: true, + data: [], + query: 'test', + }); + expect(result.success).toBe(true); + expect(result.data).toEqual([]); + expect(result.elapsedMs()).toBeNull(); + }); + + test('toJSON includes discover-specific fields', () => { + const result = new DiscoverResult({ + success: true, + data: [{ link: 'https://x.com', title: 'X', description: 'd', relevance_score: 0.8 }], + query: 'test query', + intent: 'research', + durationSeconds: 2.5, + totalResults: 1, + taskId: 'task_abc', + }); + + const json = result.toJSON(); + expect(json.query).toBe('test query'); + expect(json.intent).toBe('research'); + expect(json.durationSeconds).toBe(2.5); + expect(json.totalResults).toBe(1); + expect(json.taskId).toBe('task_abc'); + }); + + test('toString shows query preview', () => { + const result = new DiscoverResult({ + success: true, + data: [], + query: 'a very long query that exceeds fifty characters and should be truncated', + }); + + const str = result.toString(); + expect(str).toContain('DiscoverResult'); + expect(str).toContain('...'); + }); + + test('defaults to null for optional fields', () => { + const result = new DiscoverResult({ success: false, query: 'q' }); + expect(result.intent).toBeNull(); + expect(result.durationSeconds).toBeNull(); + expect(result.totalResults).toBeNull(); + expect(result.taskId).toBeNull(); + }); +}); diff --git a/tests/error-classification.test.ts b/tests/error-classification.test.ts index 39b13c2..aab8782 100644 --- a/tests/error-classification.test.ts +++ b/tests/error-classification.test.ts @@ -1,18 +1,11 @@ import type { Dispatcher } from 'undici'; import { describe, it, expect } from 'vitest'; import { throwInvalidStatus, assertResponse } from '../src/core/transport'; -import { wrapAPIError } from '../src/utils/error-utils'; import { - BRDError, ValidationError, AuthenticationError, - ZoneError, NetworkError, - NetworkTimeoutError, - TimeoutError, - FSError, APIError, - DataNotReadyError, } from '../src/utils/errors'; describe('throwInvalidStatus', () => { @@ -100,68 +93,31 @@ describe('assertResponse', () => { }); }); -describe('wrapAPIError', () => { - describe('rethrows BRDError subtypes unchanged', () => { - const subtypes = [ - { name: 'AuthenticationError', make: () => new AuthenticationError('auth') }, - { name: 'ValidationError', make: () => new ValidationError('bad') }, - { name: 'ZoneError', make: () => new ZoneError('zone') }, - { name: 'NetworkError', make: () => new NetworkError('net') }, - { name: 'NetworkTimeoutError', make: () => new NetworkTimeoutError('timeout') }, - { name: 'TimeoutError', make: () => new TimeoutError() }, - { name: 'FSError', make: () => new FSError('fs') }, - { name: 'APIError', make: () => new APIError('api') }, - { name: 'DataNotReadyError', make: () => new DataNotReadyError() }, - { name: 'BRDError', make: () => new BRDError('base') }, - ]; - - for (const { name, make } of subtypes) { - it(`rethrows ${name}`, () => { - const original = make(); - try { - wrapAPIError(original, 'test'); - } catch (e) { - expect(e).toBe(original); - } - }); - } +describe('assertResponse — body read failures', () => { + it('success path: body.text() throws → NetworkError', async () => { + const response = { + statusCode: 200, + headers: {}, + body: { + text: () => Promise.reject(new Error('stream destroyed')), + }, + } as unknown as Dispatcher.ResponseData; + + await expect(assertResponse(response)).rejects.toThrow(NetworkError); + await expect(assertResponse(response)).rejects.toThrow( + /Failed to read response body/, + ); }); - describe('wraps non-BRDError as APIError with context', () => { - it('wraps TypeError', () => { - expect(() => wrapAPIError(new TypeError('boom'), 'snapshot.fetch')).toThrow( - APIError, - ); - try { - wrapAPIError(new TypeError('boom'), 'snapshot.fetch'); - } catch (e) { - expect((e as APIError).message).toContain('snapshot.fetch'); - expect((e as APIError).message).toContain('boom'); - } - }); - - it('wraps RangeError', () => { - expect(() => wrapAPIError(new RangeError('out'), 'test')).toThrow(APIError); - }); - - it('wraps generic Error', () => { - expect(() => wrapAPIError(new Error('fail'), 'test')).toThrow(APIError); - }); - - it('includes location in message', () => { - try { - wrapAPIError(new Error('oops'), 'listZones'); - } catch (e) { - expect((e as APIError).message).toBe('listZones: oops'); - } - }); - - it('includes location and operation in message', () => { - try { - wrapAPIError(new Error('unexpected token'), 'snapshot.fetch', 'parsing response'); - } catch (e) { - expect((e as APIError).message).toBe('snapshot.fetch: parsing response: unexpected token'); - } - }); + it('error path: body.text() throws → still throws status error with fallback text', async () => { + const response = { + statusCode: 500, + headers: {}, + body: { + text: () => Promise.reject(new Error('stream destroyed')), + }, + } as unknown as Dispatcher.ResponseData; + + await expect(assertResponse(response)).rejects.toThrow(APIError); }); }); diff --git a/tests/integration/serp.test.ts b/tests/integration/serp.test.ts new file mode 100644 index 0000000..06a80fc --- /dev/null +++ b/tests/integration/serp.test.ts @@ -0,0 +1,53 @@ +import 'dotenv/config'; +import { describe, test, expect, beforeAll, afterAll } from 'vitest'; +import { bdclient } from '../../src/index'; + +const API_KEY = process.env.BRIGHTDATA_API_TOKEN; + +describe.skipIf(!API_KEY)('SERP / Search (real API)', () => { + let client: bdclient; + + beforeAll(() => { + client = new bdclient({ + apiKey: API_KEY, + autoCreateZones: false, + }); + }); + + afterAll(async () => { + await client?.close(); + }); + + test('search.google returns results', async () => { + const result = await client.search.google('bright data web scraping'); + expect(typeof result).toBe('string'); + expect((result as string).length).toBeGreaterThan(0); + }, 30_000); + + test('search.google returns JSON when format is json', async () => { + const result = await client.search.google('nodejs tutorial', { + format: 'json', + }); + expect(result).toHaveProperty('status_code'); + expect(result).toHaveProperty('body'); + }, 30_000); + + test('search.bing returns results', async () => { + const result = await client.search.bing('bright data'); + expect(typeof result).toBe('string'); + expect((result as string).length).toBeGreaterThan(0); + }, 30_000); + + test('search.yandex returns results', async () => { + const result = await client.search.yandex('bright data'); + expect(typeof result).toBe('string'); + expect((result as string).length).toBeGreaterThan(0); + }, 60_000); + + test('search.google handles batch queries', async () => { + const queries = ['pizza restaurants', 'sushi restaurants']; + const results = await client.search.google(queries); + expect(Array.isArray(results)).toBe(true); + expect(results).toHaveLength(2); + }, 60_000); +}); diff --git a/tests/integration/web_unlocker.test.ts b/tests/integration/web_unlocker.test.ts new file mode 100644 index 0000000..b80b0bb --- /dev/null +++ b/tests/integration/web_unlocker.test.ts @@ -0,0 +1,49 @@ +import 'dotenv/config'; +import { describe, test, expect, beforeAll, afterAll } from 'vitest'; +import { bdclient } from '../../src/index'; + +const API_KEY = process.env.BRIGHTDATA_API_TOKEN; + +describe.skipIf(!API_KEY)('Web Unlocker (real API)', () => { + let client: bdclient; + + beforeAll(() => { + client = new bdclient({ + apiKey: API_KEY, + autoCreateZones: false, + }); + }); + + afterAll(async () => { + await client?.close(); + }); + + test('scrapeUrl returns HTML for a simple page', async () => { + const result = await client.scrapeUrl('https://example.com'); + expect(typeof result).toBe('string'); + expect(result).toContain('Example Domain'); + }, 30_000); + + test('scrapeUrl returns JSON when format is json', async () => { + const result = await client.scrapeUrl('https://example.com', { + format: 'json', + }); + expect(result).toHaveProperty('status_code'); + expect(result).toHaveProperty('body'); + }, 30_000); + + test('scrapeUrl returns markdown when dataFormat is markdown', async () => { + const result = await client.scrapeUrl('https://example.com', { + dataFormat: 'markdown', + }); + expect(typeof result).toBe('string'); + expect(result).toContain('Example Domain'); + }, 30_000); + + test('scrapeUrl handles batch URLs', async () => { + const urls = ['https://example.com', 'https://httpbin.org/html']; + const results = await client.scrapeUrl(urls); + expect(Array.isArray(results)).toBe(true); + expect(results).toHaveLength(2); + }, 60_000); +}); diff --git a/tests/polling.test.ts b/tests/polling.test.ts index 4c19ec9..bab6f66 100644 --- a/tests/polling.test.ts +++ b/tests/polling.test.ts @@ -1,8 +1,8 @@ import { describe, expect, test, vi } from 'vitest'; -import { pollUntilReady } from '../src/utils/polling'; +import { pollUntilStatus } from '../src/utils/polling'; import { APIError, BRDError, TimeoutError } from '../src/utils/errors'; -describe('pollUntilReady', () => { +describe('pollUntilStatus', () => { test('resolves when status is ready', async () => { const getStatus = vi .fn() @@ -10,14 +10,14 @@ describe('pollUntilReady', () => { .mockResolvedValueOnce({ status: 'running' }) .mockResolvedValueOnce({ status: 'ready' }); - await pollUntilReady('snap_123', getStatus, { pollInterval: 10 }); + await pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }); expect(getStatus).toHaveBeenCalledTimes(3); }); test('resolves immediately if already ready', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'ready' }); const start = Date.now(); - await pollUntilReady('snap_123', getStatus, { pollInterval: 10_000 }); + await pollUntilStatus('snap_123', getStatus, { pollInterval: 10_000 }); expect(Date.now() - start).toBeLessThan(100); expect(getStatus).toHaveBeenCalledTimes(1); }); @@ -25,7 +25,7 @@ describe('pollUntilReady', () => { test('throws TimeoutError when timeout exceeded', async () => { const getStatus = vi.fn().mockResolvedValue({ status: 'running' }); await expect( - pollUntilReady('snap_123', getStatus, { + pollUntilStatus('snap_123', getStatus, { pollInterval: 50, pollTimeout: 120, }), @@ -35,14 +35,14 @@ describe('pollUntilReady', () => { test('throws BRDError when status is failed', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'failed' }); await expect( - pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), + pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(BRDError); }); test('throws BRDError when status is error', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'error' }); await expect( - pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), + pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(BRDError); }); @@ -53,7 +53,7 @@ describe('pollUntilReady', () => { .mockResolvedValueOnce({ status: 'ready' }); const onStatus = vi.fn(); - await pollUntilReady('snap_123', getStatus, { + await pollUntilStatus('snap_123', getStatus, { pollInterval: 10, onStatus, }); @@ -70,7 +70,7 @@ describe('pollUntilReady', () => { new APIError('API failure', 500, 'Internal Server Error'), ); await expect( - pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), + pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(APIError); }); }); diff --git a/tests/response-schemas.test.ts b/tests/response-schemas.test.ts new file mode 100644 index 0000000..73f477e --- /dev/null +++ b/tests/response-schemas.test.ts @@ -0,0 +1,150 @@ +import { describe, expect, test } from 'vitest'; +import { + SnapshotMetaResponseSchema, + SnapshotStatusResponseSchema, +} from '../src/schemas/responses'; +import { parseResponse } from '../src/utils/misc'; +import { APIError } from '../src/utils/errors'; + +describe('parseResponse', () => { + const schema = SnapshotMetaResponseSchema; + + test('valid JSON with matching schema returns parsed data', () => { + const result = parseResponse( + '{"snapshot_id":"abc123"}', + schema, + 'test', + ); + expect(result.snapshot_id).toBe('abc123'); + }); + + test('valid JSON with missing critical field throws APIError', () => { + expect(() => parseResponse('{}', schema, 'test')).toThrow(APIError); + }); + + test('valid JSON with extra unknown fields passes (passthrough)', () => { + const result = parseResponse( + '{"snapshot_id":"abc123","extra":true}', + schema, + 'test', + ); + expect(result.snapshot_id).toBe('abc123'); + expect(result.extra).toBe(true); + }); + + test('malformed JSON throws APIError (from inner parseJSON)', () => { + expect(() => parseResponse('{bad json', schema, 'test')).toThrow( + APIError, + ); + }); + + test('error message includes label', () => { + expect(() => + parseResponse('{}', schema, 'datasets/v3/trigger'), + ).toThrow(/datasets\/v3\/trigger/); + }); +}); + +describe('SnapshotMetaResponseSchema', () => { + test('accepts {snapshot_id: "abc123"}', () => { + const result = SnapshotMetaResponseSchema.parse({ + snapshot_id: 'abc123', + }); + expect(result.snapshot_id).toBe('abc123'); + }); + + test('rejects {}', () => { + expect(() => SnapshotMetaResponseSchema.parse({})).toThrow(); + }); + + test('rejects {snapshot_id: ""}', () => { + expect(() => + SnapshotMetaResponseSchema.parse({ snapshot_id: '' }), + ).toThrow(); + }); + + test('accepts with extra fields', () => { + const result = SnapshotMetaResponseSchema.parse({ + snapshot_id: 'abc', + extra_field: true, + }); + expect(result.snapshot_id).toBe('abc'); + expect(result.extra_field).toBe(true); + }); +}); + +describe('SnapshotStatusResponseSchema', () => { + test('accepts valid status response', () => { + const result = SnapshotStatusResponseSchema.parse({ + status: 'ready', + snapshot_id: 's1', + dataset_id: 'd1', + }); + expect(result.status).toBe('ready'); + }); + + test('accepts all valid status values', () => { + for (const status of [ + 'running', + 'ready', + 'failed', + 'cancelled', + 'error', + ]) { + const result = SnapshotStatusResponseSchema.parse({ + status, + snapshot_id: 's1', + dataset_id: 'd1', + }); + expect(result.status).toBe(status); + } + }); + + test('rejects unknown status value', () => { + expect(() => + SnapshotStatusResponseSchema.parse({ + status: 'unknown', + snapshot_id: 's1', + dataset_id: 'd1', + }), + ).toThrow(); + }); + + test('rejects missing status', () => { + expect(() => + SnapshotStatusResponseSchema.parse({ + snapshot_id: 's1', + dataset_id: 'd1', + }), + ).toThrow(); + }); + + test('rejects missing snapshot_id', () => { + expect(() => + SnapshotStatusResponseSchema.parse({ + status: 'ready', + dataset_id: 'd1', + }), + ).toThrow(); + }); + + test('rejects missing dataset_id', () => { + expect(() => + SnapshotStatusResponseSchema.parse({ + status: 'ready', + snapshot_id: 's1', + }), + ).toThrow(); + }); + + test('accepts with extra fields', () => { + const result = SnapshotStatusResponseSchema.parse({ + status: 'running', + snapshot_id: 's1', + dataset_id: 'd1', + progress: 0.5, + records_count: 100, + }); + expect(result.progress).toBe(0.5); + }); +}); diff --git a/tests/scraper-studio.test.ts b/tests/scraper-studio.test.ts new file mode 100644 index 0000000..d90e85c --- /dev/null +++ b/tests/scraper-studio.test.ts @@ -0,0 +1,378 @@ +import type { Dispatcher } from 'undici'; +import { describe, expect, test, vi, beforeEach } from 'vitest'; +import { ScraperStudioService } from '../src/api/scraperstudio/service'; +import { ScraperStudioJob } from '../src/api/scraperstudio/job'; +import { Transport } from '../src/core/transport'; +import { + ValidationError, + APIError, + DataNotReadyError, + NetworkError, + TimeoutError, +} from '../src/utils/errors'; + +const mockTransport = { + request: vi.fn(), + stream: vi.fn(), +} as unknown as Transport; + +function mockRequest(statusCode: number, body: string) { + vi.mocked(mockTransport.request).mockResolvedValue({ + statusCode, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { + text: () => Promise.resolve(body), + }, + } as unknown as Dispatcher.ResponseData); +} + +function mockRequestSequence( + responses: Array<{ statusCode: number; body: string }>, +) { + const mock = vi.mocked(mockTransport.request); + for (const r of responses) { + mock.mockResolvedValueOnce({ + statusCode: r.statusCode, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { + text: () => Promise.resolve(r.body), + }, + } as unknown as Dispatcher.ResponseData); + } +} + +// --- ScraperStudioService.trigger --- + +describe('ScraperStudioService.trigger', () => { + let service: ScraperStudioService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new ScraperStudioService({ transport: mockTransport }); + }); + + test('returns ScraperStudioJob with responseId', async () => { + mockRequest(200, JSON.stringify({ response_id: 'resp_abc123' })); + + const job = await service.trigger('c_test', { url: 'https://example.com' }); + expect(job).toBeInstanceOf(ScraperStudioJob); + expect(job.responseId).toBe('resp_abc123'); + }); + + test('sends collector as query param', async () => { + mockRequest(200, JSON.stringify({ response_id: 'resp_123' })); + + await service.trigger('c_mycollector', { url: 'https://example.com' }); + + expect(mockTransport.request).toHaveBeenCalledWith( + expect.stringContaining('trigger_immediate'), + expect.objectContaining({ + query: { collector: 'c_mycollector' }, + }), + ); + }); + + test('sends input as POST body', async () => { + mockRequest(200, JSON.stringify({ response_id: 'resp_123' })); + + await service.trigger('c_test', { url: 'https://example.com', custom: 'field' }); + + const callBody = JSON.parse( + vi.mocked(mockTransport.request).mock.calls[0][1]?.body as string, + ); + expect(callBody.url).toBe('https://example.com'); + expect(callBody.custom).toBe('field'); + }); + + test('validates empty collector ID', async () => { + await expect( + service.trigger('', { url: 'https://example.com' }), + ).rejects.toThrow(ValidationError); + }); + + test('throws on API error', async () => { + mockRequest(500, 'Internal Server Error'); + await expect( + service.trigger('c_test', { url: 'https://example.com' }), + ).rejects.toThrow(APIError); + }); +}); + +// --- ScraperStudioService.run --- + +describe('ScraperStudioService.run', () => { + let service: ScraperStudioService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new ScraperStudioService({ transport: mockTransport }); + }); + + test('single input — triggers, polls, returns RunResult', async () => { + const data = [{ title: 'Product A', price: 29.99 }]; + mockRequestSequence([ + // trigger + { statusCode: 200, body: JSON.stringify({ response_id: 'resp_1' }) }, + // first fetch → 202 + { statusCode: 202, body: 'not ready' }, + // second fetch → 200 + { statusCode: 200, body: JSON.stringify(data) }, + ]); + + const results = await service.run('c_test', { + input: { url: 'https://example.com' }, + pollInterval: 10, + }); + + expect(results).toHaveLength(1); + expect(results[0].data).toEqual(data); + expect(results[0].error).toBeNull(); + expect(results[0].responseId).toBe('resp_1'); + expect(results[0].elapsedMs).toBeGreaterThan(0); + expect(results[0].input).toEqual({ url: 'https://example.com' }); + }); + + test('array input — triggers each, returns per-input results', async () => { + const data1 = [{ title: 'A' }]; + const data2 = [{ title: 'B' }]; + mockRequestSequence([ + // trigger 1 + { statusCode: 200, body: JSON.stringify({ response_id: 'resp_1' }) }, + // fetch 1 + { statusCode: 200, body: JSON.stringify(data1) }, + // trigger 2 + { statusCode: 200, body: JSON.stringify({ response_id: 'resp_2' }) }, + // fetch 2 + { statusCode: 200, body: JSON.stringify(data2) }, + ]); + + const results = await service.run('c_test', { + input: [{ url: 'https://a.com' }, { url: 'https://b.com' }], + pollInterval: 10, + }); + + expect(results).toHaveLength(2); + expect(results[0].data).toEqual(data1); + expect(results[0].responseId).toBe('resp_1'); + expect(results[1].data).toEqual(data2); + expect(results[1].responseId).toBe('resp_2'); + }); + + test('captures per-input errors without aborting', async () => { + mockRequestSequence([ + // trigger 1 → fails + { statusCode: 500, body: 'Internal Server Error' }, + // trigger 2 → succeeds + { statusCode: 200, body: JSON.stringify({ response_id: 'resp_2' }) }, + // fetch 2 + { statusCode: 200, body: JSON.stringify([{ title: 'B' }]) }, + ]); + + const results = await service.run('c_test', { + input: [{ url: 'https://a.com' }, { url: 'https://b.com' }], + pollInterval: 10, + }); + + expect(results).toHaveLength(2); + expect(results[0].data).toBeNull(); + expect(results[0].error).toBeTruthy(); + expect(results[1].data).toEqual([{ title: 'B' }]); + expect(results[1].error).toBeNull(); + }); +}); + +// --- ScraperStudioService.status --- + +describe('ScraperStudioService.status', () => { + let service: ScraperStudioService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new ScraperStudioService({ transport: mockTransport }); + }); + + test('returns normalized JobStatus', async () => { + mockRequest( + 200, + JSON.stringify({ + id: 'j_abc', + status: 'done', + collector: 'c_test', + inputs: 1, + lines: 5, + fails: 0, + success_rate: 1, + created: '2026-04-01T00:00:00Z', + }), + ); + + const status = await service.status('j_abc'); + expect(status.id).toBe('j_abc'); + expect(status.status).toBe('done'); + expect(status.collector).toBe('c_test'); + expect(status.successRate).toBe(1); + }); + + test('handles mixed-case API response fields', async () => { + mockRequest( + 200, + JSON.stringify({ + Id: 'j_mixed', + Status: 'running', + Collector: 'c_mixed', + Inputs: 2, + Lines: 10, + Fails: 1, + Success_rate: 0.5, + Created: '2026-04-01T00:00:00Z', + Job_time: 5000, + Queue_time: 100, + }), + ); + + const status = await service.status('j_mixed'); + expect(status.id).toBe('j_mixed'); + expect(status.status).toBe('running'); + expect(status.collector).toBe('c_mixed'); + expect(status.successRate).toBe(0.5); + expect(status.jobTime).toBe(5000); + expect(status.queueTime).toBe(100); + }); + + test('rejects response missing status field', async () => { + mockRequest(200, JSON.stringify({ id: 'j_abc' })); + await expect(service.status('j_abc')).rejects.toThrow(APIError); + }); +}); + +// --- ScraperStudioService.fetch --- + +describe('ScraperStudioService.fetch', () => { + let service: ScraperStudioService; + + beforeEach(() => { + vi.clearAllMocks(); + service = new ScraperStudioService({ transport: mockTransport }); + }); + + test('returns data on 200', async () => { + const data = [{ title: 'Test' }]; + mockRequest(200, JSON.stringify(data)); + + const result = await service.fetch('resp_123'); + expect(result).toEqual(data); + }); + + test('throws DataNotReadyError on 202', async () => { + mockRequest(202, 'not ready'); + await expect(service.fetch('resp_123')).rejects.toThrow( + DataNotReadyError, + ); + }); +}); + +// --- ScraperStudioJob --- + +describe('ScraperStudioJob', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('fetch() returns data on 200', async () => { + const data = [{ title: 'Product' }]; + mockRequest(200, JSON.stringify(data)); + + const job = new ScraperStudioJob('resp_123', mockTransport); + const result = await job.fetch(); + expect(result).toEqual(data); + }); + + test('fetch() throws DataNotReadyError on 202', async () => { + mockRequest(202, 'not ready'); + + const job = new ScraperStudioJob('resp_123', mockTransport); + await expect(job.fetch()).rejects.toThrow(DataNotReadyError); + }); + + test('fetch() consumes body on 202', async () => { + const textFn = vi.fn().mockResolvedValue('not ready'); + vi.mocked(mockTransport.request).mockResolvedValue({ + statusCode: 202, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { text: textFn }, + } as unknown as Dispatcher.ResponseData); + + const job = new ScraperStudioJob('resp_123', mockTransport); + await expect(job.fetch()).rejects.toThrow(DataNotReadyError); + expect(textFn).toHaveBeenCalled(); + }); + + test('waitAndFetch() retries on DataNotReadyError', async () => { + const data = [{ title: 'Ready' }]; + mockRequestSequence([ + { statusCode: 202, body: 'not ready' }, + { statusCode: 202, body: 'not ready' }, + { statusCode: 200, body: JSON.stringify(data) }, + ]); + + const job = new ScraperStudioJob('resp_123', mockTransport); + const result = await job.waitAndFetch({ pollInterval: 10 }); + expect(result).toEqual(data); + expect(mockTransport.request).toHaveBeenCalledTimes(3); + }); + + test('waitAndFetch() retries on transient NetworkError', async () => { + const data = [{ title: 'OK' }]; + const mock = vi.mocked(mockTransport.request); + + // First call: network error + mock.mockRejectedValueOnce(new NetworkError('ECONNRESET')); + // Second call: success + mock.mockResolvedValueOnce({ + statusCode: 200, + headers: {}, + trailers: {}, + opaque: null, + context: {}, + body: { text: () => Promise.resolve(JSON.stringify(data)) }, + } as unknown as Dispatcher.ResponseData); + + const job = new ScraperStudioJob('resp_123', mockTransport); + const result = await job.waitAndFetch({ pollInterval: 10 }); + expect(result).toEqual(data); + expect(mock).toHaveBeenCalledTimes(2); + }); + + test('waitAndFetch() throws TimeoutError', async () => { + mockRequest(202, 'not ready'); + + const job = new ScraperStudioJob('resp_123', mockTransport); + await expect( + job.waitAndFetch({ timeout: 50, pollInterval: 10 }), + ).rejects.toThrow(TimeoutError); + }); + + test('waitAndFetch() passes response_id as query param', async () => { + const data = [{ id: 1 }]; + mockRequest(200, JSON.stringify(data)); + + const job = new ScraperStudioJob('resp_abc', mockTransport); + await job.waitAndFetch({ pollInterval: 10 }); + + expect(mockTransport.request).toHaveBeenCalledWith( + expect.stringContaining('get_result'), + expect.objectContaining({ + query: { response_id: 'resp_abc' }, + }), + ); + }); +}); diff --git a/tests/snapshot.test.ts b/tests/snapshot.test.ts index d4989bf..cbec825 100644 --- a/tests/snapshot.test.ts +++ b/tests/snapshot.test.ts @@ -91,7 +91,7 @@ describe('SnapshotAPI.getStatus', () => { }); test('returns parsed status response', async () => { - const statusData = { status: 'ready', snapshot_id: 'snap_123' }; + const statusData = { status: 'ready', snapshot_id: 'snap_123', dataset_id: 'ds_1' }; mockRequest(200, JSON.stringify(statusData)); const result = await api.getStatus('snap_123'); @@ -99,7 +99,7 @@ describe('SnapshotAPI.getStatus', () => { }); test('calls correct status endpoint', async () => { - mockRequest(200, JSON.stringify({ status: 'running' })); + mockRequest(200, JSON.stringify({ status: 'running', snapshot_id: 'snap_123', dataset_id: 'ds_1' })); await api.getStatus('snap_123'); expect(mockTransport.request).toHaveBeenCalledWith( diff --git a/tests/transport.test.ts b/tests/transport.test.ts index 656fe78..36414f4 100644 --- a/tests/transport.test.ts +++ b/tests/transport.test.ts @@ -1,9 +1,11 @@ import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'; -import { request as lib_request, stream as lib_stream } from 'undici'; +import { request as lib_request, stream as lib_stream, interceptors } from 'undici'; import { Transport } from '../src/core/transport'; import { BRDError, + APIError, AuthenticationError, + ValidationError, NetworkError, NetworkTimeoutError, } from '../src/utils/errors'; @@ -312,3 +314,165 @@ describe('Transport.stream()', () => { } }); }); + +describe('AbortSignal timeout detection', () => { + let transport: Transport; + + beforeEach(() => { + vi.clearAllMocks(); + transport = new Transport({ apiKey: API_KEY }); + }); + + afterEach(async () => { + try { await transport?.close(); } catch { /* ignore */ } + }); + + it('DOMException name=TimeoutError → NetworkTimeoutError', async () => { + const err = new Error('The operation was aborted'); + err.name = 'TimeoutError'; + vi.mocked(lib_request).mockRejectedValue(err); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(NetworkTimeoutError); + }); + + it('AbortError with timeout message → NetworkTimeoutError', async () => { + const err = new Error('The operation was aborted due to timeout'); + err.name = 'AbortError'; + vi.mocked(lib_request).mockRejectedValue(err); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(NetworkTimeoutError); + }); + + it('AbortError without timeout message → NetworkError', async () => { + const err = new Error('The operation was aborted'); + err.name = 'AbortError'; + vi.mocked(lib_request).mockRejectedValue(err); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(NetworkError); + }); + + it('stream: DOMException name=TimeoutError → NetworkTimeoutError', async () => { + const err = new Error('The operation was aborted'); + err.name = 'TimeoutError'; + vi.mocked(lib_stream).mockRejectedValue(err); + + await expect( + transport.stream('https://example.com', { method: 'GET' }, vi.fn()), + ).rejects.toThrow(NetworkTimeoutError); + }); +}); + +describe('RequestRetryError classification', () => { + let transport: Transport; + + beforeEach(() => { + vi.clearAllMocks(); + transport = new Transport({ apiKey: API_KEY }); + }); + + afterEach(async () => { + try { await transport?.close(); } catch { /* ignore */ } + }); + + function makeRetryError(statusCode: number) { + const err = new Error('Request retry error') as Error & { statusCode: number }; + err.name = 'RequestRetryError'; + err.statusCode = statusCode; + return err; + } + + it('statusCode 429 → APIError', async () => { + vi.mocked(lib_request).mockRejectedValue(makeRetryError(429)); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(APIError); + }); + + it('statusCode 500 → APIError', async () => { + vi.mocked(lib_request).mockRejectedValue(makeRetryError(500)); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(APIError); + }); + + it('statusCode 403 → AuthenticationError', async () => { + vi.mocked(lib_request).mockRejectedValue(makeRetryError(403)); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(AuthenticationError); + }); + + it('statusCode 400 → ValidationError', async () => { + vi.mocked(lib_request).mockRejectedValue(makeRetryError(400)); + + await expect( + transport.request('https://example.com'), + ).rejects.toThrow(ValidationError); + }); + + it('stream: statusCode 429 → APIError', async () => { + vi.mocked(lib_stream).mockRejectedValue(makeRetryError(429)); + + await expect( + transport.stream('https://example.com', { method: 'GET' }, vi.fn()), + ).rejects.toThrow(APIError); + }); + + it('stream: statusCode 403 → AuthenticationError', async () => { + vi.mocked(lib_stream).mockRejectedValue(makeRetryError(403)); + + await expect( + transport.stream('https://example.com', { method: 'GET' }, vi.fn()), + ).rejects.toThrow(AuthenticationError); + }); +}); + +describe('retry interceptor configuration', () => { + it('retry interceptor receives correct options', () => { + const transport = new Transport({ apiKey: API_KEY }); + const retryCall = vi.mocked(interceptors.retry).mock.calls[0]![0] as Record; + + expect(retryCall).toMatchObject({ + maxRetries: 3, + timeoutFactor: 1.5, + statusCodes: [429, 500, 502, 503, 504], + }); + expect(retryCall.methods).toBeDefined(); + expect(retryCall.errorCodes).toBeDefined(); + + void transport.close(); + }); + + it('POST is in retry methods', () => { + const transport = new Transport({ apiKey: API_KEY }); + const retryCall = vi.mocked(interceptors.retry).mock.calls[0]![0] as Record; + const methods = retryCall.methods as string[]; + + expect(methods).toContain('POST'); + expect(methods).toContain('GET'); + + void transport.close(); + }); + + it('UND_ERR_CONNECT_TIMEOUT is in retry error codes', () => { + const transport = new Transport({ apiKey: API_KEY }); + const retryCall = vi.mocked(interceptors.retry).mock.calls[0]![0] as Record; + const errorCodes = retryCall.errorCodes as string[]; + + expect(errorCodes).toContain('UND_ERR_CONNECT_TIMEOUT'); + expect(errorCodes).toContain('ECONNREFUSED'); + expect(errorCodes).toContain('ECONNRESET'); + expect(errorCodes).toContain('ENOTFOUND'); + + void transport.close(); + }); +}); From fe7eaf501c73080e47c0b365b9cbebd1bbce3989 Mon Sep 17 00:00:00 2001 From: "user.mail" Date: Thu, 9 Apr 2026 10:56:17 +0300 Subject: [PATCH 2/4] fix bad naming --- src/api/datasets/base.ts | 4 ++-- src/api/scrape/job.ts | 4 ++-- src/api/scrape/snapshot.ts | 4 ++-- src/client.ts | 25 +++++++++++++++++++++++++ src/index.ts | 4 ++++ src/schemas/client.ts | 6 ++++++ src/schemas/scraperstudio.ts | 8 ++++---- src/types/datasets.ts | 5 ++++- src/utils/polling.ts | 6 ++---- tests/polling.test.ts | 18 +++++++++--------- 10 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/api/datasets/base.ts b/src/api/datasets/base.ts index 361738d..fbffbec 100644 --- a/src/api/datasets/base.ts +++ b/src/api/datasets/base.ts @@ -2,7 +2,7 @@ import { Transport, assertResponse } from '../../core/transport'; import { API_ENDPOINT } from '../../utils/constants'; import { parseJSON } from '../../utils/misc'; import { getLogger } from '../../utils/logger'; -import { pollUntilStatus } from '../../utils/polling'; +import { pollUntilReady } from '../../utils/polling'; import type { DatasetMetadata, DatasetSnapshotStatus, @@ -83,7 +83,7 @@ export abstract class BaseDataset { opts?: DatasetDownloadOptions, ): Promise { this.logger.debug('download', { snapshotId }); - await pollUntilStatus(snapshotId, (id) => this.getStatus(id)); + await pollUntilReady(snapshotId, (id) => this.getStatus(id)); const url = API_ENDPOINT.DATASET_SNAPSHOT_DOWNLOAD.replace( '{snapshot_id}', snapshotId, diff --git a/src/api/scrape/job.ts b/src/api/scrape/job.ts index 050fc74..926c043 100644 --- a/src/api/scrape/job.ts +++ b/src/api/scrape/job.ts @@ -1,4 +1,4 @@ -import { pollUntilStatus, type PollOptions } from '../../utils/polling'; +import { pollUntilReady, type PollOptions } from '../../utils/polling'; import { DataNotReadyError, TimeoutError } from '../../utils/errors'; import { sleep } from '../../utils/misc'; import { getLogger } from '../../utils/logger'; @@ -62,7 +62,7 @@ export class ScrapeJob { * @throws BRDError if job fails */ async wait(options?: PollOptions): Promise { - await pollUntilStatus( + await pollUntilReady( this.snapshotId, (id) => this.snapshotOps.getStatus(id), options, diff --git a/src/api/scrape/snapshot.ts b/src/api/scrape/snapshot.ts index 7265ffb..962759e 100644 --- a/src/api/scrape/snapshot.ts +++ b/src/api/scrape/snapshot.ts @@ -8,7 +8,7 @@ import { getAbsAndEnsureDir, } from '../../utils/files'; import { parseJSON, parseResponse } from '../../utils/misc'; -import { pollUntilStatus } from '../../utils/polling'; +import { pollUntilReady } from '../../utils/polling'; import type { z } from 'zod'; import { SnapshotIdSchema, @@ -197,7 +197,7 @@ export class SnapshotAPI extends BaseAPI { async #awaitReady(snapshotId: string): Promise { this.logger.info(`polling snapshot status for id ${snapshotId}`); - await pollUntilStatus(snapshotId, (id) => this.#getStatus(id), { + await pollUntilReady(snapshotId, (id) => this.#getStatus(id), { pollInterval: 10_000, onStatus: (status, elapsed) => { this.logger.info( diff --git a/src/client.ts b/src/client.ts index b552c6d..2f57f67 100644 --- a/src/client.ts +++ b/src/client.ts @@ -8,6 +8,7 @@ import type { DiscoverResult } from './api/discover/result'; import type { DiscoverJob } from './api/discover/job'; import type { DiscoverOptions } from './schemas/discover'; import { ScraperStudioService } from './api/scraperstudio/service'; +import { BrowserService } from './api/browser/service'; import { setup as setupLogger, getLogger } from './utils/logger'; import { DEFAULT_WEB_UNLOCKER_ZONE, @@ -93,6 +94,7 @@ export class bdclient { declare search: SearchRouter; declare datasets: DatasetsClient; declare scraperStudio: ScraperStudioService; + declare browser: BrowserService; constructor(options?: BdClientOptions) { const opt = assertSchema( @@ -165,6 +167,29 @@ export class bdclient { defineLazy(this, 'scraperStudio', () => new ScraperStudioService({ transport: this.transport }), ); + + defineLazy(this, 'browser', () => { + const username = + opt.browserUsername || + process.env.BRIGHTDATA_BROWSERAPI_USERNAME; + const password = + opt.browserPassword || + process.env.BRIGHTDATA_BROWSERAPI_PASSWORD; + + if (!username || !password) { + throw new ValidationError( + 'Browser API requires credentials. Pass browserUsername and browserPassword to the client, ' + + 'or set BRIGHTDATA_BROWSERAPI_USERNAME and BRIGHTDATA_BROWSERAPI_PASSWORD environment variables.', + ); + } + + return new BrowserService({ + username, + password, + host: opt.browserHost, + port: opt.browserPort, + }); + }); } private get scrapeAPI(): ScrapeAPI { diff --git a/src/index.ts b/src/index.ts index 5b2277b..516cfeb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -39,6 +39,10 @@ export type { RunResult, } from './schemas/scraperstudio'; +// ── Browser API ──────────────────────────────────────────────── +export { BrowserService } from './api/browser/service'; +export type { BrowserConnectOptions } from './schemas/browser'; + // ── Subpath re-exports (backward compat) ───────────────────────── // Consumers can also import these from '@brightdata/sdk/scrapers', // '@brightdata/sdk/search', or '@brightdata/sdk/datasets'. diff --git a/src/schemas/client.ts b/src/schemas/client.ts index 729599e..0c0c04d 100644 --- a/src/schemas/client.ts +++ b/src/schemas/client.ts @@ -26,6 +26,12 @@ export const ClientOptionsSchema = z.object({ rateLimit: z.number().min(0).optional(), ratePeriod: z.number().positive().optional(), timeout: z.number().min(1000).max(300_000).optional(), + + // Browser API credentials (optional — only needed for client.browser) + browserUsername: z.string().optional(), + browserPassword: z.string().optional(), + browserHost: z.string().optional(), + browserPort: z.number().int().min(1).max(65535).optional(), }); export type BdClientOptions = z.input; diff --git a/src/schemas/scraperstudio.ts b/src/schemas/scraperstudio.ts index f98d70b..e81a1d7 100644 --- a/src/schemas/scraperstudio.ts +++ b/src/schemas/scraperstudio.ts @@ -2,9 +2,7 @@ import { z } from 'zod'; // --- Input validation --- -export const CollectorIdSchema = z - .string() - .min(1, 'collector ID is required'); +export const CollectorIdSchema = z.string().min(1, 'collector ID is required'); export const ScraperStudioInputSchema = z.union([ z.record(z.string(), z.any()), @@ -19,7 +17,9 @@ export const ScraperStudioRunOptionsSchema = z.object({ pollInterval: z.number().positive().default(10_000), }); -export type ScraperStudioRunOptions = z.input; +export type ScraperStudioRunOptions = z.input< + typeof ScraperStudioRunOptionsSchema +>; // --- Result type for run() --- diff --git a/src/types/datasets.ts b/src/types/datasets.ts index 58b6d3a..7b3abe8 100644 --- a/src/types/datasets.ts +++ b/src/types/datasets.ts @@ -12,7 +12,10 @@ export type { export type SnapshotFormat = 'json' | 'ndjson' | 'jsonl' | 'csv'; -export type { SnapshotMeta, SnapshotStatusResponse } from '../schemas/responses'; +export type { + SnapshotMeta, + SnapshotStatusResponse, +} from '../schemas/responses'; export type SnapshotStatus = SnapshotStatusResponse['status']; diff --git a/src/utils/polling.ts b/src/utils/polling.ts index d9b973d..7ea6aeb 100644 --- a/src/utils/polling.ts +++ b/src/utils/polling.ts @@ -15,7 +15,7 @@ export interface PollOptions { onStatus?: (status: string, elapsedMs: number) => void; } -export async function pollUntilStatus( +export async function pollUntilReady( id: string, getStatus: (id: string) => Promise<{ status: string }>, options?: PollOptions, @@ -52,9 +52,7 @@ export async function pollUntilStatus( return; } if (status === 'failed' || status === 'error') { - throw new BRDError( - `${id} failed with status: ${status}`, - ); + throw new BRDError(`${id} failed with status: ${status}`); } await sleep(interval); diff --git a/tests/polling.test.ts b/tests/polling.test.ts index bab6f66..4c19ec9 100644 --- a/tests/polling.test.ts +++ b/tests/polling.test.ts @@ -1,8 +1,8 @@ import { describe, expect, test, vi } from 'vitest'; -import { pollUntilStatus } from '../src/utils/polling'; +import { pollUntilReady } from '../src/utils/polling'; import { APIError, BRDError, TimeoutError } from '../src/utils/errors'; -describe('pollUntilStatus', () => { +describe('pollUntilReady', () => { test('resolves when status is ready', async () => { const getStatus = vi .fn() @@ -10,14 +10,14 @@ describe('pollUntilStatus', () => { .mockResolvedValueOnce({ status: 'running' }) .mockResolvedValueOnce({ status: 'ready' }); - await pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }); + await pollUntilReady('snap_123', getStatus, { pollInterval: 10 }); expect(getStatus).toHaveBeenCalledTimes(3); }); test('resolves immediately if already ready', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'ready' }); const start = Date.now(); - await pollUntilStatus('snap_123', getStatus, { pollInterval: 10_000 }); + await pollUntilReady('snap_123', getStatus, { pollInterval: 10_000 }); expect(Date.now() - start).toBeLessThan(100); expect(getStatus).toHaveBeenCalledTimes(1); }); @@ -25,7 +25,7 @@ describe('pollUntilStatus', () => { test('throws TimeoutError when timeout exceeded', async () => { const getStatus = vi.fn().mockResolvedValue({ status: 'running' }); await expect( - pollUntilStatus('snap_123', getStatus, { + pollUntilReady('snap_123', getStatus, { pollInterval: 50, pollTimeout: 120, }), @@ -35,14 +35,14 @@ describe('pollUntilStatus', () => { test('throws BRDError when status is failed', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'failed' }); await expect( - pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), + pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(BRDError); }); test('throws BRDError when status is error', async () => { const getStatus = vi.fn().mockResolvedValueOnce({ status: 'error' }); await expect( - pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), + pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(BRDError); }); @@ -53,7 +53,7 @@ describe('pollUntilStatus', () => { .mockResolvedValueOnce({ status: 'ready' }); const onStatus = vi.fn(); - await pollUntilStatus('snap_123', getStatus, { + await pollUntilReady('snap_123', getStatus, { pollInterval: 10, onStatus, }); @@ -70,7 +70,7 @@ describe('pollUntilStatus', () => { new APIError('API failure', 500, 'Internal Server Error'), ); await expect( - pollUntilStatus('snap_123', getStatus, { pollInterval: 10 }), + pollUntilReady('snap_123', getStatus, { pollInterval: 10 }), ).rejects.toThrow(APIError); }); }); From 1676c3331ab92ede6477cf2609e28b8f0e5fa723 Mon Sep 17 00:00:00 2001 From: "user.mail" Date: Sun, 12 Apr 2026 11:01:57 +0300 Subject: [PATCH 3/4] fix broken import reference --- src/client.ts | 51 +++++++++++++++++++++++++++------------------------ src/index.ts | 5 +++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/client.ts b/src/client.ts index 2f57f67..bf04126 100644 --- a/src/client.ts +++ b/src/client.ts @@ -8,7 +8,8 @@ import type { DiscoverResult } from './api/discover/result'; import type { DiscoverJob } from './api/discover/job'; import type { DiscoverOptions } from './schemas/discover'; import { ScraperStudioService } from './api/scraperstudio/service'; -import { BrowserService } from './api/browser/service'; +// TODO: uncomment when browser API files are pushed +// import { BrowserService } from './api/browser/service'; import { setup as setupLogger, getLogger } from './utils/logger'; import { DEFAULT_WEB_UNLOCKER_ZONE, @@ -94,7 +95,8 @@ export class bdclient { declare search: SearchRouter; declare datasets: DatasetsClient; declare scraperStudio: ScraperStudioService; - declare browser: BrowserService; + // TODO: uncomment when browser API files are pushed + // declare browser: BrowserService; constructor(options?: BdClientOptions) { const opt = assertSchema( @@ -168,28 +170,29 @@ export class bdclient { new ScraperStudioService({ transport: this.transport }), ); - defineLazy(this, 'browser', () => { - const username = - opt.browserUsername || - process.env.BRIGHTDATA_BROWSERAPI_USERNAME; - const password = - opt.browserPassword || - process.env.BRIGHTDATA_BROWSERAPI_PASSWORD; - - if (!username || !password) { - throw new ValidationError( - 'Browser API requires credentials. Pass browserUsername and browserPassword to the client, ' + - 'or set BRIGHTDATA_BROWSERAPI_USERNAME and BRIGHTDATA_BROWSERAPI_PASSWORD environment variables.', - ); - } - - return new BrowserService({ - username, - password, - host: opt.browserHost, - port: opt.browserPort, - }); - }); + // TODO: uncomment when browser API files are pushed + // defineLazy(this, 'browser', () => { + // const username = + // opt.browserUsername || + // process.env.BRIGHTDATA_BROWSERAPI_USERNAME; + // const password = + // opt.browserPassword || + // process.env.BRIGHTDATA_BROWSERAPI_PASSWORD; + // + // if (!username || !password) { + // throw new ValidationError( + // 'Browser API requires credentials. Pass browserUsername and browserPassword to the client, ' + + // 'or set BRIGHTDATA_BROWSERAPI_USERNAME and BRIGHTDATA_BROWSERAPI_PASSWORD environment variables.', + // ); + // } + // + // return new BrowserService({ + // username, + // password, + // host: opt.browserHost, + // port: opt.browserPort, + // }); + // }); } private get scrapeAPI(): ScrapeAPI { diff --git a/src/index.ts b/src/index.ts index 516cfeb..660214c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -40,8 +40,9 @@ export type { } from './schemas/scraperstudio'; // ── Browser API ──────────────────────────────────────────────── -export { BrowserService } from './api/browser/service'; -export type { BrowserConnectOptions } from './schemas/browser'; +// TODO: uncomment when browser API files are pushed +// export { BrowserService } from './api/browser/service'; +// export type { BrowserConnectOptions } from './schemas/browser'; // ── Subpath re-exports (backward compat) ───────────────────────── // Consumers can also import these from '@brightdata/sdk/scrapers', From 31bc9befa46f8194ef62bd3318fbc3e3dc28e3d5 Mon Sep 17 00:00:00 2001 From: Shahar Carmi Date: Tue, 14 Apr 2026 13:09:34 +0300 Subject: [PATCH 4/4] chore: trigger CI