diff --git a/.DS_Store b/.DS_Store index db7e987..36ce176 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/CHANGELOG.md b/CHANGELOG.md index 64c986f..4dbf2f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## [1.8.0](https://github.com/ScrapingBee/scrapingbee-node/compare/v1.7.5...v1.8.0) (2026-01-19) + +### Features + +- Added `htmlApi()` method for HTML API +- Added `googleSearch()` method for Google Search API +- Added `amazonSearch()` method for Amazon Search API +- Added `amazonProduct()` method for Amazon Product API +- Added `walmartSearch()` method for Walmart Search API +- Added `walmartProduct()` method for Walmart Product API +- Added `youtubeSearch()` method for YouTube Search API +- Added `youtubeMetadata()` method for YouTube Metadata API +- Added `youtubeTranscript()` method for YouTube Transcript API +- Added `youtubeTrainability()` method for YouTube Trainability API +- Added `chatGPT()` method for ChatGPT API +- Added `usage()` method for Usage endpoint +- Added new HTML API parameters: `ai_query`, `ai_selector`, `return_page_markdown`, `return_page_text`, `scraping_config` +- Refactored internal `request()` method to be API-agnostic + +### Deprecated + +- `get()` and `post()` methods are deprecated in favor of `htmlApi()` method + ## [1.7.5](https://github.com/ScrapingBee/scrapingbee-node/compare/v1.7.4...v1.7.5) (2023-06-06) ### Bugfix diff --git a/README.md b/README.md index 5ee7ca6..7822ce2 100644 --- a/README.md +++ b/README.md @@ -12,140 +12,447 @@ npm install scrapingbee ## Usage -The ScrapingBee Node SDK is a wrapper around the [axios](https://axios-http.com/docs/intro) library. ScrapingBee supports GET and POST requests. +The ScrapingBee Node SDK is a wrapper around the [axios](https://axios-http.com/docs/intro) library. Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/register) and some free credits to get started. -### Making a GET request +## Table of Contents + +- [HTML API](#html-api) +- [Google Search API](#google-search-api) +- [Amazon API](#amazon-api) +- [Walmart API](#walmart-api) +- [YouTube API](#youtube-api) +- [ChatGPT API](#chatgpt-api) +- [Usage API](#usage-api) + +--- + +## HTML API + +The HTML API allows you to scrape any webpage and get the HTML content. + +### GET Request ```javascript -const scrapingbee = require('scrapingbee'); +const { ScrapingBeeClient } = require('scrapingbee'); async function get(url) { - var client = new scrapingbee.ScrapingBeeClient('REPLACE-WITH-YOUR-API-KEY'); - var response = await client.get({ - // The URL you want to scrape + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.htmlApi({ url: url, params: { - // Block ads on the page you want to scrape - block_ads: false, - // Block images and CSS on the page you want to scrape - block_resources: true, - // Premium proxy geolocation - country_code: '', - // Control the device the request will be sent from - device: 'desktop', - // Use some data extraction rules - extract_rules: { title: 'h1' }, - // Use AI data extraction rules - ai_extract_rules: { summary: '5 words summary of the post' }, - // Wrap response in JSON - json_response: false, - // JavaScript scenario to execute (clicking on button, scrolling ...) + render_js: true, + extract_rules: { title: 'h1', links: 'a @href' }, js_scenario: { instructions: [ - { wait_for: '#slow_button' }, - { click: '#slow_button' }, - { scroll_x: 1000 }, - { wait: 1000 }, - { scroll_x: 1000 }, - { wait: 1000 }, - ], + { click: '#button' }, + { wait: 500 }, + { scroll_y: 1000 }, + ] }, - // Use premium proxies to bypass difficult to scrape websites (10-25 credits/request) - premium_proxy: false, - // Execute JavaScript code with a Headless Browser (5 credits/request) - render_js: true, - // Return the original HTML before the JavaScript rendering - return_page_source: false, - // Return page screenshot as a png image - screenshot: false, - // Take a full page screenshot without the window limitation - screenshot_full_page: false, - // Transparently return the same HTTP code of the page requested. - transparent_status_code: false, - // Wait, in miliseconds, before returning the response - wait: 0, - // Wait for CSS selector before returning the response, ex ".title" - wait_for: '', - // Set the browser window width in pixel - window_width: 1920, - // Set the browser window height in pixel - window_height: 1080, + } + }); + + const decoder = new TextDecoder(); + const text = decoder.decode(response.data); + console.log(text); +} + +get('https://example.com'); +``` + +### POST Request + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function post(url) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.htmlApi({ + url: url, + method: 'POST', + 'username=user&password=pass', + params: { + render_js: false, }, headers: { - // Forward custom headers to the target website - key: 'value', + 'Custom-Header': 'value', }, cookies: { - // Forward custom cookies to the target website - name: 'value', + session: 'abc123', }, - // `timeout` specifies the number of milliseconds before the request times out. - // If the request takes longer than `timeout`, the request will be aborted. - timeout: 10000, // here 10sec, default is `0` (no timeout) }); - var decoder = new TextDecoder(); - var text = decoder.decode(response.data); + const decoder = new TextDecoder(); + const text = decoder.decode(response.data); console.log(text); } -get('https://httpbin-scrapingbee.cleverapps.io/html').catch((e) => console.log('A problem occurs : ' + e.message)); - -/* -- output - ... -*/ +post('https://httpbin.org/post'); ``` -ScrapingBee takes various parameters to render JavaScript, execute a custom JavaScript script, use a premium proxy from a specific geolocation and more. - -You can find all the supported parameters on [ScrapingBee's documentation](https://www.scrapingbee.com/documentation/). - -You can send custom cookies and headers like you would normally do with the requests library. - -## Screenshot - -Here a little exemple on how to retrieve and store a screenshot from the ScrapingBee blog in its mobile resolution. +### Screenshot ```javascript const fs = require('fs'); -const scrapingbee = require('scrapingbee'); +const { ScrapingBeeClient } = require('scrapingbee'); async function screenshot(url, path) { - var client = new scrapingbee.ScrapingBeeClient('REPLACE-WITH-YOUR-API-KEY'); - var response = await client.get({ + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.htmlApi({ url: url, params: { - screenshot: true, // Take a screenshot - screenshot_full_page: true, // Specify that we need the full height - window_width: 375, // Specify a mobile width in pixel + screenshot: true, + screenshot_full_page: true, + window_width: 375, }, }); fs.writeFileSync(path, response.data); } -screenshot('https://httpbin-scrapingbee.cleverapps.io/html', './httpbin.png').catch((e) => - console.log('A problem occurs : ' + e.message) -); +screenshot('https://example.com', './screenshot.png'); +``` + +--- + +## Google Search API + +Scrape Google search results in real-time. + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function googleSearch(query) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.googleSearch({ + search: query, + params: { + language: 'en', + country_code: 'us', + page: 1, + search_type: 'classic', + device: 'desktop', + light_request: true, + nfpr: false, + add_html: false, + } + }); + + console.log(response.data); +} + +googleSearch('web scraping tools'); +``` + +--- + +## Amazon API + +Scrape Amazon search results and product details. + +### Amazon Search + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function amazonSearch(query) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.amazonSearch({ + query: query, + params: { + domain: 'com', + language: 'en', + country: 'us', + device: 'desktop', + pages: 1, + start_page: 1, + sort_by: 'featured', + currency: 'USD', + add_html: false, + screenshot: false, + } + }); + + console.log(response.data); +} + +amazonSearch('laptop'); +``` + +### Amazon Product + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function amazonProduct(asin) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.amazonProduct({ + query: asin, + params: { + domain: 'com', + language: 'en', + country: 'us', + device: 'desktop', + autoselect_variant: false, + add_html: false, + screenshot: false, + } + }); + + console.log(response.data); +} + +amazonProduct('B0D2Q9397Y'); +``` + +--- + +## Walmart API + +Scrape Walmart search results and product details. + +### Walmart Search + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function walmartSearch(query) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.walmartSearch({ + query: query, + params: { + device: 'desktop', + sort_by: 'best_match', + min_price: 10, + max_price: 1000, + delivery_zip: '10001', + store_id: '', + fulfillment_speed: '', + add_html: false, + screenshot: false, + } + }); + + console.log(response.data); +} + +walmartSearch('laptop'); +``` + +### Walmart Product + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function walmartProduct(productId) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.walmartProduct({ + product_id: productId, + params: { + device: 'desktop', + delivery_zip: '10001', + store_id: '', + add_html: false, + screenshot: false, + } + }); + + console.log(response.data); +} + +walmartProduct('123456789'); +``` + +--- + +## YouTube API + +Scrape YouTube search results, video metadata, transcripts, and trainability data. + +### YouTube Search + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function youtubeSearch(query) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.youtubeSearch({ + search: query, + params: { + sort_by: 'relevance', + type: 'video', + upload_date: '', + duration: '', + hd: false, + '4k': false, + subtitles: false, + live: false, + } + }); + + console.log(response.data); +} + +youtubeSearch('web scraping tutorial'); +``` + +### YouTube Metadata + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function youtubeMetadata(videoId) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.youtubeMetadata({ + video_id: videoId, + }); + + console.log(response.data); +} + +youtubeMetadata('dQw4w9WgXcQ'); +``` + +### YouTube Transcript + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function youtubeTranscript(videoId) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.youtubeTranscript({ + video_id: videoId, + params: { + language: 'en', + transcript_origin: 'auto_generated', + } + }); + + console.log(response.data); +} + +youtubeTranscript('dQw4w9WgXcQ'); +``` + +### YouTube Trainability + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function youtubeTrainability(videoId) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.youtubeTrainability({ + video_id: videoId, + }); + + console.log(response.data); +} + +youtubeTrainability('dQw4w9WgXcQ'); +``` + +--- + +## ChatGPT API + +Use ChatGPT with optional web search capabilities. + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function askChatGPT(prompt) { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.chatGPT({ + prompt: prompt, + params: { + search: true, + country_code: 'us', + add_html: false, + } + }); + + console.log(response.data); +} + +askChatGPT('What are the latest web scraping trends?'); +``` + +--- + +## Usage API + +Check your API credit usage and account limits. + +```javascript +const { ScrapingBeeClient } = require('scrapingbee'); + +async function checkUsage() { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.usage(); + + console.log(response.data); +} + +checkUsage(); ``` +--- + ## Retries -The client includes a retry mechanism for 5XX responses. +All API methods support automatic retry for 5XX responses. ```javascript -const spb = require('scrapingbee'); +const { ScrapingBeeClient } = require('scrapingbee'); -async function get(url) { - let client = new spb.ScrapingBeeClient('REPLACE-WITH-YOUR-API-KEY'); - let resp = await client.get({ url: url, params: { render_js: false }, retries: 5 }); +async function scrapeWithRetry() { + const client = new ScrapingBeeClient('YOUR-API-KEY'); + const response = await client.htmlApi({ + url: 'https://example.com', + retries: 5, + timeout: 30000, + }); - let decoder = new TextDecoder(); - let text = decoder.decode(resp.data); - console.log(text); + console.log(response.data); } +``` + +--- + +## Promise and Async/Await + +All methods return Promises, so you can use either `.then()` or `async/await`: + +```javascript +// Using async/await +const response = await client.googleSearch({ search: 'test' }); -get('https://httpbin-scrapingbee.cleverapps.io/html').catch((e) => console.log('A problem occured: ' + e.message)); +// Using .then() +client.googleSearch({ search: 'test' }) + .then(response => console.log(response.data)) + .catch(error => console.error(error)); ``` + +--- + +## Legacy Methods (Deprecated) + +The `get()` and `post()` methods are deprecated and will be removed in a future version. Please use `htmlApi()` instead. + +```javascript +// Deprecated +await client.get({ url: '...' }); +await client.post({ url: '...' }); + +// Use instead +await client.htmlApi({ url: '...', method: 'GET' }); +await client.htmlApi({ url: '...', method: 'POST' }); +``` + +--- + +## Documentation + +For more details on all available parameters, visit [ScrapingBee's documentation](https://www.scrapingbee.com/documentation/). \ No newline at end of file diff --git a/dist/index.d.ts b/dist/index.d.ts index ccc24d6..e2db34b 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -1,5 +1,8 @@ import { AxiosPromise } from 'axios'; -export declare type SpbParams = { +export declare type HtmlApiParams = { + ai_extract_rules?: object | string; + ai_query?: string; + ai_selector?: string; block_ads?: boolean; block_resources?: boolean; cookies?: string | Record; @@ -7,9 +10,6 @@ export declare type SpbParams = { custom_google?: boolean; device?: string; extract_rules?: object | string; - ai_extract_rules?: { - summary: string; - }; forward_headers?: boolean; forward_headers_pure?: boolean; js_scenario?: object | string; @@ -17,7 +17,10 @@ export declare type SpbParams = { own_proxy?: string; premium_proxy?: boolean; render_js?: boolean; + return_page_markdown?: boolean; return_page_source?: boolean; + return_page_text?: boolean; + scraping_config?: string; screenshot?: boolean; screenshot_full_page?: boolean; screenshot_selector?: string; @@ -26,26 +29,211 @@ export declare type SpbParams = { timeout?: number; transparent_status_code?: boolean; wait?: number; - wait_browser?: string | Array; + wait_browser?: string; wait_for?: string; window_height?: number; window_width?: number; } & { [key: string]: any; }; -export interface SpbConfig { +export interface HtmlApiConfig { url: string; + method?: 'GET' | 'POST' | 'PUT'; headers?: Record; cookies?: string | Record; - params?: SpbParams; + params?: HtmlApiParams; data?: any; retries?: number; timeout?: number; } +export declare type GoogleSearchParams = { + add_html?: boolean; + country_code?: string; + device?: string; + extra_params?: string; + language?: string; + light_request?: boolean; + nfpr?: boolean; + page?: number; + search_type?: string; +} & { + [key: string]: any; +}; +export interface GoogleSearchConfig { + search: string; + params?: GoogleSearchParams; + retries?: number; + timeout?: number; +} +export declare type AmazonSearchParams = { + add_html?: boolean; + category_id?: string; + country?: string; + currency?: string; + device?: string; + domain?: string; + language?: string; + light_request?: boolean; + merchant_id?: string; + pages?: number; + screenshot?: boolean; + sort_by?: string; + start_page?: number; + zip_code?: string; +} & { + [key: string]: any; +}; +export interface AmazonSearchConfig { + query: string; + params?: AmazonSearchParams; + retries?: number; + timeout?: number; +} +export declare type AmazonProductParams = { + add_html?: boolean; + autoselect_variant?: boolean; + country?: string; + currency?: string; + device?: string; + domain?: string; + language?: string; + light_request?: boolean; + screenshot?: boolean; + zip_code?: string; +} & { + [key: string]: any; +}; +export interface AmazonProductConfig { + query: string; + params?: AmazonProductParams; + retries?: number; + timeout?: number; +} +export declare type WalmartSearchParams = { + add_html?: boolean; + delivery_zip?: string; + device?: string; + domain?: string; + fulfillment_speed?: string; + fulfillment_type?: string; + light_request?: boolean; + max_price?: number; + min_price?: number; + screenshot?: boolean; + sort_by?: string; + store_id?: string; +} & { + [key: string]: any; +}; +export interface WalmartSearchConfig { + query: string; + params?: WalmartSearchParams; + retries?: number; + timeout?: number; +} +export declare type WalmartProductParams = { + add_html?: boolean; + delivery_zip?: string; + device?: string; + domain?: string; + light_request?: boolean; + screenshot?: boolean; + store_id?: string; +} & { + [key: string]: any; +}; +export interface WalmartProductConfig { + product_id: string; + params?: WalmartProductParams; + retries?: number; + timeout?: number; +} +export declare type ChatGPTParams = { + add_html?: boolean; + country_code?: string; + search?: boolean; +} & { + [key: string]: any; +}; +export interface ChatGPTConfig { + prompt: string; + params?: ChatGPTParams; + retries?: number; + timeout?: number; +} +export declare type YouTubeSearchParams = { + '360'?: boolean; + '3d'?: boolean; + '4k'?: boolean; + creative_commons?: boolean; + duration?: string; + hd?: boolean; + hdr?: boolean; + live?: boolean; + location?: boolean; + purchased?: boolean; + sort_by?: string; + subtitles?: boolean; + type?: string; + upload_date?: string; + vr180?: boolean; +} & { + [key: string]: any; +}; +export interface YouTubeSearchConfig { + search: string; + params?: YouTubeSearchParams; + retries?: number; + timeout?: number; +} +export interface YouTubeMetadataConfig { + video_id: string; + retries?: number; + timeout?: number; +} +export declare type YouTubeTranscriptParams = { + language?: string; + transcript_origin?: string; +} & { + [key: string]: any; +}; +export interface YouTubeTranscriptConfig { + video_id: string; + params?: YouTubeTranscriptParams; + retries?: number; + timeout?: number; +} +export interface YouTubeTrainabilityConfig { + video_id: string; + retries?: number; + timeout?: number; +} +export interface UsageConfig { + retries?: number; + timeout?: number; +} export declare class ScrapingBeeClient { readonly api_key: string; constructor(api_key: string); private request; - get(config: SpbConfig): AxiosPromise; - post(config: SpbConfig): AxiosPromise; + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + get: (config: HtmlApiConfig) => AxiosPromise; + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + post: (config: HtmlApiConfig) => AxiosPromise; + googleSearch(config: GoogleSearchConfig): AxiosPromise; + amazonSearch(config: AmazonSearchConfig): AxiosPromise; + amazonProduct(config: AmazonProductConfig): AxiosPromise; + walmartSearch(config: WalmartSearchConfig): AxiosPromise; + walmartProduct(config: WalmartProductConfig): AxiosPromise; + chatGPT(config: ChatGPTConfig): AxiosPromise; + youtubeSearch(config: YouTubeSearchConfig): AxiosPromise; + youtubeMetadata(config: YouTubeMetadataConfig): AxiosPromise; + youtubeTranscript(config: YouTubeTranscriptConfig): AxiosPromise; + youtubeTrainability(config: YouTubeTrainabilityConfig): AxiosPromise; + htmlApi(config: HtmlApiConfig): AxiosPromise; + usage(config?: UsageConfig): AxiosPromise; } diff --git a/dist/index.js b/dist/index.js index d94f9c0..d5d122e 100644 --- a/dist/index.js +++ b/dist/index.js @@ -5,46 +5,212 @@ var __importDefault = (this && this.__importDefault) || function (mod) { Object.defineProperty(exports, "__esModule", { value: true }); exports.ScrapingBeeClient = void 0; const axios_1 = __importDefault(require("axios")); +const util_1 = require("util"); const axios_retry_1 = __importDefault(require("axios-retry")); const utils_1 = require("./utils"); -const API_URL = 'https://app.scrapingbee.com/api/v1/'; +const HTML_API_URL = 'https://app.scrapingbee.com/api/v1/'; +const GOOGLE_API_URL = 'https://app.scrapingbee.com/api/v1/store/google'; +const AMAZON_SEARCH_API_URL = 'https://app.scrapingbee.com/api/v1/amazon/search'; +const AMAZON_PRODUCT_API_URL = 'https://app.scrapingbee.com/api/v1/amazon/product'; +const WALMART_SEARCH_API_URL = 'https://app.scrapingbee.com/api/v1/walmart/search'; +const WALMART_PRODUCT_API_URL = 'https://app.scrapingbee.com/api/v1/walmart/product'; +const CHATGPT_API_URL = 'https://app.scrapingbee.com/api/v1/chatgpt'; +const YOUTUBE_SEARCH_API_URL = 'https://app.scrapingbee.com/api/v1/youtube/search'; +const YOUTUBE_METADATA_API_URL = 'https://app.scrapingbee.com/api/v1/youtube/metadata'; +const YOUTUBE_TRANSCRIPT_API_URL = 'https://app.scrapingbee.com/api/v1/youtube/transcript'; +const YOUTUBE_TRAINABILITY_API_URL = 'https://app.scrapingbee.com/api/v1/youtube/trainability'; +const USAGE_API_URL = 'https://app.scrapingbee.com/api/v1/usage'; class ScrapingBeeClient { constructor(api_key) { + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + this.get = util_1.deprecate((config) => { + var _a; + let params = Object.assign(Object.assign({}, config.params), { url: config.url, cookies: config.cookies }); + let headers = utils_1.process_headers(config.headers); + if (Object.keys((_a = config.headers) !== null && _a !== void 0 ? _a : {}).length > 0) { + params.forward_headers = true; + } + return this.request({ + method: 'GET', + endpoint: HTML_API_URL, + params: utils_1.process_params(params), + headers: headers, + data: config.data, + retries: config.retries, + timeout: config.timeout, + }); + }, 'ScrapingBeeClient.get() is deprecated. Please use client.htmlApi() instead. This method will be removed in version 2.0.0.'); + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + this.post = util_1.deprecate((config) => { + var _a; + let params = Object.assign(Object.assign({}, config.params), { url: config.url, cookies: config.cookies }); + let headers = utils_1.process_headers(config.headers); + if (Object.keys((_a = config.headers) !== null && _a !== void 0 ? _a : {}).length > 0) { + params.forward_headers = true; + } + return this.request({ + method: 'POST', + endpoint: HTML_API_URL, + params: utils_1.process_params(params), + headers: headers, + data: config.data, + retries: config.retries, + timeout: config.timeout, + }); + }, 'ScrapingBeeClient.post() is deprecated. Please use client.htmlApi() instead. This method will be removed in version 2.0.0.'); this.api_key = api_key; } - request(method, config) { - var _a, _b; - let params = config.params || {}; - // Headers - let headers = utils_1.process_headers(config.headers); - if (Object.keys((_a = config.headers) !== null && _a !== void 0 ? _a : {}).length > 0) { - params.forward_headers = true; - } - // Cookies - params.cookies = config.cookies; - // Other query params - params['api_key'] = this.api_key; - params['url'] = config.url; - params = utils_1.process_params(params); - let axios_params = { - method: method, - headers: headers, - params: params, + request(config) { + var _a; + config.params['api_key'] = this.api_key; + const axiosConfig = { + method: config.method, + url: config.endpoint, + params: config.params, + headers: config.headers, data: config.data, responseType: 'arraybuffer', - timeout: (_b = config.timeout) !== null && _b !== void 0 ? _b : 0, + timeout: (_a = config.timeout) !== null && _a !== void 0 ? _a : 0, }; // Retry policy if (config.retries !== undefined) { axios_retry_1.default(axios_1.default, { retries: config.retries }); } - return axios_1.default(API_URL, axios_params); + return axios_1.default(axiosConfig); + } + googleSearch(config) { + const params = Object.assign({ search: config.search }, config.params); + return this.request({ + method: 'GET', + endpoint: GOOGLE_API_URL, + params: utils_1.process_params(params), + retries: config.retries, + timeout: config.timeout, + }); + } + amazonSearch(config) { + const params = Object.assign({ query: config.query }, config.params); + return this.request({ + method: 'GET', + endpoint: AMAZON_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + amazonProduct(config) { + const params = Object.assign({ query: config.query }, config.params); + return this.request({ + method: 'GET', + endpoint: AMAZON_PRODUCT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + walmartSearch(config) { + const params = Object.assign({ query: config.query }, config.params); + return this.request({ + method: 'GET', + endpoint: WALMART_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + walmartProduct(config) { + const params = Object.assign({ product_id: config.product_id }, config.params); + return this.request({ + method: 'GET', + endpoint: WALMART_PRODUCT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + chatGPT(config) { + const params = Object.assign({ prompt: config.prompt }, config.params); + return this.request({ + method: 'GET', + endpoint: CHATGPT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + youtubeSearch(config) { + const params = Object.assign({ search: config.search }, config.params); + return this.request({ + method: 'GET', + endpoint: YOUTUBE_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); } - get(config) { - return this.request('GET', config); + youtubeMetadata(config) { + const params = { + video_id: config.video_id, + }; + return this.request({ + method: 'GET', + endpoint: YOUTUBE_METADATA_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + youtubeTranscript(config) { + const params = Object.assign({ video_id: config.video_id }, config.params); + return this.request({ + method: 'GET', + endpoint: YOUTUBE_TRANSCRIPT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + youtubeTrainability(config) { + const params = { + video_id: config.video_id, + }; + return this.request({ + method: 'GET', + endpoint: YOUTUBE_TRAINABILITY_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + htmlApi(config) { + var _a; + let params = Object.assign(Object.assign({}, config.params), { url: config.url, cookies: config.cookies }); + let headers = utils_1.process_headers(config.headers); + if (Object.keys((_a = config.headers) !== null && _a !== void 0 ? _a : {}).length > 0) { + params.forward_headers = true; + } + return this.request({ + method: config.method || 'GET', + endpoint: HTML_API_URL, + params: utils_1.process_params(params), + headers: headers, + data: config.data, + retries: config.retries, + timeout: config.timeout, + }); } - post(config) { - return this.request('POST', config); + usage(config = {}) { + return this.request({ + method: 'GET', + endpoint: USAGE_API_URL, + params: {}, + retries: config.retries, + timeout: config.timeout, + }); } } exports.ScrapingBeeClient = ScrapingBeeClient; diff --git a/dist/version.d.ts b/dist/version.d.ts index a25b917..6b4927c 100644 --- a/dist/version.d.ts +++ b/dist/version.d.ts @@ -1 +1 @@ -export declare const LIB_VERSION = "1.7.5"; +export declare const LIB_VERSION = "1.8.0"; diff --git a/dist/version.js b/dist/version.js index 28c05b2..aae18d2 100644 --- a/dist/version.js +++ b/dist/version.js @@ -1,4 +1,4 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.LIB_VERSION = void 0; -exports.LIB_VERSION = "1.7.5"; +exports.LIB_VERSION = "1.8.0"; diff --git a/manual-test.js b/manual-test.js new file mode 100644 index 0000000..9903b64 --- /dev/null +++ b/manual-test.js @@ -0,0 +1,644 @@ +const { ScrapingBeeClient } = require('./dist/index'); + +const API_KEY = process.env.SCRAPINGBEE_API_KEY; +const client = new ScrapingBeeClient(API_KEY); + +// Helper function to parse response data +function parseResponse(response) { + if (Buffer.isBuffer(response.data)) { + return JSON.parse(response.data.toString()); + } else if (typeof response.data === 'string') { + return JSON.parse(response.data); + } + return response.data; +} + +// Helper function to assert +function assert(condition, message) { + if (!condition) { + throw new Error(message); + } +} + +// ============================================ +// Legacy HTML API Tests +// ============================================ + +async function testHtmlGet() { + console.log('=== Testing HTML API - GET ==='); + try { + const response = await client.get({ + url: 'https://httpbin.org/get', + params: { render_js: false } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + assert(response.data.toString().includes('httpbin'), 'Response does not contain expected content'); + + console.log('Status:', response.status); + console.log('✅ HTML GET test passed!\n'); + } catch (error) { + console.log('❌ HTML GET test failed:', error.message); + throw error; + } +} + +async function testHtmlPost() { + console.log('=== Testing HTML API - POST ==='); + try { + const response = await client.post({ + url: 'https://httpbin.org/post', + params: { render_js: false }, + data: 'test=data' + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + assert(response.data.toString().includes('test'), 'Response does not contain posted data'); + + console.log('Status:', response.status); + console.log('✅ HTML POST test passed!\n'); + } catch (error) { + console.log('❌ HTML POST test failed:', error.message); + throw error; + } +} + +// ============================================ +// New HTML API Tests +// ============================================ + +async function testHtmlApiGet() { + console.log('=== Testing HTML API (New) - GET ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/get', + method: 'GET', + params: { render_js: false } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + assert(response.data.toString().includes('httpbin'), 'Response does not contain expected content'); + + console.log('Status:', response.status); + console.log('✅ HTML API GET test passed!\n'); + } catch (error) { + console.log('❌ HTML API GET test failed:', error.message); + throw error; + } +} + +async function testHtmlApiPost() { + console.log('=== Testing HTML API (New) - POST ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/post', + method: 'POST', + params: { render_js: false }, + data: 'test=data' + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + assert(response.data.toString().includes('test'), 'Response does not contain posted data'); + + console.log('Status:', response.status); + console.log('✅ HTML API POST test passed!\n'); + } catch (error) { + console.log('❌ HTML API POST test failed:', error.message); + throw error; + } +} + +async function testHtmlApiExtractRules() { + console.log('=== Testing HTML API - Extract Rules ==='); + try { + const response = await client.htmlApi({ + url: 'https://www.scrapingbee.com/blog/', + params: { + render_js: false, + extract_rules: { + title: 'h1', + posts: { + selector: '.container > div > div > div', + type: 'list', + output: { + title: 'h4', + link: 'a@href' + } + } + } + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.title, 'Extracted title is missing'); + assert(Array.isArray(data.posts), 'Extracted posts is not an array'); + assert(data.posts.length > 0, 'No posts extracted'); + + console.log('Status:', response.status); + console.log('Extracted title:', data.title); + console.log('Extracted posts count:', data.posts.length); + console.log('✅ HTML API Extract Rules test passed!\n'); + } catch (error) { + console.log('❌ HTML API Extract Rules test failed:', error.message); + throw error; + } +} + +async function testHtmlApiJsScenario() { + console.log('=== Testing HTML API - JS Scenario ==='); + try { + const response = await client.htmlApi({ + url: 'https://www.scrapingbee.com', + params: { + render_js: true, + js_scenario: { + instructions: [ + { wait: 1000 }, + { scroll_y: 500 }, + { wait: 500 } + ] + } + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + + console.log('Status:', response.status); + console.log('Content:', response.data.toString().substring(0, 300)); + console.log('✅ HTML API JS Scenario test passed!\n'); + } catch (error) { + console.log('❌ HTML API JS Scenario test failed:', error.message); + throw error; + } +} + +async function testHtmlApiScreenshot() { + console.log('=== Testing HTML API - Screenshot ==='); + try { + const response = await client.htmlApi({ + url: 'https://www.scrapingbee.com', + params: { + render_js: true, + screenshot: true, + window_width: 1920, + window_height: 1080 + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data, 'Response data is empty'); + assert(response.data.length > 10000, 'Screenshot seems too small'); + + // Check PNG signature (first 8 bytes) + const pngSignature = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]); + assert(response.data.slice(0, 8).equals(pngSignature), 'Response is not a valid PNG'); + + console.log('Status:', response.status); + console.log('Screenshot size:', response.data.length, 'bytes'); + console.log('✅ HTML API Screenshot test passed!\n'); + } catch (error) { + console.log('❌ HTML API Screenshot test failed:', error.message); + throw error; + } +} + +async function testHtmlApiJsonResponse() { + console.log('=== Testing HTML API - JSON Response ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/get', + params: { + render_js: false, + json_response: true + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.body, 'JSON response missing body field'); + assert(data.xhr, 'JSON response missing xhr field'); + + const bodyPreview = typeof data.body === 'string' + ? data.body.substring(0, 300) + : JSON.stringify(data.body).substring(0, 300); + + console.log('Status:', response.status); + console.log('Content:', bodyPreview); + console.log('✅ HTML API JSON Response test passed!\n'); + } catch (error) { + console.log('❌ HTML API JSON Response test failed:', error.message); + throw error; + } +} + +async function testHtmlApiWithHeaders() { + console.log('=== Testing HTML API - Custom Headers ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/headers', + params: { render_js: false }, + headers: { + 'X-Custom-Header': 'CustomValue123' + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data.toString().includes('CustomValue123'), 'Custom header not forwarded'); + + console.log('Status:', response.status); + console.log('✅ HTML API Custom Headers test passed!\n'); + } catch (error) { + console.log('❌ HTML API Custom Headers test failed:', error.message); + throw error; + } +} + +async function testHtmlApiWithCookies() { + console.log('=== Testing HTML API - Custom Cookies ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/cookies', + params: { render_js: false }, + cookies: { + session_id: 'abc123', + user_token: 'xyz789' + } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const responseText = response.data.toString(); + assert(responseText.includes('abc123') || responseText.includes('xyz789'), 'Cookies not forwarded'); + + console.log('Status:', response.status); + console.log('✅ HTML API Custom Cookies test passed!\n'); + } catch (error) { + console.log('❌ HTML API Custom Cookies test failed:', error.message); + throw error; + } +} + +async function testHtmlApiPostWithHeadersAndCookies() { + console.log('=== Testing HTML API - POST with Headers + Cookies ==='); + try { + const response = await client.htmlApi({ + url: 'https://httpbin.org/post', + method: 'POST', + params: { render_js: false }, + headers: { 'X-Test-Header': 'TestValue' }, + cookies: { session: 'mysession123' }, + data: JSON.stringify({ action: 'submit' }) + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + assert(response.data.toString().includes('submit'), 'Posted data not in response'); + + console.log('Status:', response.status); + console.log('✅ HTML API POST with Headers + Cookies test passed!\n'); + } catch (error) { + console.log('❌ HTML API POST with Headers + Cookies test failed:', error.message); + throw error; + } +} + +// ============================================ +// Google Search API +// ============================================ + +async function testGoogleSearch() { + console.log('=== Testing Google Search API ==='); + try { + const response = await client.googleSearch({ + search: 'scrapingbee', + params: { language: 'en', country_code: 'us' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.organic_results, 'Missing organic_results in response'); + assert(Array.isArray(data.organic_results), 'organic_results is not an array'); + assert(data.organic_results.length > 0, 'No organic results found'); + + console.log('Status:', response.status); + console.log('Results found:', data.organic_results.length); + console.log('✅ Google Search test passed!\n'); + } catch (error) { + console.log('❌ Google Search test failed:', error.message); + throw error; + } +} + +// ============================================ +// Amazon API +// ============================================ + +async function testAmazonSearch() { + console.log('=== Testing Amazon Search API ==='); + try { + const response = await client.amazonSearch({ + query: 'laptop', + params: { domain: 'com', pages: 1 } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.products, 'Missing products in response'); + assert(Array.isArray(data.products), 'products is not an array'); + assert(data.products.length > 0, 'No products found'); + + console.log('Status:', response.status); + console.log('Results found:', data.products.length); + console.log('✅ Amazon Search test passed!\n'); + } catch (error) { + console.log('❌ Amazon Search test failed:', error.message); + throw error; + } +} + +async function testAmazonProduct() { + console.log('=== Testing Amazon Product API ==='); + try { + const response = await client.amazonProduct({ + query: 'B0D2Q9397Y', + params: { domain: 'com' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.title, 'Missing product title in response'); + + console.log('Status:', response.status); + console.log('Product title:', data.title.substring(0, 50)); + console.log('✅ Amazon Product test passed!\n'); + } catch (error) { + console.log('❌ Amazon Product test failed:', error.message); + throw error; + } +} + +// ============================================ +// Walmart API +// ============================================ + +async function testWalmartSearch() { + console.log('=== Testing Walmart Search API ==='); + try { + const response = await client.walmartSearch({ + query: 'laptop', + params: { device: 'desktop', sort_by: 'best_match' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.products, 'Missing products in response'); + assert(Array.isArray(data.products), 'products is not an array'); + assert(data.products.length > 0, 'No products found'); + + console.log('Status:', response.status); + console.log('Results found:', data.products.length); + console.log('✅ Walmart Search test passed!\n'); + } catch (error) { + console.log('❌ Walmart Search test failed:', error.message); + throw error; + } +} + +async function testWalmartProduct() { + console.log('=== Testing Walmart Product API ==='); + try { + const response = await client.walmartProduct({ + product_id: '454408250', + params: { device: 'desktop' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.title, 'Missing product title in response'); + + console.log('Status:', response.status); + console.log('Product title:', data.title.substring(0, 50)); + console.log('✅ Walmart Product test passed!\n'); + } catch (error) { + console.log('❌ Walmart Product test failed:', error.message); + throw error; + } +} + +// ============================================ +// ChatGPT API +// ============================================ + +async function testChatGPT() { + console.log('=== Testing ChatGPT API ==='); + try { + const response = await client.chatGPT({ + prompt: 'What is web scraping? Answer in one sentence.', + params: { search: true } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.results_text || data.results_markdown, 'Missing response text'); + + console.log('Status:', response.status); + console.log('Response:', (data.results_text || data.results_markdown).substring(0, 100)); + console.log('✅ ChatGPT test passed!\n'); + } catch (error) { + console.log('❌ ChatGPT test failed:', error.message); + throw error; + } +} + +// ============================================ +// YouTube API +// ============================================ + +async function testYouTubeSearch() { + console.log('=== Testing YouTube Search API ==='); + try { + const response = await client.youtubeSearch({ + search: 'web scraping tutorial', + params: { sort_by: 'relevance', type: 'video' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.results, 'Missing results in response'); + assert(Array.isArray(data.results), 'results is not an array'); + assert(data.results.length > 0, 'No results found'); + + console.log('Status:', response.status); + console.log('Results found:', data.results.length); + console.log('✅ YouTube Search test passed!\n'); + } catch (error) { + console.log('❌ YouTube Search test failed:', error.message); + throw error; + } +} + +async function testYouTubeMetadata() { + console.log('=== Testing YouTube Metadata API ==='); + try { + const response = await client.youtubeMetadata({ + video_id: 'dQw4w9WgXcQ' + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.title || data.like_count !== undefined, 'Missing expected metadata fields'); + + console.log('Status:', response.status); + console.log('Like count:', data.like_count); + console.log('✅ YouTube Metadata test passed!\n'); + } catch (error) { + console.log('❌ YouTube Metadata test failed:', error.message); + throw error; + } +} + +async function testYouTubeTranscript() { + console.log('=== Testing YouTube Transcript API ==='); + try { + const response = await client.youtubeTranscript({ + video_id: 'sfyL4BswUeE', + params: { language: 'en' } + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.text || data.transcript, 'Missing transcript in response'); + + console.log('Status:', response.status); + console.log('Transcript preview:', (data.text || JSON.stringify(data.transcript)).substring(0, 100)); + console.log('✅ YouTube Transcript test passed!\n'); + } catch (error) { + console.log('❌ YouTube Transcript test failed:', error.message); + throw error; + } +} + +async function testYouTubeTrainability() { + console.log('=== Testing YouTube Trainability API ==='); + try { + const response = await client.youtubeTrainability({ + video_id: 'dQw4w9WgXcQ' + }); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.permitted !== undefined, 'Missing permitted field in response'); + + console.log('Status:', response.status); + console.log('Permitted:', data.permitted); + console.log('✅ YouTube Trainability test passed!\n'); + } catch (error) { + console.log('❌ YouTube Trainability test failed:', error.message); + throw error; + } +} + +// ============================================ +// Usage API +// ============================================ + +async function testUsage() { + console.log('=== Testing Usage API ==='); + try { + const response = await client.usage(); + + assert(response.status === 200, `Expected status 200, got ${response.status}`); + + const data = parseResponse(response); + assert(data.max_api_credit !== undefined, 'Missing max_api_credit'); + assert(data.used_api_credit !== undefined, 'Missing used_api_credit'); + assert(data.max_concurrency !== undefined, 'Missing max_concurrency'); + + console.log('Status:', response.status); + console.log('Max API credits:', data.max_api_credit); + console.log('Used API credits:', data.used_api_credit); + console.log('Max concurrency:', data.max_concurrency); + console.log('✅ Usage test passed!\n'); + } catch (error) { + console.log('❌ Usage test failed:', error.message); + throw error; + } +} + +// ============================================ +// Run All Tests +// ============================================ + +async function runTests() { + console.log('\n🚀 Starting ScrapingBee SDK Tests\n'); + + let passed = 0; + let failed = 0; + + const tests = [ + // Legacy HTML API + testHtmlGet, + testHtmlPost, + + // New HTML API + testHtmlApiGet, + testHtmlApiPost, + testHtmlApiExtractRules, + testHtmlApiJsScenario, + testHtmlApiScreenshot, + testHtmlApiJsonResponse, + testHtmlApiWithHeaders, + testHtmlApiWithCookies, + testHtmlApiPostWithHeadersAndCookies, + + // Other APIs + testGoogleSearch, + testAmazonSearch, + testAmazonProduct, + testWalmartSearch, + testWalmartProduct, + testChatGPT, + testYouTubeSearch, + testYouTubeMetadata, + testYouTubeTranscript, + testYouTubeTrainability, + testUsage, + ]; + + for (const test of tests) { + try { + await test(); + passed++; + } catch (error) { + failed++; + } + } + + console.log('🏁 All tests completed!'); + console.log(`✅ Passed: ${passed}`); + console.log(`❌ Failed: ${failed}`); + console.log(`📊 Total: ${tests.length}\n`); + + if (failed > 0) { + process.exit(1); + } +} + +runTests(); diff --git a/package-lock.json b/package-lock.json index 4a43328..80d785d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scrapingbee", - "version": "1.7.5", + "version": "1.8.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scrapingbee", - "version": "1.7.5", + "version": "1.8.0", "license": "ISC", "dependencies": { "axios": "^1.7", diff --git a/package.json b/package.json index f15e39f..e5375c4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scrapingbee", - "version": "1.7.6", + "version": "1.8.0", "description": "ScrapingBee Node SDK", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/index.ts b/src/index.ts index b18019a..eb650f7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,11 +1,28 @@ import axios, { AxiosPromise, AxiosRequestConfig, Method } from 'axios'; +import { deprecate } from 'util'; import axiosRetry from 'axios-retry'; import { process_params, process_headers } from './utils'; -const API_URL: string = 'https://app.scrapingbee.com/api/v1/'; +const HTML_API_URL: string = 'https://app.scrapingbee.com/api/v1/'; +const GOOGLE_API_URL: string = 'https://app.scrapingbee.com/api/v1/store/google'; +const AMAZON_SEARCH_API_URL: string = 'https://app.scrapingbee.com/api/v1/amazon/search'; +const AMAZON_PRODUCT_API_URL: string = 'https://app.scrapingbee.com/api/v1/amazon/product'; +const WALMART_SEARCH_API_URL: string = 'https://app.scrapingbee.com/api/v1/walmart/search'; +const WALMART_PRODUCT_API_URL: string = 'https://app.scrapingbee.com/api/v1/walmart/product'; +const CHATGPT_API_URL: string = 'https://app.scrapingbee.com/api/v1/chatgpt'; +const YOUTUBE_SEARCH_API_URL: string = 'https://app.scrapingbee.com/api/v1/youtube/search'; +const YOUTUBE_METADATA_API_URL: string = 'https://app.scrapingbee.com/api/v1/youtube/metadata'; +const YOUTUBE_TRANSCRIPT_API_URL: string = 'https://app.scrapingbee.com/api/v1/youtube/transcript'; +const YOUTUBE_TRAINABILITY_API_URL: string = 'https://app.scrapingbee.com/api/v1/youtube/trainability'; +const USAGE_API_URL: string = 'https://app.scrapingbee.com/api/v1/usage'; -export type SpbParams = { + +// HTML API +export type HtmlApiParams = { + ai_extract_rules?: object | string; + ai_query?: string; + ai_selector?: string; block_ads?: boolean; block_resources?: boolean; cookies?: string | Record; @@ -13,7 +30,6 @@ export type SpbParams = { custom_google?: boolean; device?: string; extract_rules?: object | string; - ai_extract_rules?: { summary: string }; forward_headers?: boolean; forward_headers_pure?: boolean; js_scenario?: object | string; @@ -21,7 +37,10 @@ export type SpbParams = { own_proxy?: string; premium_proxy?: boolean; render_js?: boolean; + return_page_markdown?: boolean; return_page_source?: boolean; + return_page_text?: boolean; + scraping_config?: string; screenshot?: boolean; screenshot_full_page?: boolean; screenshot_selector?: string; @@ -30,7 +49,7 @@ export type SpbParams = { timeout?: number; transparent_status_code?: boolean; wait?: number; - wait_browser?: string | Array; + wait_browser?: string; wait_for?: string; window_height?: number; window_width?: number; @@ -38,16 +57,215 @@ export type SpbParams = { [key: string]: any; }; -export interface SpbConfig { +export interface HtmlApiConfig { url: string; + method?: 'GET' | 'POST' | 'PUT'; headers?: Record; cookies?: string | Record; - params?: SpbParams; + params?: HtmlApiParams; data?: any; retries?: number; timeout?: number; } +// GOOGLE + +export type GoogleSearchParams = { + add_html?: boolean; + country_code?: string; + device?: string; + extra_params?: string; + language?: string; + light_request?: boolean; + nfpr?: boolean; + page?: number; + search_type?: string; +} & { + [key: string]: any; +}; + +export interface GoogleSearchConfig { + search: string; + params?: GoogleSearchParams; + retries?: number; + timeout?: number; +} + +// AMAZON + +export type AmazonSearchParams = { + add_html?: boolean; + category_id?: string; + country?: string; + currency?: string; + device?: string; + domain?: string; + language?: string; + light_request?: boolean; + merchant_id?: string; + pages?: number; + screenshot?: boolean; + sort_by?: string; + start_page?: number; + zip_code?: string; +} & { + [key: string]: any; +}; + +export interface AmazonSearchConfig { + query: string; + params?: AmazonSearchParams; + retries?: number; + timeout?: number; +} + +export type AmazonProductParams = { + add_html?: boolean; + autoselect_variant?: boolean; + country?: string; + currency?: string; + device?: string; + domain?: string; + language?: string; + light_request?: boolean; + screenshot?: boolean; + zip_code?: string; +} & { + [key: string]: any; +}; + +export interface AmazonProductConfig { + query: string; + params?: AmazonProductParams; + retries?: number; + timeout?: number; +} + +// WALMART + +export type WalmartSearchParams = { + add_html?: boolean; + delivery_zip?: string; + device?: string; + domain?: string; + fulfillment_speed?: string; + fulfillment_type?: string; + light_request?: boolean; + max_price?: number; + min_price?: number; + screenshot?: boolean; + sort_by?: string; + store_id?: string; +} & { + [key: string]: any; +}; + +export interface WalmartSearchConfig { + query: string; + params?: WalmartSearchParams; + retries?: number; + timeout?: number; +} + +export type WalmartProductParams = { + add_html?: boolean; + delivery_zip?: string; + device?: string; + domain?: string; + light_request?: boolean; + screenshot?: boolean; + store_id?: string; +} & { + [key: string]: any; +}; + +export interface WalmartProductConfig { + product_id: string; + params?: WalmartProductParams; + retries?: number; + timeout?: number; +} + +// CHATGPT + +export type ChatGPTParams = { + add_html?: boolean; + country_code?: string; + search?: boolean; +} & { + [key: string]: any; +}; + +export interface ChatGPTConfig { + prompt: string; + params?: ChatGPTParams; + retries?: number; + timeout?: number; +} + +// YOUTUBE + +export type YouTubeSearchParams = { + '360'?: boolean; + '3d'?: boolean; + '4k'?: boolean; + creative_commons?: boolean; + duration?: string; + hd?: boolean; + hdr?: boolean; + live?: boolean; + location?: boolean; + purchased?: boolean; + sort_by?: string; + subtitles?: boolean; + type?: string; + upload_date?: string; + vr180?: boolean; +} & { + [key: string]: any; +}; + +export interface YouTubeSearchConfig { + search: string; + params?: YouTubeSearchParams; + retries?: number; + timeout?: number; +} + +export interface YouTubeMetadataConfig { + video_id: string; + retries?: number; + timeout?: number; +} + +export type YouTubeTranscriptParams = { + language?: string; + transcript_origin?: string; +} & { + [key: string]: any; +}; + +export interface YouTubeTranscriptConfig { + video_id: string; + params?: YouTubeTranscriptParams; + retries?: number; + timeout?: number; +} + +export interface YouTubeTrainabilityConfig { + video_id: string; + retries?: number; + timeout?: number; +} + +// USAGE + +export interface UsageConfig { + retries?: number; + timeout?: number; +} + + export class ScrapingBeeClient { readonly api_key: string; @@ -55,45 +273,258 @@ export class ScrapingBeeClient { this.api_key = api_key; } - private request(method: string, config: SpbConfig): AxiosPromise { - let params = config.params || {}; + private request(config: Record): AxiosPromise { + config.params['api_key'] = this.api_key; + + const axiosConfig: AxiosRequestConfig = { + method: config.method as Method, + url: config.endpoint, + params: config.params, + headers: config.headers, + data: config.data, + responseType: 'arraybuffer', + timeout: config.timeout ?? 0, + }; + + // Retry policy + if (config.retries !== undefined) { + axiosRetry(axios, { retries: config.retries }); + } + + return axios(axiosConfig); + } + + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + public get = deprecate((config: HtmlApiConfig): AxiosPromise => { + let params: Record = { + ...config.params, + url: config.url, + cookies: config.cookies + }; - // Headers let headers = process_headers(config.headers); if (Object.keys(config.headers ?? {}).length > 0) { params.forward_headers = true; } - // Cookies - params.cookies = config.cookies; + return this.request({ + method: 'GET', + endpoint: HTML_API_URL, + params: process_params(params), + headers: headers, + data: config.data, + retries: config.retries, + timeout: config.timeout, + }); + }, 'ScrapingBeeClient.get() is deprecated. Please use client.htmlApi() instead. This method will be removed in version 2.0.0.'); - // Other query params - params['api_key'] = this.api_key; - params['url'] = config.url; - params = process_params(params); + /** + * @deprecated Use htmlApi() instead. This method will be removed in version 2.0.0. + */ + public post = deprecate((config: HtmlApiConfig): AxiosPromise => { + let params: Record = { + ...config.params, + url: config.url, + cookies: config.cookies + }; + + let headers = process_headers(config.headers); + if (Object.keys(config.headers ?? {}).length > 0) { + params.forward_headers = true; + } - let axios_params: AxiosRequestConfig = { - method: method as Method, + return this.request({ + method: 'POST', + endpoint: HTML_API_URL, + params: process_params(params), headers: headers, - params: params, data: config.data, - responseType: 'arraybuffer', - timeout: config.timeout ?? 0, + retries: config.retries, + timeout: config.timeout, + }); + }, 'ScrapingBeeClient.post() is deprecated. Please use client.htmlApi() instead. This method will be removed in version 2.0.0.'); + + public googleSearch(config: GoogleSearchConfig): AxiosPromise { + const params: Record = { + search: config.search, + ...config.params, }; - // Retry policy - if (config.retries !== undefined) { - axiosRetry(axios, { retries: config.retries }); - } + return this.request({ + method: 'GET', + endpoint: GOOGLE_API_URL, + params: process_params(params), + retries: config.retries, + timeout: config.timeout, + }); + } + + public amazonSearch(config: AmazonSearchConfig): AxiosPromise { + const params: Record = { + query: config.query, + ...config.params, + }; - return axios(API_URL, axios_params); + return this.request({ + method: 'GET', + endpoint: AMAZON_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); } - public get(config: SpbConfig) { - return this.request('GET', config); + public amazonProduct(config: AmazonProductConfig): AxiosPromise { + const params: Record = { + query: config.query, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: AMAZON_PRODUCT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public walmartSearch(config: WalmartSearchConfig): AxiosPromise { + const params: Record = { + query: config.query, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: WALMART_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public walmartProduct(config: WalmartProductConfig): AxiosPromise { + const params: Record = { + product_id: config.product_id, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: WALMART_PRODUCT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public chatGPT(config: ChatGPTConfig): AxiosPromise { + const params: Record = { + prompt: config.prompt, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: CHATGPT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public youtubeSearch(config: YouTubeSearchConfig): AxiosPromise { + const params: Record = { + search: config.search, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: YOUTUBE_SEARCH_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public youtubeMetadata(config: YouTubeMetadataConfig): AxiosPromise { + const params: Record = { + video_id: config.video_id, + }; + + return this.request({ + method: 'GET', + endpoint: YOUTUBE_METADATA_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); } - public post(config: SpbConfig) { - return this.request('POST', config); + public youtubeTranscript(config: YouTubeTranscriptConfig): AxiosPromise { + const params: Record = { + video_id: config.video_id, + ...config.params, + }; + + return this.request({ + method: 'GET', + endpoint: YOUTUBE_TRANSCRIPT_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); } + + public youtubeTrainability(config: YouTubeTrainabilityConfig): AxiosPromise { + const params: Record = { + video_id: config.video_id, + }; + + return this.request({ + method: 'GET', + endpoint: YOUTUBE_TRAINABILITY_API_URL, + params, + retries: config.retries, + timeout: config.timeout, + }); + } + + public htmlApi(config: HtmlApiConfig): AxiosPromise { + let params: Record = { + ...config.params, + url: config.url, + cookies: config.cookies + }; + + let headers = process_headers(config.headers); + if (Object.keys(config.headers ?? {}).length > 0) { + params.forward_headers = true; + } + + return this.request({ + method: config.method || 'GET', + endpoint: HTML_API_URL, + params: process_params(params), + headers: headers, + data: config.data, + retries: config.retries, + timeout: config.timeout, + }); + } + + public usage(config: UsageConfig = {}): AxiosPromise { + return this.request({ + method: 'GET', + endpoint: USAGE_API_URL, + params: {}, + retries: config.retries, + timeout: config.timeout, + }); + } + } diff --git a/src/version.ts b/src/version.ts index 7866235..0121859 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const LIB_VERSION = "1.7.6"; +export const LIB_VERSION = "1.8.0"; diff --git a/tests/client.test.ts b/tests/client.test.ts index 3e82983..45b9ab2 100644 --- a/tests/client.test.ts +++ b/tests/client.test.ts @@ -6,6 +6,10 @@ import { LIB_VERSION } from '../src/version'; var mock = new MockAdapter(axios); +// ============================================ +// HTML API +// ============================================ + describe('test_ScrapingBeeClient.get', function () { var api_key = 'API_KEY'; var target_url = 'https://httpbin-scrapingbee.cleverapps.io/html'; @@ -88,3 +92,564 @@ describe('test_ScrapingBeeClient.post', function () { assert.match(res.config.headers['User-Agent'], /^ScrapingBee-Node\//); }); }); + +describe('test_ScrapingBeeClient.htmlApi', function () { + var api_key = 'API_KEY'; + var target_url = 'https://httpbin.org/get'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + mock.onPost().reply(201); + + it('should make a GET request by default', async function () { + var res = await client.htmlApi({ url: target_url }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['url'], target_url); + // @ts-ignore + assert.match(res.config.headers['User-Agent'], /^ScrapingBee-Node\//); + }); + + it('should make a GET request when explicitly specified', async function () { + var res = await client.htmlApi({ url: target_url, method: 'GET' }); + assert.deepStrictEqual(res.status, 200); + }); + + it('should make a POST request when specified', async function () { + var res = await client.htmlApi({ + url: 'https://httpbin.org/post', + method: 'POST', + data: 'test=data' + }); + assert.deepStrictEqual(res.status, 201); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + // @ts-ignore + assert.match(res.config.headers['User-Agent'], /^ScrapingBee-Node\//); + }); + + it('should add the render_js query param', async function () { + var res = await client.htmlApi({ url: target_url, params: { render_js: true } }); + assert.deepStrictEqual(res.config.params['render_js'], true); + }); + + it('should handle multiple params correctly', async function () { + var res = await client.htmlApi({ + url: target_url, + params: { render_js: true, premium_proxy: true, block_ads: true } + }); + assert.deepStrictEqual(res.config.params['render_js'], true); + assert.deepStrictEqual(res.config.params['premium_proxy'], true); + assert.deepStrictEqual(res.config.params['block_ads'], true); + }); + + it('should prefix header names with Spb- and set forward_headers', async function () { + var res = await client.htmlApi({ + url: target_url, + headers: { 'Content-Type': 'text/html; charset=utf-8' } + }); + // @ts-ignore + assert.deepStrictEqual(res.config.headers['Spb-Content-Type'], 'text/html; charset=utf-8'); + // @ts-ignore + assert.deepStrictEqual(res.config.headers['User-Agent'], `ScrapingBee-Node/${LIB_VERSION}`); + assert.deepStrictEqual(res.config.params['forward_headers'], true); + }); + + it('should format the cookies and add them to the query params', async function () { + var cookies = { name1: 'value1', name2: 'value2' }; + var res = await client.htmlApi({ url: target_url, cookies: cookies }); + assert.deepStrictEqual(res.config.params['cookies'], 'name1=value1;name2=value2'); + }); + + it('should pass string cookies directly', async function () { + var cookies = 'session=abc123;user=john'; + var res = await client.htmlApi({ url: target_url, cookies: cookies }); + assert.deepStrictEqual(res.config.params['cookies'], cookies); + }); + + it('should format the extract_rules and add them to the query params', async function () { + var res = await client.htmlApi({ + url: target_url, + params: { + extract_rules: { + title: 'h1', + subtitle: '#subtitle', + }, + }, + }); + assert.deepStrictEqual( + res.config.params['extract_rules'], + '{"title":"h1","subtitle":"#subtitle"}' + ); + }); + + it('should format the js_scenario and add them to the query params', async function () { + var res = await client.htmlApi({ + url: target_url, + params: { + js_scenario: { + instructions: [{ click: '#buttonId' }], + }, + }, + }); + assert.deepStrictEqual( + res.config.params['js_scenario'], + '{"instructions":[{"click":"#buttonId"}]}' + ); + }); + + it('should format the ai_extract_rules and add them to the query params', async function () { + var res = await client.htmlApi({ + url: target_url, + params: { + ai_extract_rules: { + summary: 'Extract the main content', + }, + }, + }); + assert.deepStrictEqual( + res.config.params['ai_extract_rules'], + '{"summary":"Extract the main content"}' + ); + }); + + it('should handle POST with data and params', async function () { + var res = await client.htmlApi({ + url: 'https://httpbin.org/post', + method: 'POST', + data: 'username=user&password=pass', + params: { render_js: true } + }); + assert.deepStrictEqual(res.status, 201); + assert.deepStrictEqual(res.config.params['render_js'], true); + assert.deepStrictEqual(res.config.data, 'username=user&password=pass'); + }); + + it('should handle POST with headers', async function () { + var res = await client.htmlApi({ + url: 'https://httpbin.org/post', + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + data: 'test=data' + }); + // @ts-ignore + assert.deepStrictEqual(res.config.headers['Spb-Content-Type'], 'application/x-www-form-urlencoded'); + assert.deepStrictEqual(res.config.params['forward_headers'], true); + }); +}); + +// ============================================ +// Google Search API +// ============================================ + +describe('test_ScrapingBeeClient.googleSearch', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.googleSearch({ + search: 'test query', + params: { language: 'en', country_code: 'us' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['search'], 'test query'); + assert.deepStrictEqual(res.config.params['language'], 'en'); + assert.deepStrictEqual(res.config.params['country_code'], 'us'); + }); + + it('should work with only required param', async function () { + var res = await client.googleSearch({ search: 'test' }); + assert.deepStrictEqual(res.config.params['search'], 'test'); + }); + + it('should handle all optional params', async function () { + var res = await client.googleSearch({ + search: 'test', + params: { + add_html: true, + device: 'mobile', + page: 2, + search_type: 'news', + light_request: false, + nfpr: true + } + }); + assert.deepStrictEqual(res.config.params['add_html'], true); + assert.deepStrictEqual(res.config.params['device'], 'mobile'); + assert.deepStrictEqual(res.config.params['page'], 2); + assert.deepStrictEqual(res.config.params['search_type'], 'news'); + assert.deepStrictEqual(res.config.params['light_request'], false); + assert.deepStrictEqual(res.config.params['nfpr'], true); + }); +}); + +// ============================================ +// Amazon Search API +// ============================================ + +describe('test_ScrapingBeeClient.amazonSearch', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.amazonSearch({ + query: 'laptop', + params: { domain: 'com', language: 'en' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['query'], 'laptop'); + assert.deepStrictEqual(res.config.params['domain'], 'com'); + assert.deepStrictEqual(res.config.params['language'], 'en'); + }); + + it('should work with only required param', async function () { + var res = await client.amazonSearch({ query: 'phone' }); + assert.deepStrictEqual(res.config.params['query'], 'phone'); + }); + + it('should handle all optional params', async function () { + var res = await client.amazonSearch({ + query: 'laptop', + params: { + add_html: true, + country: 'us', + currency: 'USD', + device: 'desktop', + pages: 2, + sort_by: 'price_low_to_high', + start_page: 1 + } + }); + assert.deepStrictEqual(res.config.params['add_html'], true); + assert.deepStrictEqual(res.config.params['country'], 'us'); + assert.deepStrictEqual(res.config.params['currency'], 'USD'); + assert.deepStrictEqual(res.config.params['pages'], 2); + assert.deepStrictEqual(res.config.params['sort_by'], 'price_low_to_high'); + }); +}); + +// ============================================ +// Amazon Product API +// ============================================ + +describe('test_ScrapingBeeClient.amazonProduct', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.amazonProduct({ + query: 'B0D2Q9397Y', + params: { domain: 'com' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['query'], 'B0D2Q9397Y'); + assert.deepStrictEqual(res.config.params['domain'], 'com'); + }); + + it('should work with only required param', async function () { + var res = await client.amazonProduct({ query: 'B0D2Q9397Y' }); + assert.deepStrictEqual(res.config.params['query'], 'B0D2Q9397Y'); + }); + + it('should handle all optional params', async function () { + var res = await client.amazonProduct({ + query: 'B0D2Q9397Y', + params: { + add_html: true, + autoselect_variant: true, + country: 'us', + currency: 'USD', + device: 'mobile', + language: 'en', + light_request: false, + screenshot: true + } + }); + assert.deepStrictEqual(res.config.params['autoselect_variant'], true); + assert.deepStrictEqual(res.config.params['screenshot'], true); + assert.deepStrictEqual(res.config.params['light_request'], false); + }); +}); + +// ============================================ +// Walmart Search API +// ============================================ + +describe('test_ScrapingBeeClient.walmartSearch', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.walmartSearch({ + query: 'laptop', + params: { sort_by: 'best_match', device: 'desktop' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['query'], 'laptop'); + assert.deepStrictEqual(res.config.params['sort_by'], 'best_match'); + }); + + it('should work with only required param', async function () { + var res = await client.walmartSearch({ query: 'tv' }); + assert.deepStrictEqual(res.config.params['query'], 'tv'); + }); + + it('should handle all optional params', async function () { + var res = await client.walmartSearch({ + query: 'laptop', + params: { + add_html: true, + delivery_zip: '10001', + device: 'mobile', + fulfillment_speed: 'today', + max_price: 1000, + min_price: 100, + screenshot: true, + store_id: '12345' + } + }); + assert.deepStrictEqual(res.config.params['delivery_zip'], '10001'); + assert.deepStrictEqual(res.config.params['max_price'], 1000); + assert.deepStrictEqual(res.config.params['min_price'], 100); + assert.deepStrictEqual(res.config.params['fulfillment_speed'], 'today'); + }); +}); + +// ============================================ +// Walmart Product API +// ============================================ + +describe('test_ScrapingBeeClient.walmartProduct', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.walmartProduct({ + product_id: '123456789', + params: { device: 'desktop' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['product_id'], '123456789'); + assert.deepStrictEqual(res.config.params['device'], 'desktop'); + }); + + it('should work with only required param', async function () { + var res = await client.walmartProduct({ product_id: '123456789' }); + assert.deepStrictEqual(res.config.params['product_id'], '123456789'); + }); + + it('should handle all optional params', async function () { + var res = await client.walmartProduct({ + product_id: '123456789', + params: { + add_html: true, + delivery_zip: '10001', + device: 'tablet', + domain: 'com', + light_request: false, + screenshot: true, + store_id: '12345' + } + }); + assert.deepStrictEqual(res.config.params['delivery_zip'], '10001'); + assert.deepStrictEqual(res.config.params['device'], 'tablet'); + assert.deepStrictEqual(res.config.params['screenshot'], true); + }); +}); + +// ============================================ +// ChatGPT API +// ============================================ + +describe('test_ScrapingBeeClient.chatGPT', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.chatGPT({ + prompt: 'What is web scraping?', + params: { search: true } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['prompt'], 'What is web scraping?'); + assert.deepStrictEqual(res.config.params['search'], true); + }); + + it('should work with only required param', async function () { + var res = await client.chatGPT({ prompt: 'Hello' }); + assert.deepStrictEqual(res.config.params['prompt'], 'Hello'); + }); + + it('should handle all optional params', async function () { + var res = await client.chatGPT({ + prompt: 'Explain AI', + params: { + add_html: true, + country_code: 'us', + search: true + } + }); + assert.deepStrictEqual(res.config.params['add_html'], true); + assert.deepStrictEqual(res.config.params['country_code'], 'us'); + assert.deepStrictEqual(res.config.params['search'], true); + }); +}); + +// ============================================ +// YouTube Search API +// ============================================ + +describe('test_ScrapingBeeClient.youtubeSearch', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.youtubeSearch({ + search: 'web scraping tutorial', + params: { sort_by: 'relevance', type: 'video' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['search'], 'web scraping tutorial'); + assert.deepStrictEqual(res.config.params['sort_by'], 'relevance'); + }); + + it('should work with only required param', async function () { + var res = await client.youtubeSearch({ search: 'coding' }); + assert.deepStrictEqual(res.config.params['search'], 'coding'); + }); + + it('should handle all optional params', async function () { + var res = await client.youtubeSearch({ + search: 'music', + params: { + '4k': true, + hd: true, + duration: '4-20', + upload_date: 'this_week', + sort_by: 'view_count', + type: 'video', + subtitles: true, + live: false + } + }); + assert.deepStrictEqual(res.config.params['4k'], true); + assert.deepStrictEqual(res.config.params['hd'], true); + assert.deepStrictEqual(res.config.params['duration'], '4-20'); + assert.deepStrictEqual(res.config.params['upload_date'], 'this_week'); + assert.deepStrictEqual(res.config.params['subtitles'], true); + }); +}); + +// ============================================ +// YouTube Metadata API +// ============================================ + +describe('test_ScrapingBeeClient.youtubeMetadata', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.youtubeMetadata({ + video_id: 'dQw4w9WgXcQ' + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['video_id'], 'dQw4w9WgXcQ'); + }); +}); + +// ============================================ +// YouTube Transcript API +// ============================================ + +describe('test_ScrapingBeeClient.youtubeTranscript', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.youtubeTranscript({ + video_id: 'dQw4w9WgXcQ', + params: { language: 'en' } + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['video_id'], 'dQw4w9WgXcQ'); + assert.deepStrictEqual(res.config.params['language'], 'en'); + }); + + it('should work with only required param', async function () { + var res = await client.youtubeTranscript({ video_id: 'dQw4w9WgXcQ' }); + assert.deepStrictEqual(res.config.params['video_id'], 'dQw4w9WgXcQ'); + }); + + it('should handle all optional params', async function () { + var res = await client.youtubeTranscript({ + video_id: 'dQw4w9WgXcQ', + params: { + language: 'es', + transcript_origin: 'uploader_provided' + } + }); + assert.deepStrictEqual(res.config.params['language'], 'es'); + assert.deepStrictEqual(res.config.params['transcript_origin'], 'uploader_provided'); + }); +}); + +// ============================================ +// YouTube Trainability API +// ============================================ + +describe('test_ScrapingBeeClient.youtubeTrainability', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.youtubeTrainability({ + video_id: 'dQw4w9WgXcQ' + }); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + assert.deepStrictEqual(res.config.params['video_id'], 'dQw4w9WgXcQ'); + }); +}); + +// ============================================ +// Usage API +// ============================================ + +describe('test_ScrapingBeeClient.usage', function () { + var api_key = 'API_KEY'; + var client = new ScrapingBeeClient(api_key); + mock.onGet().reply(200); + + it('should make a request with correct params', async function () { + var res = await client.usage(); + assert.deepStrictEqual(res.status, 200); + assert.deepStrictEqual(res.config.params['api_key'], api_key); + }); + + it('should work with empty config', async function () { + var res = await client.usage({}); + assert.deepStrictEqual(res.status, 200); + }); + + it('should work with optional config', async function () { + var res = await client.usage({ retries: 3, timeout: 5000 }); + assert.deepStrictEqual(res.status, 200); + }); +}); diff --git a/tests/utils.test.ts b/tests/utils.test.ts index 1a82aba..9830468 100644 --- a/tests/utils.test.ts +++ b/tests/utils.test.ts @@ -62,6 +62,21 @@ describe('test_process_extract_rules', function () { }); }); +describe('test_process_ai_extract_rules', function () { + it('should stringify JSON', function () { + var ai_extract_rules = { summary: 'Extract main content' }; + var res = process_params({ ai_extract_rules: ai_extract_rules }); + assert.strictEqual(res['ai_extract_rules'], '{"summary":"Extract main content"}'); + }); + + it('should return the same string', function () { + var ai_extract_rules = '{"summary":"Extract main content"}'; + var res = process_params({ ai_extract_rules: ai_extract_rules }); + assert.strictEqual(res['ai_extract_rules'], '{"summary":"Extract main content"}'); + }); +}); + + describe('test_process_js_scenario', function () { it('should stringify JSON', function () { var js_scenario = {