From 6f1e0a215e3160b07d0d7213cb6aa380651c8e96 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 17 Feb 2026 11:49:28 +0100 Subject: [PATCH 1/2] feat: add typed API responses and fix sync/async endpoint classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ApiResult with proper response types for all endpoints. Only crawl uses polling now — all other endpoints are direct POST calls. Co-Authored-By: Claude Opus 4.6 --- README.md | 14 ++- src/commands/agentic-scraper.ts | 2 +- src/commands/generate-schema.ts | 2 +- src/commands/markdownify.ts | 2 +- src/commands/scrape.ts | 2 +- src/commands/search-scraper.ts | 2 +- src/commands/smart-scraper.ts | 2 +- src/lib/scrapegraphai.ts | 147 ++++++++++++++------------------ src/types/index.ts | 96 +++++++++++++++++++++ tests/scrapegraphai.test.ts | 65 ++++++-------- 10 files changed, 204 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index e4f8a2a..f0a100f 100644 --- a/README.md +++ b/README.md @@ -11,16 +11,26 @@ Command-line interface for [ScrapeGraph AI](https://scrapegraphai.com) — AI-po ``` just-scrape/ +├── docs/ # API response docs per endpoint +│ ├── smartscraper.md +│ ├── searchscraper.md +│ ├── markdownify.md +│ ├── crawl.md +│ ├── scrape.md +│ ├── agenticscraper.md +│ ├── generate-schema.md +│ ├── sitemap.md +│ └── credits.md ├── src/ │ ├── cli.ts # Entry point, citty main command + subcommands │ ├── lib/ │ │ ├── env.ts # Zod-parsed env config (API key, debug, timeout) │ │ ├── folders.ts # API key resolution + interactive prompt -│ │ ├── scrapegraphai.ts # SDK layer — all API functions +│ │ ├── scrapegraphai.ts # SDK layer — all API functions (typed responses) │ │ ├── schemas.ts # Zod validation schemas │ │ └── log.ts # Logger factory + syntax-highlighted JSON output │ ├── types/ -│ │ └── index.ts # Zod-derived types + ApiResult +│ │ └── index.ts # Zod-derived types + ApiResult + response types │ ├── commands/ │ │ ├── smart-scraper.ts │ │ ├── search-scraper.ts diff --git a/src/commands/agentic-scraper.ts b/src/commands/agentic-scraper.ts index d5d0993..ce1a146 100644 --- a/src/commands/agentic-scraper.ts +++ b/src/commands/agentic-scraper.ts @@ -43,7 +43,7 @@ export default defineCommand({ if (args["use-session"]) params.use_session = true; out.start("Running browser automation"); - const result = await scrapegraphai.agenticScraper(key, params, out.poll); + const result = await scrapegraphai.agenticScraper(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/commands/generate-schema.ts b/src/commands/generate-schema.ts index eef4795..b24a937 100644 --- a/src/commands/generate-schema.ts +++ b/src/commands/generate-schema.ts @@ -28,7 +28,7 @@ export default defineCommand({ if (args["existing-schema"]) params.existing_schema = JSON.parse(args["existing-schema"]); out.start("Generating schema"); - const result = await scrapegraphai.generateSchema(key, params, out.poll); + const result = await scrapegraphai.generateSchema(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/commands/markdownify.ts b/src/commands/markdownify.ts index 467cb08..40e62c7 100644 --- a/src/commands/markdownify.ts +++ b/src/commands/markdownify.ts @@ -31,7 +31,7 @@ export default defineCommand({ if (args.headers) params.headers = JSON.parse(args.headers); out.start("Converting to markdown"); - const result = await scrapegraphai.markdownify(key, params, out.poll); + const result = await scrapegraphai.markdownify(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts index 8fa5394..d57112a 100644 --- a/src/commands/scrape.ts +++ b/src/commands/scrape.ts @@ -31,7 +31,7 @@ export default defineCommand({ if (args["country-code"]) params.country_code = args["country-code"]; out.start("Scraping"); - const result = await scrapegraphai.scrape(key, params, out.poll); + const result = await scrapegraphai.scrape(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/commands/search-scraper.ts b/src/commands/search-scraper.ts index ab2a4ff..46f339e 100644 --- a/src/commands/search-scraper.ts +++ b/src/commands/search-scraper.ts @@ -43,7 +43,7 @@ export default defineCommand({ if (args.headers) params.headers = JSON.parse(args.headers); out.start("Searching"); - const result = await scrapegraphai.searchScraper(key, params, out.poll); + const result = await scrapegraphai.searchScraper(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/commands/smart-scraper.ts b/src/commands/smart-scraper.ts index a44d63f..40d3207 100644 --- a/src/commands/smart-scraper.ts +++ b/src/commands/smart-scraper.ts @@ -48,7 +48,7 @@ export default defineCommand({ if (args["plain-text"]) params.plain_text = true; out.start("Scraping"); - const result = await scrapegraphai.smartScraper(key, params, out.poll); + const result = await scrapegraphai.smartScraper(key, params); out.stop(result.elapsedMs); if (result.data) out.result(result.data); diff --git a/src/lib/scrapegraphai.ts b/src/lib/scrapegraphai.ts index 5cdfed6..3ed768c 100644 --- a/src/lib/scrapegraphai.ts +++ b/src/lib/scrapegraphai.ts @@ -1,14 +1,25 @@ import type { AgenticScraperParams, + AgenticScraperResponse, ApiResult, CrawlParams, + CrawlResponse, + CreditsResponse, GenerateSchemaParams, + GenerateSchemaResponse, + HealthResponse, HistoryParams, + HistoryResponse, MarkdownifyParams, + MarkdownifyResponse, ScrapeParams, + ScrapeResponse, SearchScraperParams, + SearchScraperResponse, SitemapParams, + SitemapResponse, SmartScraperParams, + SmartScraperResponse, } from "../types/index.js"; import { env } from "./env.js"; import { @@ -22,18 +33,28 @@ import { SitemapSchema, SmartScraperSchema, } from "./schemas.js"; - export type { AgenticScraperParams, + AgenticScraperResponse, ApiResult, CrawlParams, + CrawlResponse, + CreditsResponse, GenerateSchemaParams, + GenerateSchemaResponse, + HealthResponse, HistoryParams, + HistoryResponse, MarkdownifyParams, + MarkdownifyResponse, ScrapeParams, + ScrapeResponse, SearchScraperParams, + SearchScraperResponse, SitemapParams, + SitemapResponse, SmartScraperParams, + SmartScraperResponse, } from "../types/index.js"; const BASE_URL = process.env.JUST_SCRAPE_API_URL || "https://api.scrapegraphai.com/v1"; @@ -46,10 +67,6 @@ function debug(label: string, data?: unknown) { else console.error(`[${ts}] ${label}`); } -function getTimeoutMs(): number { - return env.timeoutS * 1000; -} - function ok(data: T, elapsedMs: number): ApiResult { return { status: "success", data, elapsedMs }; } @@ -98,7 +115,7 @@ async function request( ...(body ? { "Content-Type": "application/json" } : {}), }, body: body ? JSON.stringify(body) : undefined, - signal: AbortSignal.timeout(getTimeoutMs()), + signal: AbortSignal.timeout(env.timeoutS * 1000), }); if (!res.ok) { @@ -119,19 +136,21 @@ async function request( type PollResponse = { status: string; - request_id?: string; - crawl_id?: string; error?: string; [key: string]: unknown; }; +function isDone(status: string) { + return status === "completed" || status === "done"; +} + async function pollUntilDone( path: string, id: string, apiKey: string, onPoll?: (status: string) => void, ): Promise> { - const deadline = Date.now() + getTimeoutMs(); + const deadline = Date.now() + env.timeoutS * 1000; let totalMs = 0; while (Date.now() < deadline) { @@ -139,7 +158,7 @@ async function pollUntilDone( totalMs += elapsedMs; onPoll?.(data.status); - if (data.status === "completed" || data.status === "done") return { data, elapsedMs: totalMs }; + if (isDone(data.status)) return { data, elapsedMs: totalMs }; if (data.status === "failed") throw new Error(data.error ?? "Job failed"); await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); @@ -148,37 +167,28 @@ async function pollUntilDone( throw new Error("Polling timed out"); } -async function submitAndPoll( +async function submitAndPoll( path: string, apiKey: string, body: object, idField: string, onPoll?: (status: string) => void, -): Promise> { +): Promise> { const { data: res, elapsedMs } = await request("POST", path, apiKey, body); - if (res.status === "completed" || res.status === "done") return { data: res, elapsedMs }; + if (isDone(res.status)) return { data: res as unknown as T, elapsedMs }; const id = res[idField]; if (typeof id !== "string") throw new Error(`Missing ${idField} in response`); const poll = await pollUntilDone(path, id, apiKey, onPoll); - return { data: poll.data, elapsedMs: elapsedMs + poll.elapsedMs }; + return { data: poll.data as unknown as T, elapsedMs: elapsedMs + poll.elapsedMs }; } -// --- Async endpoints --- - export async function smartScraper( apiKey: string, params: SmartScraperParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { SmartScraperSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/smartscraper", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/smartscraper", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -188,17 +198,10 @@ export async function smartScraper( export async function searchScraper( apiKey: string, params: SearchScraperParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { SearchScraperSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/searchscraper", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/searchscraper", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -208,17 +211,10 @@ export async function searchScraper( export async function markdownify( apiKey: string, params: MarkdownifyParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { MarkdownifySchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/markdownify", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/markdownify", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -228,17 +224,10 @@ export async function markdownify( export async function scrape( apiKey: string, params: ScrapeParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { ScrapeSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/scrape", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/scrape", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -249,10 +238,10 @@ export async function crawl( apiKey: string, params: CrawlParams, onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { CrawlSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll("/crawl", apiKey, params, "crawl_id", onPoll); + const { data, elapsedMs } = await submitAndPoll("/crawl", apiKey, params, "crawl_id", onPoll); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -262,17 +251,10 @@ export async function crawl( export async function agenticScraper( apiKey: string, params: AgenticScraperParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { AgenticScraperSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/agentic-scrapper", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/agentic-scrapper", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -282,60 +264,61 @@ export async function agenticScraper( export async function generateSchema( apiKey: string, params: GenerateSchemaParams, - onPoll?: (status: string) => void, -): Promise> { +): Promise> { try { GenerateSchemaSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll( - "/generate_schema", - apiKey, - params, - "request_id", - onPoll, - ); + const { data, elapsedMs } = await request("POST", "/generate_schema", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -// --- Sync endpoints --- - -export async function sitemap(apiKey: string, params: SitemapParams): Promise> { +export async function sitemap( + apiKey: string, + params: SitemapParams, +): Promise> { try { SitemapSchema.parse(params); - const { data, elapsedMs } = await request("POST", "/sitemap", apiKey, params); + const { data, elapsedMs } = await request("POST", "/sitemap", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function getCredits(apiKey: string): Promise> { +export async function getCredits(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/credits", apiKey); + const { data, elapsedMs } = await request("GET", "/credits", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function checkHealth(apiKey: string): Promise> { +export async function checkHealth(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/healthz", apiKey); + const { data, elapsedMs } = await request("GET", "/healthz", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function history(apiKey: string, params: HistoryParams): Promise> { +export async function history( + apiKey: string, + params: HistoryParams, +): Promise> { try { const parsed = HistorySchema.parse(params); const qs = new URLSearchParams(); qs.set("page", String(parsed.page)); qs.set("page_size", String(parsed.page_size)); - const { data, elapsedMs } = await request("GET", `/history/${parsed.service}?${qs}`, apiKey); + const { data, elapsedMs } = await request( + "GET", + `/history/${parsed.service}?${qs}`, + apiKey, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); diff --git a/src/types/index.ts b/src/types/index.ts index fa6760c..6e6dba6 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -27,3 +27,99 @@ export type ApiResult = { error?: string; elapsedMs: number; }; + +export type SmartScraperResponse = { + request_id: string; + status: string; + website_url: string; + user_prompt: string; + result: Record | null; + error?: string; +}; + +export type SearchScraperResponse = { + request_id: string; + status: string; + user_prompt: string; + result: Record | null; + reference_urls: string[]; + error?: string; +}; + +export type MarkdownifyResponse = { + request_id: string; + status: string; + website_url: string; + result: string | null; + error?: string; +}; + +export type CrawlPage = { + url: string; + markdown: string; +}; + +export type CrawlResponse = { + crawl_id: string; + status: string; + result?: Record | null; + crawled_urls?: string[]; + pages?: CrawlPage[]; + error?: string; +}; + +export type ScrapeResponse = { + request_id: string; + status: string; + html: string; + branding?: Record; + error?: string; +}; + +export type AgenticScraperResponse = { + request_id: string; + status: string; + result: Record | null; + error?: string; +}; + +export type GenerateSchemaResponse = { + request_id: string; + status: string; + user_prompt: string; + refined_prompt?: string | null; + generated_schema?: Record | null; + error?: string | null; + created_at?: string | null; + updated_at?: string | null; +}; + +export type SitemapResponse = { + request_id?: string; + status?: string; + website_url?: string; + urls: string[]; + error?: string; +}; + +export type CreditsResponse = { + remaining_credits: number; + total_credits_used: number; +}; + +export type HealthResponse = { + status: string; +}; + +export type HistoryResponse = { + requests: HistoryEntry[]; + total_count: number; + page: number; + page_size: number; +}; + +export type HistoryEntry = { + request_id: string; + status: string; + [key: string]: unknown; +}; diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index c6322d8..3b68525 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -41,13 +41,13 @@ function expectGet(callIndex: number, path: string) { } // --------------------------------------------------------------------------- -// smartScraper — exhaustive (tests all shared internals) +// smartScraper — sync endpoint, tests shared HTTP internals // --------------------------------------------------------------------------- describe("smartScraper", () => { const params = { user_prompt: "Extract prices", website_url: "https://example.com" }; - test("immediate completion", async () => { + test("success", async () => { const body = { status: "completed", result: { prices: [10, 20] } }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); @@ -60,43 +60,6 @@ describe("smartScraper", () => { expectPost(0, "/smartscraper", params); }); - test("polls when POST returns pending", async () => { - const pollResult = { status: "completed", request_id: "req-1", result: { data: "scraped" } }; - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", request_id: "req-1" })) - .mockResolvedValueOnce(json(pollResult)); - - const res = await scrapegraphai.smartScraper(API_KEY, params); - - expect(res.status).toBe("success"); - expect(res.data).toEqual(pollResult); - expect(fetchSpy).toHaveBeenCalledTimes(2); - expectPost(0, "/smartscraper", params); - expectGet(1, "/smartscraper/req-1"); - }); - - test("calls onPoll callback", async () => { - const statuses: string[] = []; - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", request_id: "req-1" })) - .mockResolvedValueOnce(json({ status: "completed", request_id: "req-1" })); - - await scrapegraphai.smartScraper(API_KEY, params, (s) => statuses.push(s)); - - expect(statuses).toEqual(["completed"]); - }); - - test("poll failure", async () => { - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", request_id: "req-1" })) - .mockResolvedValueOnce(json({ status: "failed", error: "Job exploded" })); - - const res = await scrapegraphai.smartScraper(API_KEY, params); - - expect(res.status).toBe("error"); - expect(res.error).toBe("Job exploded"); - }); - test("validation failure", async () => { const res = await scrapegraphai.smartScraper(API_KEY, { user_prompt: "" } as any); @@ -247,7 +210,7 @@ describe("scrape", () => { }); // --------------------------------------------------------------------------- -// crawl — uses crawl_id instead of request_id +// crawl — async endpoint, uses crawl_id, tests polling internals // --------------------------------------------------------------------------- describe("crawl", () => { @@ -276,6 +239,28 @@ describe("crawl", () => { expectGet(1, "/crawl/crawl-99"); }); + test("calls onPoll callback", async () => { + const statuses: string[] = []; + fetchSpy = spyOn(globalThis, "fetch") + .mockResolvedValueOnce(json({ status: "pending", crawl_id: "crawl-99" })) + .mockResolvedValueOnce(json({ status: "done", crawl_id: "crawl-99", pages: [] })); + + await scrapegraphai.crawl(API_KEY, params, (s) => statuses.push(s)); + + expect(statuses).toEqual(["done"]); + }); + + test("poll failure", async () => { + fetchSpy = spyOn(globalThis, "fetch") + .mockResolvedValueOnce(json({ status: "pending", crawl_id: "crawl-99" })) + .mockResolvedValueOnce(json({ status: "failed", error: "Crawl exploded" })); + + const res = await scrapegraphai.crawl(API_KEY, params); + + expect(res.status).toBe("error"); + expect(res.error).toBe("Crawl exploded"); + }); + test("validation failure", async () => { const res = await scrapegraphai.crawl(API_KEY, { url: "not-a-url" } as any); expect(res.status).toBe("error"); From dd567e548691b2f32cec446685ab248b753631be Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 17 Feb 2026 11:52:13 +0100 Subject: [PATCH 2/2] fixed lint errors --- .gitignore | 4 ++++ package.json | 2 +- src/lib/scrapegraphai.ts | 43 ++++++++++++++++++++++++++++++++++------ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 2549593..0beae13 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,7 @@ temp/ # Bun bun.lockb + +# API comparison data +data/ +scripts/ \ No newline at end of file diff --git a/package.json b/package.json index 3e95a3d..de59a49 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "just-scrape", - "version": "0.1.8", + "version": "0.1.9", "description": "ScrapeGraph AI CLI tool", "type": "module", "main": "dist/cli.mjs", diff --git a/src/lib/scrapegraphai.ts b/src/lib/scrapegraphai.ts index 3ed768c..1aa06c8 100644 --- a/src/lib/scrapegraphai.ts +++ b/src/lib/scrapegraphai.ts @@ -188,7 +188,12 @@ export async function smartScraper( ): Promise> { try { SmartScraperSchema.parse(params); - const { data, elapsedMs } = await request("POST", "/smartscraper", apiKey, params); + const { data, elapsedMs } = await request( + "POST", + "/smartscraper", + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -201,7 +206,12 @@ export async function searchScraper( ): Promise> { try { SearchScraperSchema.parse(params); - const { data, elapsedMs } = await request("POST", "/searchscraper", apiKey, params); + const { data, elapsedMs } = await request( + "POST", + "/searchscraper", + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -214,7 +224,12 @@ export async function markdownify( ): Promise> { try { MarkdownifySchema.parse(params); - const { data, elapsedMs } = await request("POST", "/markdownify", apiKey, params); + const { data, elapsedMs } = await request( + "POST", + "/markdownify", + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -241,7 +256,13 @@ export async function crawl( ): Promise> { try { CrawlSchema.parse(params); - const { data, elapsedMs } = await submitAndPoll("/crawl", apiKey, params, "crawl_id", onPoll); + const { data, elapsedMs } = await submitAndPoll( + "/crawl", + apiKey, + params, + "crawl_id", + onPoll, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -254,7 +275,12 @@ export async function agenticScraper( ): Promise> { try { AgenticScraperSchema.parse(params); - const { data, elapsedMs } = await request("POST", "/agentic-scrapper", apiKey, params); + const { data, elapsedMs } = await request( + "POST", + "/agentic-scrapper", + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -267,7 +293,12 @@ export async function generateSchema( ): Promise> { try { GenerateSchemaSchema.parse(params); - const { data, elapsedMs } = await request("POST", "/generate_schema", apiKey, params); + const { data, elapsedMs } = await request( + "POST", + "/generate_schema", + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err);