From 906c1e60a8080cb1ee395b083869b039d0cc1ec0 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 16 Feb 2026 12:19:11 +0100
Subject: [PATCH] feat: add wait_ms parameter to Crawler endpoint

Allows callers to configure rendering wait time per page during crawling,
matching the API-side change in sgai-api#402.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 scrapegraph-py/scrapegraph_py/async_client.py | 6 ++++++
 scrapegraph-py/scrapegraph_py/client.py       | 6 ++++++
 scrapegraph-py/scrapegraph_py/models/crawl.py | 5 +++++
 3 files changed, 17 insertions(+)

diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py
index d2d5166..5111849 100644
--- a/scrapegraph-py/scrapegraph_py/async_client.py
+++ b/scrapegraph-py/scrapegraph_py/async_client.py
@@ -884,6 +884,7 @@ async def crawl(
         include_paths: Optional[list[str]] = None,
         exclude_paths: Optional[list[str]] = None,
         webhook_url: Optional[str] = None,
+        wait_ms: Optional[int] = None,
         return_toon: bool = False,
     ):
         """Send a crawl request with support for both AI extraction and
@@ -911,6 +912,7 @@ async def crawl(
             exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
                           Supports wildcards and takes precedence over include_paths
             webhook_url: URL to receive webhook notifications when the crawl completes
+            wait_ms: Number of milliseconds to wait before scraping each page
             return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
         """
         logger.info("🔍 Starting crawl request")
@@ -944,6 +946,8 @@ async def crawl(
             logger.debug(f"❌ Exclude paths: {exclude_paths}")
         if webhook_url:
             logger.debug(f"🔔 Webhook URL: {webhook_url}")
+        if wait_ms is not None:
+            logger.debug(f"⏱️ Wait ms: {wait_ms}")
         if return_toon:
             logger.debug("🎨 TOON format output enabled")
 
@@ -977,6 +981,8 @@ async def crawl(
             request_data["exclude_paths"] = exclude_paths
         if webhook_url is not None:
             request_data["webhook_url"] = webhook_url
+        if wait_ms is not None:
+            request_data["wait_ms"] = wait_ms
 
         request = CrawlRequest(**request_data)
         logger.debug("✅ Request validation passed")
diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py
index 1333342..28fc1bf 100644
--- a/scrapegraph-py/scrapegraph_py/client.py
+++ b/scrapegraph-py/scrapegraph_py/client.py
@@ -894,6 +894,7 @@ def crawl(
         include_paths: Optional[list[str]] = None,
         exclude_paths: Optional[list[str]] = None,
         webhook_url: Optional[str] = None,
+        wait_ms: Optional[int] = None,
         return_toon: bool = False,
     ):
         """Send a crawl request with support for both AI extraction and
@@ -921,6 +922,7 @@ def crawl(
             exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
                           Supports wildcards and takes precedence over include_paths
             webhook_url: URL to receive webhook notifications when the crawl completes
+            wait_ms: Number of milliseconds to wait before scraping each page
             return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
         """
         logger.info("🔍 Starting crawl request")
@@ -954,6 +956,8 @@ def crawl(
             logger.debug(f"❌ Exclude paths: {exclude_paths}")
         if webhook_url:
             logger.debug(f"🔔 Webhook URL: {webhook_url}")
+        if wait_ms is not None:
+            logger.debug(f"⏱️ Wait ms: {wait_ms}")
         if return_toon:
             logger.debug("🎨 TOON format output enabled")
 
@@ -987,6 +991,8 @@ def crawl(
             request_data["exclude_paths"] = exclude_paths
         if webhook_url is not None:
             request_data["webhook_url"] = webhook_url
+        if wait_ms is not None:
+            request_data["wait_ms"] = wait_ms
 
         request = CrawlRequest(**request_data)
         logger.debug("✅ Request validation passed")
diff --git a/scrapegraph-py/scrapegraph_py/models/crawl.py b/scrapegraph-py/scrapegraph_py/models/crawl.py
index e515b80..dd6cca9 100644
--- a/scrapegraph-py/scrapegraph_py/models/crawl.py
+++ b/scrapegraph-py/scrapegraph_py/models/crawl.py
@@ -108,6 +108,11 @@ class CrawlRequest(BaseModel):
         "The webhook will receive a POST request with the crawl results.",
         example="https://example.com/webhook"
     )
+    wait_ms: Optional[int] = Field(
+        default=None,
+        description="Number of milliseconds to wait before scraping each page. "
+        "Useful for pages with heavy JavaScript rendering that need extra time to load.",
+    )
 
     @model_validator(mode="after")
     def validate_url(self) -> "CrawlRequest":