Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions scrapegraph-py/scrapegraph_py/async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ async def crawl(
include_paths: Optional[list[str]] = None,
exclude_paths: Optional[list[str]] = None,
webhook_url: Optional[str] = None,
wait_ms: Optional[int] = None,
return_toon: bool = False,
):
"""Send a crawl request with support for both AI extraction and
Expand Down Expand Up @@ -911,6 +912,7 @@ async def crawl(
exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
Supports wildcards and takes precedence over include_paths
webhook_url: URL to receive webhook notifications when the crawl completes
wait_ms: Number of milliseconds to wait before scraping each page
return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
"""
logger.info("🔍 Starting crawl request")
Expand Down Expand Up @@ -944,6 +946,8 @@ async def crawl(
logger.debug(f"❌ Exclude paths: {exclude_paths}")
if webhook_url:
logger.debug(f"🔔 Webhook URL: {webhook_url}")
if wait_ms is not None:
logger.debug(f"⏱️ Wait ms: {wait_ms}")
if return_toon:
logger.debug("🎨 TOON format output enabled")

Expand Down Expand Up @@ -977,6 +981,8 @@ async def crawl(
request_data["exclude_paths"] = exclude_paths
if webhook_url is not None:
request_data["webhook_url"] = webhook_url
if wait_ms is not None:
request_data["wait_ms"] = wait_ms

request = CrawlRequest(**request_data)
logger.debug("✅ Request validation passed")
Expand Down
6 changes: 6 additions & 0 deletions scrapegraph-py/scrapegraph_py/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,7 @@ def crawl(
include_paths: Optional[list[str]] = None,
exclude_paths: Optional[list[str]] = None,
webhook_url: Optional[str] = None,
wait_ms: Optional[int] = None,
return_toon: bool = False,
):
"""Send a crawl request with support for both AI extraction and
Expand Down Expand Up @@ -921,6 +922,7 @@ def crawl(
exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
Supports wildcards and takes precedence over include_paths
webhook_url: URL to receive webhook notifications when the crawl completes
wait_ms: Number of milliseconds to wait before scraping each page
return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
"""
logger.info("🔍 Starting crawl request")
Expand Down Expand Up @@ -954,6 +956,8 @@ def crawl(
logger.debug(f"❌ Exclude paths: {exclude_paths}")
if webhook_url:
logger.debug(f"🔔 Webhook URL: {webhook_url}")
if wait_ms is not None:
logger.debug(f"⏱️ Wait ms: {wait_ms}")
if return_toon:
logger.debug("🎨 TOON format output enabled")

Expand Down Expand Up @@ -987,6 +991,8 @@ def crawl(
request_data["exclude_paths"] = exclude_paths
if webhook_url is not None:
request_data["webhook_url"] = webhook_url
if wait_ms is not None:
request_data["wait_ms"] = wait_ms

request = CrawlRequest(**request_data)
logger.debug("✅ Request validation passed")
Expand Down
5 changes: 5 additions & 0 deletions scrapegraph-py/scrapegraph_py/models/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ class CrawlRequest(BaseModel):
"The webhook will receive a POST request with the crawl results.",
example="https://example.com/webhook"
)
wait_ms: Optional[int] = Field(
default=None,
description="Number of milliseconds to wait before scraping each page. "
"Useful for pages with heavy JavaScript rendering that need extra time to load.",
)

@model_validator(mode="after")
def validate_url(self) -> "CrawlRequest":
Expand Down
Loading