crestalnetwork · us · Jun 14, 2026
diff --git a/.env.example b/.env.example
@@ -109,6 +109,10 @@ ZAI_PLAN_API_KEY=
 TAVILY_API_KEY=
 JINA_API_KEY=
 FIRECRAWL_API_KEY=
+# fastCRW (Firecrawl-compatible web scraper). CRW_API_URL defaults to the
+# managed cloud; override it to point at a self-hosted server.
+CRW_API_KEY=
+CRW_API_URL=
 
 # Tracing backend (optional). Only one runs at a time: Langfuse takes
 # precedence when its keys are set, otherwise LangSmith is used. Values are

diff --git a/intentkit/config/config.py b/intentkit/config/config.py
@@ -228,6 +228,10 @@ def __init__(self) -> None:
         self.jina_api_key: str | None = self.load("JINA_API_KEY")
         self.cookiefun_api_key: str | None = self.load("COOKIEFUN_API_KEY")
         self.firecrawl_api_key: str | None = self.load("FIRECRAWL_API_KEY")
+        # fastCRW (Firecrawl-compatible web scraper; single binary, self-host or
+        # cloud). CRW_API_URL overrides the base for self-hosted deployments.
+        self.crw_api_key: str | None = self.load("CRW_API_KEY")
+        self.crw_api_url: str = self.load("CRW_API_URL", "https://fastcrw.com/api")
         self.cryptopanic_api_key: str | None = self.load("CRYPTOPANIC_API_KEY")
         self.unrealspeech_api_key: str | None = self.load("UNREALSPEECH_API_KEY")
         self.dune_api_key: str | None = self.load("DUNE_API_KEY")

diff --git a/intentkit/tools/crw/README.md b/intentkit/tools/crw/README.md
@@ -0,0 +1,79 @@
+# fastCRW Tools
+
+The fastCRW tools provide web scraping and content indexing using [fastCRW](https://fastcrw.com),
+a Firecrawl-compatible web data engine that ships as a single binary. Run it
+self-hosted (free, open core) or against the managed cloud. These tools mirror
+the Firecrawl provider and are additive — both providers can be enabled side by
+side.
+
+## Tools Overview
+
+### 1. crw_scrape
+Scrapes a single webpage and REPLACES any existing indexed content for that URL, preventing duplicates.
+
+**Parameters:**
+- `url` (required): The URL to scrape
+- `formats` (optional): Output formats - markdown, html, rawHtml, screenshot, links, json (default: ["markdown"])
+- `only_main_content` (optional): Extract only main content (default: true)
+- `include_tags` (optional): HTML tags to include (e.g., ["h1", "h2", "p"])
+- `exclude_tags` (optional): HTML tags to exclude
+- `wait_for` (optional): Wait time in milliseconds before scraping
+- `timeout` (optional): Maximum timeout in milliseconds (default: 30000)
+- `index_content` (optional): Whether to index content for querying (default: true)
+- `chunk_size` (optional): Size of text chunks for indexing (default: 1000)
+- `chunk_overlap` (optional): Overlap between chunks (default: 200)
+
+### 2. crw_crawl
+Crawls multiple pages from a website and indexes all content.
+
+**Parameters:**
+- `url` (required): The base URL to start crawling
+- `limit` (optional): Maximum number of pages to crawl (default: 10)
+- `include_paths` (optional): URL patterns to include (e.g., ["/docs/*"])
+- `exclude_paths` (optional): URL patterns to exclude
+- `max_depth` (optional): Maximum crawl depth
+- `index_content` (optional): Whether to index content for querying (default: true)
+- `chunk_size` (optional): Size of text chunks for indexing (default: 1000)
+- `chunk_overlap` (optional): Overlap between chunks (default: 200)
+
+### 3. crw_query_indexed_content
+Queries previously indexed fastCRW content using semantic search.
+
+**Parameters:**
+- `query` (required): The search query
+- `max_results` (optional): Maximum number of results to return (1-10, default: 4)
+
+### 4. crw_clear_indexed_content
+Clears all previously indexed fastCRW content from the vector store.
+
+**Parameters:**
+- `confirm` (required): Must be set to true to confirm the deletion (default: false)
+
+**Note:** This action is permanent and cannot be undone.
+
+## Configuration
+
+fastCRW is Firecrawl-compatible; the integration defaults to the managed cloud
+and lets you override the base URL for a self-hosted server.
+
+```bash
+# Managed cloud (default base URL https://fastcrw.com/api)
+export CRW_API_KEY=your-api-key-here
+
+# Self-hosted server (auth optional; CRW_API_KEY may be omitted)
+export CRW_API_URL=http://localhost:3000
+```
+
+- `CRW_API_KEY` — Bearer token. Optional for self-hosted instances that run without auth.
+- `CRW_API_URL` — Base URL, default `https://fastcrw.com/api`. Set this to point at a self-hosted server.
+
+Content indexing uses OpenAI embeddings, so `OPENAI_API_KEY` must also be configured.
+
+## Features and Benefits
+
+- **Firecrawl-compatible**: Same REST surface and data shapes as Firecrawl.
+- **Single binary**: Self-host the open core for free, or use the managed cloud.
+- **JavaScript Rendering**: Handles SPAs and dynamic content.
+- **Intelligent Chunking**: Optimized text splitting for better search.
+- **Content Replacement**: Replace mode prevents duplicate/stale content on re-scrape.
+- **Semantic Search**: Uses OpenAI embeddings for intelligent querying.
diff --git a/intentkit/tools/crw/__init__.py b/intentkit/tools/crw/__init__.py
@@ -0,0 +1,102 @@
+"""fastCRW tools for web scraping and crawling.
+
+fastCRW is a Firecrawl-compatible web scraper that ships as a single binary and
+runs self-hosted or on the managed cloud at https://fastcrw.com. This toolset
+mirrors the Firecrawl provider with a different base URL (CRW_API_URL) and key
+(CRW_API_KEY); it is additive and does not affect the Firecrawl toolset.
+"""
+
+import logging
+from typing import NotRequired, TypedDict
+
+from intentkit.config.config import config as system_config
+from intentkit.tools.base import ToolsetConfig, ToolState
+from intentkit.tools.crw.base import CrwBaseTool
+from intentkit.tools.crw.clear import CrwClearIndexedContent
+from intentkit.tools.crw.crawl import CrwCrawl
+from intentkit.tools.crw.query import CrwQueryIndexedContent
+from intentkit.tools.crw.scrape import CrwScrape
+
+# Cache tools at the system level, because they are stateless
+_cache: dict[str, CrwBaseTool] = {}
+
+logger = logging.getLogger(__name__)
+
+
+class ToolStates(TypedDict):
+    crw_scrape: ToolState
+    crw_crawl: ToolState
+    crw_query_indexed_content: ToolState
+    crw_clear_indexed_content: ToolState
+
+
+class Config(ToolsetConfig):
+    """Configuration for fastCRW tools."""
+
+    states: ToolStates
+    rate_limit_number: NotRequired[int]
+    rate_limit_minutes: NotRequired[int]
+
+
+async def get_tools(
+    config: "Config",
+    is_private: bool,
+    **_,
+) -> list[CrwBaseTool]:
+    """Get all fastCRW tools.
+
+    Args:
+        config: The configuration for fastCRW tools.
+        is_private: Whether to include private tools.
+
+    Returns:
+        A list of fastCRW tools.
+    """
+    available_tools = []
+
+    # Include tools based on their state
+    for tool_name, state in config["states"].items():
+        if state == "disabled":
+            continue
+        elif state == "public" or (state == "private" and is_private):
+            available_tools.append(tool_name)
+
+    # Get each tool using the cached getter
+    return [s for name in available_tools if (s := get_crw_tool(name))]
+
+
+def get_crw_tool(
+    name: str,
+) -> CrwBaseTool | None:
+    """Get a fastCRW tool by name."""
+    if name == "crw_scrape":
+        if name not in _cache:
+            _cache[name] = CrwScrape()
+        return _cache[name]
+    elif name == "crw_crawl":
+        if name not in _cache:
+            _cache[name] = CrwCrawl()
+        return _cache[name]
+    elif name == "crw_query_indexed_content":
+        if name not in _cache:
+            _cache[name] = CrwQueryIndexedContent()
+        return _cache[name]
+    elif name == "crw_clear_indexed_content":
+        if name not in _cache:
+            _cache[name] = CrwClearIndexedContent()
+        return _cache[name]
+    else:
+        logger.warning("Unknown fastCRW tool: %s", name)
+        return None
+
+
+def available() -> bool:
+    """Check if this toolset is available based on system config.
+
+    fastCRW self-host may run without auth, and CRW_API_URL always has a default,
+    so the toolset is available when a key or a custom base URL is configured.
+    """
+    return bool(system_config.crw_api_key) or bool(
+        system_config.crw_api_url
+        and system_config.crw_api_url != "https://fastcrw.com/api"
+    )
diff --git a/intentkit/tools/crw/base.py b/intentkit/tools/crw/base.py
@@ -0,0 +1,24 @@
+from langchain_core.tools.base import ToolException
+
+from intentkit.config.config import config
+from intentkit.tools.base import IntentKitTool
+
+
+class CrwBaseTool(IntentKitTool):
+    """Base class for fastCRW tools.
+
+    fastCRW is a Firecrawl-compatible web scraper that ships as a single binary
+    and runs self-hosted or on the managed cloud. The REST surface mirrors
+    Firecrawl, so these tools mirror the Firecrawl provider with a different
+    base URL (CRW_API_URL, default https://fastcrw.com/api) and key (CRW_API_KEY).
+    """
+
+    def get_api_key(self):
+        # Self-hosted fastCRW may run without auth, so the key is optional there.
+        return config.crw_api_key
+
+    def get_api_url(self) -> str:
+        base = (config.crw_api_url or "https://fastcrw.com/api").rstrip("/")
+        return base
+
+    category: str = "crw"
diff --git a/intentkit/tools/crw/clear.py b/intentkit/tools/crw/clear.py
@@ -0,0 +1,79 @@
+import logging
+
+from langchain_core.tools import ArgsSchema
+from langchain_core.tools.base import ToolException
+from pydantic import BaseModel, Field
+
+from intentkit.models.tool import AgentToolData
+from intentkit.tools.crw.base import CrwBaseTool
+
+logger = logging.getLogger(__name__)
+
+
+class CrwClearInput(BaseModel):
+    """Input for fastCRW clear tool."""
+
+    confirm: bool = Field(
+        description="Must be true to confirm deletion.",
+        default=False,
+    )
+
+
+class CrwClearIndexedContent(CrwBaseTool):
+    """Tool for clearing all indexed fastCRW content.
+
+    This tool removes all previously indexed content from the fastCRW vector store,
+    allowing for a fresh start with new content.
+    """
+
+    name: str = "crw_clear_indexed_content"
+    description: str = "Permanently clear all indexed fastCRW content from the vector store. Cannot be undone."
+    args_schema: ArgsSchema | None = CrwClearInput
+
+    async def _arun(
+        self,
+        confirm: bool = False,
+        **kwargs,
+    ) -> str:
+        """Clear all indexed fastCRW content for the agent.
+
+        Args:
+            confirm: Must be True to confirm the deletion
+            config: The configuration for the tool call
+
+        Returns:
+            str: Confirmation message
+        """
+        context = self.get_context()
+        agent_id = context.agent_id
+
+        if not agent_id:
+            raise ToolException("Error: Agent ID not available for clearing content.")
+        if not confirm:
+            raise ToolException(
+                "Error: You must set confirm=true to clear all indexed content."
+            )
+        logger.info(
+            f"crw_clear: Starting clear indexed content operation for agent {agent_id}"
+        )
+
+        try:
+            # Delete vector store data (using web_scraper storage format for compatibility)
+            vector_store_key = f"vector_store_{agent_id}"
+            await AgentToolData.delete(agent_id, "web_scraper", vector_store_key)
+
+            # Delete metadata
+            metadata_key = f"indexed_urls_{agent_id}"
+            await AgentToolData.delete(agent_id, "web_scraper", metadata_key)
+
+            logger.info(
+                f"crw_clear: Successfully cleared all indexed content for agent {agent_id}"
+            )
+            return "Successfully cleared all fastCRW indexed content. The vector store is now empty and ready for new content."
+
+        except Exception as e:
+            logger.error(
+                f"crw_clear: Error clearing indexed content for agent {agent_id}: {e}",
+                exc_info=True,
+            )
+            raise ToolException(f"Error clearing indexed content: {str(e)}")