From 1dbe397b344882a9d36906515fed47ee668b3e4d Mon Sep 17 00:00:00 2001 From: Ranuga <79456372+Programmer-RD-AI@users.noreply.github.com> Date: Tue, 22 Apr 2025 11:30:45 +0530 Subject: [PATCH 1/3] feat(tavily): add TavilyExtractorTool and TavilySearchTool with documentation --- .../tools/tavily_extractor_tool/README.md | 99 ++++++++ .../tavily_extractor_tool.py | 156 +++++++++++++ .../tools/tavily_search_tool/README.md | 115 ++++++++++ .../tavily_search_tool/tavily_search_tool.py | 213 ++++++++++++++++++ pyproject.toml | 4 +- 5 files changed, 586 insertions(+), 1 deletion(-) create mode 100644 crewai_tools/tools/tavily_extractor_tool/README.md create mode 100644 crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py create mode 100644 crewai_tools/tools/tavily_search_tool/README.md create mode 100644 crewai_tools/tools/tavily_search_tool/tavily_search_tool.py diff --git a/crewai_tools/tools/tavily_extractor_tool/README.md b/crewai_tools/tools/tavily_extractor_tool/README.md new file mode 100644 index 00000000..8e2794dd --- /dev/null +++ b/crewai_tools/tools/tavily_extractor_tool/README.md @@ -0,0 +1,99 @@ +# TavilyExtractorTool + +## Description + +The `TavilyExtractorTool` allows CrewAI agents to extract structured content from web pages using the Tavily API. It can process single URLs or lists of URLs and provides options for controlling the extraction depth and including images. + +## Installation + +To use the `TavilyExtractorTool`, you need to install the `tavily-python` library: + +```shell +pip install 'crewai[tools]' tavily-python +``` + +You also need to set your Tavily API key as an environment variable: + +```bash +export TAVILY_API_KEY='your-tavily-api-key' +``` + +## Example + +Here's how to initialize and use the `TavilyExtractorTool` within a CrewAI agent: + +```python +import os +from crewai import Agent, Task, Crew +from crewai_tools import TavilyExtractorTool + +# Ensure TAVILY_API_KEY is set in your environment +# os.environ["TAVILY_API_KEY"] = "YOUR_API_KEY" + +# Initialize the tool +tavily_tool = TavilyExtractorTool() + +# Create an agent that uses the tool +extractor_agent = Agent( + role='Web Content Extractor', + goal='Extract key information from specified web pages', + backstory='You are an expert at extracting relevant content from websites using the Tavily API.', + tools=[tavily_tool], + verbose=True +) + +# Define a task for the agent +extract_task = Task( + description='Extract the main content from the URL https://example.com using basic extraction depth.', + expected_output='A JSON string containing the extracted content from the URL.', + agent=extractor_agent, + tool_inputs={ + 'urls': 'https://example.com', + 'extract_depth': 'basic' + } +) + +# Create and run the crew +crew = Crew( + agents=[extractor_agent], + tasks=[extract_task], + verbose=2 +) + +result = crew.kickoff() +print(result) + +# Example with multiple URLs and advanced extraction +extract_multiple_task = Task( + description='Extract content from https://example.com and https://anotherexample.org using advanced extraction.', + expected_output='A JSON string containing the extracted content from both URLs.', + agent=extractor_agent, + tool_inputs={ + 'urls': ['https://example.com', 'https://anotherexample.org'], + 'extract_depth': 'advanced', + 'include_images': True + } +) + +result_multiple = crew.kickoff(inputs={'urls': ['https://example.com', 'https://anotherexample.org'], 'extract_depth': 'advanced', 'include_images': True}) # If task doesn't specify inputs directly +print(result_multiple) + +``` + +## Arguments + +The `TavilyExtractorTool` accepts the following arguments during initialization or when running the tool: + +- `api_key` (Optional[str]): Your Tavily API key. If not provided during initialization, it defaults to the `TAVILY_API_KEY` environment variable. +- `proxies` (Optional[dict[str, str]]): Proxies to use for the API requests. Defaults to `None`. + +When running the tool (`_run` or `_arun` methods, or via agent execution), it uses the `TavilyExtractorToolSchema` and expects the following inputs: + +- `urls` (Union[List[str], str]): **Required**. A single URL string or a list of URL strings to extract data from. +- `include_images` (Optional[bool]): Whether to include images in the extraction results. Defaults to `False`. +- `extract_depth` (Literal["basic", "advanced"]): The depth of extraction. Use `"basic"` for faster, surface-level extraction or `"advanced"` for more comprehensive extraction. Defaults to `"basic"`. +- `timeout` (int): The maximum time in seconds to wait for the extraction request to complete. Defaults to `60`. + +## Response Format + +The tool returns a JSON string representing the structured data extracted from the provided URL(s). The exact structure depends on the content of the pages and the `extract_depth` used. Refer to the [Tavily API documentation](https://docs.tavily.com/docs/tavily-api/python-sdk#extract) for details on the response structure. diff --git a/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py new file mode 100644 index 00000000..b6670326 --- /dev/null +++ b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py @@ -0,0 +1,156 @@ +from crewai.tools import BaseTool +from pydantic import BaseModel, Field +from typing import Optional, Type, Any, Union, List, Literal +from dotenv import load_dotenv +import os +import json + +load_dotenv() +try: + from tavily import TavilyClient, AsyncTavilyClient + + TAVILY_AVAILABLE = True +except ImportError: + TAVILY_AVAILABLE = False + TavilyClient = Any + AsyncTavilyClient = Any + + +class TavilyExtractorToolSchema(BaseModel): + """Input schema for TavilyExtractorTool.""" + + urls: Union[List[str], str] = Field( + ..., + description="The URL(s) to extract data from. Can be a single URL or a list of URLs.", + ) + include_images: Optional[bool] = Field( + default=False, + description="Whether to include images in the extraction.", + ) + extract_depth: Literal["basic", "advanced"] = Field( + default="basic", + description="The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.", + ) + timeout: int = Field( + default=60, + description="The timeout for the extraction request in seconds.", + ) + + +class TavilyExtractorTool(BaseTool): + """ + Tool that uses the Tavily API to extract content from web pages. + + Attributes: + client: Synchronous Tavily client. + async_client: Asynchronous Tavily client. + name: The name of the tool. + description: The description of the tool. + args_schema: The schema for the tool's arguments. + api_key: The Tavily API key. + proxies: Optional proxies for the API requests. + """ + + model_config = {} + client: TavilyClient = None + async_client: AsyncTavilyClient = None + name: str = "TavilyExtractorTool" + description: str = ( + "Extracts content from one or more web pages using the Tavily API. Returns structured data." + ) + args_schema: Type[BaseModel] = TavilyExtractorToolSchema + api_key: Optional[str] = Field( + default=os.getenv("TAVILY_API_KEY"), + description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", + ) + proxies: Optional[dict[str, str]] = Field( + default=None, + description="Optional proxies to use for the Tavily API requests.", + ) + + def __init__(self, **kwargs): + """ + Initializes the TavilyExtractorTool. + + Args: + **kwargs: Additional keyword arguments. + """ + super().__init__(**kwargs) + if TAVILY_AVAILABLE: + self.client = TavilyClient(api_key=self.api_key, proxies=self.proxies) + self.async_client = AsyncTavilyClient( + api_key=self.api_key, proxies=self.proxies + ) + else: + import click + + if click.confirm( + "The 'tavily-python' package is required to use the TavilyExtractorTool. " + "Would you like to install it?" + ): + import subprocess + + subprocess.run(["uv", "add", "tavily-python"], check=True) + else: + raise ImportError( + "The 'tavily-python' package is required to use the TavilyExtractorTool. " + "Please install it with: uv add tavily-python" + ) + + def _run( + self, + urls: Union[List[str], str], + include_images: bool = False, + extract_depth: Literal["basic", "advanced"] = "basic", + timeout: int = 60, + ) -> str: + """ + Synchronously extracts content from the given URL(s). + + Args: + urls: The URL(s) to extract data from. + include_images: Whether to include images in the extraction. + extract_depth: The depth of extraction ('basic' or 'advanced'). + timeout: The timeout for the request in seconds. + + Returns: + A JSON string containing the extracted data. + """ + return json.dumps( + self.client.extract( + urls=urls, + extract_depth=extract_depth, + include_images=include_images, + timeout=timeout, + ), + indent=2, + ) + + async def _arun( + self, + urls: Union[List[str], str], + include_images: bool = False, + extract_depth: Literal["basic", "advanced"] = "basic", + timeout: int = 60, + ) -> str: + """ + Asynchronously extracts content from the given URL(s). + + Args: + urls: The URL(s) to extract data from. + include_images: Whether to include images in the extraction. + extract_depth: The depth of extraction ('basic' or 'advanced'). + timeout: The timeout for the request in seconds. + + Returns: + A JSON string containing the extracted data. + """ + return json.dumps( + self.async_client.extract( + urls=urls, + extract_depth=extract_depth, + include_images=include_images, + timeout=timeout, + ), + indent=2, + ) diff --git a/crewai_tools/tools/tavily_search_tool/README.md b/crewai_tools/tools/tavily_search_tool/README.md new file mode 100644 index 00000000..185b1988 --- /dev/null +++ b/crewai_tools/tools/tavily_search_tool/README.md @@ -0,0 +1,115 @@ +# Tavily Search Tool + +## Description + +The `TavilySearchTool` provides an interface to the Tavily Search API, enabling CrewAI agents to perform comprehensive web searches. It allows for specifying search depth, topics, time ranges, included/excluded domains, and whether to include direct answers, raw content, or images in the results. The tool returns the search results as a JSON string. + +## Installation + +To use the `TavilySearchTool`, you need to install the `tavily-python` library: + +```shell +pip install 'crewai[tools]' tavily-python +``` + +## Environment Variables + +Ensure your Tavily API key is set as an environment variable: + +```bash +export TAVILY_API_KEY='your_tavily_api_key' +``` + +## Example + +Here's how to initialize and use the `TavilySearchTool` within a CrewAI agent: + +```python +import os +from crewai import Agent, Task, Crew +from crewai_tools import TavilySearchTool + +# Ensure the TAVILY_API_KEY environment variable is set +# os.environ["TAVILY_API_KEY"] = "YOUR_TAVILY_API_KEY" + +# Initialize the tool +tavily_tool = TavilySearchTool() + +# Create an agent that uses the tool +researcher = Agent( + role='Market Researcher', + goal='Find information about the latest AI trends', + backstory='An expert market researcher specializing in technology.', + tools=[tavily_tool], + verbose=True +) + +# Create a task for the agent +research_task = Task( + description='Search for the top 3 AI trends in 2024.', + expected_output='A JSON report summarizing the top 3 AI trends found.', + agent=researcher +) + +# Form the crew and kick it off +crew = Crew( + agents=[researcher], + tasks=[research_task], + verbose=2 +) + +result = crew.kickoff() +print(result) + +# Example of using specific parameters +detailed_search_result = tavily_tool.run( + query="What are the recent advancements in large language models?", + search_depth="advanced", + topic="general", + max_results=5, + include_answer=True +) +print(detailed_search_result) +``` + +## Arguments + +The `TavilySearchTool` accepts the following arguments during initialization or when calling the `run` method: + +- `query` (str): **Required**. The search query string. +- `search_depth` (Literal["basic", "advanced"], optional): The depth of the search. Defaults to `"basic"`. +- `topic` (Literal["general", "news", "finance"], optional): The topic to focus the search on. Defaults to `"general"`. +- `time_range` (Literal["day", "week", "month", "year"], optional): The time range for the search. Defaults to `None`. +- `days` (int, optional): The number of days to search back. Relevant if `time_range` is not set. Defaults to `7`. +- `max_results` (int, optional): The maximum number of search results to return. Defaults to `5`. +- `include_domains` (Sequence[str], optional): A list of domains to prioritize in the search. Defaults to `None`. +- `exclude_domains` (Sequence[str], optional): A list of domains to exclude from the search. Defaults to `None`. +- `include_answer` (Union[bool, Literal["basic", "advanced"]], optional): Whether to include a direct answer synthesized from the search results. Defaults to `False`. +- `include_raw_content` (bool, optional): Whether to include the raw HTML content of the searched pages. Defaults to `False`. +- `include_images` (bool, optional): Whether to include image results. Defaults to `False`. +- `timeout` (int, optional): The request timeout in seconds. Defaults to `60`. +- `api_key` (str, optional): Your Tavily API key. If not provided, it's read from the `TAVILY_API_KEY` environment variable. +- `proxies` (dict[str, str], optional): A dictionary of proxies to use for the API request. Defaults to `None`. + +## Custom Configuration + +You can configure the tool during initialization: + +```python +# Example: Initialize with a default max_results and specific API key +custom_tavily_tool = TavilySearchTool( + api_key="YOUR_SPECIFIC_TAVILY_KEY", + config={ + 'max_results': 10, + 'search_depth': 'advanced' + } +) + +# The agent will use these defaults unless overridden in the task input +agent_with_custom_tool = Agent( + # ... agent configuration ... + tools=[custom_tavily_tool] +) +``` + +Note: The `config` dictionary allows setting default values for the arguments defined in `TavilySearchToolSchema`. These defaults can be overridden when the tool is executed if the specific parameters are provided in the agent's action input. diff --git a/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py new file mode 100644 index 00000000..3b9e437e --- /dev/null +++ b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py @@ -0,0 +1,213 @@ +from crewai.tools import BaseTool +from pydantic import BaseModel, Field +from typing import Optional, Type, Any, Union, Literal, Sequence +from dotenv import load_dotenv +import os +import json + +load_dotenv() +try: + from tavily import TavilyClient, AsyncTavilyClient + + TAVILY_AVAILABLE = True +except ImportError: + TAVILY_AVAILABLE = False + TavilyClient = Any + AsyncTavilyClient = Any + + +class TavilySearchToolSchema(BaseModel): + """Input schema for TavilySearchTool.""" + + query: str = Field(..., description="The search query string.") + search_depth: Literal["basic", "advanced"] = Field( + "basic", description="The depth of the search." + ) + topic: Literal["general", "news", "finance"] = Field( + "general", description="The topic to focus the search on." + ) + time_range: Optional[Literal["day", "week", "month", "year"]] = Field( + None, description="The time range for the search." + ) + days: int = Field(7, description="The number of days to search back.") + max_results: int = Field(5, description="The maximum number of results to return.") + include_domains: Optional[Sequence[str]] = Field( + None, description="A list of domains to include in the search." + ) + exclude_domains: Optional[Sequence[str]] = Field( + None, description="A list of domains to exclude from the search." + ) + include_answer: Union[bool, Literal["basic", "advanced"]] = Field( + False, description="Whether to include a direct answer to the query." + ) + include_raw_content: bool = Field( + False, description="Whether to include the raw content of the search results." + ) + include_images: bool = Field( + False, description="Whether to include images in the search results." + ) + timeout: int = Field( + 60, description="The timeout for the search request in seconds." + ) + + +class TavilySearchTool(BaseTool): + """ + Tool that uses the Tavily Search API to perform web searches. + + Attributes: + client: An instance of TavilyClient. + async_client: An instance of AsyncTavilyClient. + name: The name of the tool. + description: A description of the tool's purpose. + args_schema: The schema for the tool's arguments. + api_key: The Tavily API key. + proxies: Optional proxies for the API requests. + """ + + model_config = {} + client: TavilyClient = None + async_client: AsyncTavilyClient = None + name: str = "Tavily Search" + description: str = ( + "A tool that performs web searches using the Tavily Search API. " + "It returns a JSON object containing the search results." + ) + args_schema: Type[BaseModel] = TavilySearchToolSchema + api_key: Optional[str] = Field( + default=os.getenv("TAVILY_API_KEY"), + description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", + ) + proxies: Optional[dict[str, str]] = Field( + default=None, + description="Optional proxies to use for the Tavily API requests.", + ) + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if TAVILY_AVAILABLE: + self.client = TavilyClient(api_key=self.api_key, proxies=self.proxies) + self.async_client = AsyncTavilyClient( + api_key=self.api_key, proxies=self.proxies + ) + else: + import click + + if click.confirm( + "The 'tavily-python' package is required to use the TavilyExtractorTool. " + "Would you like to install it?" + ): + import subprocess + + subprocess.run(["uv", "add", "tavily-python"], check=True) + else: + raise ImportError( + "The 'tavily-python' package is required to use the TavilyExtractorTool. " + "Please install it with: uv add tavily-python" + ) + + def _run( + self, + query: str, + search_depth: Literal["basic", "advanced"] = "basic", + topic: Literal["general", "news", "finance"] = "general", + time_range: Optional[Literal["day", "week", "month", "year"]] = None, + days: int = 7, + max_results: int = 5, + include_domains: Optional[Sequence[str]] = None, + exclude_domains: Optional[Sequence[str]] = None, + include_answer: Union[bool, Literal["basic", "advanced"]] = False, + include_raw_content: bool = False, + include_images: bool = False, + timeout: int = 60, + ) -> str: + """ + Synchronously performs a search using the Tavily API. + + Args: + query: The search query string. + search_depth: The depth of the search ('basic' or 'advanced'). + topic: The topic to focus the search on ('general', 'news', 'finance'). + time_range: The time range for the search ('day', 'week', 'month', 'year'). + days: The number of days to search back. + max_results: The maximum number of results to return. + include_domains: A list of domains to include in the search. + exclude_domains: A list of domains to exclude from the search. + include_answer: Whether to include a direct answer to the query. + include_raw_content: Whether to include the raw content of the search results. + include_images: Whether to include images in the search results. + timeout: The timeout for the search request in seconds. + + Returns: + A JSON string containing the search results. + """ + return json.dumps( + self.client.search( + query=query, + search_depth=search_depth, + topic=topic, + time_range=time_range, + days=days, + max_results=max_results, + include_domains=include_domains, + exclude_domains=exclude_domains, + include_answer=include_answer, + include_raw_content=include_raw_content, + include_images=include_images, + timeout=timeout, + ), + indent=2, + ) + + async def _arun( + self, + query: str, + search_depth: Literal["basic", "advanced"] = "basic", + topic: Literal["general", "news", "finance"] = "general", + time_range: Optional[Literal["day", "week", "month", "year"]] = None, + days: int = 7, + max_results: int = 5, + include_domains: Optional[Sequence[str]] = None, + exclude_domains: Optional[Sequence[str]] = None, + include_answer: Union[bool, Literal["basic", "advanced"]] = False, + include_raw_content: bool = False, + include_images: bool = False, + timeout: int = 60, + ) -> str: + """ + Asynchronously performs a search using the Tavily API. + + Args: + query: The search query string. + search_depth: The depth of the search ('basic' or 'advanced'). + topic: The topic to focus the search on ('general', 'news', 'finance'). + time_range: The time range for the search ('day', 'week', 'month', 'year'). + days: The number of days to search back. + max_results: The maximum number of results to return. + include_domains: A list of domains to include in the search. + exclude_domains: A list of domains to exclude from the search. + include_answer: Whether to include a direct answer to the query. + include_raw_content: Whether to include the raw content of the search results. + include_images: Whether to include images in the search results. + timeout: The timeout for the search request in seconds. + + Returns: + A JSON string containing the search results. + """ + return json.dumps( + self.async_client.search( + query=query, + search_depth=search_depth, + topic=topic, + time_range=time_range, + days=days, + max_results=max_results, + include_domains=include_domains, + exclude_domains=exclude_domains, + include_answer=include_answer, + include_raw_content=include_raw_content, + include_images=include_images, + timeout=timeout, + ), + indent=2, + ) diff --git a/pyproject.toml b/pyproject.toml index f7343e01..05d2f4f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,9 @@ scrapegraph-py = [ linkup-sdk = [ "linkup-sdk>=0.2.2", ] - +tavily-python = [ + "tavily-python>=0.5.4", +] hyperbrowser = [ "hyperbrowser>=0.18.0", ] From dedbd8c67dc16afadd2d155e0b21d36b7b92aa30 Mon Sep 17 00:00:00 2001 From: Programmer-RD-AI <79456372+Programmer-RD-AI@users.noreply.github.com> Date: Wed, 14 May 2025 20:20:33 +0530 Subject: [PATCH 2/3] feat(tavily): enhance TavilyExtractorTool and TavilySearchTool with additional parameters and improved error handling --- .../tavily_extractor_tool.py | 106 ++++--- .../tavily_search_tool/tavily_search_tool.py | 272 ++++++++++-------- 2 files changed, 211 insertions(+), 167 deletions(-) diff --git a/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py index b6670326..88ab3ee0 100644 --- a/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py +++ b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py @@ -23,18 +23,6 @@ class TavilyExtractorToolSchema(BaseModel): ..., description="The URL(s) to extract data from. Can be a single URL or a list of URLs.", ) - include_images: Optional[bool] = Field( - default=False, - description="Whether to include images in the extraction.", - ) - extract_depth: Literal["basic", "advanced"] = Field( - default="basic", - description="The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.", - ) - timeout: int = Field( - default=60, - description="The timeout for the extraction request in seconds.", - ) class TavilyExtractorTool(BaseTool): @@ -49,26 +37,41 @@ class TavilyExtractorTool(BaseTool): args_schema: The schema for the tool's arguments. api_key: The Tavily API key. proxies: Optional proxies for the API requests. + include_images: Whether to include images in the extraction. + extract_depth: The depth of extraction. + timeout: The timeout for the extraction request in seconds. """ - model_config = {} - client: TavilyClient = None - async_client: AsyncTavilyClient = None + model_config = {"arbitrary_types_allowed": True} + client: Optional[TavilyClient] = None + async_client: Optional[AsyncTavilyClient] = None name: str = "TavilyExtractorTool" description: str = ( "Extracts content from one or more web pages using the Tavily API. Returns structured data." ) args_schema: Type[BaseModel] = TavilyExtractorToolSchema api_key: Optional[str] = Field( - default=os.getenv("TAVILY_API_KEY"), + default_factory=lambda: os.getenv("TAVILY_API_KEY"), description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", ) proxies: Optional[dict[str, str]] = Field( default=None, description="Optional proxies to use for the Tavily API requests.", ) + include_images: bool = Field( + default=False, + description="Whether to include images in the extraction.", + ) + extract_depth: Literal["basic", "advanced"] = Field( + default="basic", + description="The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.", + ) + timeout: int = Field( + default=60, + description="The timeout for the extraction request in seconds.", + ) - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): """ Initializes the TavilyExtractorTool. @@ -82,46 +85,58 @@ def __init__(self, **kwargs): api_key=self.api_key, proxies=self.proxies ) else: - import click + try: + import click + import subprocess + except ImportError: + raise ImportError( + "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. " + "Please install 'tavily-python' manually (e.g., 'pip install tavily-python') and ensure 'click' and 'subprocess' are available." + ) if click.confirm( - "The 'tavily-python' package is required to use the TavilyExtractorTool. " - "Would you like to install it?" + "You are missing the 'tavily-python' package, which is required for TavilyExtractorTool. Would you like to install it?" ): - import subprocess - - subprocess.run(["uv", "add", "tavily-python"], check=True) + try: + subprocess.run(["pip", "install", "tavily-python"], check=True) + raise ImportError( + "'tavily-python' has been installed. Please restart your Python application to use the TavilyExtractorTool." + ) + except subprocess.CalledProcessError as e: + raise ImportError( + f"Attempted to install 'tavily-python' but failed: {e}. " + f"Please install it manually to use the TavilyExtractorTool." + ) else: raise ImportError( "The 'tavily-python' package is required to use the TavilyExtractorTool. " - "Please install it with: uv add tavily-python" + "Please install it with: pip install tavily-python" ) def _run( self, urls: Union[List[str], str], - include_images: bool = False, - extract_depth: Literal["basic", "advanced"] = "basic", - timeout: int = 60, ) -> str: """ Synchronously extracts content from the given URL(s). Args: urls: The URL(s) to extract data from. - include_images: Whether to include images in the extraction. - extract_depth: The depth of extraction ('basic' or 'advanced'). - timeout: The timeout for the request in seconds. Returns: A JSON string containing the extracted data. """ + if not self.client: + raise ValueError( + "Tavily client is not initialized. Ensure 'tavily-python' is installed and API key is set." + ) + return json.dumps( self.client.extract( urls=urls, - extract_depth=extract_depth, - include_images=include_images, - timeout=timeout, + extract_depth=self.extract_depth, + include_images=self.include_images, + timeout=self.timeout, ), indent=2, ) @@ -129,28 +144,25 @@ def _run( async def _arun( self, urls: Union[List[str], str], - include_images: bool = False, - extract_depth: Literal["basic", "advanced"] = "basic", - timeout: int = 60, ) -> str: """ Asynchronously extracts content from the given URL(s). Args: urls: The URL(s) to extract data from. - include_images: Whether to include images in the extraction. - extract_depth: The depth of extraction ('basic' or 'advanced'). - timeout: The timeout for the request in seconds. Returns: A JSON string containing the extracted data. """ - return json.dumps( - self.async_client.extract( - urls=urls, - extract_depth=extract_depth, - include_images=include_images, - timeout=timeout, - ), - indent=2, + if not self.async_client: + raise ValueError( + "Tavily async client is not initialized. Ensure 'tavily-python' is installed and API key is set." + ) + + results = await self.async_client.extract( + urls=urls, + extract_depth=self.extract_depth, + include_images=self.include_images, + timeout=self.timeout, ) + return json.dumps(results, indent=2) diff --git a/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py index 3b9e437e..76a1ae00 100644 --- a/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py +++ b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py @@ -20,35 +20,6 @@ class TavilySearchToolSchema(BaseModel): """Input schema for TavilySearchTool.""" query: str = Field(..., description="The search query string.") - search_depth: Literal["basic", "advanced"] = Field( - "basic", description="The depth of the search." - ) - topic: Literal["general", "news", "finance"] = Field( - "general", description="The topic to focus the search on." - ) - time_range: Optional[Literal["day", "week", "month", "year"]] = Field( - None, description="The time range for the search." - ) - days: int = Field(7, description="The number of days to search back.") - max_results: int = Field(5, description="The maximum number of results to return.") - include_domains: Optional[Sequence[str]] = Field( - None, description="A list of domains to include in the search." - ) - exclude_domains: Optional[Sequence[str]] = Field( - None, description="A list of domains to exclude from the search." - ) - include_answer: Union[bool, Literal["basic", "advanced"]] = Field( - False, description="Whether to include a direct answer to the query." - ) - include_raw_content: bool = Field( - False, description="Whether to include the raw content of the search results." - ) - include_images: bool = Field( - False, description="Whether to include images in the search results." - ) - timeout: int = Field( - 60, description="The timeout for the search request in seconds." - ) class TavilySearchTool(BaseTool): @@ -63,11 +34,23 @@ class TavilySearchTool(BaseTool): args_schema: The schema for the tool's arguments. api_key: The Tavily API key. proxies: Optional proxies for the API requests. + search_depth: The depth of the search. + topic: The topic to focus the search on. + time_range: The time range for the search. + days: The number of days to search back. + max_results: The maximum number of results to return. + include_domains: A list of domains to include in the search. + exclude_domains: A list of domains to exclude from the search. + include_answer: Whether to include a direct answer to the query. + include_raw_content: Whether to include the raw content of the search results. + include_images: Whether to include images in the search results. + timeout: The timeout for the search request in seconds. + max_content_length_per_result: Maximum length for the 'content' of each search result. """ - model_config = {} - client: TavilyClient = None - async_client: AsyncTavilyClient = None + model_config = {"arbitrary_types_allowed": True} + client: Optional[TavilyClient] = None + async_client: Optional[AsyncTavilyClient] = None name: str = "Tavily Search" description: str = ( "A tool that performs web searches using the Tavily Search API. " @@ -75,15 +58,51 @@ class TavilySearchTool(BaseTool): ) args_schema: Type[BaseModel] = TavilySearchToolSchema api_key: Optional[str] = Field( - default=os.getenv("TAVILY_API_KEY"), + default_factory=lambda: os.getenv("TAVILY_API_KEY"), description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", ) proxies: Optional[dict[str, str]] = Field( default=None, description="Optional proxies to use for the Tavily API requests.", ) + search_depth: Literal["basic", "advanced"] = Field( + default="basic", description="The depth of the search." + ) + topic: Literal["general", "news", "finance"] = Field( + default="general", description="The topic to focus the search on." + ) + time_range: Optional[Literal["day", "week", "month", "year"]] = Field( + default=None, description="The time range for the search." + ) + days: int = Field(default=7, description="The number of days to search back.") + max_results: int = Field( + default=5, description="The maximum number of results to return." + ) + include_domains: Optional[Sequence[str]] = Field( + default=None, description="A list of domains to include in the search." + ) + exclude_domains: Optional[Sequence[str]] = Field( + default=None, description="A list of domains to exclude from the search." + ) + include_answer: Union[bool, Literal["basic", "advanced"]] = Field( + default=False, description="Whether to include a direct answer to the query." + ) + include_raw_content: bool = Field( + default=False, + description="Whether to include the raw content of the search results.", + ) + include_images: bool = Field( + default=False, description="Whether to include images in the search results." + ) + timeout: int = Field( + default=60, description="The timeout for the search request in seconds." + ) + max_content_length_per_result: int = Field( + default=1000, + description="Maximum length for the 'content' of each search result to avoid context window issues.", + ) - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): super().__init__(**kwargs) if TAVILY_AVAILABLE: self.client = TavilyClient(api_key=self.api_key, proxies=self.proxies) @@ -91,123 +110,136 @@ def __init__(self, **kwargs): api_key=self.api_key, proxies=self.proxies ) else: - import click + try: + import click + import subprocess + except ImportError: + raise ImportError( + "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. " + "Please install 'tavily-python' manually (e.g., 'pip install tavily-python') and ensure 'click' and 'subprocess' are available." + ) if click.confirm( - "The 'tavily-python' package is required to use the TavilyExtractorTool. " - "Would you like to install it?" + "You are missing the 'tavily-python' package, which is required for TavilySearchTool. Would you like to install it?" ): - import subprocess - - subprocess.run(["uv", "add", "tavily-python"], check=True) + try: + subprocess.run(["pip", "install", "tavily-python"], check=True) + raise ImportError( + "'tavily-python' has been installed. Please restart your Python application to use the TavilySearchTool." + ) + except subprocess.CalledProcessError as e: + raise ImportError( + f"Attempted to install 'tavily-python' but failed: {e}. " + f"Please install it manually to use the TavilySearchTool." + ) else: raise ImportError( - "The 'tavily-python' package is required to use the TavilyExtractorTool. " - "Please install it with: uv add tavily-python" + "The 'tavily-python' package is required to use the TavilySearchTool. " + "Please install it with: pip install tavily-python" ) def _run( self, query: str, - search_depth: Literal["basic", "advanced"] = "basic", - topic: Literal["general", "news", "finance"] = "general", - time_range: Optional[Literal["day", "week", "month", "year"]] = None, - days: int = 7, - max_results: int = 5, - include_domains: Optional[Sequence[str]] = None, - exclude_domains: Optional[Sequence[str]] = None, - include_answer: Union[bool, Literal["basic", "advanced"]] = False, - include_raw_content: bool = False, - include_images: bool = False, - timeout: int = 60, ) -> str: """ Synchronously performs a search using the Tavily API. + Content of each result is truncated to `max_content_length_per_result`. Args: query: The search query string. - search_depth: The depth of the search ('basic' or 'advanced'). - topic: The topic to focus the search on ('general', 'news', 'finance'). - time_range: The time range for the search ('day', 'week', 'month', 'year'). - days: The number of days to search back. - max_results: The maximum number of results to return. - include_domains: A list of domains to include in the search. - exclude_domains: A list of domains to exclude from the search. - include_answer: Whether to include a direct answer to the query. - include_raw_content: Whether to include the raw content of the search results. - include_images: Whether to include images in the search results. - timeout: The timeout for the search request in seconds. Returns: - A JSON string containing the search results. + A JSON string containing the search results with truncated content. """ - return json.dumps( - self.client.search( - query=query, - search_depth=search_depth, - topic=topic, - time_range=time_range, - days=days, - max_results=max_results, - include_domains=include_domains, - exclude_domains=exclude_domains, - include_answer=include_answer, - include_raw_content=include_raw_content, - include_images=include_images, - timeout=timeout, - ), - indent=2, + if not self.client: + raise ValueError( + "Tavily client is not initialized. Ensure 'tavily-python' is installed and API key is set." + ) + + raw_results = self.client.search( + query=query, + search_depth=self.search_depth, + topic=self.topic, + time_range=self.time_range, + days=self.days, + max_results=self.max_results, + include_domains=self.include_domains, + exclude_domains=self.exclude_domains, + include_answer=self.include_answer, + include_raw_content=self.include_raw_content, + include_images=self.include_images, + timeout=self.timeout, ) + if ( + isinstance(raw_results, dict) + and "results" in raw_results + and isinstance(raw_results["results"], list) + ): + for item in raw_results["results"]: + if ( + isinstance(item, dict) + and "content" in item + and isinstance(item["content"], str) + ): + if len(item["content"]) > self.max_content_length_per_result: + item["content"] = ( + item["content"][: self.max_content_length_per_result] + + "..." + ) + + return json.dumps(raw_results, indent=2) + async def _arun( self, query: str, - search_depth: Literal["basic", "advanced"] = "basic", - topic: Literal["general", "news", "finance"] = "general", - time_range: Optional[Literal["day", "week", "month", "year"]] = None, - days: int = 7, - max_results: int = 5, - include_domains: Optional[Sequence[str]] = None, - exclude_domains: Optional[Sequence[str]] = None, - include_answer: Union[bool, Literal["basic", "advanced"]] = False, - include_raw_content: bool = False, - include_images: bool = False, - timeout: int = 60, ) -> str: """ Asynchronously performs a search using the Tavily API. + Content of each result is truncated to `max_content_length_per_result`. Args: query: The search query string. - search_depth: The depth of the search ('basic' or 'advanced'). - topic: The topic to focus the search on ('general', 'news', 'finance'). - time_range: The time range for the search ('day', 'week', 'month', 'year'). - days: The number of days to search back. - max_results: The maximum number of results to return. - include_domains: A list of domains to include in the search. - exclude_domains: A list of domains to exclude from the search. - include_answer: Whether to include a direct answer to the query. - include_raw_content: Whether to include the raw content of the search results. - include_images: Whether to include images in the search results. - timeout: The timeout for the search request in seconds. Returns: - A JSON string containing the search results. + A JSON string containing the search results with truncated content. """ - return json.dumps( - self.async_client.search( - query=query, - search_depth=search_depth, - topic=topic, - time_range=time_range, - days=days, - max_results=max_results, - include_domains=include_domains, - exclude_domains=exclude_domains, - include_answer=include_answer, - include_raw_content=include_raw_content, - include_images=include_images, - timeout=timeout, - ), - indent=2, + if not self.async_client: + raise ValueError( + "Tavily async client is not initialized. Ensure 'tavily-python' is installed and API key is set." + ) + + raw_results = await self.async_client.search( + query=query, + search_depth=self.search_depth, + topic=self.topic, + time_range=self.time_range, + days=self.days, + max_results=self.max_results, + include_domains=self.include_domains, + exclude_domains=self.exclude_domains, + include_answer=self.include_answer, + include_raw_content=self.include_raw_content, + include_images=self.include_images, + timeout=self.timeout, ) + + if ( + isinstance(raw_results, dict) + and "results" in raw_results + and isinstance(raw_results["results"], list) + ): + for item in raw_results["results"]: + if ( + isinstance(item, dict) + and "content" in item + and isinstance(item["content"], str) + ): + if len(item["content"]) > self.max_content_length_per_result: + item["content"] = ( + item["content"][: self.max_content_length_per_result] + + "..." + ) + + return json.dumps(raw_results, indent=2) From d3f18e7aa674c2658daaa44acfe7998866fac755 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Wed, 14 May 2025 09:18:42 -0700 Subject: [PATCH 3/3] fix(tavily): update installation instructions for 'tavily-python' package in TavilyExtractorTool and TavilySearchTool --- .../tools/tavily_extractor_tool/tavily_extractor_tool.py | 8 +++----- .../tools/tavily_search_tool/tavily_search_tool.py | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py index 88ab3ee0..0320ab10 100644 --- a/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py +++ b/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py @@ -46,9 +46,7 @@ class TavilyExtractorTool(BaseTool): client: Optional[TavilyClient] = None async_client: Optional[AsyncTavilyClient] = None name: str = "TavilyExtractorTool" - description: str = ( - "Extracts content from one or more web pages using the Tavily API. Returns structured data." - ) + description: str = "Extracts content from one or more web pages using the Tavily API. Returns structured data." args_schema: Type[BaseModel] = TavilyExtractorToolSchema api_key: Optional[str] = Field( default_factory=lambda: os.getenv("TAVILY_API_KEY"), @@ -91,7 +89,7 @@ def __init__(self, **kwargs: Any): except ImportError: raise ImportError( "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. " - "Please install 'tavily-python' manually (e.g., 'pip install tavily-python') and ensure 'click' and 'subprocess' are available." + "Please install 'tavily-python' manually (e.g., 'uv add tavily-python') and ensure 'click' and 'subprocess' are available." ) if click.confirm( @@ -110,7 +108,7 @@ def __init__(self, **kwargs: Any): else: raise ImportError( "The 'tavily-python' package is required to use the TavilyExtractorTool. " - "Please install it with: pip install tavily-python" + "Please install it with: uv add tavily-python" ) def _run( diff --git a/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py index 76a1ae00..1179be90 100644 --- a/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py +++ b/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py @@ -123,7 +123,7 @@ def __init__(self, **kwargs: Any): "You are missing the 'tavily-python' package, which is required for TavilySearchTool. Would you like to install it?" ): try: - subprocess.run(["pip", "install", "tavily-python"], check=True) + subprocess.run(["uv", "add", "tavily-python"], check=True) raise ImportError( "'tavily-python' has been installed. Please restart your Python application to use the TavilySearchTool." ) @@ -135,7 +135,7 @@ def __init__(self, **kwargs: Any): else: raise ImportError( "The 'tavily-python' package is required to use the TavilySearchTool. " - "Please install it with: pip install tavily-python" + "Please install it with: uv add tavily-python" ) def _run(