diff --git a/app/agent/manus.py b/app/agent/manus.py index a83e76a1e..07110fd07 100644 --- a/app/agent/manus.py +++ b/app/agent/manus.py @@ -82,8 +82,46 @@ async def think(self) -> bool: # Modify the next_step_prompt temporarily original_prompt = self.next_step_prompt + current_next_step_prompt = list( + self.next_step_prompt + ) # Make it a list to append strings + if browser_state and not browser_state.get("error"): - self.next_step_prompt += f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n" + current_next_step_prompt.append( + f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n" + ) + + # Check if the last tool call was ANPTool or if ANPTool is intended to be called + # This is a simplified check, more sophisticated logic might be needed + anp_tool_was_called = False + if self.memory.messages: + last_message = self.memory.messages[-1] + if ( + last_message.role == "tool" and last_message.name == ANPTool().name + ): # Accessing name directly + anp_tool_was_called = True + + # Potential: Check if LLM intends to call ANPTool in the current thought process + # This would require a preliminary call to LLM or parsing its initial thought if available + # For now, we'll rely on the history or a more explicit trigger. + + if anp_tool_was_called: # Or some other condition indicating ANPTool context + anp_detailed_description = """ + +ANPTool Detailed Workflow: +1. **Agent/Service Discovery**: If you don't know the specific service URL, you can first use ANPTool to query an agent directory (e.g., an initial URL provided by the user or planner, like 'https://agent-search.ai/ad.json'). +2. **Parse Agent Description (AD)**: The response from the discovery service or a direct agent URL will be an Agent Description (usually in JSON-LD or a similar format). Analyze it to understand the agent's capabilities, available API interfaces, and their descriptions. +3. **Discover API Endpoint URL**: Extract specific API endpoint URLs from the AD (look for fields like `serviceEndpoint`, `url`, `@id`). +4. **Fetch API Specification (OpenAPI/YAML)**: If its URL is provided in the AD, use ANPTool again to fetch the OpenAPI/YAML specification for the selected API endpoint. This specification details how to call the API (methods, parameters, request/response formats). +5. **Execute Service Call**: Construct the correct request (method, headers, parameters, body) based on the API specification and use ANPTool to call the actual service API endpoint. +6. **Process Response**: ANPTool will return the response from the external agent/service. This could be JSON, YAML (as text), or other text formats. +7. **Iterative Process**: Complex tasks may involve multiple calls to ANPTool: discovery, fetching specifications, and then multiple interactions with the target service API. + +**Key Rule for ANPTool**: Any URL obtained via ANPTool (e.g., agent description URL, API specification URL, or API endpoint URL) MUST subsequently be accessed using ANPTool itself. Do not attempt to use BrowserUseTool or other methods for these URLs. +""" + current_next_step_prompt.append(anp_detailed_description) + + self.next_step_prompt = "".join(current_next_step_prompt) # Call parent implementation result = await super().think() diff --git a/app/prompt/manus.py b/app/prompt/manus.py index 3e1d0c9e5..d572777c1 100644 --- a/app/prompt/manus.py +++ b/app/prompt/manus.py @@ -10,7 +10,7 @@ Terminate: End the current interaction when the task is complete or when you need additional information from the user. Use this tool to signal that you've finished addressing the user's request or need clarification before proceeding further. -ANPTool: Interact with other agents using Agent Network Protocol (ANP).ANP provides services such as hotel and scenic spot ticket query and booking. +ANPTool: Interact with other specialized agents and services using the Agent Network Protocol (ANP). This is crucial for tasks like hotel booking, flight reservations, ticket purchases, accessing domain-specific knowledge, etc. Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps. diff --git a/app/tool/anp_tool.py b/app/tool/anp_tool.py index 73e808d5a..09c8434af 100644 --- a/app/tool/anp_tool.py +++ b/app/tool/anp_tool.py @@ -1,24 +1,39 @@ import asyncio import json -import yaml -import aiohttp from pathlib import Path -from typing import Dict, Any, Optional +from typing import Any, Dict, Optional +import aiohttp +import yaml +from agent_connect.authentication import DIDWbaAuthHeader + +from app.config import PROJECT_ROOT, config from app.logger import logger from app.tool.base import BaseTool -from agent_connect.authentication import DIDWbaAuthHeader -from app.config import config, PROJECT_ROOT class ANPTool(BaseTool): name: str = "anp_tool" - description: str = """Use Agent Network Protocol (ANP) to interact with other agents. -1. For the first use, please enter the URL: https://agent-search.ai/ad.json, which is an agent search service. You can use the interfaces inside to query agents that can provide hotels, tickets, and attractions. -2. After receiving the agent's description document, you can crawl data based on the data link URL in the agent's description document. -3. During the process, you can call the API to complete the service until you think the task is completed. -4. Note, any URL obtained using ANPTool must be called using ANPTool, do not call it directly yourself. + description: str = """Use Agent Network Protocol (ANP) to interact with other agents through a sophisticated multi-step process. + +ANPTool Complex Workflow: +1. **Agent Discovery Phase**: When first used, access https://agent-search.ai/ad.json, which is an agent search service to query a directory of agents providing services like hotels, tickets, attractions, etc. +2. **Agent Description Parsing**: After obtaining an agent's AD (Agent Description) file, parse the various interfaces and capability descriptions within it. +3. **Interface URL Discovery**: Extract specific API interface URLs from the AD file. These URLs point to concrete service interfaces (e.g., hotel booking, attraction query). +4. **API Specification Retrieval**: Obtain the OpenAPI/YAML specification file via the interface URL to understand how to call the specific API. +5. **Service Call Execution**: Construct the correct request parameters based on the API specification and call the specific service interface to complete the task. +6. **Multi-turn Interaction Management**: Supports complex multi-step interaction flows until the task is completed. + +Special Considerations: +- All URLs obtained via ANPTool must continue to be called using ANPTool; do not access them directly. +- Supports DID (Decentralized Identity) authentication to ensure secure inter-agent communication. +- Automatically handles authentication failure retry mechanisms. +- Supports automatic parsing of multiple content formats (JSON, YAML, text). +- Possesses comprehensive error handling and state management capabilities. + +This tool enables true interoperability within an agent network, serving as a key component for agent collaboration in AGI systems. """ + did_manager: Optional[DIDManager] = None parameters: dict = { "type": "object", "properties": {