diff --git a/agent_memory_server/cli.py b/agent_memory_server/cli.py index 5be7ba4..648de72 100644 --- a/agent_memory_server/cli.py +++ b/agent_memory_server/cli.py @@ -319,8 +319,8 @@ def api(port: int, host: str, reload: bool, no_worker: bool, task_backend: str): @click.option( "--mode", default="stdio", - help="Run the MCP server in SSE or stdio mode", - type=click.Choice(["stdio", "sse"]), + help="Run the MCP server in SSE, streamable-http, or stdio mode", + type=click.Choice(["stdio", "sse", "streamable-http"]), ) @click.option( "--task-backend", @@ -362,6 +362,9 @@ async def setup_and_run(): if mode == "sse": logger.info(f"Starting MCP server on port {port}\n") await mcp_app.run_sse_async() + elif mode == "streamable-http": + logger.info(f"Starting MCP server (streamable HTTP) on port {port}\n") + await mcp_app.run_streamable_http_async() elif mode == "stdio": await mcp_app.run_stdio_async() else: diff --git a/agent_memory_server/config.py b/agent_memory_server/config.py index 002eae4..edd65f3 100644 --- a/agent_memory_server/config.py +++ b/agent_memory_server/config.py @@ -358,6 +358,7 @@ class Settings(BaseSettings): "gpt-5-mini" # Faster, smaller model for quick tasks like query optimization ) port: int = 8000 + mcp_host: str = "0.0.0.0" mcp_port: int = 9000 # Memory vector database factory configuration diff --git a/agent_memory_server/mcp.py b/agent_memory_server/mcp.py index 1fce971..610de76 100644 --- a/agent_memory_server/mcp.py +++ b/agent_memory_server/mcp.py @@ -5,11 +5,11 @@ import ulid from mcp.server.fastmcp import FastMCP as _FastMCPBase +from agent_memory_server import working_memory as working_memory_core from agent_memory_server.api import ( create_long_term_memory as core_create_long_term_memory, delete_long_term_memory as core_delete_long_term_memory, get_long_term_memory as core_get_long_term_memory, - get_working_memory as core_get_working_memory, memory_prompt as core_memory_prompt, put_working_memory_core as core_put_working_memory, search_long_term_memory as core_search_long_term_memory, @@ -88,11 +88,12 @@ def sse_app(self): from mcp.server.sse import SseServerTransport from starlette.applications import Starlette from starlette.requests import Request + from starlette.responses import Response from starlette.routing import Mount, Route sse = SseServerTransport(self.settings.message_path) - async def handle_sse(request: Request) -> None: + async def handle_sse(request: Request) -> Response: # Store the request in the FastMCP instance so call_tool can access it self._current_request = request @@ -110,6 +111,7 @@ async def handle_sse(request: Request) -> None: finally: # Clean up request reference self._current_request = None + return Response() return Starlette( debug=self.settings.debug, @@ -175,6 +177,15 @@ async def run_sse_async(self): uvicorn.Config(app, host="0.0.0.0", port=int(self.settings.port)) ).serve() + async def run_streamable_http_async(self): + """Start streamable HTTP MCP server.""" + from agent_memory_server.utils.redis import get_redis_conn + + await get_redis_conn() + # Enable stateless mode only for streamable-http transport + self.settings.stateless_http = True + return await super().run_streamable_http_async() + async def run_stdio_async(self): """Start STDIO MCP server.""" from agent_memory_server.utils.redis import get_redis_conn @@ -191,6 +202,7 @@ async def run_stdio_async(self): mcp_app = FastMCP( "Redis Agent Memory Server", + host=settings.mcp_host, port=settings.mcp_port, instructions=INSTRUCTIONS, default_namespace=settings.default_mcp_namespace, @@ -906,9 +918,12 @@ async def get_working_memory( Returns: Working memory containing messages, context, and structured memory records """ - return await core_get_working_memory( + result = await working_memory_core.get_working_memory( session_id=session_id, recent_messages_limit=recent_messages_limit ) + if result is None: + return WorkingMemory(session_id=session_id, messages=[], memories=[]) + return result @mcp_app.tool() diff --git a/docs/cli.md b/docs/cli.md index 962213e..50c6029 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -60,7 +60,7 @@ agent-memory mcp [OPTIONS] **Options:** - `--port INTEGER`: Port to run the MCP server on. (Default: value from `settings.mcp_port`, usually 9000) -- `--mode [stdio|sse]`: Run the MCP server in stdio or SSE mode. (Default: stdio) +- `--mode [stdio|sse|streamable-http]`: Run the MCP server in stdio, SSE, or streamable-http mode. (Default: stdio) - `--task-backend [asyncio|docket]`: Background task backend. `asyncio` (default) runs tasks inline in the MCP process with no separate worker. `docket` sends tasks to a Docket queue, which requires running `agent-memory task-worker`. **Examples:** @@ -72,11 +72,14 @@ agent-memory mcp # SSE mode for development (no separate worker needed) agent-memory mcp --mode sse --port 9001 +# Streamable HTTP mode for network deployments (e.g. Kubernetes) +agent-memory mcp --mode streamable-http --port 9000 + # SSE mode for production (requires separate worker process) agent-memory mcp --mode sse --port 9001 --task-backend docket ``` -**Note:** Stdio mode is designed for tools like Claude Desktop and, by default, uses the asyncio backend (no worker). Use `--task-backend docket` if you want MCP to enqueue background work into a shared Docket worker. +**Note:** Stdio mode is designed for tools like Claude Desktop and, by default, uses the asyncio backend (no worker). Streamable HTTP mode is suited for deploying the MCP server as a network service where HTTP clients (like Claude Code) connect over the network. Use `--task-backend docket` if you want MCP to enqueue background work into a shared Docket worker. ### `schedule-task` diff --git a/tests/test_cli.py b/tests/test_cli.py index 5c9ea48..689b6e7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -284,6 +284,62 @@ def test_mcp_command_sse_mode(self, mock_mcp_app, mock_settings): assert result.exit_code == 0 mock_mcp_app.run_sse_async.assert_called_once() + @patch("agent_memory_server.cli.settings") + @patch("agent_memory_server.mcp.mcp_app") + def test_mcp_command_streamable_http_mode(self, mock_mcp_app, mock_settings): + """Test mcp command in streamable-http mode.""" + mock_settings.mcp_port = 3001 + + mock_mcp_app.run_streamable_http_async = AsyncMock() + + runner = CliRunner() + result = runner.invoke(mcp, ["--mode", "streamable-http", "--port", "9000"]) + + assert result.exit_code == 0 + mock_mcp_app.run_streamable_http_async.assert_called_once() + + @patch("agent_memory_server.cli.configure_logging") + @patch("agent_memory_server.mcp.mcp_app") + def test_mcp_command_streamable_http_mode_uses_asyncio_by_default( + self, mock_mcp_app, mock_configure_logging + ): + """Test that streamable-http mode uses asyncio backend by default.""" + from agent_memory_server.config import settings + + # Set initial state + settings.use_docket = True + + mock_mcp_app.run_streamable_http_async = AsyncMock() + + runner = CliRunner() + result = runner.invoke(mcp, ["--mode", "streamable-http"]) + + assert result.exit_code == 0 + assert settings.use_docket is False + mock_mcp_app.run_streamable_http_async.assert_called_once() + + @patch("agent_memory_server.cli.configure_logging") + @patch("agent_memory_server.mcp.mcp_app") + def test_mcp_command_streamable_http_mode_with_task_backend_docket( + self, mock_mcp_app, mock_configure_logging + ): + """Test that streamable-http mode with --task-backend=docket sets use_docket=True.""" + from agent_memory_server.config import settings + + # Set initial state + settings.use_docket = False + + mock_mcp_app.run_streamable_http_async = AsyncMock() + + runner = CliRunner() + result = runner.invoke( + mcp, ["--mode", "streamable-http", "--task-backend", "docket"] + ) + + assert result.exit_code == 0 + assert settings.use_docket is True + mock_mcp_app.run_streamable_http_async.assert_called_once() + @patch("agent_memory_server.cli.configure_mcp_logging") @patch("agent_memory_server.cli.settings") @patch("agent_memory_server.mcp.mcp_app")