Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions agent_memory_server/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,8 @@ def api(port: int, host: str, reload: bool, no_worker: bool, task_backend: str):
@click.option(
"--mode",
default="stdio",
help="Run the MCP server in SSE or stdio mode",
type=click.Choice(["stdio", "sse"]),
help="Run the MCP server in SSE, streamable-http, or stdio mode",
type=click.Choice(["stdio", "sse", "streamable-http"]),
Comment on lines 319 to +323
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CLI help/choices are updated to include streamable-http, but the repo documentation still describes MCP --mode as [stdio|sse] (e.g., docs/cli.md, docs/getting-started.md, docs/quick-start.md). Please update those docs (and any examples) to include streamable-http so users don’t follow outdated instructions.

Copilot uses AI. Check for mistakes.
)
@click.option(
"--task-backend",
Expand Down Expand Up @@ -362,6 +362,9 @@ async def setup_and_run():
if mode == "sse":
logger.info(f"Starting MCP server on port {port}\n")
await mcp_app.run_sse_async()
elif mode == "streamable-http":
logger.info(f"Starting MCP server (streamable HTTP) on port {port}\n")
await mcp_app.run_streamable_http_async()
Comment on lines +365 to +367
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

streamable-http mode is added here, but there are existing CLI tests for stdio and sse modes (see tests/test_cli.py::TestMcpCommand). Please add a matching test case that --mode streamable-http calls mcp_app.run_streamable_http_async() and validates logging/backend behavior, to prevent regressions in the new mode selection logic.

Copilot uses AI. Check for mistakes.
elif mode == "stdio":
await mcp_app.run_stdio_async()
else:
Expand Down
1 change: 1 addition & 0 deletions agent_memory_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ class Settings(BaseSettings):
"gpt-5-mini" # Faster, smaller model for quick tasks like query optimization
)
port: int = 8000
mcp_host: str = "0.0.0.0"
mcp_port: int = 9000

# Vector store factory configuration
Expand Down
4 changes: 1 addition & 3 deletions agent_memory_server/long_term_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,10 +569,8 @@ async def merge_memories_with_llm(
The merged memory:
"""

model_name = "gpt-4o-mini"

response = await LLMClient.create_chat_completion(
model=model_name,
model=settings.generation_model,
messages=[{"role": "user", "content": prompt}],
)

Expand Down
21 changes: 18 additions & 3 deletions agent_memory_server/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import ulid
from mcp.server.fastmcp import FastMCP as _FastMCPBase

from agent_memory_server import working_memory as working_memory_core
from agent_memory_server.api import (
create_long_term_memory as core_create_long_term_memory,
delete_long_term_memory as core_delete_long_term_memory,
get_long_term_memory as core_get_long_term_memory,
get_working_memory as core_get_working_memory,
memory_prompt as core_memory_prompt,
put_working_memory_core as core_put_working_memory,
search_long_term_memory as core_search_long_term_memory,
Expand Down Expand Up @@ -77,6 +77,7 @@ class FastMCP(_FastMCPBase):
"""Extend FastMCP to support optional URL namespace and default STDIO namespace."""

def __init__(self, *args, default_namespace=None, **kwargs):
kwargs.setdefault("stateless_http", True)
super().__init__(*args, **kwargs)
self.default_namespace = default_namespace
self._current_request = None # Initialize the attribute
Expand All @@ -85,11 +86,12 @@ def sse_app(self):
from mcp.server.sse import SseServerTransport
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.responses import Response
from starlette.routing import Mount, Route

sse = SseServerTransport(self.settings.message_path)

async def handle_sse(request: Request) -> None:
async def handle_sse(request: Request) -> Response:
# Store the request in the FastMCP instance so call_tool can access it
self._current_request = request

Expand All @@ -103,10 +105,12 @@ async def handle_sse(request: Request) -> None:
read_stream,
write_stream,
self._mcp_server.create_initialization_options(),
stateless=True,
)
finally:
# Clean up request reference
self._current_request = None
return Response()

return Starlette(
debug=self.settings.debug,
Expand Down Expand Up @@ -172,6 +176,13 @@ async def run_sse_async(self):
uvicorn.Config(app, host="0.0.0.0", port=int(self.settings.port))
).serve()

async def run_streamable_http_async(self):
"""Start streamable HTTP MCP server."""
from agent_memory_server.utils.redis import get_redis_conn

await get_redis_conn()
return await super().run_streamable_http_async()
Comment on lines +179 to +184
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FastMCP advertises “optional URL namespace” support, but streamable-http currently just delegates to super().run_streamable_http_async() without an app override like sse_app(). As a result, the namespace-in-URL behavior (and call_tool()’s namespace injection via request.path_params) is likely not supported for streamable HTTP. Consider adding a custom Streamable HTTP Starlette app with /{namespace} routes (and setting request context per request) to keep behavior consistent across transports.

Copilot uses AI. Check for mistakes.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. The streamable-http mode currently delegates to FastMCP's built-in implementation which doesn't include the namespace-in-URL routing. For my use case (single namespace via env var) this works, but I agree parity with SSE's /{namespace} routes would be valuable. Happy to add a custom streamable_http_app() method with namespace routes in a follow-up if the maintainers want it in this PR.


async def run_stdio_async(self):
"""Start STDIO MCP server."""
from agent_memory_server.utils.redis import get_redis_conn
Expand All @@ -188,6 +199,7 @@ async def run_stdio_async(self):

mcp_app = FastMCP(
"Redis Agent Memory Server",
host=settings.mcp_host,
port=settings.mcp_port,
instructions=INSTRUCTIONS,
default_namespace=settings.default_mcp_namespace,
Expand Down Expand Up @@ -899,9 +911,12 @@ async def get_working_memory(
Returns:
Working memory containing messages, context, and structured memory records
"""
return await core_get_working_memory(
result = await working_memory_core.get_working_memory(
session_id=session_id, recent_messages_limit=recent_messages_limit
)
if result is None:
return WorkingMemory(session_id=session_id, messages=[], memories=[])
return result


@mcp_app.tool()
Expand Down
7 changes: 5 additions & 2 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ agent-memory mcp [OPTIONS]
**Options:**

- `--port INTEGER`: Port to run the MCP server on. (Default: value from `settings.mcp_port`, usually 9000)
- `--mode [stdio|sse]`: Run the MCP server in stdio or SSE mode. (Default: stdio)
- `--mode [stdio|sse|streamable-http]`: Run the MCP server in stdio, SSE, or streamable-http mode. (Default: stdio)
- `--task-backend [asyncio|docket]`: Background task backend. `asyncio` (default) runs tasks inline in the MCP process with no separate worker. `docket` sends tasks to a Docket queue, which requires running `agent-memory task-worker`.

**Examples:**
Expand All @@ -72,11 +72,14 @@ agent-memory mcp
# SSE mode for development (no separate worker needed)
agent-memory mcp --mode sse --port 9001

# Streamable HTTP mode for network deployments (e.g. Kubernetes)
agent-memory mcp --mode streamable-http --port 9000

# SSE mode for production (requires separate worker process)
agent-memory mcp --mode sse --port 9001 --task-backend docket
```

**Note:** Stdio mode is designed for tools like Claude Desktop and, by default, uses the asyncio backend (no worker). Use `--task-backend docket` if you want MCP to enqueue background work into a shared Docket worker.
**Note:** Stdio mode is designed for tools like Claude Desktop and, by default, uses the asyncio backend (no worker). Streamable HTTP mode is suited for deploying the MCP server as a network service where HTTP clients (like Claude Code) connect over the network. Use `--task-backend docket` if you want MCP to enqueue background work into a shared Docket worker.

### `schedule-task`

Expand Down
56 changes: 56 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,62 @@ def test_mcp_command_sse_mode(self, mock_mcp_app, mock_settings):
assert result.exit_code == 0
mock_mcp_app.run_sse_async.assert_called_once()

@patch("agent_memory_server.cli.settings")
@patch("agent_memory_server.mcp.mcp_app")
def test_mcp_command_streamable_http_mode(self, mock_mcp_app, mock_settings):
"""Test mcp command in streamable-http mode."""
mock_settings.mcp_port = 3001

mock_mcp_app.run_streamable_http_async = AsyncMock()

runner = CliRunner()
result = runner.invoke(mcp, ["--mode", "streamable-http", "--port", "9000"])

assert result.exit_code == 0
mock_mcp_app.run_streamable_http_async.assert_called_once()

@patch("agent_memory_server.cli.configure_logging")
@patch("agent_memory_server.mcp.mcp_app")
def test_mcp_command_streamable_http_mode_uses_asyncio_by_default(
self, mock_mcp_app, mock_configure_logging
):
"""Test that streamable-http mode uses asyncio backend by default."""
from agent_memory_server.config import settings

# Set initial state
settings.use_docket = True

mock_mcp_app.run_streamable_http_async = AsyncMock()

runner = CliRunner()
result = runner.invoke(mcp, ["--mode", "streamable-http"])

assert result.exit_code == 0
assert settings.use_docket is False
mock_mcp_app.run_streamable_http_async.assert_called_once()

@patch("agent_memory_server.cli.configure_logging")
@patch("agent_memory_server.mcp.mcp_app")
def test_mcp_command_streamable_http_mode_with_task_backend_docket(
self, mock_mcp_app, mock_configure_logging
):
"""Test that streamable-http mode with --task-backend=docket sets use_docket=True."""
from agent_memory_server.config import settings

# Set initial state
settings.use_docket = False

mock_mcp_app.run_streamable_http_async = AsyncMock()

runner = CliRunner()
result = runner.invoke(
mcp, ["--mode", "streamable-http", "--task-backend", "docket"]
)

assert result.exit_code == 0
assert settings.use_docket is True
mock_mcp_app.run_streamable_http_async.assert_called_once()

@patch("agent_memory_server.cli.configure_mcp_logging")
@patch("agent_memory_server.cli.settings")
@patch("agent_memory_server.mcp.mcp_app")
Expand Down