diff --git a/.env.example b/.env.example index ed3f3143..1d4d8d15 100644 --- a/.env.example +++ b/.env.example @@ -180,3 +180,32 @@ RERANKER_BATCH_SIZE=10 BACKEND_IMAGE=ghcr.io/manavgup/rag_modulo/backend:latest FRONTEND_IMAGE=ghcr.io/manavgup/rag_modulo/frontend:latest TEST_IMAGE=ghcr.io/manavgup/rag_modulo/backend:latest + +# ================================ +# SPIFFE/SPIRE IDENTITY (Agent/Machine Identity) +# ================================ +# Enable SPIFFE workload identity for agents and services +# See: https://spiffe.io/docs/latest/spire-about/spire-concepts/ +SPIFFE_ENABLED=false + +# Authentication mode for migration (disabled|optional|preferred|required) +# - disabled: No SPIFFE support (current default) +# - optional: Accept both user JWT and SPIFFE JWT-SVID +# - preferred: Prefer SPIFFE, log warning on legacy JWT +# - required: Only SPIFFE JWT-SVIDs accepted for workloads +SPIFFE_AUTH_MODE=disabled + +# SPIRE Agent Workload API socket path +SPIFFE_ENDPOINT_SOCKET=unix:///run/spire/agent/api.sock + +# SPIFFE trust domain for this environment +SPIFFE_TRUST_DOMAIN=rag-modulo.local + +# Log warning when legacy JWT is used (for migration tracking) +SPIFFE_LEGACY_JWT_WARNING=false + +# Default SVID TTL in seconds (default: 3600 = 1 hour) +SPIFFE_SVID_TTL_SECONDS=3600 + +# Allowed JWT-SVID audiences (comma-separated) +SPIFFE_JWT_AUDIENCES=rag-modulo,mcp-gateway diff --git a/backend/core/authentication_middleware.py b/backend/core/authentication_middleware.py index d862458a..044078de 100644 --- a/backend/core/authentication_middleware.py +++ b/backend/core/authentication_middleware.py @@ -1,7 +1,15 @@ """Authentication middleware for FastAPI application. This module provides middleware for handling JWT-based authentication, -including support for development/testing modes and mock user creation. +including support for development/testing modes, mock user creation, +and SPIFFE JWT-SVID authentication for AI agents. + +SPIFFE Integration: + This middleware supports SPIFFE JWT-SVIDs for agent authentication. + When a Bearer token with a SPIFFE ID in the 'sub' claim is detected, + it validates the token and creates an agent principal instead of a user. + +Reference: docs/architecture/spire-integration-architecture.md """ import logging @@ -19,6 +27,10 @@ from core.config import get_settings from core.mock_auth import create_mock_user_data, ensure_mock_user_exists, is_bypass_mode_active, is_mock_token from core.request_context import RequestContext +from core.spiffe_auth import ( + get_spiffe_authenticator, + is_spiffe_jwt_svid, +) # Get settings safely for middleware settings = get_settings() @@ -206,9 +218,64 @@ def _handle_mock_token(self, request: Request, token: str) -> bool: # pylint: d logger.info("AuthMiddleware: Using mock test token") return True + def _handle_spiffe_jwt_svid(self, request: Request, token: str) -> bool: + """Handle SPIFFE JWT-SVID authentication for agents. + + This method validates a SPIFFE JWT-SVID and sets up the agent + principal in the request state. + + Args: + request: The FastAPI request object. + token: The SPIFFE JWT-SVID token. + + Returns: + True if SPIFFE JWT-SVID was handled successfully. + """ + try: + authenticator = get_spiffe_authenticator() + principal = authenticator.validate_jwt_svid(token) + + if principal is None: + logger.warning("AuthMiddleware: Invalid SPIFFE JWT-SVID") + return False + + # Check if the agent is expired + if principal.is_expired(): + logger.warning("AuthMiddleware: Expired SPIFFE JWT-SVID for %s", principal.spiffe_id) + return False + + # Set agent principal in request state + request.state.agent = principal + request.state.identity_type = "agent" + + # Also set a unified principal representation for compatibility + agent_data = { + "identity_type": "agent", + "spiffe_id": principal.spiffe_id, + "agent_type": principal.agent_type.value, + "agent_id": principal.agent_id, + "capabilities": [cap.value for cap in principal.capabilities], + "audiences": principal.audiences, + } + request.state.user = agent_data # For backward compatibility + RequestContext.set_user(agent_data) + + logger.info( + "AuthMiddleware: SPIFFE JWT-SVID validated successfully. Agent: %s (type: %s)", + principal.spiffe_id, + principal.agent_type.value, + ) + return True + except Exception as e: + logger.warning("AuthMiddleware: Error validating SPIFFE JWT-SVID - %s", e) + return False + def _handle_jwt_token(self, request: Request, token: str) -> bool: """Handle JWT token authentication and cache user data. + This method handles both traditional user JWTs and SPIFFE JWT-SVIDs. + It detects the token type and delegates to the appropriate handler. + Args: request: The FastAPI request object. token: The JWT token. @@ -221,9 +288,14 @@ def _handle_jwt_token(self, request: Request, token: str) -> bool: if is_mock_token(token): return self._handle_mock_token(request, token) - # Verify JWT using the verify_jwt_token function + # Check if this is a SPIFFE JWT-SVID (agent authentication) + if is_spiffe_jwt_svid(token): + return self._handle_spiffe_jwt_svid(request, token) + + # Verify JWT using the verify_jwt_token function (user authentication) payload = verify_jwt_token(token) user_data = { + "identity_type": "user", "id": payload.get("sub"), "email": payload.get("email"), "name": payload.get("name"), @@ -231,6 +303,7 @@ def _handle_jwt_token(self, request: Request, token: str) -> bool: "role": payload.get("role"), } request.state.user = user_data + request.state.identity_type = "user" # Cache user data in request context to eliminate N+1 queries RequestContext.set_user(user_data) logger.info("AuthMiddleware: JWT token validated successfully. User: %s", request.state.user) diff --git a/backend/core/spiffe_auth.py b/backend/core/spiffe_auth.py new file mode 100644 index 00000000..8690b7b7 --- /dev/null +++ b/backend/core/spiffe_auth.py @@ -0,0 +1,734 @@ +"""SPIFFE/SPIRE authentication module for agent workload identity. + +This module provides authentication support for AI agents using SPIFFE JWT-SVIDs. +It integrates with the py-spiffe library to fetch and validate SPIFFE identities, +enabling zero-trust agent authentication for the RAG Modulo platform. + +Key components: +- SPIFFEConfig: Configuration for SPIFFE/SPIRE integration +- SPIFFEAuthenticator: Handles JWT-SVID fetching and validation +- AgentPrincipal: Represents an authenticated agent identity + +Reference: https://spiffe.io/docs/latest/spire-about/spire-concepts/ +""" + +from __future__ import annotations + +import logging +import os +import re +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from datetime import UTC, datetime +from enum import Enum +from functools import wraps +from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar + +import jwt +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + pass + +# Type variables for generic decorator typing +P = ParamSpec("P") +T = TypeVar("T") + +logger = logging.getLogger(__name__) + +# SPIFFE ID pattern: spiffe://trust-domain/path +SPIFFE_ID_PATTERN = re.compile(r"^spiffe://([a-zA-Z0-9._-]+)/(.+)$") + +# Default SPIRE Workload API socket path +DEFAULT_SPIFFE_ENDPOINT_SOCKET = "unix:///var/run/spire/agent.sock" + + +class AgentType(str, Enum): + """Enumeration of supported agent types in RAG Modulo. + + Each agent type has specific capabilities and is assigned + a unique SPIFFE ID path. + """ + + SEARCH_ENRICHER = "search-enricher" + COT_REASONING = "cot-reasoning" + QUESTION_DECOMPOSER = "question-decomposer" + SOURCE_ATTRIBUTION = "source-attribution" + ENTITY_EXTRACTION = "entity-extraction" + ANSWER_SYNTHESIS = "answer-synthesis" + CUSTOM = "custom" + + +class AgentCapability(str, Enum): + """Enumeration of agent capabilities for access control. + + Capabilities define what actions an agent is allowed to perform. + """ + + MCP_TOOL_INVOKE = "mcp:tool:invoke" + SEARCH_READ = "search:read" + SEARCH_WRITE = "search:write" + LLM_INVOKE = "llm:invoke" + PIPELINE_EXECUTE = "pipeline:execute" + DOCUMENT_READ = "document:read" + DOCUMENT_WRITE = "document:write" + COT_INVOKE = "cot:invoke" + AGENT_SPAWN = "agent:spawn" + ADMIN = "admin" + + +# Default capabilities per agent type +AGENT_TYPE_CAPABILITIES: dict[AgentType, list[AgentCapability]] = { + AgentType.SEARCH_ENRICHER: [ + AgentCapability.MCP_TOOL_INVOKE, + AgentCapability.SEARCH_READ, + ], + AgentType.COT_REASONING: [ + AgentCapability.SEARCH_READ, + AgentCapability.LLM_INVOKE, + AgentCapability.PIPELINE_EXECUTE, + AgentCapability.COT_INVOKE, + ], + AgentType.QUESTION_DECOMPOSER: [ + AgentCapability.SEARCH_READ, + AgentCapability.LLM_INVOKE, + ], + AgentType.SOURCE_ATTRIBUTION: [ + AgentCapability.DOCUMENT_READ, + AgentCapability.SEARCH_READ, + ], + AgentType.ENTITY_EXTRACTION: [ + AgentCapability.DOCUMENT_READ, + AgentCapability.LLM_INVOKE, + ], + AgentType.ANSWER_SYNTHESIS: [ + AgentCapability.SEARCH_READ, + AgentCapability.LLM_INVOKE, + AgentCapability.COT_INVOKE, + ], + AgentType.CUSTOM: [], # Custom agents have no default capabilities +} + + +@dataclass +class SPIFFEConfig: + """Configuration for SPIFFE/SPIRE integration. + + Attributes: + enabled: Whether SPIFFE authentication is enabled + endpoint_socket: Path to SPIRE agent workload API socket + trust_domain: The SPIFFE trust domain (e.g., "rag-modulo.example.com") + default_audiences: Default audiences for JWT-SVID requests + svid_ttl_seconds: Time-to-live for SVIDs in seconds + fallback_to_jwt: Whether to fall back to legacy JWT if SPIRE unavailable + """ + + enabled: bool = False + endpoint_socket: str = DEFAULT_SPIFFE_ENDPOINT_SOCKET + trust_domain: str = "rag-modulo.example.com" + default_audiences: list[str] = field(default_factory=lambda: ["backend-api", "mcp-gateway"]) + svid_ttl_seconds: int = 3600 # 1 hour + fallback_to_jwt: bool = True + + @classmethod + def from_env(cls) -> SPIFFEConfig: + """Create SPIFFEConfig from environment variables. + + Environment variables (aligned with .env.example): + SPIFFE_ENABLED: Enable SPIFFE authentication (default: false) + SPIFFE_ENDPOINT_SOCKET: Workload API socket path + SPIFFE_TRUST_DOMAIN: Trust domain name + SPIFFE_JWT_AUDIENCES: Comma-separated list of audiences + SPIFFE_SVID_TTL_SECONDS: SVID TTL in seconds + SPIFFE_FALLBACK_TO_JWT: Enable JWT fallback (default: true) + """ + enabled = os.getenv("SPIFFE_ENABLED", "false").lower() == "true" + endpoint_socket = os.getenv("SPIFFE_ENDPOINT_SOCKET", DEFAULT_SPIFFE_ENDPOINT_SOCKET) + trust_domain = os.getenv("SPIFFE_TRUST_DOMAIN", "rag-modulo.example.com") + audiences_str = os.getenv("SPIFFE_JWT_AUDIENCES", "rag-modulo,mcp-gateway") + default_audiences = [a.strip() for a in audiences_str.split(",") if a.strip()] + svid_ttl = int(os.getenv("SPIFFE_SVID_TTL_SECONDS", "3600")) + fallback_to_jwt = os.getenv("SPIFFE_FALLBACK_TO_JWT", "true").lower() == "true" + + return cls( + enabled=enabled, + endpoint_socket=endpoint_socket, + trust_domain=trust_domain, + default_audiences=default_audiences, + svid_ttl_seconds=svid_ttl, + fallback_to_jwt=fallback_to_jwt, + ) + + +class AgentPrincipal(BaseModel): + """Represents an authenticated agent identity. + + This model captures the identity information extracted from a SPIFFE JWT-SVID + or from the local agent registration. + + Attributes: + spiffe_id: Full SPIFFE ID (e.g., "spiffe://rag-modulo.example.com/agent/search-enricher/abc123") + trust_domain: The trust domain portion of the SPIFFE ID + agent_type: The type of agent (from AgentType enum) + agent_id: Unique identifier for this agent instance + capabilities: List of capabilities this agent has + audiences: Audiences this SVID is valid for + issued_at: When the SVID was issued + expires_at: When the SVID expires + metadata: Additional metadata from the SVID or registration + """ + + spiffe_id: str = Field(..., description="Full SPIFFE ID") + trust_domain: str = Field(..., description="Trust domain from SPIFFE ID") + agent_type: AgentType = Field(..., description="Agent type classification") + agent_id: str = Field(..., description="Unique agent instance identifier") + capabilities: list[AgentCapability] = Field(default_factory=list, description="Agent capabilities") + audiences: list[str] = Field(default_factory=list, description="Valid audiences") + issued_at: datetime | None = Field(default=None, description="SVID issue time") + expires_at: datetime | None = Field(default=None, description="SVID expiration time") + metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata") + + @classmethod + def from_spiffe_id( + cls, + spiffe_id: str, + capabilities: list[AgentCapability] | None = None, + audiences: list[str] | None = None, + issued_at: datetime | None = None, + expires_at: datetime | None = None, + metadata: dict[str, Any] | None = None, + ) -> AgentPrincipal: + """Create an AgentPrincipal from a SPIFFE ID string. + + Args: + spiffe_id: Full SPIFFE ID (e.g., "spiffe://domain/agent/type/id") + capabilities: Optional list of capabilities (defaults to type-based) + audiences: Optional list of audiences + issued_at: Optional issue timestamp + expires_at: Optional expiration timestamp + metadata: Optional additional metadata + + Returns: + AgentPrincipal instance + + Raises: + ValueError: If SPIFFE ID format is invalid + """ + match = SPIFFE_ID_PATTERN.match(spiffe_id) + if not match: + raise ValueError(f"Invalid SPIFFE ID format: {spiffe_id}") + + trust_domain = match.group(1) + path = match.group(2) + + # Parse path: expected format is "agent/{type}/{id}" or "agent/{type}" + path_parts = path.split("/") + if len(path_parts) < 2 or path_parts[0] != "agent": + raise ValueError(f"Invalid agent SPIFFE ID path: {path}") + + agent_type_str = path_parts[1] + try: + agent_type = AgentType(agent_type_str) + except ValueError: + agent_type = AgentType.CUSTOM + + agent_id = path_parts[2] if len(path_parts) > 2 else agent_type_str + + # Use default capabilities for agent type if not provided + if capabilities is None: + capabilities = list(AGENT_TYPE_CAPABILITIES.get(agent_type, [])) + + return cls( + spiffe_id=spiffe_id, + trust_domain=trust_domain, + agent_type=agent_type, + agent_id=agent_id, + capabilities=capabilities, + audiences=audiences or [], + issued_at=issued_at, + expires_at=expires_at, + metadata=metadata or {}, + ) + + def has_capability(self, capability: AgentCapability) -> bool: + """Check if this agent has a specific capability. + + Args: + capability: The capability to check + + Returns: + True if the agent has the capability + """ + return capability in self.capabilities + + def has_any_capability(self, capabilities: list[AgentCapability]) -> bool: + """Check if this agent has any of the specified capabilities. + + Args: + capabilities: List of capabilities to check + + Returns: + True if the agent has at least one of the capabilities + """ + return any(cap in self.capabilities for cap in capabilities) + + def has_all_capabilities(self, capabilities: list[AgentCapability]) -> bool: + """Check if this agent has all of the specified capabilities. + + Args: + capabilities: List of capabilities to check + + Returns: + True if the agent has all of the capabilities + """ + return all(cap in self.capabilities for cap in capabilities) + + def is_valid_for_audience(self, audience: str) -> bool: + """Check if this agent's SVID is valid for a specific audience. + + Args: + audience: The audience to check + + Returns: + True if the SVID is valid for the audience + """ + return audience in self.audiences + + def is_expired(self) -> bool: + """Check if this agent's SVID has expired. + + Returns: + True if the SVID has expired + """ + if self.expires_at is None: + return False + return datetime.now(UTC) > self.expires_at + + +class SPIFFEAuthenticator: + """Authenticator for SPIFFE JWT-SVIDs. + + This class handles: + - Fetching JWT-SVIDs from the SPIRE agent via Workload API + - Validating incoming JWT-SVIDs + - Extracting agent identity from validated tokens + - Graceful fallback when SPIRE is unavailable + + Usage: + authenticator = SPIFFEAuthenticator() + + # Fetch SVID for outbound calls + token = authenticator.fetch_jwt_svid(audiences=["mcp-gateway"]) + + # Validate incoming SVID + principal = authenticator.validate_jwt_svid(token) + """ + + def __init__(self, config: SPIFFEConfig | None = None) -> None: + """Initialize the SPIFFE authenticator. + + Args: + config: Optional SPIFFEConfig. If not provided, loads from environment. + """ + self.config = config or SPIFFEConfig.from_env() + self._workload_client: Any = None + self._jwt_source: Any = None + self._initialized = False + self._spire_available = False + + def _initialize(self) -> bool: + """Initialize the SPIFFE workload API client. + + Returns: + True if initialization successful, False otherwise + """ + if self._initialized: + return self._spire_available + + if not self.config.enabled: + logger.info("SPIFFE authentication is disabled") + self._initialized = True + self._spire_available = False + return False + + try: + # Set environment variable for py-spiffe + os.environ.setdefault("SPIFFE_ENDPOINT_SOCKET", self.config.endpoint_socket) + + # Import py-spiffe (may not be available in all environments) + from spiffe import JwtSource, WorkloadApiClient # type: ignore[import-not-found] + + self._workload_client = WorkloadApiClient() + self._jwt_source = JwtSource() + self._spire_available = True + self._initialized = True + logger.info("SPIFFE authenticator initialized successfully") + return True + except ImportError as e: + # When SPIFFE is explicitly enabled, missing py-spiffe is a critical error + error_msg = ( + "SPIFFE authentication is enabled but py-spiffe library is not installed. " + "Install with: pip install spiffe" + ) + if self.config.enabled: + logger.error(error_msg) + raise RuntimeError(error_msg) from e + # If not explicitly enabled, just warn and disable + logger.warning("py-spiffe library not available, SPIFFE authentication disabled") + self._initialized = True + self._spire_available = False + return False + except Exception as e: + # Connection/runtime errors are recoverable with fallback + if self.config.fallback_to_jwt: + logger.warning("Failed to initialize SPIFFE authenticator (will use JWT fallback): %s", e) + self._initialized = True + self._spire_available = False + return False + # If no fallback allowed, this is critical + error_msg = f"Failed to initialize SPIFFE authenticator and fallback is disabled: {e}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e + + @property + def is_available(self) -> bool: + """Check if SPIFFE authentication is available. + + Returns: + True if SPIFFE is enabled and SPIRE agent is reachable + """ + if not self._initialized: + self._initialize() + return self._spire_available + + def fetch_jwt_svid(self, audiences: list[str] | None = None) -> str | None: + """Fetch a JWT-SVID from the SPIRE agent. + + Args: + audiences: List of audiences for the JWT-SVID + + Returns: + JWT-SVID token string, or None if unavailable + """ + if not self._initialize(): + return None + + try: + if audiences is None: + audiences = self.config.default_audiences + + # Use JwtSource for auto-refreshing tokens + with self._jwt_source as source: + svid = source.fetch_svid(audience=set(audiences)) + return svid.token + except Exception as e: + logger.error("Failed to fetch JWT-SVID: %s", e) + return None + + def validate_jwt_svid(self, token: str, required_audience: str | None = None) -> AgentPrincipal | None: + """Validate a JWT-SVID and extract the agent principal. + + This method validates the JWT-SVID signature against the SPIRE trust bundle + and extracts the agent identity information. + + SECURITY NOTE: By default, signature validation is REQUIRED. The fallback_to_jwt + config option only controls whether we fall back when SPIRE is UNAVAILABLE, + NOT when signature validation FAILS. Failed signature validation always rejects. + + Args: + token: The JWT-SVID token string + required_audience: Optional audience that must be present in the token + + Returns: + AgentPrincipal if validation successful, None otherwise + """ + try: + # First, decode without verification to check if it's a SPIFFE JWT-SVID + unverified = jwt.decode(token, options={"verify_signature": False}) + + # Check if this is a SPIFFE JWT-SVID (has 'sub' claim with spiffe:// prefix) + subject = unverified.get("sub", "") + if not subject.startswith("spiffe://"): + logger.debug("Token is not a SPIFFE JWT-SVID") + return None + + # Validate trust domain matches our configuration + match = SPIFFE_ID_PATTERN.match(subject) + if not match: + logger.warning("Invalid SPIFFE ID format in token: %s", subject) + return None + + token_trust_domain = match.group(1) + if token_trust_domain != self.config.trust_domain: + logger.warning( + "SPIFFE ID trust domain mismatch: expected %s, got %s", + self.config.trust_domain, + token_trust_domain, + ) + return None + + # Validate audience if required + audiences = unverified.get("aud", []) + if isinstance(audiences, str): + audiences = [audiences] + + if required_audience and required_audience not in audiences: + logger.warning("JWT-SVID missing required audience: %s", required_audience) + return None + + # Signature validation - CRITICAL SECURITY CHECK + signature_validated = False + + if self.is_available: + try: + # Use workload client to validate signature with trust bundle + with self._workload_client as client: + jwt_bundle = client.fetch_jwt_bundles() + bundle = jwt_bundle.get_bundle_for_trust_domain(token_trust_domain) + if bundle: + # Validate token signature with bundle + bundle.validate_jwt_svid(token, audiences=set(audiences) if audiences else None) + signature_validated = True + logger.debug("JWT-SVID signature validated successfully") + else: + logger.error("No trust bundle found for domain: %s", token_trust_domain) + return None + except Exception as e: + # SECURITY: Signature validation FAILED - always reject + logger.error( + "JWT-SVID signature validation FAILED: %s. Token rejected for security.", + e, + ) + return None + else: + # SPIRE is not available + if self.config.fallback_to_jwt: + # Allow fallback only when SPIRE is unavailable (not when validation fails) + logger.warning( + "SPIRE unavailable, accepting token without signature validation. " + "This is ONLY safe in development environments." + ) + else: + logger.error("SPIRE unavailable and fallback disabled. Token rejected.") + return None + + # Extract timestamps with UTC timezone + issued_at = None + expires_at = None + if "iat" in unverified: + issued_at = datetime.fromtimestamp(unverified["iat"], tz=UTC) + if "exp" in unverified: + expires_at = datetime.fromtimestamp(unverified["exp"], tz=UTC) + + # Create agent principal from SPIFFE ID + principal = AgentPrincipal.from_spiffe_id( + spiffe_id=subject, + audiences=audiences, + issued_at=issued_at, + expires_at=expires_at, + metadata={ + "raw_claims": unverified, + "signature_validated": signature_validated, + }, + ) + + # Check expiration + if principal.is_expired(): + logger.warning("JWT-SVID has expired") + return None + + return principal + + except jwt.InvalidTokenError as e: + logger.warning("Invalid JWT-SVID: %s", e) + return None + except ValueError as e: + logger.warning("Failed to parse SPIFFE ID from JWT-SVID: %s", e) + return None + except Exception as e: + logger.error("Unexpected error validating JWT-SVID: %s", e) + return None + + def get_auth_headers(self, audiences: list[str] | None = None) -> dict[str, str]: + """Get authentication headers with JWT-SVID for outbound requests. + + Args: + audiences: List of audiences for the JWT-SVID + + Returns: + Dictionary of headers to include in requests + """ + token = self.fetch_jwt_svid(audiences) + if token: + return {"Authorization": f"Bearer {token}"} + return {} + + +def is_spiffe_jwt_svid(token: str) -> bool: + """Check if a token is a SPIFFE JWT-SVID. + + Args: + token: JWT token string + + Returns: + True if the token is a SPIFFE JWT-SVID + """ + try: + unverified = jwt.decode(token, options={"verify_signature": False}) + subject = unverified.get("sub", "") + return subject.startswith("spiffe://") + except Exception: + return False + + +def parse_spiffe_id(spiffe_id: str) -> tuple[str, str] | None: + """Parse a SPIFFE ID into trust domain and path. + + Args: + spiffe_id: Full SPIFFE ID string + + Returns: + Tuple of (trust_domain, path) or None if invalid + """ + match = SPIFFE_ID_PATTERN.match(spiffe_id) + if match: + return (match.group(1), match.group(2)) + return None + + +def build_spiffe_id(trust_domain: str, agent_type: AgentType, agent_id: str | None = None) -> str: + """Build a SPIFFE ID for an agent. + + Args: + trust_domain: The trust domain (e.g., "rag-modulo.example.com") + agent_type: The type of agent + agent_id: Optional unique identifier for the agent instance + + Returns: + Full SPIFFE ID string + """ + if agent_id: + return f"spiffe://{trust_domain}/agent/{agent_type.value}/{agent_id}" + return f"spiffe://{trust_domain}/agent/{agent_type.value}" + + +# Global authenticator instance (lazy initialization) +_authenticator: SPIFFEAuthenticator | None = None + + +def get_spiffe_authenticator() -> SPIFFEAuthenticator: + """Get the global SPIFFE authenticator instance. + + Returns: + SPIFFEAuthenticator instance + """ + global _authenticator + if _authenticator is None: + _authenticator = SPIFFEAuthenticator() + return _authenticator + + +def require_capabilities( + *required_capabilities: AgentCapability, + require_all: bool = True, +) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]: + """Decorator to enforce capability requirements on endpoint handlers. + + This decorator checks if the authenticated agent has the required capabilities + before allowing access to the endpoint. It works with FastAPI's dependency + injection system. + + Args: + *required_capabilities: One or more capabilities required to access the endpoint + require_all: If True, all capabilities are required. If False, any one suffices. + + Returns: + FastAPI dependency that validates capabilities + + Example: + @router.post("/search") + @require_capabilities(AgentCapability.SEARCH_READ) + async def search_endpoint(request: Request): + ... + + @router.post("/admin") + @require_capabilities(AgentCapability.ADMIN, AgentCapability.SEARCH_WRITE, require_all=True) + async def admin_endpoint(request: Request): + ... + """ + from fastapi import HTTPException, Request, status + + def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]: + @wraps(func) + async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: + # Find the request object in args or kwargs + request: Request | None = None + for arg in args: + if isinstance(arg, Request): + request = arg + break + if request is None: + request = kwargs.get("request") + + if request is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Request object not found in handler", + ) + + # Check if request has agent principal + agent_principal: AgentPrincipal | None = getattr(request.state, "agent_principal", None) + + if agent_principal is None: + # Not an agent request - check if we should allow user requests + user = getattr(request.state, "user", None) + if user: + # User requests are allowed by default (they have implicit capabilities) + return await func(*args, **kwargs) + + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Authentication required", + ) + + # Verify agent has required capabilities + if require_all: + has_permission = agent_principal.has_all_capabilities(list(required_capabilities)) + else: + has_permission = agent_principal.has_any_capability(list(required_capabilities)) + + if not has_permission: + capability_names = [cap.value for cap in required_capabilities] + mode = "all of" if require_all else "any of" + logger.warning( + "Agent %s denied access: missing %s capabilities %s (has: %s)", + agent_principal.spiffe_id, + mode, + capability_names, + [cap.value for cap in agent_principal.capabilities], + ) + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=f"Agent lacks required capabilities: {capability_names}", + ) + + return await func(*args, **kwargs) + + return wrapper + + return decorator + + +def get_agent_principal_from_request(request: Any) -> AgentPrincipal | None: + """Extract agent principal from request state if present. + + This is a utility function for handlers that need to check agent identity + without requiring it. + + Args: + request: FastAPI Request object + + Returns: + AgentPrincipal if request is from an authenticated agent, None otherwise + """ + return getattr(request.state, "agent_principal", None) diff --git a/backend/main.py b/backend/main.py index a9326cf9..819b0e1e 100644 --- a/backend/main.py +++ b/backend/main.py @@ -27,6 +27,7 @@ # Database from rag_solution.file_management.database import Base, engine, get_db +from rag_solution.router.agent_router import router as agent_router # Models from rag_solution.router.auth_router import router as auth_router @@ -176,6 +177,41 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: # ------------------------------------------- # šŸš€ APPLICATION INITIALIZATION # ------------------------------------------- + +# OpenAPI tags metadata for API documentation +tags_metadata = [ + { + "name": "agents", + "description": "AI Agent management with SPIFFE-based workload identity. " + "Register, manage, and authenticate AI agents using SPIRE for secure " + "machine-to-machine communication.", + "externalDocs": { + "description": "SPIFFE Integration Architecture", + "url": "https://spiffe.io/", + }, + }, + { + "name": "auth", + "description": "User authentication and authorization endpoints.", + }, + { + "name": "collections", + "description": "Document collection management operations.", + }, + { + "name": "search", + "description": "RAG search and query operations.", + }, + { + "name": "users", + "description": "User profile and settings management.", + }, + { + "name": "teams", + "description": "Team management and collaboration.", + }, +] + app = FastAPI( lifespan=lifespan, title="RAG Modulo API", @@ -183,6 +219,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: version="1.0.0", docs_url="/docs", redoc_url="/redoc", + openapi_tags=tags_metadata, ) # Middleware @@ -222,6 +259,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: app.include_router(token_warning_router) app.include_router(voice_router) app.include_router(websocket_router) +app.include_router(agent_router) # Root endpoint @@ -239,6 +277,80 @@ async def root() -> dict[str, str]: # ------------------------------------------- # šŸ“Š CUSTOM OPENAPI SCHEMA # ------------------------------------------- + +# OpenAPI tag metadata for improved documentation organization +OPENAPI_TAGS = [ + { + "name": "health", + "description": "Health check endpoints for monitoring service availability", + }, + { + "name": "auth", + "description": "Authentication and authorization endpoints", + }, + { + "name": "users", + "description": "User management and profile operations", + }, + { + "name": "teams", + "description": "Team management and membership operations", + }, + { + "name": "collections", + "description": "Document collection management and configuration", + }, + { + "name": "search", + "description": "RAG search operations with Chain of Thought reasoning", + }, + { + "name": "chat", + "description": "Conversational AI chat interface", + }, + { + "name": "conversations", + "description": "Conversation history and session management", + }, + { + "name": "agents", + "description": ( + "AI agent management with SPIFFE/SPIRE workload identity. " + "Provides registration, capability management, and JWT-SVID validation " + "for machine-to-machine authentication." + ), + "externalDocs": { + "description": "SPIFFE Integration Architecture", + "url": "https://spiffe.io/docs/latest/spire-about/spire-concepts/", + }, + }, + { + "name": "podcast", + "description": "AI-powered podcast generation from document collections", + }, + { + "name": "voice", + "description": "Voice synthesis and audio preview operations", + }, + { + "name": "dashboard", + "description": "Dashboard data and analytics endpoints", + }, + { + "name": "runtime-config", + "description": "Runtime configuration management", + }, + { + "name": "token-warning", + "description": "Token usage warnings and limits", + }, + { + "name": "websocket", + "description": "WebSocket connections for real-time updates", + }, +] + + def custom_openapi() -> dict[str, Any]: """Generate custom OpenAPI schema for the application. @@ -256,6 +368,7 @@ def custom_openapi() -> dict[str, Any]: version=app.version, description=app.description, routes=app.routes, + tags=OPENAPI_TAGS, ) app.openapi_schema = openapi_schema return app.openapi_schema diff --git a/backend/rag_solution/models/__init__.py b/backend/rag_solution/models/__init__.py index fc5c9d90..c3837195 100644 --- a/backend/rag_solution/models/__init__.py +++ b/backend/rag_solution/models/__init__.py @@ -2,6 +2,9 @@ from rag_solution.file_management.database import Base +# Agent model for SPIFFE-based workload identity +from rag_solution.models.agent import Agent + # Then Collection since it's referenced by UserCollection from rag_solution.models.collection import Collection @@ -28,6 +31,7 @@ # Register all models with Base.metadata __all__ = [ + "Agent", "Base", "Collection", "ConversationMessage", diff --git a/backend/rag_solution/models/agent.py b/backend/rag_solution/models/agent.py new file mode 100644 index 00000000..faa17a48 --- /dev/null +++ b/backend/rag_solution/models/agent.py @@ -0,0 +1,221 @@ +"""Agent model for SPIFFE-based workload identity. + +This module defines the Agent SQLAlchemy model for storing agent identities +that are authenticated via SPIFFE/SPIRE. Agents are AI workloads that perform +various tasks in the RAG pipeline. + +Reference: docs/architecture/spire-integration-architecture.md +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from sqlalchemy import DateTime, ForeignKey, Index, String, Text +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from core.identity_service import IdentityService +from rag_solution.file_management.database import Base + +if TYPE_CHECKING: + from rag_solution.models.team import Team + from rag_solution.models.user import User + + +class AgentStatus(str): + """Agent status enumeration. + + Attributes: + ACTIVE: Agent is active and can authenticate + SUSPENDED: Agent is temporarily suspended + REVOKED: Agent credentials have been revoked + PENDING: Agent is pending approval/registration + """ + + ACTIVE = "active" + SUSPENDED = "suspended" + REVOKED = "revoked" + PENDING = "pending" + + +class Agent(Base): + """SQLAlchemy model for AI agent identities. + + Agents are workloads that authenticate using SPIFFE JWT-SVIDs. Each agent + has a unique SPIFFE ID and a set of capabilities that define what actions + it can perform. + + Attributes: + id: Unique identifier for the agent (UUID) + spiffe_id: Full SPIFFE ID (e.g., "spiffe://trust-domain/agent/type/id") + agent_type: Classification of agent (e.g., "search-enricher", "cot-reasoning") + name: Human-readable name for the agent + description: Description of the agent's purpose + owner_user_id: UUID of the user who owns this agent + team_id: Optional team association + capabilities: JSONB array of capability strings + agent_metadata: Additional JSONB metadata (named to avoid SQLAlchemy reserved word) + status: Current status (active, suspended, revoked, pending) + created_at: Timestamp of agent creation + updated_at: Timestamp of last update + last_seen_at: Timestamp of last successful authentication + """ + + __tablename__ = "agents" + + # Composite indexes for common query patterns + __table_args__ = ( + Index("ix_agents_owner_status", "owner_user_id", "status"), + Index("ix_agents_type_status", "agent_type", "status"), + Index("ix_agents_team_status", "team_id", "status"), + ) + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + default=IdentityService.generate_id, + ) + spiffe_id: Mapped[str] = mapped_column( + String(512), + unique=True, + index=True, + nullable=False, + comment="Full SPIFFE ID (spiffe://trust-domain/agent/type/id)", + ) + agent_type: Mapped[str] = mapped_column( + String(100), + index=True, + nullable=False, + comment="Agent type classification", + ) + name: Mapped[str] = mapped_column( + String(255), + nullable=False, + comment="Human-readable agent name", + ) + description: Mapped[str | None] = mapped_column( + Text, + nullable=True, + comment="Description of agent purpose and capabilities", + ) + owner_user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + index=True, + comment="User who owns this agent", + ) + team_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), + ForeignKey("teams.id", ondelete="SET NULL"), + nullable=True, + index=True, + comment="Optional team association", + ) + capabilities: Mapped[list] = mapped_column( + JSONB, + nullable=False, + default=list, + comment="Array of capability strings", + ) + agent_metadata: Mapped[dict] = mapped_column( + "metadata", # Maps to 'metadata' column in database + JSONB, + nullable=False, + default=dict, + comment="Additional agent metadata", + ) + status: Mapped[str] = mapped_column( + String(50), + nullable=False, + default=AgentStatus.PENDING, + index=True, + comment="Agent status (active, suspended, revoked, pending)", + ) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(UTC), + nullable=False, + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(UTC), + onupdate=lambda: datetime.now(UTC), + nullable=False, + ) + last_seen_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + comment="Last successful authentication timestamp", + ) + + # Relationships + owner: Mapped[User] = relationship( + "User", + back_populates="agents", + foreign_keys=[owner_user_id], + ) + team: Mapped[Team | None] = relationship( + "Team", + back_populates="agents", + foreign_keys=[team_id], + ) + + def __repr__(self) -> str: + """String representation of the agent.""" + return ( + f"Agent(id='{self.id}', spiffe_id='{self.spiffe_id}', " + f"agent_type='{self.agent_type}', name='{self.name}', status='{self.status}')" + ) + + def is_active(self) -> bool: + """Check if the agent is active.""" + return self.status == AgentStatus.ACTIVE + + def has_capability(self, capability: str) -> bool: + """Check if the agent has a specific capability. + + Args: + capability: The capability string to check + + Returns: + True if the agent has the capability + """ + return capability in self.capabilities + + def add_capability(self, capability: str) -> None: + """Add a capability to the agent. + + Args: + capability: The capability string to add + """ + if capability not in self.capabilities: + self.capabilities = [*self.capabilities, capability] + + def remove_capability(self, capability: str) -> None: + """Remove a capability from the agent. + + Args: + capability: The capability string to remove + """ + if capability in self.capabilities: + self.capabilities = [c for c in self.capabilities if c != capability] + + def suspend(self) -> None: + """Suspend the agent.""" + self.status = AgentStatus.SUSPENDED + + def activate(self) -> None: + """Activate the agent.""" + self.status = AgentStatus.ACTIVE + + def revoke(self) -> None: + """Revoke the agent's credentials.""" + self.status = AgentStatus.REVOKED + + def update_last_seen(self) -> None: + """Update the last seen timestamp to now.""" + self.last_seen_at = datetime.now(UTC) diff --git a/backend/rag_solution/models/team.py b/backend/rag_solution/models/team.py index ca7e9bb8..996d4fd9 100644 --- a/backend/rag_solution/models/team.py +++ b/backend/rag_solution/models/team.py @@ -12,6 +12,7 @@ from rag_solution.file_management.database import Base if TYPE_CHECKING: + from rag_solution.models.agent import Agent from rag_solution.models.user_team import UserTeam @@ -25,6 +26,7 @@ class Team(Base): updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now) users: Mapped[list[UserTeam]] = relationship("UserTeam", back_populates="team") + agents: Mapped[list[Agent]] = relationship("Agent", back_populates="team") def __repr__(self) -> str: return f"Team(id='{self.id}', name='{self.name}')" diff --git a/backend/rag_solution/models/user.py b/backend/rag_solution/models/user.py index 887bbd45..c01d88b6 100644 --- a/backend/rag_solution/models/user.py +++ b/backend/rag_solution/models/user.py @@ -12,6 +12,7 @@ from rag_solution.file_management.database import Base if TYPE_CHECKING: + from rag_solution.models.agent import Agent from rag_solution.models.conversation import ConversationSession from rag_solution.models.file import File from rag_solution.models.llm_parameters import LLMParameters @@ -53,6 +54,7 @@ class User(Base): ) podcasts: Mapped[list[Podcast]] = relationship("Podcast", back_populates="user", cascade="all, delete-orphan") voices: Mapped[list[Voice]] = relationship("Voice", back_populates="user", cascade="all, delete-orphan") + agents: Mapped[list[Agent]] = relationship("Agent", back_populates="owner", cascade="all, delete-orphan") def __repr__(self) -> str: return ( diff --git a/backend/rag_solution/repository/agent_repository.py b/backend/rag_solution/repository/agent_repository.py new file mode 100644 index 00000000..f5486103 --- /dev/null +++ b/backend/rag_solution/repository/agent_repository.py @@ -0,0 +1,438 @@ +"""Repository for Agent entity database operations. + +This module provides data access for AI agents with SPIFFE-based +workload identity. + +Reference: docs/architecture/spire-integration-architecture.md +""" + +from datetime import UTC, datetime +from typing import Any + +from pydantic import UUID4 +from sqlalchemy import func +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, joinedload + +from core.custom_exceptions import RepositoryError +from core.logging_utils import get_logger +from rag_solution.core.exceptions import AlreadyExistsError, NotFoundError, ValidationError +from rag_solution.models.agent import Agent, AgentStatus +from rag_solution.schemas.agent_schema import AgentInput, AgentOutput, AgentUpdate + +logger = get_logger(__name__) + + +class AgentRepository: + """Repository for handling Agent entity database operations.""" + + def __init__(self: Any, db: Session) -> None: + """Initialize with database session. + + Args: + db: SQLAlchemy database session + """ + self.db = db + + def create( + self, + agent_input: AgentInput, + owner_user_id: UUID4, + spiffe_id: str, + ) -> AgentOutput: + """Create a new agent. + + Args: + agent_input: Agent creation data + owner_user_id: UUID of the owning user + spiffe_id: Generated SPIFFE ID for the agent + + Returns: + Created agent data + + Raises: + AlreadyExistsError: If SPIFFE ID already exists + ValidationError: For validation errors + RepositoryError: For other database errors + """ + try: + # Convert capabilities to list of strings + capabilities = [cap.value for cap in agent_input.capabilities] + + agent = Agent( + spiffe_id=spiffe_id, + agent_type=agent_input.agent_type.value, + name=agent_input.name, + description=agent_input.description, + owner_user_id=owner_user_id, + team_id=agent_input.team_id, + capabilities=capabilities, + agent_metadata=agent_input.metadata or {}, + status=AgentStatus.PENDING, + ) + self.db.add(agent) + self.db.commit() + self.db.refresh(agent) + return AgentOutput.model_validate(agent) + except IntegrityError as e: + self.db.rollback() + if "agents_spiffe_id_key" in str(e) or "ix_agents_spiffe_id" in str(e): + raise AlreadyExistsError("Agent", "spiffe_id", spiffe_id) from e + raise ValidationError("An error occurred while creating the agent") from e + except Exception as e: + self.db.rollback() + logger.error(f"Error creating agent: {e!s}") + raise RepositoryError(f"Failed to create agent: {e!s}") from e + + def get_by_id(self, agent_id: UUID4) -> AgentOutput: + """Fetch agent by ID with relationships. + + Args: + agent_id: UUID of the agent + + Returns: + Agent data + + Raises: + NotFoundError: If agent not found + RepositoryError: For database errors + """ + try: + agent = ( + self.db.query(Agent) + .filter(Agent.id == agent_id) + .options(joinedload(Agent.owner), joinedload(Agent.team)) + .first() + ) + if not agent: + raise NotFoundError("Agent", resource_id=str(agent_id)) + return AgentOutput.model_validate(agent, from_attributes=True) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error getting agent {agent_id}: {e!s}") + raise RepositoryError(f"Failed to get agent by ID: {e!s}") from e + + def get_by_spiffe_id(self, spiffe_id: str) -> AgentOutput: + """Fetch agent by SPIFFE ID. + + Args: + spiffe_id: Full SPIFFE ID string + + Returns: + Agent data + + Raises: + NotFoundError: If agent not found + RepositoryError: For database errors + """ + try: + agent = ( + self.db.query(Agent) + .filter(Agent.spiffe_id == spiffe_id) + .options(joinedload(Agent.owner), joinedload(Agent.team)) + .first() + ) + if not agent: + raise NotFoundError("Agent", identifier=f"spiffe_id={spiffe_id}") + return AgentOutput.model_validate(agent) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error getting agent by SPIFFE ID {spiffe_id}: {e!s}") + raise RepositoryError(f"Failed to get agent by SPIFFE ID: {e!s}") from e + + def get_model_by_spiffe_id(self, spiffe_id: str) -> Agent | None: + """Fetch agent model by SPIFFE ID (for internal use). + + Args: + spiffe_id: Full SPIFFE ID string + + Returns: + Agent model or None if not found + """ + try: + return self.db.query(Agent).filter(Agent.spiffe_id == spiffe_id).first() + except Exception as e: + logger.error(f"Error getting agent model by SPIFFE ID {spiffe_id}: {e!s}") + return None + + def update(self, agent_id: UUID4, agent_update: AgentUpdate) -> AgentOutput: + """Update agent data. + + Args: + agent_id: UUID of the agent + agent_update: Update data + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + RepositoryError: For database errors + """ + try: + agent = self.db.query(Agent).filter(Agent.id == agent_id).first() + if not agent: + raise NotFoundError("Agent", resource_id=str(agent_id)) + + # Update only provided fields + update_data = agent_update.model_dump(exclude_unset=True) + for key, value in update_data.items(): + if key == "capabilities" and value is not None: + # Convert capability enums to strings + value = [cap.value if hasattr(cap, "value") else cap for cap in value] + if key == "status" and value is not None: + value = value.value if hasattr(value, "value") else value + setattr(agent, key, value) + + self.db.commit() + self.db.refresh(agent) + return AgentOutput.model_validate(agent) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error updating agent {agent_id}: {e!s}") + self.db.rollback() + raise RepositoryError(f"Failed to update agent: {e!s}") from e + + def update_status(self, agent_id: UUID4, status: str) -> AgentOutput: + """Update agent status. + + Args: + agent_id: UUID of the agent + status: New status value + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + RepositoryError: For database errors + """ + try: + agent = self.db.query(Agent).filter(Agent.id == agent_id).first() + if not agent: + raise NotFoundError("Agent", resource_id=str(agent_id)) + + agent.status = status + self.db.commit() + self.db.refresh(agent) + return AgentOutput.model_validate(agent) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error updating agent status {agent_id}: {e!s}") + self.db.rollback() + raise RepositoryError(f"Failed to update agent status: {e!s}") from e + + def update_capabilities( + self, + agent_id: UUID4, + add_capabilities: list[str] | None = None, + remove_capabilities: list[str] | None = None, + ) -> AgentOutput: + """Update agent capabilities. + + Args: + agent_id: UUID of the agent + add_capabilities: Capabilities to add + remove_capabilities: Capabilities to remove + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + RepositoryError: For database errors + """ + try: + agent = self.db.query(Agent).filter(Agent.id == agent_id).first() + if not agent: + raise NotFoundError("Agent", resource_id=str(agent_id)) + + current_capabilities = set(agent.capabilities) + + if add_capabilities: + current_capabilities.update(add_capabilities) + if remove_capabilities: + current_capabilities.difference_update(remove_capabilities) + + agent.capabilities = list(current_capabilities) + self.db.commit() + self.db.refresh(agent) + return AgentOutput.model_validate(agent) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error updating agent capabilities {agent_id}: {e!s}") + self.db.rollback() + raise RepositoryError(f"Failed to update agent capabilities: {e!s}") from e + + def update_last_seen(self, agent_id: UUID4) -> None: + """Update the last seen timestamp for an agent. + + Args: + agent_id: UUID of the agent + """ + try: + agent = self.db.query(Agent).filter(Agent.id == agent_id).first() + if agent: + agent.last_seen_at = datetime.now(UTC) + self.db.commit() + except Exception as e: + logger.error(f"Error updating agent last_seen {agent_id}: {e!s}") + self.db.rollback() + + def update_last_seen_by_spiffe_id(self, spiffe_id: str) -> None: + """Update the last seen timestamp for an agent by SPIFFE ID. + + Args: + spiffe_id: SPIFFE ID of the agent + """ + try: + agent = self.db.query(Agent).filter(Agent.spiffe_id == spiffe_id).first() + if agent: + agent.last_seen_at = datetime.now(UTC) + self.db.commit() + except Exception as e: + logger.error(f"Error updating agent last_seen by SPIFFE ID {spiffe_id}: {e!s}") + self.db.rollback() + + def delete(self, agent_id: UUID4) -> bool: + """Delete an agent. + + Args: + agent_id: UUID of the agent + + Returns: + True if deleted, False if not found + """ + try: + result = self.db.query(Agent).filter(Agent.id == agent_id).delete() + self.db.commit() + return result > 0 + except Exception as e: + logger.error(f"Error deleting agent {agent_id}: {e!s}") + self.db.rollback() + raise RepositoryError(f"Failed to delete agent: {e!s}") from e + + def list_agents( + self, + skip: int = 0, + limit: int = 100, + owner_user_id: UUID4 | None = None, + team_id: UUID4 | None = None, + agent_type: str | None = None, + status: str | None = None, + ) -> tuple[list[AgentOutput], int]: + """List agents with optional filters and pagination. + + Args: + skip: Number of records to skip + limit: Maximum number of records to return + owner_user_id: Filter by owner user ID + team_id: Filter by team ID + agent_type: Filter by agent type + status: Filter by status + + Returns: + Tuple of (list of agents, total count) + """ + try: + query = self.db.query(Agent) + + # Apply filters + if owner_user_id: + query = query.filter(Agent.owner_user_id == owner_user_id) + if team_id: + query = query.filter(Agent.team_id == team_id) + if agent_type: + query = query.filter(Agent.agent_type == agent_type) + if status: + query = query.filter(Agent.status == status) + + # Get total count + total = query.count() + + # Apply pagination and fetch + agents = ( + query.options(joinedload(Agent.owner), joinedload(Agent.team)) + .order_by(Agent.created_at.desc()) + .offset(skip) + .limit(limit) + .all() + ) + + return ([AgentOutput.model_validate(agent) for agent in agents], total) + except Exception as e: + logger.error(f"Error listing agents: {e!s}") + raise RepositoryError(f"Failed to list agents: {e!s}") from e + + def list_by_owner(self, owner_user_id: UUID4, skip: int = 0, limit: int = 100) -> list[AgentOutput]: + """List agents owned by a specific user. + + Args: + owner_user_id: UUID of the owner + skip: Number of records to skip + limit: Maximum number of records to return + + Returns: + List of agents + """ + agents, _ = self.list_agents(skip=skip, limit=limit, owner_user_id=owner_user_id) + return agents + + def list_by_team(self, team_id: UUID4, skip: int = 0, limit: int = 100) -> list[AgentOutput]: + """List agents in a specific team. + + Args: + team_id: UUID of the team + skip: Number of records to skip + limit: Maximum number of records to return + + Returns: + List of agents + """ + agents, _ = self.list_agents(skip=skip, limit=limit, team_id=team_id) + return agents + + def list_active_by_type(self, agent_type: str, limit: int = 100) -> list[AgentOutput]: + """List active agents of a specific type. + + Args: + agent_type: Type of agent + limit: Maximum number of records to return + + Returns: + List of active agents + """ + agents, _ = self.list_agents(limit=limit, agent_type=agent_type, status=AgentStatus.ACTIVE) + return agents + + def count_by_owner(self, owner_user_id: UUID4) -> int: + """Count agents owned by a user. + + Args: + owner_user_id: UUID of the owner + + Returns: + Number of agents + """ + try: + return self.db.query(func.count(Agent.id)).filter(Agent.owner_user_id == owner_user_id).scalar() or 0 + except Exception as e: + logger.error(f"Error counting agents for owner {owner_user_id}: {e!s}") + return 0 + + def count_active(self) -> int: + """Count all active agents. + + Returns: + Number of active agents + """ + try: + return self.db.query(func.count(Agent.id)).filter(Agent.status == AgentStatus.ACTIVE).scalar() or 0 + except Exception as e: + logger.error(f"Error counting active agents: {e!s}") + return 0 diff --git a/backend/rag_solution/router/agent_router.py b/backend/rag_solution/router/agent_router.py new file mode 100644 index 00000000..9912ed27 --- /dev/null +++ b/backend/rag_solution/router/agent_router.py @@ -0,0 +1,605 @@ +"""Router for Agent API endpoints. + +This module provides REST API endpoints for managing AI agents with +SPIFFE-based workload identity. + +Endpoints: + POST /api/agents/register - Register a new agent + POST /api/agents - Create a new agent + GET /api/agents - List agents + GET /api/agents/{agent_id} - Get agent by ID + GET /api/agents/spiffe/{spiffe_id:path} - Get agent by SPIFFE ID + PUT /api/agents/{agent_id} - Update agent + DELETE /api/agents/{agent_id} - Delete agent + POST /api/agents/{agent_id}/status - Update agent status + POST /api/agents/{agent_id}/capabilities - Update agent capabilities + POST /api/agents/validate - Validate SPIFFE JWT-SVID + +Reference: docs/architecture/spire-integration-architecture.md +""" + +from fastapi import APIRouter, Depends, HTTPException, Query, Request, status +from pydantic import UUID4 +from sqlalchemy.orm import Session + +from core.logging_utils import get_logger +from rag_solution.core.exceptions import AlreadyExistsError, NotFoundError, ValidationError +from rag_solution.file_management.database import get_db +from rag_solution.schemas.agent_schema import ( + AgentCapabilityUpdate, + AgentInput, + AgentListResponse, + AgentOutput, + AgentRegistrationRequest, + AgentRegistrationResponse, + AgentStatusUpdate, + AgentUpdate, + SPIFFEValidationRequest, + SPIFFEValidationResponse, +) +from rag_solution.services.agent_service import AgentService + +logger = get_logger(__name__) + +router = APIRouter( + prefix="/api/agents", + tags=["agents"], + responses={ + 401: {"description": "Unauthorized"}, + 403: {"description": "Forbidden"}, + 404: {"description": "Agent not found"}, + }, +) + + +def get_current_user_id(request: Request) -> UUID4: + """Extract current user ID from request state. + + Args: + request: FastAPI request object + + Returns: + User UUID + + Raises: + HTTPException: If user not authenticated + """ + user = getattr(request.state, "user", None) + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Authentication required", + ) + + # Check if this is a user (not an agent) + identity_type = user.get("identity_type", "user") + if identity_type == "agent": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Agent authentication not allowed for this endpoint", + ) + + user_id = user.get("uuid") or user.get("id") + if not user_id: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User ID not found in token", + ) + + return UUID4(user_id) + + +@router.post( + "/register", + response_model=AgentRegistrationResponse, + status_code=status.HTTP_201_CREATED, + summary="Register a new agent", + description="Register a new AI agent and get SPIFFE ID with registration instructions.", +) +async def register_agent( + request: Request, + registration_request: AgentRegistrationRequest, + db: Session = Depends(get_db), +) -> AgentRegistrationResponse: + """Register a new agent with SPIFFE ID generation. + + This endpoint creates an agent record and returns the SPIFFE ID + along with instructions for completing SPIRE registration. + + Args: + request: FastAPI request object + registration_request: Agent registration data + db: Database session + + Returns: + Registration response with agent data and SPIFFE ID + """ + try: + owner_user_id = get_current_user_id(request) + service = AgentService(db) + return service.register_agent(registration_request, owner_user_id) + except AlreadyExistsError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) from e + except ValidationError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error registering agent: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to register agent", + ) from e + + +@router.post( + "", + response_model=AgentOutput, + status_code=status.HTTP_201_CREATED, + summary="Create a new agent", + description="Create a new AI agent with the specified configuration.", +) +async def create_agent( + request: Request, + agent_input: AgentInput, + db: Session = Depends(get_db), +) -> AgentOutput: + """Create a new agent. + + Args: + request: FastAPI request object + agent_input: Agent creation data + db: Database session + + Returns: + Created agent data + """ + try: + owner_user_id = get_current_user_id(request) + service = AgentService(db) + return service.create_agent(agent_input, owner_user_id) + except AlreadyExistsError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) from e + except ValidationError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error creating agent: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to create agent", + ) from e + + +@router.get( + "", + response_model=AgentListResponse, + summary="List agents", + description="List agents with optional filtering and pagination.", +) +async def list_agents( + request: Request, + skip: int = Query(0, ge=0, description="Number of records to skip"), + limit: int = Query(100, ge=1, le=1000, description="Maximum records to return"), + agent_type: str | None = Query(None, description="Filter by agent type"), + agent_status: str | None = Query(None, alias="status", description="Filter by status"), + team_id: UUID4 | None = Query(None, description="Filter by team ID"), + mine_only: bool = Query(False, description="Only show agents owned by current user"), + db: Session = Depends(get_db), +) -> AgentListResponse: + """List agents with filtering and pagination. + + Args: + request: FastAPI request object + skip: Pagination offset + limit: Maximum records + agent_type: Filter by type + agent_status: Filter by status + team_id: Filter by team + mine_only: Only show owned agents + db: Database session + + Returns: + Paginated agent list + """ + try: + owner_user_id = None + if mine_only: + owner_user_id = get_current_user_id(request) + + service = AgentService(db) + return service.list_agents( + skip=skip, + limit=limit, + owner_user_id=owner_user_id, + team_id=team_id, + agent_type=agent_type, + status=agent_status, + ) + except Exception as e: + logger.error("Error listing agents: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to list agents", + ) from e + + +@router.get( + "/{agent_id}", + response_model=AgentOutput, + summary="Get agent by ID", + description="Get a specific agent by its UUID.", +) +async def get_agent( + agent_id: UUID4, + db: Session = Depends(get_db), +) -> AgentOutput: + """Get an agent by ID. + + Args: + agent_id: UUID of the agent + db: Database session + + Returns: + Agent data + """ + try: + service = AgentService(db) + return service.get_agent(agent_id) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error getting agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to get agent", + ) from e + + +@router.get( + "/spiffe/{spiffe_id:path}", + response_model=AgentOutput, + summary="Get agent by SPIFFE ID", + description="Get a specific agent by its SPIFFE ID.", +) +async def get_agent_by_spiffe_id( + spiffe_id: str, + db: Session = Depends(get_db), +) -> AgentOutput: + """Get an agent by SPIFFE ID. + + Args: + spiffe_id: Full SPIFFE ID (e.g., spiffe://domain/agent/type/id) + db: Database session + + Returns: + Agent data + """ + try: + service = AgentService(db) + return service.get_agent_by_spiffe_id(spiffe_id) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error getting agent by SPIFFE ID %s: %s", spiffe_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to get agent", + ) from e + + +@router.put( + "/{agent_id}", + response_model=AgentOutput, + summary="Update agent", + description="Update an existing agent's configuration.", +) +async def update_agent( + agent_id: UUID4, + agent_update: AgentUpdate, + db: Session = Depends(get_db), +) -> AgentOutput: + """Update an agent. + + Args: + agent_id: UUID of the agent + agent_update: Update data + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.update_agent(agent_id, agent_update) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except ValidationError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error updating agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to update agent", + ) from e + + +@router.delete( + "/{agent_id}", + status_code=status.HTTP_204_NO_CONTENT, + summary="Delete agent", + description="Delete an agent and revoke its SPIFFE ID.", +) +async def delete_agent( + agent_id: UUID4, + db: Session = Depends(get_db), +) -> None: + """Delete an agent. + + Args: + agent_id: UUID of the agent + db: Database session + """ + try: + service = AgentService(db) + deleted = service.delete_agent(agent_id) + if not deleted: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Agent {agent_id} not found", + ) + except HTTPException: + raise + except Exception as e: + logger.error("Error deleting agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to delete agent", + ) from e + + +@router.post( + "/{agent_id}/status", + response_model=AgentOutput, + summary="Update agent status", + description="Update an agent's status (active, suspended, revoked).", +) +async def update_agent_status( + agent_id: UUID4, + status_update: AgentStatusUpdate, + db: Session = Depends(get_db), +) -> AgentOutput: + """Update agent status. + + Args: + agent_id: UUID of the agent + status_update: Status update request + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.update_agent_status(agent_id, status_update) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error updating agent status %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to update agent status", + ) from e + + +@router.post( + "/{agent_id}/capabilities", + response_model=AgentOutput, + summary="Update agent capabilities", + description="Add or remove capabilities from an agent.", +) +async def update_agent_capabilities( + agent_id: UUID4, + capability_update: AgentCapabilityUpdate, + db: Session = Depends(get_db), +) -> AgentOutput: + """Update agent capabilities. + + Args: + agent_id: UUID of the agent + capability_update: Capabilities to add/remove + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.update_agent_capabilities(agent_id, capability_update) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error updating agent capabilities %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to update agent capabilities", + ) from e + + +@router.post( + "/validate", + response_model=SPIFFEValidationResponse, + summary="Validate SPIFFE JWT-SVID", + description="Validate a SPIFFE JWT-SVID token and return agent identity. Requires authentication.", +) +async def validate_spiffe_token( + request: Request, + validation_request: SPIFFEValidationRequest, + db: Session = Depends(get_db), +) -> SPIFFEValidationResponse: + """Validate a SPIFFE JWT-SVID. + + This endpoint allows validating JWT-SVIDs and extracting agent identity. + Requires authentication to prevent SPIFFE ID enumeration attacks. + + Args: + request: FastAPI request object + validation_request: Token to validate + db: Database session + + Returns: + Validation response with agent identity + + Raises: + HTTPException: If user not authenticated + """ + # Require authentication to prevent enumeration attacks + _ = get_current_user_id(request) + + try: + service = AgentService(db) + return service.validate_jwt_svid(validation_request) + except Exception as e: + logger.error("Error validating SPIFFE token: %s", e) + return SPIFFEValidationResponse( + valid=False, + error=str(e), + ) + + +@router.post( + "/{agent_id}/suspend", + response_model=AgentOutput, + summary="Suspend agent", + description="Suspend an agent, preventing it from authenticating.", +) +async def suspend_agent( + agent_id: UUID4, + reason: str | None = Query(None, description="Reason for suspension"), + db: Session = Depends(get_db), +) -> AgentOutput: + """Suspend an agent. + + Args: + agent_id: UUID of the agent + reason: Optional suspension reason + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.suspend_agent(agent_id, reason) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error suspending agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to suspend agent", + ) from e + + +@router.post( + "/{agent_id}/activate", + response_model=AgentOutput, + summary="Activate agent", + description="Activate a suspended agent.", +) +async def activate_agent( + agent_id: UUID4, + reason: str | None = Query(None, description="Reason for activation"), + db: Session = Depends(get_db), +) -> AgentOutput: + """Activate an agent. + + Args: + agent_id: UUID of the agent + reason: Optional activation reason + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.activate_agent(agent_id, reason) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error activating agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to activate agent", + ) from e + + +@router.post( + "/{agent_id}/revoke", + response_model=AgentOutput, + summary="Revoke agent", + description="Revoke an agent's credentials permanently.", +) +async def revoke_agent( + agent_id: UUID4, + reason: str | None = Query(None, description="Reason for revocation"), + db: Session = Depends(get_db), +) -> AgentOutput: + """Revoke an agent. + + Args: + agent_id: UUID of the agent + reason: Optional revocation reason + db: Database session + + Returns: + Updated agent data + """ + try: + service = AgentService(db) + return service.revoke_agent(agent_id, reason) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + except Exception as e: + logger.error("Error revoking agent %s: %s", agent_id, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to revoke agent", + ) from e diff --git a/backend/rag_solution/schemas/agent_schema.py b/backend/rag_solution/schemas/agent_schema.py new file mode 100644 index 00000000..e7ebd18f --- /dev/null +++ b/backend/rag_solution/schemas/agent_schema.py @@ -0,0 +1,291 @@ +"""Pydantic schemas for Agent entity. + +This module defines the request/response schemas for the Agent API, +supporting SPIFFE-based workload identity for AI agents. + +Reference: docs/architecture/spire-integration-architecture.md +""" + +from datetime import datetime +from enum import Enum +from typing import Self + +from pydantic import UUID4, BaseModel, ConfigDict, Field, model_validator + + +class AgentType(str, Enum): + """Enumeration of supported agent types.""" + + SEARCH_ENRICHER = "search-enricher" + COT_REASONING = "cot-reasoning" + QUESTION_DECOMPOSER = "question-decomposer" + SOURCE_ATTRIBUTION = "source-attribution" + ENTITY_EXTRACTION = "entity-extraction" + ANSWER_SYNTHESIS = "answer-synthesis" + CUSTOM = "custom" + + +class AgentStatus(str, Enum): + """Enumeration of agent statuses.""" + + ACTIVE = "active" + SUSPENDED = "suspended" + REVOKED = "revoked" + PENDING = "pending" + + +class AgentCapability(str, Enum): + """Enumeration of agent capabilities for access control.""" + + MCP_TOOL_INVOKE = "mcp:tool:invoke" + SEARCH_READ = "search:read" + SEARCH_WRITE = "search:write" + LLM_INVOKE = "llm:invoke" + PIPELINE_EXECUTE = "pipeline:execute" + DOCUMENT_READ = "document:read" + DOCUMENT_WRITE = "document:write" + COT_INVOKE = "cot:invoke" + AGENT_SPAWN = "agent:spawn" + ADMIN = "admin" + + +# Define allowed capabilities per agent type for security +ALLOWED_CAPABILITIES_BY_TYPE: dict[AgentType, set[AgentCapability]] = { + AgentType.SEARCH_ENRICHER: { + AgentCapability.SEARCH_READ, + AgentCapability.SEARCH_WRITE, + AgentCapability.DOCUMENT_READ, + AgentCapability.LLM_INVOKE, + }, + AgentType.COT_REASONING: { + AgentCapability.COT_INVOKE, + AgentCapability.LLM_INVOKE, + AgentCapability.SEARCH_READ, + AgentCapability.DOCUMENT_READ, + }, + AgentType.QUESTION_DECOMPOSER: { + AgentCapability.LLM_INVOKE, + AgentCapability.SEARCH_READ, + }, + AgentType.SOURCE_ATTRIBUTION: { + AgentCapability.DOCUMENT_READ, + AgentCapability.SEARCH_READ, + }, + AgentType.ENTITY_EXTRACTION: { + AgentCapability.LLM_INVOKE, + AgentCapability.DOCUMENT_READ, + }, + AgentType.ANSWER_SYNTHESIS: { + AgentCapability.LLM_INVOKE, + AgentCapability.SEARCH_READ, + AgentCapability.DOCUMENT_READ, + }, + AgentType.CUSTOM: { + # Custom agents can have any non-admin capabilities + AgentCapability.MCP_TOOL_INVOKE, + AgentCapability.SEARCH_READ, + AgentCapability.SEARCH_WRITE, + AgentCapability.LLM_INVOKE, + AgentCapability.PIPELINE_EXECUTE, + AgentCapability.DOCUMENT_READ, + AgentCapability.DOCUMENT_WRITE, + AgentCapability.COT_INVOKE, + AgentCapability.AGENT_SPAWN, + # Note: ADMIN is never allowed for CUSTOM agents + }, +} + + +class AgentInput(BaseModel): + """Schema for creating a new agent. + + Attributes: + agent_type: Type of agent (from AgentType enum) + name: Human-readable name for the agent + description: Optional description of the agent's purpose + team_id: Optional team to associate the agent with + capabilities: List of capabilities to grant the agent + metadata: Optional additional metadata + """ + + agent_type: AgentType = Field(..., description="Type of agent") + name: str = Field(..., min_length=1, max_length=255, description="Human-readable agent name") + description: str | None = Field(default=None, max_length=2000, description="Agent description") + team_id: UUID4 | None = Field(default=None, description="Optional team association") + capabilities: list[AgentCapability] = Field( + default_factory=list, + description="Agent capabilities", + ) + metadata: dict | None = Field(default=None, description="Additional metadata") + + @model_validator(mode="after") + def validate_capabilities_for_agent_type(self) -> Self: + """Validate that requested capabilities are allowed for this agent type.""" + if not self.capabilities: + return self + + allowed = ALLOWED_CAPABILITIES_BY_TYPE.get(self.agent_type, set()) + requested = set(self.capabilities) + disallowed = requested - allowed + + if disallowed: + disallowed_names = [cap.value for cap in disallowed] + raise ValueError( + f"Capabilities {disallowed_names} are not allowed for agent type '{self.agent_type.value}'. " + f"Allowed capabilities: {[cap.value for cap in allowed]}" + ) + return self + + +class AgentUpdate(BaseModel): + """Schema for updating an existing agent. + + All fields are optional - only provided fields will be updated. + """ + + name: str | None = Field(default=None, min_length=1, max_length=255) + description: str | None = Field(default=None, max_length=2000) + team_id: UUID4 | None = Field(default=None) + capabilities: list[AgentCapability] | None = Field(default=None) + metadata: dict | None = Field(default=None) + status: AgentStatus | None = Field(default=None) + + +class AgentOutput(BaseModel): + """Schema for agent response data. + + Attributes: + id: Unique identifier for the agent + spiffe_id: Full SPIFFE ID + agent_type: Type of agent + name: Human-readable name + description: Agent description + owner_user_id: UUID of the owning user + team_id: Optional team association + capabilities: List of granted capabilities + metadata: Additional metadata + status: Current agent status + created_at: Creation timestamp + updated_at: Last update timestamp + last_seen_at: Last authentication timestamp + """ + + id: UUID4 + spiffe_id: str + agent_type: str + name: str + description: str | None + owner_user_id: UUID4 + team_id: UUID4 | None + capabilities: list[str] + metadata: dict = Field(default_factory=dict, validation_alias="agent_metadata") + status: str + created_at: datetime + updated_at: datetime + last_seen_at: datetime | None + + model_config = ConfigDict(from_attributes=True, populate_by_name=True) + + +class AgentListResponse(BaseModel): + """Schema for paginated agent list response.""" + + agents: list[AgentOutput] + total: int + skip: int + limit: int + + +class AgentStatusUpdate(BaseModel): + """Schema for updating agent status.""" + + status: AgentStatus = Field(..., description="New agent status") + reason: str | None = Field(default=None, max_length=500, description="Reason for status change") + + +class AgentCapabilityUpdate(BaseModel): + """Schema for updating agent capabilities.""" + + add_capabilities: list[AgentCapability] = Field( + default_factory=list, + description="Capabilities to add", + ) + remove_capabilities: list[AgentCapability] = Field( + default_factory=list, + description="Capabilities to remove", + ) + + +class AgentRegistrationRequest(BaseModel): + """Schema for registering a new agent with SPIFFE ID generation. + + This is used when creating an agent that will obtain SVIDs from SPIRE. + """ + + agent_type: AgentType = Field(..., description="Type of agent") + name: str = Field(..., min_length=1, max_length=255, description="Human-readable agent name") + description: str | None = Field(default=None, max_length=2000) + team_id: UUID4 | None = Field(default=None) + capabilities: list[AgentCapability] = Field(default_factory=list) + metadata: dict | None = Field(default=None) + # SPIFFE-specific fields + trust_domain: str | None = Field( + default=None, + description="Trust domain (must match server configuration or be omitted)", + ) + custom_path: str | None = Field( + default=None, + description="Custom SPIFFE ID path suffix (generated if not provided)", + ) + + @model_validator(mode="after") + def validate_capabilities_for_agent_type(self) -> Self: + """Validate that requested capabilities are allowed for this agent type.""" + if not self.capabilities: + return self + + allowed = ALLOWED_CAPABILITIES_BY_TYPE.get(self.agent_type, set()) + requested = set(self.capabilities) + disallowed = requested - allowed + + if disallowed: + disallowed_names = [cap.value for cap in disallowed] + raise ValueError( + f"Capabilities {disallowed_names} are not allowed for agent type '{self.agent_type.value}'. " + f"Allowed capabilities: {[cap.value for cap in allowed]}" + ) + return self + + +class AgentRegistrationResponse(BaseModel): + """Response schema for agent registration. + + Includes the SPIFFE ID that should be used for SPIRE registration entry. + """ + + agent: AgentOutput + spiffe_id: str = Field(..., description="SPIFFE ID for SPIRE registration") + registration_instructions: str = Field( + ..., + description="Instructions for completing SPIRE registration", + ) + + +class SPIFFEValidationRequest(BaseModel): + """Schema for validating a SPIFFE JWT-SVID.""" + + token: str = Field(..., description="JWT-SVID token to validate") + required_audience: str | None = Field(default=None, description="Required audience claim") + + +class SPIFFEValidationResponse(BaseModel): + """Response schema for SPIFFE JWT-SVID validation.""" + + valid: bool + spiffe_id: str | None = None + agent_type: str | None = None + agent_id: str | None = None + capabilities: list[str] = Field(default_factory=list) + audiences: list[str] = Field(default_factory=list) + expires_at: datetime | None = None + error: str | None = None diff --git a/backend/rag_solution/services/agent_service.py b/backend/rag_solution/services/agent_service.py new file mode 100644 index 00000000..96b9720a --- /dev/null +++ b/backend/rag_solution/services/agent_service.py @@ -0,0 +1,505 @@ +"""Service layer for Agent entity operations. + +This module provides business logic for managing AI agents with +SPIFFE-based workload identity. + +Reference: docs/architecture/spire-integration-architecture.md +""" + +import uuid + +from pydantic import UUID4 +from sqlalchemy.orm import Session + +from core.logging_utils import get_logger +from core.spiffe_auth import ( + AgentCapability, + AgentPrincipal, + AgentType, + SPIFFEConfig, + build_spiffe_id, + get_spiffe_authenticator, +) +from rag_solution.models.agent import AgentStatus +from rag_solution.repository.agent_repository import AgentRepository +from rag_solution.schemas.agent_schema import ( + AgentCapabilityUpdate, + AgentInput, + AgentListResponse, + AgentOutput, + AgentRegistrationRequest, + AgentRegistrationResponse, + AgentStatusUpdate, + AgentUpdate, + SPIFFEValidationRequest, + SPIFFEValidationResponse, +) + +logger = get_logger(__name__) + + +class AgentService: + """Service for managing AI agent identities. + + This service handles: + - Agent registration with SPIFFE ID generation + - Agent CRUD operations + - SPIFFE JWT-SVID validation + - Capability management + - Agent status management + """ + + def __init__(self, db: Session) -> None: + """Initialize the agent service. + + Args: + db: SQLAlchemy database session + """ + self.db = db + self.repository = AgentRepository(db) + self._config = SPIFFEConfig.from_env() + self._authenticator = get_spiffe_authenticator() + + def register_agent( + self, + request: AgentRegistrationRequest, + owner_user_id: UUID4, + ) -> AgentRegistrationResponse: + """Register a new agent with SPIFFE ID generation. + + This creates an agent record and generates the SPIFFE ID that should + be used when configuring the SPIRE registration entry. + + Args: + request: Agent registration request + owner_user_id: UUID of the owning user + + Returns: + Registration response with agent data and SPIFFE ID + """ + # Generate unique agent instance ID + agent_instance_id = str(uuid.uuid4())[:8] + + # Validate and determine trust domain - users cannot specify arbitrary trust domains + if request.trust_domain and request.trust_domain != self._config.trust_domain: + logger.warning( + f"Rejected trust domain '{request.trust_domain}' from user - " + f"must match configured domain '{self._config.trust_domain}'" + ) + raise ValueError( + f"Trust domain must be '{self._config.trust_domain}' or omitted. " + "Custom trust domains are not allowed for security reasons." + ) + trust_domain = self._config.trust_domain + + # Generate SPIFFE ID + if request.custom_path: + spiffe_id = f"spiffe://{trust_domain}/agent/{request.custom_path}" + else: + spiffe_id = build_spiffe_id( + trust_domain=trust_domain, + agent_type=AgentType(request.agent_type.value), + agent_id=agent_instance_id, + ) + + # Create agent input + agent_input = AgentInput( + agent_type=request.agent_type, + name=request.name, + description=request.description, + team_id=request.team_id, + capabilities=request.capabilities, + metadata=request.metadata, + ) + + # Create agent in database + agent = self.repository.create( + agent_input=agent_input, + owner_user_id=owner_user_id, + spiffe_id=spiffe_id, + ) + + # Generate registration instructions + instructions = self._generate_registration_instructions( + spiffe_id=spiffe_id, + agent_type=request.agent_type.value, + ) + + return AgentRegistrationResponse( + agent=agent, + spiffe_id=spiffe_id, + registration_instructions=instructions, + ) + + def _generate_registration_instructions(self, spiffe_id: str, agent_type: str) -> str: + """Generate SPIRE registration instructions for an agent. + + Args: + spiffe_id: The agent's SPIFFE ID + agent_type: Type of agent + + Returns: + Registration instructions string + """ + return f"""To complete agent registration, create a SPIRE registration entry: + +For Kubernetes: +``` +spire-server entry create \\ + -spiffeID {spiffe_id} \\ + -parentID spiffe://{self._config.trust_domain}/spire/agent/k8s/node \\ + -selector k8s:ns:rag-modulo \\ + -selector k8s:sa:{agent_type}-agent \\ + -selector k8s:pod-label:app:{agent_type} +``` + +For Docker/Unix: +``` +spire-server entry create \\ + -spiffeID {spiffe_id} \\ + -parentID spiffe://{self._config.trust_domain}/spire/agent/unix \\ + -selector unix:uid:1000 \\ + -selector docker:label:app:{agent_type} +``` + +After creating the registration entry, the agent workload can fetch SVIDs via: +```python +from spiffe import JwtSource + +with JwtSource() as source: + svid = source.fetch_svid(audience={{"backend-api", "mcp-gateway"}}) + # Use svid.token in Authorization header +``` +""" + + def create_agent( + self, + agent_input: AgentInput, + owner_user_id: UUID4, + spiffe_id: str | None = None, + ) -> AgentOutput: + """Create a new agent. + + Args: + agent_input: Agent creation data + owner_user_id: UUID of the owning user + spiffe_id: Optional pre-generated SPIFFE ID + + Returns: + Created agent data + """ + if not spiffe_id: + # Generate SPIFFE ID + agent_instance_id = str(uuid.uuid4())[:8] + spiffe_id = build_spiffe_id( + trust_domain=self._config.trust_domain, + agent_type=AgentType(agent_input.agent_type.value), + agent_id=agent_instance_id, + ) + + return self.repository.create( + agent_input=agent_input, + owner_user_id=owner_user_id, + spiffe_id=spiffe_id, + ) + + def get_agent(self, agent_id: UUID4) -> AgentOutput: + """Get an agent by ID. + + Args: + agent_id: UUID of the agent + + Returns: + Agent data + + Raises: + NotFoundError: If agent not found + """ + return self.repository.get_by_id(agent_id) + + def get_agent_by_spiffe_id(self, spiffe_id: str) -> AgentOutput: + """Get an agent by SPIFFE ID. + + Args: + spiffe_id: Full SPIFFE ID string + + Returns: + Agent data + + Raises: + NotFoundError: If agent not found + """ + return self.repository.get_by_spiffe_id(spiffe_id) + + def update_agent(self, agent_id: UUID4, agent_update: AgentUpdate) -> AgentOutput: + """Update an agent. + + Args: + agent_id: UUID of the agent + agent_update: Update data + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + """ + return self.repository.update(agent_id, agent_update) + + def delete_agent(self, agent_id: UUID4) -> bool: + """Delete an agent. + + Args: + agent_id: UUID of the agent + + Returns: + True if deleted + """ + return self.repository.delete(agent_id) + + def update_agent_status( + self, + agent_id: UUID4, + status_update: AgentStatusUpdate, + ) -> AgentOutput: + """Update agent status. + + Args: + agent_id: UUID of the agent + status_update: Status update request + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + """ + # Log status change with reason + logger.info( + "Updating agent %s status to %s. Reason: %s", + agent_id, + status_update.status.value, + status_update.reason or "No reason provided", + ) + return self.repository.update_status(agent_id, status_update.status.value) + + def update_agent_capabilities( + self, + agent_id: UUID4, + capability_update: AgentCapabilityUpdate, + ) -> AgentOutput: + """Update agent capabilities. + + Args: + agent_id: UUID of the agent + capability_update: Capability update request + + Returns: + Updated agent data + + Raises: + NotFoundError: If agent not found + """ + add_caps = [cap.value for cap in capability_update.add_capabilities] + remove_caps = [cap.value for cap in capability_update.remove_capabilities] + + return self.repository.update_capabilities( + agent_id=agent_id, + add_capabilities=add_caps, + remove_capabilities=remove_caps, + ) + + def list_agents( + self, + skip: int = 0, + limit: int = 100, + owner_user_id: UUID4 | None = None, + team_id: UUID4 | None = None, + agent_type: str | None = None, + status: str | None = None, + ) -> AgentListResponse: + """List agents with filtering and pagination. + + Args: + skip: Number of records to skip + limit: Maximum number of records to return + owner_user_id: Filter by owner + team_id: Filter by team + agent_type: Filter by type + status: Filter by status + + Returns: + Paginated agent list + """ + agents, total = self.repository.list_agents( + skip=skip, + limit=limit, + owner_user_id=owner_user_id, + team_id=team_id, + agent_type=agent_type, + status=status, + ) + return AgentListResponse( + agents=agents, + total=total, + skip=skip, + limit=limit, + ) + + def list_user_agents(self, owner_user_id: UUID4, skip: int = 0, limit: int = 100) -> list[AgentOutput]: + """List agents owned by a user. + + Args: + owner_user_id: UUID of the owner + skip: Number of records to skip + limit: Maximum number of records to return + + Returns: + List of agents + """ + return self.repository.list_by_owner(owner_user_id, skip, limit) + + def validate_jwt_svid(self, request: SPIFFEValidationRequest) -> SPIFFEValidationResponse: + """Validate a SPIFFE JWT-SVID. + + This validates the token and returns agent identity information. + + Args: + request: Validation request with token + + Returns: + Validation response + """ + try: + principal = self._authenticator.validate_jwt_svid( + token=request.token, + required_audience=request.required_audience, + ) + + if principal is None: + return SPIFFEValidationResponse( + valid=False, + error="Invalid or expired JWT-SVID", + ) + + # Update last seen for the agent + self.repository.update_last_seen_by_spiffe_id(principal.spiffe_id) + + return SPIFFEValidationResponse( + valid=True, + spiffe_id=principal.spiffe_id, + agent_type=principal.agent_type.value, + agent_id=principal.agent_id, + capabilities=[cap.value for cap in principal.capabilities], + audiences=principal.audiences, + expires_at=principal.expires_at, + ) + except Exception as e: + logger.error("Error validating JWT-SVID: %s", e) + return SPIFFEValidationResponse( + valid=False, + error=str(e), + ) + + def get_agent_principal_from_token(self, token: str) -> AgentPrincipal | None: + """Extract agent principal from a JWT-SVID token. + + This is used by the authentication middleware to identify agents. + + Args: + token: JWT-SVID token + + Returns: + AgentPrincipal if valid, None otherwise + """ + principal = self._authenticator.validate_jwt_svid(token) + + if principal is None: + return None + + # Check if agent exists and is active + agent_model = self.repository.get_model_by_spiffe_id(principal.spiffe_id) + if agent_model is None: + logger.warning("Agent with SPIFFE ID %s not found in database", principal.spiffe_id) + # Allow unknown agents if SPIFFE validation passed (for new agents) + return principal + + if not agent_model.is_active(): + logger.warning("Agent %s is not active (status: %s)", principal.spiffe_id, agent_model.status) + return None + + # Update last seen + agent_model.update_last_seen() + self.db.commit() + + # Merge database capabilities with SVID capabilities + db_capabilities = [ + AgentCapability(cap) for cap in agent_model.capabilities if cap in AgentCapability.__members__ + ] + principal.capabilities = list(set(principal.capabilities + db_capabilities)) + + return principal + + def suspend_agent(self, agent_id: UUID4, reason: str | None = None) -> AgentOutput: + """Suspend an agent. + + Args: + agent_id: UUID of the agent + reason: Optional reason for suspension + + Returns: + Updated agent data + """ + return self.update_agent_status( + agent_id, + AgentStatusUpdate(status=AgentStatus.SUSPENDED, reason=reason), + ) + + def activate_agent(self, agent_id: UUID4, reason: str | None = None) -> AgentOutput: + """Activate an agent. + + Args: + agent_id: UUID of the agent + reason: Optional reason for activation + + Returns: + Updated agent data + """ + return self.update_agent_status( + agent_id, + AgentStatusUpdate(status=AgentStatus.ACTIVE, reason=reason), + ) + + def revoke_agent(self, agent_id: UUID4, reason: str | None = None) -> AgentOutput: + """Revoke an agent's credentials. + + Args: + agent_id: UUID of the agent + reason: Optional reason for revocation + + Returns: + Updated agent data + """ + return self.update_agent_status( + agent_id, + AgentStatusUpdate(status=AgentStatus.REVOKED, reason=reason), + ) + + def get_agent_count_for_user(self, owner_user_id: UUID4) -> int: + """Get the number of agents owned by a user. + + Args: + owner_user_id: UUID of the owner + + Returns: + Agent count + """ + return self.repository.count_by_owner(owner_user_id) + + def get_active_agent_count(self) -> int: + """Get the total number of active agents. + + Returns: + Active agent count + """ + return self.repository.count_active() diff --git a/deployment/spire/README.md b/deployment/spire/README.md new file mode 100644 index 00000000..40f1077d --- /dev/null +++ b/deployment/spire/README.md @@ -0,0 +1,179 @@ +# SPIRE Configuration for RAG Modulo + +This directory contains configuration files for deploying SPIRE (SPIFFE Runtime Environment) +to manage workload identities for AI agents in RAG Modulo. + +## Overview + +SPIRE provides cryptographic identity to workloads (including AI agents) without requiring +secrets to be distributed or embedded in applications. Each agent receives a SPIFFE ID +and can obtain short-lived JWT-SVIDs (JSON Web Token SPIFFE Verifiable Identity Documents) +for authentication. + +## Architecture Reference + +See the full architecture documentation at: +`docs/architecture/spire-integration-architecture.md` + +## Files + +- `server.conf` - SPIRE Server configuration +- `agent.conf` - SPIRE Agent configuration +- `docker-compose.spire.yml` - Docker Compose for local development + +## Quick Start (Development) + +### Prerequisites + +- Docker and Docker Compose +- Network access to pull SPIRE images from `ghcr.io/spiffe` + +### Start SPIRE + +```bash +# Start SPIRE services +docker compose -f docker-compose.spire.yml up -d + +# Check server health +curl http://localhost:8080/live + +# Check agent health +curl http://localhost:8089/live + +# View server logs +docker logs rag-modulo-spire-server +``` + +### Create Registration Entries + +Registration entries tell SPIRE which workloads can obtain which identities. + +```bash +# Connect to SPIRE server container +docker exec -it rag-modulo-spire-server /bin/sh + +# Create an entry for a search-enricher agent +spire-server entry create \ + -socketPath /tmp/spire-server/private/api.sock \ + -spiffeID spiffe://rag-modulo.example.com/agent/search-enricher/agent-001 \ + -parentID spiffe://rag-modulo.example.com/spire/agent/unix \ + -selector docker:label:agent-type:search-enricher \ + -selector docker:label:agent-id:agent-001 + +# List all entries +spire-server entry show -socketPath /tmp/spire-server/private/api.sock +``` + +### Fetch SVIDs from Workloads + +Once registration entries are created, workloads can fetch SVIDs: + +```python +from spiffe import JwtSource + +# Create JWT source connected to SPIRE Agent +with JwtSource() as source: + # Fetch JWT-SVID for authentication to backend-api + svid = source.fetch_svid(audience={"backend-api"}) + + # Use the token in Authorization header + headers = {"Authorization": f"Bearer {svid.token}"} + # Make authenticated request... +``` + +## Kubernetes Deployment + +For Kubernetes, use the SPIRE Helm charts or Kubernetes manifests: + +### Using Helm + +```bash +helm repo add spiffe https://spiffe.github.io/helm-charts-hardened/ +helm install spire spiffe/spire \ + --namespace spire \ + --create-namespace \ + --values values-kubernetes.yaml +``` + +### Kubernetes Registration Entries + +```bash +# Create entry for backend-api +kubectl exec -n spire spire-server-0 -- \ + spire-server entry create \ + -spiffeID spiffe://rag-modulo.example.com/workload/backend-api \ + -parentID spiffe://rag-modulo.example.com/spire/agent/k8s/node \ + -selector k8s:ns:rag-modulo \ + -selector k8s:sa:backend-api + +# Create entry for search-enricher agents +kubectl exec -n spire spire-server-0 -- \ + spire-server entry create \ + -spiffeID spiffe://rag-modulo.example.com/agent/search-enricher/default \ + -parentID spiffe://rag-modulo.example.com/spire/agent/k8s/node \ + -selector k8s:ns:rag-modulo \ + -selector k8s:sa:search-enricher-agent \ + -selector k8s:pod-label:agent-type:search-enricher +``` + +## Trust Domain + +The default trust domain is `rag-modulo.example.com`. For production deployments, +change this to match your organization's domain (e.g., `rag-modulo.yourcompany.com`). + +Update the following files: + +- `server.conf` - `trust_domain` setting +- `agent.conf` - `trust_domain` setting +- Registration entries - SPIFFE ID prefixes + +## Monitoring + +SPIRE exposes Prometheus metrics: + +- Server: +- Agent: + +Key metrics to monitor: + +- `spire_server_ca_manager_x509_ca_rotate_total` - CA rotations +- `spire_server_svid_issued_total` - SVIDs issued +- `spire_agent_svid_rotations_total` - Agent SVID rotations +- `workload_api_connection_total` - Workload API connections + +## Security Considerations + +1. **Production Trust Bundle**: Use proper CA certificates in production +2. **Database Security**: Secure the SPIRE database with proper credentials +3. **Network Security**: Limit access to SPIRE Server API +4. **Selector Security**: Use specific selectors to prevent workload impersonation +5. **SVID TTL**: Configure appropriate TTLs for different workload types + +## Troubleshooting + +### Server won't start + +Check database connectivity: + +```bash +docker logs rag-modulo-spire-db +docker exec rag-modulo-spire-db pg_isready -U spire +``` + +### Agent won't connect + +Check server address and port: + +```bash +docker exec rag-modulo-spire-agent cat /etc/spire/agent/agent.conf | grep server_ +``` + +### Workload can't fetch SVID + +1. Verify registration entry exists +2. Check workload selectors match +3. Check agent logs for attestation errors: + +```bash +docker logs rag-modulo-spire-agent 2>&1 | grep -i attest +``` diff --git a/deployment/spire/agent.conf b/deployment/spire/agent.conf new file mode 100644 index 00000000..370a16a0 --- /dev/null +++ b/deployment/spire/agent.conf @@ -0,0 +1,92 @@ +# SPIRE Agent Configuration for RAG Modulo +# Reference: docs/architecture/spire-integration-architecture.md +# +# This configuration sets up a SPIRE agent for RAG Modulo workloads +# to obtain SVIDs for AI agent authentication. + +agent { + # Data directory for SPIRE Agent data + data_dir = "/var/lib/spire/agent/data" + + # Log configuration + log_level = "INFO" + log_format = "json" + + # Server address for connecting to SPIRE Server + server_address = "spire-server" + server_port = "8081" + + # Socket path for workload API + socket_path = "/run/spire/sockets/agent.sock" + + # Trust domain must match the SPIRE Server + trust_domain = "rag-modulo.example.com" + + # Trust bundle path (populated by K8s ConfigMap) + trust_bundle_path = "/run/spire/bundle/bundle.crt" + + # Insecure bootstrap (for initial deployment, disable in production) + insecure_bootstrap = false + + # Enable workload API authorization + authorized_delegates = [] +} + +plugins { + # NodeAttestor plugin for Kubernetes + NodeAttestor "k8s_psat" { + plugin_data { + cluster = "rag-modulo-cluster" + } + } + + # KeyManager plugin + KeyManager "memory" { + plugin_data {} + } + + # WorkloadAttestor plugin for Kubernetes + WorkloadAttestor "k8s" { + plugin_data { + # Skip kubelet verification for development + # In production, configure kubelet certificate validation + skip_kubelet_verification = true + + # Node name from environment variable + node_name_env = "MY_NODE_NAME" + } + } + + # WorkloadAttestor plugin for Unix processes (Docker/local dev) + WorkloadAttestor "unix" { + plugin_data { + # Enable discovery of all Unix workloads + discover_workload_path = true + } + } + + # WorkloadAttestor plugin for Docker (optional) + WorkloadAttestor "docker" { + plugin_data { + # Docker socket path + docker_socket_path = "unix:///var/run/docker.sock" + } + } +} + +# Health check configuration +health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" +} + +# Telemetry configuration +telemetry { + Prometheus { + host = "0.0.0.0" + port = 9989 + } +} diff --git a/deployment/spire/docker-compose.spire.yml b/deployment/spire/docker-compose.spire.yml new file mode 100644 index 00000000..c57347fd --- /dev/null +++ b/deployment/spire/docker-compose.spire.yml @@ -0,0 +1,169 @@ +# Docker Compose for SPIRE Development Environment +# Reference: docs/architecture/spire-integration-architecture.md +# +# This docker-compose file sets up SPIRE Server and Agent for local development +# of RAG Modulo's agent authentication system. + +version: "3.8" + +services: + # SPIRE Server - manages trust domain and issues SVIDs + spire-server: + image: ghcr.io/spiffe/spire-server:1.11.1 + container_name: rag-modulo-spire-server + hostname: spire-server + volumes: + - ./server.conf:/etc/spire/server/server.conf:ro + - spire-server-data:/var/lib/spire/server + - spire-server-socket:/tmp/spire-server/private + ports: + - "8081:8081" # SPIRE Server API + - "8080:8080" # Health check + - "9988:9988" # Prometheus metrics + command: + - -config + - /etc/spire/server/server.conf + environment: + - SPIRE_DB_CONNECTION_STRING=${SPIRE_DB_CONNECTION_STRING:-postgres://spire:spire_password@spire-db:5432/spire?sslmode=disable} + depends_on: + spire-db: + condition: service_healthy + healthcheck: + test: + - "CMD" + - "/opt/spire/bin/spire-server" + - "healthcheck" + - "-shallow" + - "-socketPath" + - "/tmp/spire-server/private/api.sock" + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + networks: + - rag-modulo-spire + + # SPIRE Agent - runs on workload nodes and provides Workload API + spire-agent: + image: ghcr.io/spiffe/spire-agent:1.11.1 + container_name: rag-modulo-spire-agent + hostname: spire-agent + volumes: + - ./agent.conf:/etc/spire/agent/agent.conf:ro + - spire-agent-data:/var/lib/spire/agent + - spire-agent-socket:/run/spire/sockets + - spire-bundle:/run/spire/bundle:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + ports: + - "8089:8080" # Health check (different port to avoid conflict) + - "9989:9989" # Prometheus metrics + command: + - -config + - /etc/spire/agent/agent.conf + depends_on: + spire-server: + condition: service_healthy + healthcheck: + test: + - "CMD" + - "/opt/spire/bin/spire-agent" + - "healthcheck" + - "-shallow" + - "-socketPath" + - "/run/spire/sockets/agent.sock" + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + networks: + - rag-modulo-spire + + # PostgreSQL database for SPIRE Server + spire-db: + image: postgres:15-alpine + container_name: rag-modulo-spire-db + hostname: spire-db + environment: + POSTGRES_USER: spire + POSTGRES_PASSWORD: ${SPIRE_DB_PASSWORD:-spire_password} + POSTGRES_DB: spire + volumes: + - spire-db-data:/var/lib/postgresql/data + ports: + - "5433:5432" # Different port to avoid conflict with main PostgreSQL + healthcheck: + test: ["CMD-SHELL", "pg_isready -U spire -d spire"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + networks: + - rag-modulo-spire + + # SPIRE Server bootstrap job - creates initial registration entries + spire-bootstrap: + image: ghcr.io/spiffe/spire-server:1.11.1 + container_name: rag-modulo-spire-bootstrap + volumes: + - spire-server-socket:/tmp/spire-server/private:ro + - ./registration-entries:/etc/spire/entries:ro + entrypoint: ["/bin/sh", "-c"] + command: + - | + echo "Waiting for SPIRE Server to be ready..." + sleep 10 + + echo "Creating agent node registration entry..." + /opt/spire/bin/spire-server entry create \ + -socketPath /tmp/spire-server/private/api.sock \ + -spiffeID spiffe://rag-modulo.example.com/spire/agent/unix \ + -parentID spiffe://rag-modulo.example.com/spire/server \ + -selector unix:uid:0 \ + -node || echo "Agent entry may already exist" + + echo "Creating backend-api workload entry..." + /opt/spire/bin/spire-server entry create \ + -socketPath /tmp/spire-server/private/api.sock \ + -spiffeID spiffe://rag-modulo.example.com/workload/backend-api \ + -parentID spiffe://rag-modulo.example.com/spire/agent/unix \ + -selector docker:label:app:rag-modulo-backend || echo "Backend entry may already exist" + + echo "Creating search-enricher agent entry..." + /opt/spire/bin/spire-server entry create \ + -socketPath /tmp/spire-server/private/api.sock \ + -spiffeID spiffe://rag-modulo.example.com/agent/search-enricher/default \ + -parentID spiffe://rag-modulo.example.com/spire/agent/unix \ + -selector docker:label:agent-type:search-enricher || echo "Search enricher entry may already exist" + + echo "Creating cot-reasoning agent entry..." + /opt/spire/bin/spire-server entry create \ + -socketPath /tmp/spire-server/private/api.sock \ + -spiffeID spiffe://rag-modulo.example.com/agent/cot-reasoning/default \ + -parentID spiffe://rag-modulo.example.com/spire/agent/unix \ + -selector docker:label:agent-type:cot-reasoning || echo "CoT reasoning entry may already exist" + + echo "Bootstrap complete!" + depends_on: + spire-server: + condition: service_healthy + networks: + - rag-modulo-spire + +volumes: + spire-server-data: + name: rag-modulo-spire-server-data + spire-server-socket: + name: rag-modulo-spire-server-socket + spire-agent-data: + name: rag-modulo-spire-agent-data + spire-agent-socket: + name: rag-modulo-spire-agent-socket + spire-bundle: + name: rag-modulo-spire-bundle + spire-db-data: + name: rag-modulo-spire-db-data + +networks: + rag-modulo-spire: + name: rag-modulo-spire + driver: bridge diff --git a/deployment/spire/server.conf b/deployment/spire/server.conf new file mode 100644 index 00000000..f0b4a545 --- /dev/null +++ b/deployment/spire/server.conf @@ -0,0 +1,102 @@ +# SPIRE Server Configuration for RAG Modulo +# Reference: docs/architecture/spire-integration-architecture.md +# +# This configuration sets up a SPIRE server for the RAG Modulo platform +# to manage workload identities for AI agents. + +server { + # Bind address for the SPIRE Server API + bind_address = "0.0.0.0" + bind_port = "8081" + + # Socket path for local communication + socket_path = "/tmp/spire-server/private/api.sock" + + # Trust domain for the SPIRE deployment + trust_domain = "rag-modulo.example.com" + + # Data directory for SPIRE Server data + data_dir = "/var/lib/spire/server/data" + + # Log configuration + log_level = "INFO" + log_format = "json" + + # CA configuration + ca_subject { + country = ["US"] + organization = ["RAG Modulo"] + common_name = "RAG Modulo SPIRE CA" + } + + # CA key type and TTL + ca_key_type = "rsa-2048" + ca_ttl = "168h" # 7 days + + # JWT-SVID configuration + jwt_issuer = "https://spire.rag-modulo.example.com" + + # Default SVID TTL (1 hour for agents) + default_svid_ttl = "1h" + + # Federation configuration (optional) + # federation { + # bundle_endpoint { + # address = "0.0.0.0" + # port = 8443 + # } + # } +} + +plugins { + # DataStore plugin - stores registration entries and node information + DataStore "sql" { + plugin_data { + database_type = "postgres" + connection_string = "${SPIRE_DB_CONNECTION_STRING}" + } + } + + # NodeAttestor plugin for Kubernetes - attests Kubernetes nodes + NodeAttestor "k8s_psat" { + plugin_data { + clusters = { + "rag-modulo-cluster" = { + service_account_allow_list = ["spire:spire-agent"] + } + } + } + } + + # KeyManager plugin - manages signing keys + KeyManager "disk" { + plugin_data { + keys_path = "/var/lib/spire/server/keys" + } + } + + # Notifier plugin for K8s bundle management + Notifier "k8sbundle" { + plugin_data { + namespace = "spire" + config_map = "spire-bundle" + } + } +} + +# Health check configuration +health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" +} + +# Telemetry configuration +telemetry { + Prometheus { + host = "0.0.0.0" + port = 9988 + } +} diff --git a/docs/architecture/agentic-ui-architecture.md b/docs/architecture/agentic-ui-architecture.md new file mode 100644 index 00000000..ceabf6ec --- /dev/null +++ b/docs/architecture/agentic-ui-architecture.md @@ -0,0 +1,1470 @@ +# Agentic UI Architecture + +**Date**: November 2025 +**Status**: Architecture Design +**Version**: 1.0 +**Related Documents**: + +- [MCP Integration Architecture](./mcp-integration-architecture.md) +- [SearchService Agent Hooks Architecture](./search-agent-hooks-architecture.md) +- [RAG Modulo MCP Server Architecture](./rag-modulo-mcp-server-architecture.md) + +## Overview + +This document describes the frontend architecture for transforming RAG Modulo into a fully +agentic RAG solution. It covers the React component hierarchy, state management, user +interactions, and integration patterns needed to support: + +1. **Agent Configuration** - Per-collection agent assignment and configuration +2. **Artifact Display** - Rendering and downloading agent-generated artifacts +3. **Execution Visibility** - Real-time pipeline stage and agent status indicators +4. **Agent Management** - Dashboard for managing user's agents and viewing analytics + +## Current Frontend Architecture + +### Existing Components (Reference) + +``` +frontend/src/components/ +ā”œā”€ā”€ agents/ +│ └── LightweightAgentOrchestration.tsx # Existing workflow-focused agent UI +ā”œā”€ā”€ search/ +│ ā”œā”€ā”€ LightweightSearchInterface.tsx # Main search chat interface +│ ā”œā”€ā”€ ChainOfThoughtAccordion.tsx # CoT reasoning display +│ ā”œā”€ā”€ SourcesAccordion.tsx # Document sources +│ ā”œā”€ā”€ CitationsAccordion.tsx # Citation display +│ └── TokenAnalysisAccordion.tsx # Token usage metrics +ā”œā”€ā”€ collections/ +│ ā”œā”€ā”€ LightweightCollections.tsx # Collection list +│ └── LightweightCollectionDetail.tsx # Collection settings +└── ui/ + ā”œā”€ā”€ Card.tsx, Button.tsx, Modal.tsx # Reusable UI components + └── ... +``` + +### Design System + +- **Framework**: React 18 with TypeScript +- **Styling**: Tailwind CSS with Carbon Design System colors +- **Icons**: Heroicons (@heroicons/react) +- **State**: React hooks + Context (NotificationContext) +- **Routing**: React Router DOM + +## New Component Architecture + +### Component Hierarchy + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ App Layout │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ LightweightLayout (existing) │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ Routes │ │ │ +│ │ │ │ │ │ +│ │ │ /search │ │ │ +│ │ │ └── LightweightSearchInterface (ENHANCED) │ │ │ +│ │ │ ā”œā”€ā”€ SearchInput │ │ │ +│ │ │ ā”œā”€ā”€ MessageList │ │ │ +│ │ │ │ └── MessageCard │ │ │ +│ │ │ │ ā”œā”€ā”€ ChainOfThoughtAccordion │ │ │ +│ │ │ │ ā”œā”€ā”€ SourcesAccordion │ │ │ +│ │ │ │ ā”œā”€ā”€ AgentArtifactsPanel (NEW) │ │ │ +│ │ │ │ │ └── ArtifactCard (NEW) │ │ │ +│ │ │ │ └── AgentExecutionIndicator (NEW) │ │ │ +│ │ │ └── AgentPipelineStatus (NEW) │ │ │ +│ │ │ │ │ │ +│ │ │ /collections/:id/settings │ │ │ +│ │ │ └── LightweightCollectionDetail (ENHANCED) │ │ │ +│ │ │ └── CollectionAgentsTab (NEW) │ │ │ +│ │ │ ā”œā”€ā”€ AgentList (NEW) │ │ │ +│ │ │ ā”œā”€ā”€ AgentConfigModal (NEW) │ │ │ +│ │ │ └── AgentMarketplace (NEW) │ │ │ +│ │ │ │ │ │ +│ │ │ /agents │ │ │ +│ │ │ └── AgentDashboard (NEW) │ │ │ +│ │ │ ā”œā”€ā”€ MyAgentsPanel (NEW) │ │ │ +│ │ │ ā”œā”€ā”€ AgentAnalytics (NEW) │ │ │ +│ │ │ └── AgentAuditLog (NEW) │ │ │ +│ │ │ │ │ │ +│ │ │ /agents/marketplace │ │ │ +│ │ │ └── AgentMarketplacePage (NEW) │ │ │ +│ │ │ ā”œā”€ā”€ AgentCatalog (NEW) │ │ │ +│ │ │ └── AgentDetailModal (NEW) │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### File Structure + +``` +frontend/src/ +ā”œā”€ā”€ components/ +│ ā”œā”€ā”€ agents/ +│ │ ā”œā”€ā”€ LightweightAgentOrchestration.tsx # Existing (keep for workflows) +│ │ ā”œā”€ā”€ AgentDashboard.tsx # NEW: Main agent management page +│ │ ā”œā”€ā”€ MyAgentsPanel.tsx # NEW: User's configured agents +│ │ ā”œā”€ā”€ AgentAnalytics.tsx # NEW: Agent usage stats +│ │ ā”œā”€ā”€ AgentAuditLog.tsx # NEW: Execution history +│ │ ā”œā”€ā”€ AgentMarketplacePage.tsx # NEW: Browse available agents +│ │ ā”œā”€ā”€ AgentCatalog.tsx # NEW: Grid of available agents +│ │ ā”œā”€ā”€ AgentDetailModal.tsx # NEW: Agent info and add button +│ │ ā”œā”€ā”€ CollectionAgentsTab.tsx # NEW: Collection settings tab +│ │ ā”œā”€ā”€ AgentList.tsx # NEW: Agents for a collection +│ │ ā”œā”€ā”€ AgentConfigModal.tsx # NEW: Configure agent settings +│ │ └── AgentPriorityDragDrop.tsx # NEW: Drag to reorder priority +│ │ +│ ā”œā”€ā”€ search/ +│ │ ā”œā”€ā”€ LightweightSearchInterface.tsx # ENHANCED: Add artifact support +│ │ ā”œā”€ā”€ AgentArtifactsPanel.tsx # NEW: Container for artifacts +│ │ ā”œā”€ā”€ ArtifactCard.tsx # NEW: Single artifact display +│ │ ā”œā”€ā”€ ArtifactPreviewModal.tsx # NEW: Preview images/PDFs +│ │ ā”œā”€ā”€ AgentExecutionIndicator.tsx # NEW: Per-message agent badges +│ │ └── AgentPipelineStatus.tsx # NEW: Real-time pipeline stages +│ │ +│ └── ui/ +│ ā”œā”€ā”€ ProgressSteps.tsx # NEW: Pipeline stage indicator +│ └── FileDownloadButton.tsx # NEW: Base64 download handler +│ +ā”œā”€ā”€ services/ +│ ā”œā”€ā”€ apiClient.ts # ENHANCED: Add agent API methods +│ └── agentApiClient.ts # NEW: Agent-specific API calls +│ +ā”œā”€ā”€ types/ +│ └── agent.ts # NEW: Agent TypeScript interfaces +│ +└── contexts/ + └── AgentContext.tsx # NEW: Agent state management +``` + +## New Components Specification + +### 1. Search Interface Enhancements + +#### AgentArtifactsPanel + +Container for displaying agent-generated artifacts within search results. + +```typescript +// frontend/src/components/search/AgentArtifactsPanel.tsx + +interface AgentArtifact { + agent_id: string; + type: 'pptx' | 'pdf' | 'png' | 'mp3' | 'html' | 'txt'; + data: string; // base64 encoded + filename: string; + metadata: Record; +} + +interface AgentArtifactsPanelProps { + artifacts: AgentArtifact[]; + isLoading?: boolean; +} + +const AgentArtifactsPanel: React.FC = ({ + artifacts, + isLoading +}) => { + if (!artifacts?.length && !isLoading) return null; + + return ( +
+
+ +

+ Generated Artifacts ({artifacts.length}) +

+
+ + {isLoading ? ( +
+ {[1, 2].map(i => ( + + ))} +
+ ) : ( +
+ {artifacts.map((artifact, index) => ( + + ))} +
+ )} +
+ ); +}; +``` + +#### ArtifactCard + +Individual artifact display with preview and download actions. + +```typescript +// frontend/src/components/search/ArtifactCard.tsx + +interface ArtifactCardProps { + artifact: AgentArtifact; +} + +const ArtifactCard: React.FC = ({ artifact }) => { + const [previewOpen, setPreviewOpen] = useState(false); + + const getIcon = () => { + switch (artifact.type) { + case 'pptx': return ; + case 'pdf': return ; + case 'png': return ; + case 'mp3': return ; + case 'html': return ; + default: return ; + } + }; + + const getLabel = () => { + switch (artifact.type) { + case 'pptx': return 'PowerPoint'; + case 'pdf': return 'PDF Report'; + case 'png': return 'Chart'; + case 'mp3': return 'Audio'; + case 'html': return 'HTML'; + default: return 'File'; + } + }; + + const canPreview = ['png', 'pdf'].includes(artifact.type); + + const handleDownload = () => { + const mimeTypes: Record = { + pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + pdf: 'application/pdf', + png: 'image/png', + mp3: 'audio/mpeg', + html: 'text/html', + txt: 'text/plain' + }; + + const blob = base64ToBlob(artifact.data, mimeTypes[artifact.type]); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = artifact.filename; + a.click(); + URL.revokeObjectURL(url); + }; + + return ( + <> +
+
+
+ {getIcon()} +
+
+

+ {getLabel()} +

+

+ {artifact.filename} +

+
+
+ +
+ {canPreview && ( + + )} + +
+ + {artifact.metadata && ( +

+ {artifact.metadata.slides && `${artifact.metadata.slides} slides`} + {artifact.metadata.width && `${artifact.metadata.width}x${artifact.metadata.height}`} +

+ )} +
+ + {previewOpen && ( + setPreviewOpen(false)} + /> + )} + + ); +}; +``` + +#### AgentPipelineStatus + +Real-time pipeline stage indicator shown during search. + +```typescript +// frontend/src/components/search/AgentPipelineStatus.tsx + +type PipelineStage = 'pre_search' | 'search' | 'post_search' | 'generation' | 'response_agents' | 'complete'; + +interface AgentPipelineStatusProps { + currentStage: PipelineStage; + stages: { + id: PipelineStage; + label: string; + agentCount: number; + status: 'pending' | 'running' | 'completed' | 'error'; + duration?: number; + }[]; + isVisible: boolean; +} + +const AgentPipelineStatus: React.FC = ({ + currentStage, + stages, + isVisible +}) => { + if (!isVisible) return null; + + return ( +
+
+ + + Agent Pipeline Processing + +
+ +
+ {stages.map((stage, index) => ( + +
+
+ {stage.status === 'completed' ? ( + + ) : stage.status === 'running' ? ( + + ) : ( + stage.agentCount + )} +
+ + {stage.label} + + {stage.duration && ( + + {stage.duration}ms + + )} +
+ + {index < stages.length - 1 && ( +
+ )} + + ))} +
+
+ ); +}; +``` + +#### AgentExecutionIndicator + +Badge showing which agents processed a response. + +```typescript +// frontend/src/components/search/AgentExecutionIndicator.tsx + +interface AgentExecution { + agent_id: string; + agent_name: string; + stage: 'pre_search' | 'post_search' | 'response'; + duration_ms: number; + success: boolean; +} + +interface AgentExecutionIndicatorProps { + executions: AgentExecution[]; +} + +const AgentExecutionIndicator: React.FC = ({ + executions +}) => { + if (!executions?.length) return null; + + const [expanded, setExpanded] = useState(false); + + const successCount = executions.filter(e => e.success).length; + const totalDuration = executions.reduce((sum, e) => sum + e.duration_ms, 0); + + return ( +
+ + + {expanded && ( +
+ {executions.map((exec, index) => ( +
+ + {exec.agent_name} + ({exec.stage}) + {exec.duration_ms}ms +
+ ))} +
+ )} +
+ ); +}; +``` + +### 2. Collection Agent Configuration + +#### CollectionAgentsTab + +Tab component for collection settings page to configure agents. + +```typescript +// frontend/src/components/agents/CollectionAgentsTab.tsx + +interface CollectionAgentsTabProps { + collectionId: string; +} + +const CollectionAgentsTab: React.FC = ({ + collectionId +}) => { + const [agents, setAgents] = useState([]); + const [availableAgents, setAvailableAgents] = useState([]); + const [isLoading, setIsLoading] = useState(true); + const [showAddModal, setShowAddModal] = useState(false); + const [editingAgent, setEditingAgent] = useState(null); + const { addNotification } = useNotification(); + + useEffect(() => { + loadAgents(); + }, [collectionId]); + + const loadAgents = async () => { + setIsLoading(true); + try { + const [collectionAgents, allAgents] = await Promise.all([ + agentApiClient.getCollectionAgents(collectionId), + agentApiClient.getAvailableAgents() + ]); + setAgents(collectionAgents); + setAvailableAgents(allAgents); + } catch (error) { + addNotification('error', 'Error', 'Failed to load agents'); + } finally { + setIsLoading(false); + } + }; + + const handleToggleAgent = async (agentConfigId: string, enabled: boolean) => { + try { + await agentApiClient.updateAgentConfig(agentConfigId, { enabled }); + setAgents(prev => prev.map(a => + a.id === agentConfigId ? { ...a, enabled } : a + )); + } catch (error) { + addNotification('error', 'Error', 'Failed to update agent'); + } + }; + + const handleReorderAgents = async (reorderedAgents: CollectionAgent[]) => { + try { + // Update priorities based on new order + const updates = reorderedAgents.map((agent, index) => ({ + id: agent.id, + priority: index + })); + await agentApiClient.batchUpdatePriorities(updates); + setAgents(reorderedAgents); + } catch (error) { + addNotification('error', 'Error', 'Failed to reorder agents'); + } + }; + + return ( +
+ {/* Header */} +
+
+

Collection Agents

+

+ Configure AI agents that enhance search and generate artifacts +

+
+ +
+ + {/* Agent List by Stage */} + {isLoading ? ( +
+ {[1, 2, 3].map(i => )} +
+ ) : ( + <> + {/* Pre-Search Agents */} + a.trigger_stage === 'pre_search')} + onToggle={handleToggleAgent} + onEdit={setEditingAgent} + onReorder={handleReorderAgents} + /> + + {/* Post-Search Agents */} + a.trigger_stage === 'post_search')} + onToggle={handleToggleAgent} + onEdit={setEditingAgent} + onReorder={handleReorderAgents} + /> + + {/* Response Agents */} + a.trigger_stage === 'response')} + onToggle={handleToggleAgent} + onEdit={setEditingAgent} + onReorder={handleReorderAgents} + /> + + )} + + {/* Add Agent Modal */} + {showAddModal && ( + { + loadAgents(); + setShowAddModal(false); + }} + onClose={() => setShowAddModal(false)} + /> + )} + + {/* Edit Agent Modal */} + {editingAgent && ( + { + loadAgents(); + setEditingAgent(null); + }} + onClose={() => setEditingAgent(null)} + /> + )} +
+ ); +}; +``` + +#### AgentStageSection + +Section component for agents at a specific pipeline stage. + +```typescript +// frontend/src/components/agents/AgentStageSection.tsx + +interface AgentStageSectionProps { + title: string; + description: string; + stage: 'pre_search' | 'post_search' | 'response'; + agents: CollectionAgent[]; + onToggle: (id: string, enabled: boolean) => void; + onEdit: (agent: CollectionAgent) => void; + onReorder: (agents: CollectionAgent[]) => void; +} + +const AgentStageSection: React.FC = ({ + title, + description, + stage, + agents, + onToggle, + onEdit, + onReorder +}) => { + const stageIcons = { + pre_search: , + post_search: , + response: + }; + + const stageColors = { + pre_search: 'bg-yellow-10 text-yellow-60', + post_search: 'bg-blue-10 text-blue-60', + response: 'bg-purple-10 text-purple-60' + }; + + return ( +
+
+
+ {stageIcons[stage]} +
+
+

{title}

+

{description}

+
+
+ + {agents.length === 0 ? ( +
+ +

No agents configured for this stage

+
+ ) : ( + { + if (!result.destination) return; + const items = Array.from(agents); + const [reordered] = items.splice(result.source.index, 1); + items.splice(result.destination.index, 0, reordered); + onReorder(items); + }}> + + {(provided) => ( +
+ {agents.map((agent, index) => ( + + {(provided, snapshot) => ( +
+
+ +
+ +
+

{agent.name}

+

{agent.description}

+
+ +
+ + Priority: {agent.priority} + + + onToggle(agent.id, enabled)} + className={` + ${agent.enabled ? 'bg-green-50' : 'bg-gray-30'} + relative inline-flex h-5 w-9 items-center rounded-full + `} + > + + + + +
+
+ )} +
+ ))} + {provided.placeholder} +
+ )} +
+
+ )} +
+ ); +}; +``` + +#### AgentConfigModal + +Modal for configuring agent-specific settings. + +```typescript +// frontend/src/components/agents/AgentConfigModal.tsx + +interface AgentConfigModalProps { + agent: CollectionAgent; + onSave: () => void; + onClose: () => void; +} + +const AgentConfigModal: React.FC = ({ + agent, + onSave, + onClose +}) => { + const [config, setConfig] = useState(agent.config); + const [isSaving, setIsSaving] = useState(false); + const { addNotification } = useNotification(); + + // Generate form fields from agent's config schema + const renderConfigField = (key: string, schema: any) => { + const value = config.settings?.[key] ?? schema.default; + + switch (schema.type) { + case 'integer': + return ( +
+ + setConfig({ + ...config, + settings: { ...config.settings, [key]: parseInt(e.target.value) } + })} + className="input-field w-full" + /> + {schema.description && ( +

{schema.description}

+ )} +
+ ); + + case 'boolean': + return ( +
+
+ + {schema.description && ( +

{schema.description}

+ )} +
+ setConfig({ + ...config, + settings: { ...config.settings, [key]: checked } + })} + /> +
+ ); + + case 'string': + if (schema.enum) { + return ( +
+ + +
+ ); + } + return ( +
+ + setConfig({ + ...config, + settings: { ...config.settings, [key]: e.target.value } + })} + className="input-field w-full" + /> +
+ ); + + default: + return null; + } + }; + + const handleSave = async () => { + setIsSaving(true); + try { + await agentApiClient.updateAgentConfig(agent.id, { config }); + addNotification('success', 'Saved', 'Agent configuration updated'); + onSave(); + } catch (error) { + addNotification('error', 'Error', 'Failed to save configuration'); + } finally { + setIsSaving(false); + } + }; + + return ( + +
+

+ Configure {agent.name} +

+ +
+ {/* Agent info */} +
+

{agent.description}

+
+ Stage: {agent.trigger_stage} + Type: {agent.config.type} +
+
+ + {/* Dynamic config fields */} + {agent.config_schema?.properties && ( +
+ {Object.entries(agent.config_schema.properties).map(([key, schema]) => + renderConfigField(key, schema) + )} +
+ )} +
+ +
+ + +
+
+
+ ); +}; +``` + +### 3. Agent Management Dashboard + +#### AgentDashboard + +Main page for managing user's agents across all collections. + +```typescript +// frontend/src/components/agents/AgentDashboard.tsx + +const AgentDashboard: React.FC = () => { + const [activeTab, setActiveTab] = useState<'my-agents' | 'analytics' | 'audit'>('my-agents'); + + return ( +
+
+ {/* Header */} +
+

Agent Management

+

+ Configure and monitor AI agents for your document collections +

+
+ + {/* Tabs */} +
+ +
+ + {/* Tab Content */} + {activeTab === 'my-agents' && } + {activeTab === 'analytics' && } + {activeTab === 'audit' && } +
+
+ ); +}; +``` + +### 4. Agent Marketplace + +#### AgentMarketplacePage + +Browse and discover available agents. + +```typescript +// frontend/src/components/agents/AgentMarketplacePage.tsx + +interface AgentManifest { + agent_id: string; + name: string; + version: string; + description: string; + capabilities: string[]; + config_schema: Record; + input_schema: Record; + output_schema: Record; + category: 'pre_search' | 'post_search' | 'response'; + icon?: string; + author?: string; + downloads?: number; +} + +const AgentMarketplacePage: React.FC = () => { + const [agents, setAgents] = useState([]); + const [filter, setFilter] = useState('all'); + const [search, setSearch] = useState(''); + const [selectedAgent, setSelectedAgent] = useState(null); + + useEffect(() => { + loadAgents(); + }, []); + + const loadAgents = async () => { + const data = await agentApiClient.getAvailableAgents(); + setAgents(data); + }; + + const filteredAgents = agents.filter(agent => { + const matchesFilter = filter === 'all' || agent.category === filter; + const matchesSearch = !search || + agent.name.toLowerCase().includes(search.toLowerCase()) || + agent.description.toLowerCase().includes(search.toLowerCase()); + return matchesFilter && matchesSearch; + }); + + const categories = [ + { id: 'all', label: 'All Agents' }, + { id: 'pre_search', label: 'Pre-Search' }, + { id: 'post_search', label: 'Post-Search' }, + { id: 'response', label: 'Response' }, + ]; + + return ( +
+
+ {/* Header */} +
+

Agent Marketplace

+

+ Discover and add AI agents to enhance your RAG workflows +

+
+ + {/* Filters */} +
+
+ + setSearch(e.target.value)} + className="input-field w-full pl-10" + /> +
+ +
+ {categories.map(cat => ( + + ))} +
+
+ + {/* Agent Grid */} +
+ {filteredAgents.map(agent => ( +
setSelectedAgent(agent)} + > +
+
+ +
+
+

{agent.name}

+

v{agent.version}

+
+
+ +

+ {agent.description} +

+ +
+ + {agent.category.replace('_', '-')} + + + +
+
+ ))} +
+ + {/* Agent Detail Modal */} + {selectedAgent && ( + setSelectedAgent(null)} + /> + )} +
+
+ ); +}; +``` + +## API Integration + +### Agent API Client + +```typescript +// frontend/src/services/agentApiClient.ts + +import apiClient from './apiClient'; + +export interface AgentManifest { + agent_id: string; + name: string; + version: string; + description: string; + capabilities: string[]; + category: 'pre_search' | 'post_search' | 'response'; + config_schema: Record; +} + +export interface CollectionAgent { + id: string; + agent_id: string; + name: string; + description: string; + config: { + type: 'mcp' | 'builtin'; + context_forge_tool_id?: string; + settings: Record; + }; + config_schema?: Record; + enabled: boolean; + trigger_stage: 'pre_search' | 'post_search' | 'response'; + priority: number; +} + +export interface AgentExecution { + id: string; + agent_id: string; + agent_name: string; + collection_id: string; + trigger_stage: string; + success: boolean; + duration_ms: number; + error?: string; + created_at: string; +} + +const agentApiClient = { + // Available agents + getAvailableAgents: async (): Promise => { + const response = await apiClient.get('/api/v1/agents/'); + return response.data; + }, + + getAgentsByCapability: async (capability: string): Promise => { + const response = await apiClient.get(`/api/v1/agents/capabilities/${capability}`); + return response.data; + }, + + // User's agent configurations + getUserAgentConfigs: async (): Promise => { + const response = await apiClient.get('/api/v1/agents/configs'); + return response.data; + }, + + createAgentConfig: async (config: Partial): Promise => { + const response = await apiClient.post('/api/v1/agents/configs', config); + return response.data; + }, + + updateAgentConfig: async ( + configId: string, + updates: Partial + ): Promise => { + const response = await apiClient.patch(`/api/v1/agents/configs/${configId}`, updates); + return response.data; + }, + + deleteAgentConfig: async (configId: string): Promise => { + await apiClient.delete(`/api/v1/agents/configs/${configId}`); + }, + + // Collection agents + getCollectionAgents: async (collectionId: string): Promise => { + const response = await apiClient.get(`/api/v1/agents/collections/${collectionId}/agents`); + return response.data; + }, + + addAgentToCollection: async ( + collectionId: string, + agentConfigId: string + ): Promise => { + await apiClient.post(`/api/v1/agents/collections/${collectionId}/agents`, { + agent_config_id: agentConfigId + }); + }, + + removeAgentFromCollection: async ( + collectionId: string, + agentConfigId: string + ): Promise => { + await apiClient.delete( + `/api/v1/agents/collections/${collectionId}/agents/${agentConfigId}` + ); + }, + + batchUpdatePriorities: async ( + updates: { id: string; priority: number }[] + ): Promise => { + await apiClient.patch('/api/v1/agents/configs/priorities', { updates }); + }, + + // Analytics + getAgentAnalytics: async ( + agentConfigId?: string, + dateRange?: { start: string; end: string } + ): Promise => { + const params = new URLSearchParams(); + if (agentConfigId) params.append('agent_config_id', agentConfigId); + if (dateRange) { + params.append('start', dateRange.start); + params.append('end', dateRange.end); + } + const response = await apiClient.get(`/api/v1/agents/analytics?${params}`); + return response.data; + }, + + // Audit log + getAgentExecutions: async ( + options?: { + agentConfigId?: string; + collectionId?: string; + limit?: number; + offset?: number; + } + ): Promise => { + const params = new URLSearchParams(); + if (options?.agentConfigId) params.append('agent_config_id', options.agentConfigId); + if (options?.collectionId) params.append('collection_id', options.collectionId); + if (options?.limit) params.append('limit', options.limit.toString()); + if (options?.offset) params.append('offset', options.offset.toString()); + const response = await apiClient.get(`/api/v1/agents/executions?${params}`); + return response.data; + }, +}; + +export default agentApiClient; +``` + +### Enhanced Search Response Schema + +```typescript +// frontend/src/types/search.ts + +export interface SearchResponse { + answer: string; + sources: Source[]; + cot_steps?: CotStep[]; + + // NEW: Agent-related fields + agent_artifacts?: AgentArtifact[]; + agent_executions?: AgentExecution[]; + pipeline_metadata?: { + pre_search_agents: number; + post_search_agents: number; + response_agents: number; + total_agent_time_ms: number; + }; +} + +export interface AgentArtifact { + agent_id: string; + type: 'pptx' | 'pdf' | 'png' | 'mp3' | 'html' | 'txt'; + data: string; + filename: string; + metadata: Record; +} + +export interface AgentExecution { + agent_id: string; + agent_name: string; + stage: 'pre_search' | 'post_search' | 'response'; + duration_ms: number; + success: boolean; + error?: string; +} +``` + +## State Management + +### AgentContext + +Context for managing agent-related state across the application. + +```typescript +// frontend/src/contexts/AgentContext.tsx + +interface AgentState { + availableAgents: AgentManifest[]; + userConfigs: CollectionAgent[]; + isLoading: boolean; + error: string | null; +} + +interface AgentContextType extends AgentState { + loadAvailableAgents: () => Promise; + loadUserConfigs: () => Promise; + createConfig: (config: Partial) => Promise; + updateConfig: (id: string, updates: Partial) => Promise; + deleteConfig: (id: string) => Promise; +} + +const AgentContext = createContext(null); + +export const AgentProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const [state, setState] = useState({ + availableAgents: [], + userConfigs: [], + isLoading: false, + error: null + }); + + const loadAvailableAgents = async () => { + setState(s => ({ ...s, isLoading: true })); + try { + const agents = await agentApiClient.getAvailableAgents(); + setState(s => ({ ...s, availableAgents: agents, isLoading: false })); + } catch (error) { + setState(s => ({ ...s, error: 'Failed to load agents', isLoading: false })); + } + }; + + const loadUserConfigs = async () => { + setState(s => ({ ...s, isLoading: true })); + try { + const configs = await agentApiClient.getUserAgentConfigs(); + setState(s => ({ ...s, userConfigs: configs, isLoading: false })); + } catch (error) { + setState(s => ({ ...s, error: 'Failed to load configs', isLoading: false })); + } + }; + + // ... other methods + + return ( + + {children} + + ); +}; + +export const useAgents = () => { + const context = useContext(AgentContext); + if (!context) { + throw new Error('useAgents must be used within AgentProvider'); + } + return context; +}; +``` + +## Accessibility + +### Keyboard Navigation + +- All agent cards and buttons are focusable +- Drag-and-drop has keyboard alternatives (up/down arrow keys) +- Modal focus trapping implemented +- Screen reader announcements for status changes + +### ARIA Labels + +```tsx +// Example: Artifact card +
+ +
+ +// Example: Pipeline status +
+ ... +
+``` + +## Responsive Design + +### Breakpoints + +| Breakpoint | Width | Layout Changes | +|------------|-------|----------------| +| Mobile | < 640px | Single column, stacked artifacts | +| Tablet | 640-1024px | 2-column grid, collapsible panels | +| Desktop | > 1024px | 3-column grid, full sidebar | + +### Mobile Considerations + +- Artifact preview uses full-screen modal on mobile +- Drag-and-drop replaced with move up/down buttons on touch +- Pipeline status collapses to minimal indicator +- Agent config modal is full-screen on mobile + +## Performance + +### Lazy Loading + +- Agent marketplace loads agents in pages of 20 +- Artifact preview images loaded on-demand +- Audit log uses virtual scrolling for large lists + +### Caching + +- Available agents cached for 5 minutes +- User configs cached with SWR for real-time updates +- Artifact data not cached (too large) + +### Bundle Optimization + +- Agent components code-split by route +- react-beautiful-dnd loaded only when drag-drop needed +- Large icons tree-shaken + +## Related Documents + +- [MCP Integration Architecture](./mcp-integration-architecture.md) +- [SearchService Agent Hooks Architecture](./search-agent-hooks-architecture.md) +- [RAG Modulo MCP Server Architecture](./rag-modulo-mcp-server-architecture.md) diff --git a/docs/architecture/backend-architecture-diagram.md b/docs/architecture/backend-architecture-diagram.md new file mode 100644 index 00000000..0cd94bb3 --- /dev/null +++ b/docs/architecture/backend-architecture-diagram.md @@ -0,0 +1,517 @@ +# RAG Modulo Backend Architecture + +This document provides a comprehensive architecture diagram and description of the RAG Modulo +backend system. + +## Architecture Overview + +The RAG Modulo backend is a FastAPI-based application that implements a Retrieval-Augmented +Generation (RAG) system with a modular, stage-based pipeline architecture. The system supports +multiple LLM providers, vector databases, and document processing strategies. + +## Component Architecture Diagram + +```mermaid +graph TB + subgraph "Client Layer" + WEB[Web Frontend] + CLI[CLI Client] + API_CLIENT[API Clients] + end + + subgraph "API Gateway Layer" + FASTAPI[FastAPI Application
main.py] + + subgraph "Middleware Stack" + CORS[LoggingCORSMiddleware] + SESSION[SessionMiddleware] + AUTH[AuthenticationMiddleware
SPIFFE/OIDC Support] + end + end + + subgraph "Router Layer" + AUTH_R[Auth Router] + SEARCH_R[Search Router] + COLLECTION_R[Collection Router] + CHAT_R[Chat Router] + CONV_R[Conversation Router] + PODCAST_R[Podcast Router] + VOICE_R[Voice Router] + AGENT_R[Agent Router] + USER_R[User Router] + TEAM_R[Team Router] + DASH_R[Dashboard Router] + HEALTH_R[Health Router] + WS_R[WebSocket Router] + end + + subgraph "Service Layer" + SEARCH_SVC[SearchService] + CONV_SVC[ConversationService] + MSG_ORCH[MessageProcessingOrchestrator] + COLLECTION_SVC[CollectionService] + FILE_SVC[FileManagementService] + PODCAST_SVC[PodcastService] + VOICE_SVC[VoiceService] + AGENT_SVC[AgentService] + USER_SVC[UserService] + TEAM_SVC[TeamService] + DASH_SVC[DashboardService] + PIPELINE_SVC[PipelineService] + COT_SVC[ChainOfThoughtService] + ANSWER_SYNTH[AnswerSynthesizer] + CITATION_SVC[CitationAttributionService] + end + + subgraph "Pipeline Architecture" + PIPELINE_EXEC[PipelineExecutor] + + subgraph "Pipeline Stages" + STAGE1[PipelineResolutionStage] + STAGE2[QueryEnhancementStage] + STAGE3[RetrievalStage] + STAGE4[RerankingStage] + STAGE5[ReasoningStage] + STAGE6[GenerationStage] + end + + SEARCH_CTX[SearchContext] + end + + subgraph "Data Ingestion Pipeline" + DOC_STORE[DocumentStore] + DOC_PROC[DocumentProcessor] + + subgraph "Document Processors" + PDF_PROC[PdfProcessor] + DOCLING_PROC[DoclingProcessor] + WORD_PROC[WordProcessor] + EXCEL_PROC[ExcelProcessor] + TXT_PROC[TxtProcessor] + end + + CHUNKING[Chunking Strategies
Sentence/Semantic/Hierarchical] + end + + subgraph "Retrieval Layer" + RETRIEVER[Retriever] + RERANKER[Reranker] + QUERY_REWRITER[QueryRewriter] + end + + subgraph "Generation Layer" + LLM_FACTORY[LLMProviderFactory] + + subgraph "LLM Providers" + WATSONX[WatsonX Provider] + OPENAI[OpenAI Provider] + ANTHROPIC[Anthropic Provider] + end + + AUDIO_FACTORY[AudioFactory] + + subgraph "Audio Providers" + ELEVENLABS[ElevenLabs Audio] + OPENAI_AUDIO[OpenAI Audio] + OLLAMA_AUDIO[Ollama Audio] + end + end + + subgraph "Repository Layer" + USER_REPO[UserRepository] + COLLECTION_REPO[CollectionRepository] + FILE_REPO[FileRepository] + CONV_REPO[ConversationRepository] + AGENT_REPO[AgentRepository] + PODCAST_REPO[PodcastRepository] + VOICE_REPO[VoiceRepository] + TEAM_REPO[TeamRepository] + PIPELINE_REPO[PipelineRepository] + LLM_REPO[LLMProviderRepository] + end + + subgraph "Data Persistence" + POSTGRES[(PostgreSQL
Metadata & Config)] + VECTOR_DB[(Vector Database)] + + subgraph "Vector DB Implementations" + MILVUS[Milvus] + PINECONE[Pinecone] + WEAVIATE[Weaviate] + ELASTICSEARCH[Elasticsearch] + CHROMA[Chroma] + end + end + + subgraph "External Services" + SPIRE[SPIRE Server
SPIFFE Identity] + OIDC[OIDC Provider
IBM AppID] + MINIO[MinIO
Object Storage] + end + + subgraph "Core Infrastructure" + CONFIG[Settings/Config] + LOGGING[Logging Utils] + IDENTITY[Identity Service] + EXCEPTIONS[Custom Exceptions] + end + + %% Client to API Gateway + WEB --> FASTAPI + CLI --> FASTAPI + API_CLIENT --> FASTAPI + + %% Middleware Flow + FASTAPI --> CORS + CORS --> SESSION + SESSION --> AUTH + + %% Router Registration + AUTH --> AUTH_R + AUTH --> SEARCH_R + AUTH --> COLLECTION_R + AUTH --> CHAT_R + AUTH --> CONV_R + AUTH --> PODCAST_R + AUTH --> VOICE_R + AUTH --> AGENT_R + AUTH --> USER_R + AUTH --> TEAM_R + AUTH --> DASH_R + AUTH --> HEALTH_R + AUTH --> WS_R + + %% Router to Service + SEARCH_R --> SEARCH_SVC + CHAT_R --> CONV_SVC + CONV_R --> CONV_SVC + CONV_SVC --> MSG_ORCH + MSG_ORCH --> SEARCH_SVC + COLLECTION_R --> COLLECTION_SVC + COLLECTION_SVC --> FILE_SVC + PODCAST_R --> PODCAST_SVC + VOICE_R --> VOICE_SVC + AGENT_R --> AGENT_SVC + USER_R --> USER_SVC + TEAM_R --> TEAM_SVC + DASH_R --> DASH_SVC + + %% Search Service to Pipeline + SEARCH_SVC --> PIPELINE_EXEC + PIPELINE_EXEC --> STAGE1 + STAGE1 --> STAGE2 + STAGE2 --> STAGE3 + STAGE3 --> STAGE4 + STAGE4 --> STAGE5 + STAGE5 --> STAGE6 + PIPELINE_EXEC --> SEARCH_CTX + + %% Pipeline Stages to Services + STAGE1 --> PIPELINE_SVC + STAGE2 --> PIPELINE_SVC + STAGE3 --> PIPELINE_SVC + STAGE4 --> PIPELINE_SVC + STAGE5 --> COT_SVC + STAGE6 --> ANSWER_SYNTH + + %% Pipeline Service to Retrieval + PIPELINE_SVC --> RETRIEVER + PIPELINE_SVC --> RERANKER + PIPELINE_SVC --> QUERY_REWRITER + + %% Retrieval to Vector DB + RETRIEVER --> VECTOR_DB + VECTOR_DB --> MILVUS + VECTOR_DB --> PINECONE + VECTOR_DB --> WEAVIATE + VECTOR_DB --> ELASTICSEARCH + VECTOR_DB --> CHROMA + + %% Generation Layer + ANSWER_SYNTH --> LLM_FACTORY + LLM_FACTORY --> WATSONX + LLM_FACTORY --> OPENAI + LLM_FACTORY --> ANTHROPIC + PODCAST_SVC --> LLM_FACTORY + VOICE_SVC --> AUDIO_FACTORY + AUDIO_FACTORY --> ELEVENLABS + AUDIO_FACTORY --> OPENAI_AUDIO + AUDIO_FACTORY --> OLLAMA_AUDIO + + %% Data Ingestion + FILE_SVC --> DOC_STORE + DOC_STORE --> DOC_PROC + DOC_PROC --> PDF_PROC + DOC_PROC --> DOCLING_PROC + DOC_PROC --> WORD_PROC + DOC_PROC --> EXCEL_PROC + DOC_PROC --> TXT_PROC + DOC_PROC --> CHUNKING + DOC_STORE --> VECTOR_DB + + %% Service to Repository + USER_SVC --> USER_REPO + COLLECTION_SVC --> COLLECTION_REPO + FILE_SVC --> FILE_REPO + CONV_SVC --> CONV_REPO + AGENT_SVC --> AGENT_REPO + PODCAST_SVC --> PODCAST_REPO + VOICE_SVC --> VOICE_REPO + TEAM_SVC --> TEAM_REPO + PIPELINE_SVC --> PIPELINE_REPO + PIPELINE_SVC --> LLM_REPO + + %% Repository to Database + USER_REPO --> POSTGRES + COLLECTION_REPO --> POSTGRES + FILE_REPO --> POSTGRES + CONV_REPO --> POSTGRES + AGENT_REPO --> POSTGRES + PODCAST_REPO --> POSTGRES + VOICE_REPO --> POSTGRES + TEAM_REPO --> POSTGRES + PIPELINE_REPO --> POSTGRES + LLM_REPO --> POSTGRES + + %% Authentication + AUTH --> SPIRE + AUTH --> OIDC + AGENT_SVC --> SPIRE + + %% Storage + FILE_SVC --> MINIO + PODCAST_SVC --> MINIO + VOICE_SVC --> MINIO + + %% Core Infrastructure + FASTAPI --> CONFIG + FASTAPI --> LOGGING + AUTH --> IDENTITY + SEARCH_SVC --> EXCEPTIONS + CONV_SVC --> EXCEPTIONS + + style FASTAPI fill:#4A90E2 + style PIPELINE_EXEC fill:#50C878 + style VECTOR_DB fill:#FF6B6B + style POSTGRES fill:#4ECDC4 + style LLM_FACTORY fill:#FFD93D + style DOC_STORE fill:#9B59B6 +``` + +## Architecture Layers + +### 1. API Gateway Layer + +**FastAPI Application (`main.py`)** + +- Entry point for all HTTP requests +- Manages application lifespan (startup/shutdown) +- Configures middleware stack +- Registers all routers +- Initializes database and LLM providers + +**Middleware Stack:** + +- **LoggingCORSMiddleware**: Handles CORS and request/response logging +- **SessionMiddleware**: Manages user sessions +- **AuthenticationMiddleware**: Validates user authentication via SPIFFE/OIDC + +### 2. Router Layer + +The router layer provides RESTful API endpoints organized by domain: + +- **Auth Router**: User authentication and authorization +- **Search Router**: RAG search operations +- **Collection Router**: Document collection management +- **Chat Router**: Conversational interface +- **Conversation Router**: Conversation history and context +- **Podcast Router**: AI-powered podcast generation +- **Voice Router**: Voice synthesis operations +- **Agent Router**: SPIFFE-based agent management +- **User Router**: User profile management +- **Team Router**: Team collaboration features +- **Dashboard Router**: Analytics and metrics +- **Health Router**: System health checks +- **WebSocket Router**: Real-time updates + +### 3. Service Layer + +Business logic services that orchestrate operations: + +- **SearchService**: Coordinates RAG search operations +- **ConversationService**: Manages conversation sessions and messages +- **MessageProcessingOrchestrator**: Orchestrates message processing with context +- **CollectionService**: Manages document collections +- **FileManagementService**: Handles file uploads and processing +- **PodcastService**: Generates podcasts from documents +- **VoiceService**: Manages voice synthesis +- **AgentService**: Manages AI agents with SPIFFE identity +- **PipelineService**: Executes RAG pipeline stages +- **ChainOfThoughtService**: Implements reasoning capabilities +- **AnswerSynthesizer**: Generates final answers from retrieved context +- **CitationAttributionService**: Attributes sources to answers + +### 4. Pipeline Architecture + +**Stage-Based RAG Pipeline:** + +The system uses a modular, stage-based pipeline architecture: + +1. **PipelineResolutionStage**: Resolves user's default pipeline configuration +2. **QueryEnhancementStage**: Rewrites/enhances queries for better retrieval +3. **RetrievalStage**: Retrieves documents from vector database +4. **RerankingStage**: Reranks results for relevance +5. **ReasoningStage**: Applies Chain of Thought reasoning if needed +6. **GenerationStage**: Generates final answer using LLM + +**PipelineExecutor**: Orchestrates stage execution with context passing + +**SearchContext**: Maintains state across pipeline stages + +### 5. Data Ingestion Pipeline + +**DocumentStore**: Manages document ingestion workflow + +**DocumentProcessor**: Routes documents to appropriate processors: + +- **PdfProcessor**: PDF extraction with OCR support +- **DoclingProcessor**: Advanced document processing (tables, images) +- **WordProcessor**: Microsoft Word documents +- **ExcelProcessor**: Spreadsheet processing +- **TxtProcessor**: Plain text files + +**Chunking Strategies**: + +- Sentence-based (recommended) +- Semantic chunking +- Hierarchical chunking +- Token-based chunking +- Fixed-size chunking + +### 6. Retrieval Layer + +- **Retriever**: Performs vector similarity search +- **Reranker**: Reranks results for better relevance +- **QueryRewriter**: Enhances queries for better retrieval + +### 7. Generation Layer + +**LLMProviderFactory**: Factory for creating LLM provider instances + +- **WatsonX Provider**: IBM WatsonX integration +- **OpenAI Provider**: OpenAI API integration +- **Anthropic Provider**: Claude API integration + +**AudioFactory**: Factory for audio generation + +- **ElevenLabs Audio**: Voice synthesis +- **OpenAI Audio**: TTS integration +- **Ollama Audio**: Local TTS + +### 8. Repository Layer + +Data access layer using Repository pattern: + +- **UserRepository**: User data operations +- **CollectionRepository**: Collection management +- **FileRepository**: File metadata operations +- **ConversationRepository**: Conversation data (unified, optimized) +- **AgentRepository**: Agent management +- **PodcastRepository**: Podcast metadata +- **VoiceRepository**: Voice configuration +- **TeamRepository**: Team operations +- **PipelineRepository**: Pipeline configuration +- **LLMProviderRepository**: LLM provider settings + +### 9. Data Persistence + +**PostgreSQL**: + +- Stores metadata (users, collections, files, conversations) +- Manages configuration (pipelines, LLM settings) +- Handles relationships and transactions + +**Vector Database** (Abstracted via VectorStore interface): + +- **Milvus**: Primary vector database +- **Pinecone**: Cloud vector database +- **Weaviate**: GraphQL vector database +- **Elasticsearch**: Search engine with vector support +- **Chroma**: Lightweight vector database + +### 10. External Services + +- **SPIRE Server**: SPIFFE workload identity for agent authentication +- **OIDC Provider**: IBM AppID for user authentication +- **MinIO**: Object storage for files and audio + +### 11. Core Infrastructure + +- **Settings/Config**: Centralized configuration management +- **Logging Utils**: Structured logging with context +- **Identity Service**: User/agent identity management +- **Custom Exceptions**: Domain-specific error handling + +## Data Flow + +### Search Request Flow + +1. **Client** → FastAPI → **Search Router** +2. **Search Router** → **SearchService** +3. **SearchService** → **PipelineExecutor** +4. **PipelineExecutor** executes stages: + - Pipeline Resolution → Query Enhancement → Retrieval → Reranking → Reasoning → Generation +5. **RetrievalStage** → **Retriever** → **Vector Database** +6. **GenerationStage** → **AnswerSynthesizer** → **LLM Provider** +7. Response flows back through layers to client + +### Document Ingestion Flow + +1. **Client** → **Collection Router** → **CollectionService** → **FileManagementService** +2. **FileManagementService** → **DocumentStore** +3. **DocumentStore** → **DocumentProcessor** → **Specific Processor** (PDF/Word/etc.) +4. **Processor** → **Chunking Strategy** → **Document Chunks** +5. **DocumentStore** → **Vector Database** (embeddings + metadata) +6. **FileManagementService** → **FileRepository** → **PostgreSQL** (metadata) + +### Conversation Flow + +1. **Client** → **Conversation Router** → **ConversationService** +2. **ConversationService** → **MessageProcessingOrchestrator** +3. **MessageProcessingOrchestrator** → **SearchService** (with context) +4. **SearchService** executes pipeline with conversation context +5. Response saved via **ConversationRepository** → **PostgreSQL** + +## Key Design Patterns + +1. **Repository Pattern**: Data access abstraction +2. **Factory Pattern**: LLM and Vector DB instantiation +3. **Strategy Pattern**: Chunking strategies, LLM providers +4. **Pipeline Pattern**: Stage-based RAG processing +5. **Dependency Injection**: Services and repositories +6. **Middleware Pattern**: Cross-cutting concerns (auth, logging, CORS) + +## Scalability Considerations + +- **Stateless Services**: Services are stateless for horizontal scaling +- **Database Connection Pooling**: SQLAlchemy connection management +- **Async/Await**: Asynchronous operations for I/O-bound tasks +- **Vector DB Abstraction**: Easy switching between vector databases +- **LLM Provider Abstraction**: Support for multiple LLM providers +- **Modular Pipeline**: Stages can be optimized independently + +## Security Features + +- **SPIFFE/SPIRE**: Machine-to-machine authentication for agents +- **OIDC**: User authentication via IBM AppID +- **Session Management**: Secure session handling +- **CORS**: Controlled cross-origin access +- **Input Validation**: Pydantic schemas for request validation +- **Error Handling**: Secure error messages without information leakage + +## Configuration Management + +- **Environment Variables**: `.env` file support +- **Pydantic Settings**: Type-safe configuration +- **Runtime Configuration**: Dynamic configuration updates +- **User-Specific Settings**: Per-user LLM and pipeline configuration diff --git a/docs/architecture/mcp-integration-architecture.md b/docs/architecture/mcp-integration-architecture.md new file mode 100644 index 00000000..e4be1eb8 --- /dev/null +++ b/docs/architecture/mcp-integration-architecture.md @@ -0,0 +1,200 @@ +# MCP Integration Architecture + +**Date**: November 2025 +**Status**: Architecture Design +**Version**: 1.0 +**Related PRs**: #671, #684, #695 + +## Overview + +This document describes the architecture for integrating Model Context Protocol (MCP) into +RAG Modulo. The integration enables bidirectional MCP communication: + +1. **RAG Modulo as MCP Client**: Consuming external MCP tools (PowerPoint generation, charts, translation) +2. **RAG Modulo as MCP Server**: Exposing RAG capabilities to external AI tools (Claude Desktop, workflow systems) + +## PR Comparison and Decision + +### PR #671 vs #684 Analysis + +| Aspect | PR #671 | PR #684 | Decision | +|--------|---------|---------|----------| +| **File Organization** | `mcp/` dedicated directory | `services/` directory | #684 naming preferred | +| **Lines Changed** | 2,502 | 2,846 | Similar | +| **Test Functions** | 63 | 50 | #671 has more tests | +| **Mergeable** | Yes | Unknown | #671 confirmed | + +### Decision: Adopt #684 File Naming with #671 Test Coverage + +We will use #684's file naming convention (`mcp_gateway_client.py`, `search_result_enricher.py`) +placed in the `services/` directory, as this follows the existing service-based architecture +pattern. However, we should incorporate the additional test coverage from #671. + +## High-Level Architecture + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ MCP Context Forge │ +│ (Central Gateway/Registry) │ +│ │ +│ Registered Servers: │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ Internal (RAG Modulo consumes): │ │ +│ │ • ppt-generator-mcp (PowerPoint) │ │ +│ │ • chart-generator-mcp (Visualizations) │ │ +│ │ • translator-mcp (Language translation) │ │ +│ │ • web-enricher-mcp (Real-time data) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ External (RAG Modulo exposes): │ │ +│ │ • rag-modulo-mcp (search, ingest, podcast, collections) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ā–² ā–² + │ │ + │ RAG Modulo calls External tools call + │ external MCP tools RAG Modulo MCP server + │ │ + ā–¼ ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ RAG Modulo Backend │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ MCP Client │ MCP Server │ │ +│ │ services/mcp_gateway_client.py │ mcp_server/server.py │ │ +│ │ services/search_result_enricher.py│ mcp_server/tools.py │ │ +│ │ │ │ │ +│ │ Consumes: ppt-generator, │ Exposes: rag_search, │ │ +│ │ chart-generator, etc. │ rag_ingest, rag_podcast │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Core Services ││ +│ │ SearchService, DocumentService, PodcastService, CollectionService ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ā–² + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ RAG Modulo Frontend │ +│ │ +│ • Triggers searches → gets artifacts back │ +│ • Configures which agents run per collection │ +│ • Downloads/previews generated artifacts │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +## File Structure + +``` +backend/rag_solution/ +ā”œā”€ā”€ services/ +│ ā”œā”€ā”€ mcp_gateway_client.py # Client to call external MCP tools +│ ā”œā”€ā”€ search_result_enricher.py # Post-search enrichment agent +│ └── ... (existing services) +│ +ā”œā”€ā”€ mcp_server/ # RAG Modulo as MCP server +│ ā”œā”€ā”€ __init__.py +│ ā”œā”€ā”€ server.py # MCP server setup, transport handling +│ ā”œā”€ā”€ tools.py # Tool definitions (rag_search, rag_ingest, etc.) +│ ā”œā”€ā”€ resources.py # MCP resources (collection metadata, etc.) +│ └── auth.py # SPIFFE/Bearer token validation +│ +ā”œā”€ā”€ schemas/ +│ ā”œā”€ā”€ mcp_schema.py # Schemas for MCP requests/responses +│ └── ... +│ +└── router/ + ā”œā”€ā”€ mcp_router.py # REST endpoints for MCP management + └── ... + +tests/unit/ +ā”œā”€ā”€ services/ +│ ā”œā”€ā”€ test_mcp_gateway_client.py +│ └── test_search_result_enricher.py +ā”œā”€ā”€ router/ +│ └── test_mcp_router.py +└── mcp_server/ + ā”œā”€ā”€ test_server.py + └── test_tools.py +``` + +## MCP Client Components + +### MCPGatewayClient + +Thin wrapper with circuit breaker pattern for calling external MCP tools via Context Forge. + +**Key Features**: + +- Circuit breaker: 5 failure threshold, 60s recovery timeout +- Health checks: 5-second timeout +- Default timeout: 30 seconds on all calls +- Graceful degradation on failures + +### SearchResultEnricher + +Content Enricher pattern implementation for augmenting search results with external data. + +**Capabilities**: + +- Real-time data enrichment (stock prices, weather, etc.) +- External knowledge base queries +- Document metadata enhancement + +## MCP Server Components + +RAG Modulo exposes its capabilities as MCP tools for external consumption. + +### Exposed Tools + +| Tool | Description | Parameters | +|------|-------------|------------| +| `rag_search` | Search documents in a collection | `collection_id`, `query`, `top_k`, `use_cot` | +| `rag_ingest` | Add documents to a collection | `collection_id`, `documents` | +| `rag_list_collections` | List accessible collections | `include_stats` | +| `rag_generate_podcast` | Generate podcast from collection | `collection_id`, `topic`, `duration_minutes` | +| `rag_smart_questions` | Get suggested follow-up questions | `collection_id`, `context` | + +### Exposed Resources + +| Resource URI | Description | +|--------------|-------------| +| `rag://collection/{id}/documents` | Document metadata for a collection | +| `rag://collection/{id}/stats` | Collection statistics | +| `rag://search/{query}/results` | Cached search results | + +### Authentication + +- **SPIFFE JWT-SVID** (PR #695): For agent-to-agent calls +- **Bearer token**: For user-delegated access from Claude Desktop, etc. + +## Integration with Context Forge + +IBM's MCP Context Forge serves as the central gateway providing: + +- Protocol translation (stdio, SSE, WebSocket, HTTP) +- Tool registry and discovery +- Bearer token auth with JWT + RBAC +- Rate limiting with Redis backing +- OpenTelemetry integration +- Admin UI for management +- Redis-backed federation for distributed deployment + +## Security Considerations + +1. **Network Isolation**: Context Forge runs in same VPC as RAG Modulo backend +2. **JWT Authentication**: Secure token-based auth for all API calls +3. **RBAC**: Team-based access control for sensitive tools +4. **Secrets Management**: MCP server credentials managed by Context Forge +5. **Audit Logging**: All tool invocations logged via OpenTelemetry +6. **Capability Validation**: SPIFFE capabilities mapped to MCP tool permissions + +## Related Documents + +- [SearchService Agent Hooks Architecture](./search-agent-hooks-architecture.md) +- [RAG Modulo MCP Server Architecture](./rag-modulo-mcp-server-architecture.md) +- [SPIRE Integration Architecture](./spire-integration-architecture.md) +- [Agent MCP Architecture Design](../design/agent-mcp-architecture.md) +- [MCP Context Forge Integration Design](../design/mcp-context-forge-integration.md) diff --git a/docs/architecture/rag-modulo-mcp-server-architecture.md b/docs/architecture/rag-modulo-mcp-server-architecture.md new file mode 100644 index 00000000..4bbff346 --- /dev/null +++ b/docs/architecture/rag-modulo-mcp-server-architecture.md @@ -0,0 +1,689 @@ +# RAG Modulo MCP Server Architecture + +**Date**: November 2025 +**Status**: Architecture Design +**Version**: 1.0 +**Related Documents**: [MCP Integration Architecture](./mcp-integration-architecture.md), [SPIRE Integration Architecture](./spire-integration-architecture.md) + +## Overview + +This document describes the architecture for exposing RAG Modulo's capabilities as an MCP +(Model Context Protocol) server. This enables external AI tools like Claude Desktop, workflow +automation systems, and other MCP clients to interact with RAG Modulo's search, ingestion, +and content generation features. + +## Use Cases + +### External MCP Clients + +| Client | Use Case | +|--------|----------| +| **Claude Desktop** | User asks Claude to search their company documents | +| **n8n/Zapier** | Workflow automation: ingest email attachments, search on triggers | +| **Custom AI Bots** | Slack/Teams bots that query document collections | +| **Agent Frameworks** | LangChain, AutoGPT agents using RAG Modulo as knowledge source | + +### Example Scenarios + +**Scenario 1: Claude Desktop** + +``` +User in Claude Desktop: +"Search my company's financial documents for Q4 projections" + +Claude Desktop: +1. Discovers rag_search tool via MCP +2. Calls rag_search(collection_id="...", query="Q4 projections") +3. Receives answer + sources from RAG Modulo +4. Presents to user with citations +``` + +**Scenario 2: Workflow Automation** + +``` +Trigger: New email received with attachment +Action 1: Extract attachment, upload to temp storage +Action 2: Call rag_ingest to add document to collection +Action 3: Call rag_search to check for related content +Action 4: Send Slack notification with summary +``` + +**Scenario 3: Multi-Agent System** + +``` +Orchestrator Agent: +1. Calls rag_list_collections to find relevant collection +2. Calls rag_search to gather information +3. Calls rag_generate_podcast to create audio summary +4. Combines results for final user response +``` + +## Architecture + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ EXTERNAL MCP CLIENTS │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ Claude Desktop │ │ Custom AI Bot │ │ Workflow Tool │ │ +│ │ │ │ │ │ (n8n, Zapier) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā–¼ MCP Protocol (stdio/SSE/HTTP) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ RAG Modulo Native MCP Server │ +│ backend/rag_solution/mcp_server/ │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Tools ││ +│ │ ││ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ││ +│ │ │ rag_search │ │ rag_ingest │ │ rag_list_colls │ ││ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ││ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ││ +│ │ │ rag_gen_podcast │ │ rag_smart_q's │ │ rag_get_doc │ ││ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Resources ││ +│ │ ││ +│ │ rag://collection/{id}/documents - Document metadata ││ +│ │ rag://collection/{id}/stats - Collection statistics ││ +│ │ rag://search/{query}/results - Cached search results ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Authentication ││ +│ │ ││ +│ │ • SPIFFE JWT-SVID (agent-to-agent) ◀── PR #695 ││ +│ │ • Bearer token (user-delegated access) ││ +│ │ • API key (service accounts) ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ RAG Modulo Backend Services │ +│ (SearchService, DocumentService, PodcastService, etc.) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +## Exposed Tools + +### rag_search + +Search documents in a RAG Modulo collection. + +```yaml +name: rag_search +description: Search documents in a RAG Modulo collection using semantic search with optional Chain-of-Thought reasoning + +parameters: + collection_id: + type: string + description: UUID of the collection to search + required: true + query: + type: string + description: Natural language search query + required: true + top_k: + type: integer + description: Number of results to return + required: false + default: 5 + use_cot: + type: boolean + description: Enable Chain-of-Thought reasoning for complex queries + required: false + default: false + +returns: + answer: + type: string + description: Synthesized answer from retrieved documents + sources: + type: array + description: List of source documents with titles and relevance scores + cot_steps: + type: array + description: Reasoning steps (if use_cot=true) +``` + +### rag_ingest + +Add documents to a collection. + +```yaml +name: rag_ingest +description: Add one or more documents to a RAG Modulo collection + +parameters: + collection_id: + type: string + description: UUID of the target collection + required: true + documents: + type: array + description: List of documents to ingest + required: true + items: + type: object + properties: + title: + type: string + description: Document title + content: + type: string + description: Document content (text) + metadata: + type: object + description: Optional metadata (author, date, tags, etc.) + +returns: + ingested_count: + type: integer + description: Number of documents successfully ingested + document_ids: + type: array + description: UUIDs of ingested documents + errors: + type: array + description: Any errors encountered during ingestion +``` + +### rag_list_collections + +List collections accessible to the authenticated agent/user. + +```yaml +name: rag_list_collections +description: List document collections the authenticated agent can access + +parameters: + include_stats: + type: boolean + description: Include document counts and last updated timestamps + required: false + default: false + +returns: + collections: + type: array + items: + type: object + properties: + id: + type: string + description: Collection UUID + name: + type: string + description: Collection name + description: + type: string + description: Collection description + document_count: + type: integer + description: Number of documents (if include_stats=true) + last_updated: + type: string + description: ISO timestamp of last update (if include_stats=true) +``` + +### rag_generate_podcast + +Generate an audio podcast from collection content. + +```yaml +name: rag_generate_podcast +description: Generate an AI-powered audio podcast from collection documents + +parameters: + collection_id: + type: string + description: UUID of the source collection + required: true + topic: + type: string + description: Focus topic for the podcast (optional - uses all content if not specified) + required: false + duration_minutes: + type: integer + description: Target podcast duration in minutes + required: false + default: 5 + minimum: 1 + maximum: 30 + +returns: + audio_url: + type: string + description: URL to download the generated audio file + transcript: + type: string + description: Full text transcript of the podcast + duration: + type: number + description: Actual duration in seconds +``` + +### rag_smart_questions + +Get AI-suggested follow-up questions based on context. + +```yaml +name: rag_smart_questions +description: Generate intelligent follow-up questions based on collection content and conversation context + +parameters: + collection_id: + type: string + description: UUID of the collection + required: true + context: + type: string + description: Current conversation context or recent query + required: false + count: + type: integer + description: Number of questions to generate + required: false + default: 3 + minimum: 1 + maximum: 10 + +returns: + questions: + type: array + items: + type: string + description: List of suggested follow-up questions +``` + +### rag_get_document + +Retrieve a specific document's content and metadata. + +```yaml +name: rag_get_document +description: Retrieve full content and metadata for a specific document + +parameters: + document_id: + type: string + description: UUID of the document + required: true + +returns: + id: + type: string + description: Document UUID + title: + type: string + description: Document title + content: + type: string + description: Full document text content + metadata: + type: object + description: Document metadata + collection_id: + type: string + description: Parent collection UUID + created_at: + type: string + description: ISO timestamp of document creation +``` + +## Exposed Resources + +MCP resources provide read-only access to RAG Modulo data. + +### rag://collection/{id}/documents + +Document metadata for a collection. + +```json +{ + "uri": "rag://collection/abc123/documents", + "name": "Collection Documents", + "description": "List of documents in the collection", + "mimeType": "application/json" +} +``` + +**Content**: + +```json +{ + "collection_id": "abc123", + "documents": [ + { + "id": "doc1", + "title": "Q4 Financial Report", + "created_at": "2024-10-15T10:00:00Z", + "word_count": 5000, + "metadata": { "author": "Finance Team" } + } + ], + "total_count": 150 +} +``` + +### rag://collection/{id}/stats + +Collection statistics. + +```json +{ + "uri": "rag://collection/abc123/stats", + "name": "Collection Statistics", + "description": "Usage statistics for the collection", + "mimeType": "application/json" +} +``` + +**Content**: + +```json +{ + "collection_id": "abc123", + "document_count": 150, + "total_words": 500000, + "total_chunks": 2500, + "last_ingestion": "2024-11-20T14:30:00Z", + "query_count_30d": 1250, + "avg_query_time_ms": 450 +} +``` + +### rag://search/{query}/results + +Cached search results (for efficiency when same query is repeated). + +```json +{ + "uri": "rag://search/q4+projections/results", + "name": "Cached Search Results", + "description": "Cached results for recent search query", + "mimeType": "application/json" +} +``` + +## Authentication + +### SPIFFE JWT-SVID (Agent-to-Agent) + +For AI agents authenticated via SPIFFE/SPIRE (PR #695): + +``` +Authorization: Bearer + +JWT Claims: +{ + "sub": "spiffe://rag-modulo.example.com/agent/search-enricher/abc123", + "aud": ["rag-modulo-mcp"], + "exp": 1732800000 +} +``` + +The MCP server validates the JWT-SVID and extracts: + +- Agent SPIFFE ID +- Capabilities (from agents table) +- Owner user ID (for collection access) + +### Bearer Token (User-Delegated) + +For external clients acting on behalf of users: + +``` +Authorization: Bearer +``` + +User tokens are issued via existing OAuth flow and include: + +- User ID +- Scopes (read, write, admin) +- Expiration + +### API Key (Service Accounts) + +For service-to-service integration: + +``` +X-API-Key: +``` + +API keys are associated with: + +- Service account user +- Allowed collections +- Rate limits + +## Authorization + +### Capability-Based Access Control + +SPIFFE agents have capabilities that map to MCP tool permissions: + +| Capability | Allowed Tools | +|------------|---------------| +| `search:read` | `rag_search`, `rag_list_collections`, `rag_get_document` | +| `search:write` | `rag_ingest` | +| `llm:invoke` | `rag_generate_podcast`, `rag_smart_questions` | +| `collection:read` | All read operations on owned collections | +| `collection:write` | Create/modify collections | + +### Collection Access + +Agents can only access collections where: + +1. They are owned by the agent's owner_user_id +2. They are shared with the agent's team_id +3. The collection is marked as public + +## File Structure + +``` +backend/rag_solution/mcp_server/ +ā”œā”€ā”€ __init__.py +ā”œā”€ā”€ server.py # MCP server setup, transport handling +ā”œā”€ā”€ tools.py # Tool definitions and implementations +ā”œā”€ā”€ resources.py # Resource definitions +ā”œā”€ā”€ auth.py # SPIFFE/Bearer/API key validation +└── schemas.py # Request/response schemas + +tests/unit/mcp_server/ +ā”œā”€ā”€ __init__.py +ā”œā”€ā”€ test_server.py +ā”œā”€ā”€ test_tools.py +ā”œā”€ā”€ test_resources.py +└── test_auth.py +``` + +## Server Implementation + +### Transport Options + +| Transport | Use Case | Port | +|-----------|----------|------| +| **stdio** | Claude Desktop, local CLI | N/A | +| **SSE** | Web clients, real-time updates | 8010 | +| **HTTP** | REST-like integration | 8010 | + +### Example Server Setup + +```python +# backend/rag_solution/mcp_server/server.py + +from mcp import Server, Tool, Resource +from mcp.transports import StdioTransport, SSETransport + +from .tools import ( + rag_search, + rag_ingest, + rag_list_collections, + rag_generate_podcast, + rag_smart_questions, + rag_get_document, +) +from .resources import collection_documents, collection_stats, search_results +from .auth import validate_auth + +server = Server("rag-modulo") + +# Register tools +server.register_tool(rag_search) +server.register_tool(rag_ingest) +server.register_tool(rag_list_collections) +server.register_tool(rag_generate_podcast) +server.register_tool(rag_smart_questions) +server.register_tool(rag_get_document) + +# Register resources +server.register_resource(collection_documents) +server.register_resource(collection_stats) +server.register_resource(search_results) + +# Auth middleware +server.use(validate_auth) + +# Run server +if __name__ == "__main__": + transport = StdioTransport() # Or SSETransport(port=8010) + server.run(transport) +``` + +## Integration with Context Forge + +Register RAG Modulo MCP server with Context Forge for federation: + +```bash +curl -X POST http://localhost:8001/api/v1/servers \ + -H "Authorization: Bearer $CONTEXT_FORGE_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "RAG Modulo", + "type": "mcp", + "endpoint": "http://rag-modulo-backend:8010", + "config": { + "protocol": "sse", + "auth_required": true + } + }' +``` + +## SPIFFE + MCP Coexistence + +### Identity Flow + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Identity Architecture │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Human Users ││ +│ │ - Authenticate via OIDC/OAuth (existing auth) ││ +│ │ - JWT with user claims ││ +│ │ - Access collections they own ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ ā–² │ +│ │ Creates & owns │ +│ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ AI Agents (PR #695 SPIFFE) ││ +│ │ ││ +│ │ SPIFFE ID: spiffe://rag-modulo.example.com/agent/{type}/{id} ││ +│ │ ││ +│ │ Agent Record: ││ +│ │ - id: UUID ││ +│ │ - spiffe_id: Full SPIFFE ID ││ +│ │ - agent_type: search-enricher, cot-reasoning, etc. ││ +│ │ - owner_user_id: UUID (who created/owns this agent) ││ +│ │ - capabilities: [search:read, llm:invoke, etc.] ││ +│ │ - status: active, suspended, revoked, pending ││ +│ │ ││ +│ │ Auth Flow: ││ +│ │ 1. Agent presents JWT-SVID from SPIRE ││ +│ │ 2. MCP Server validates via SpiffeAuthenticator ││ +│ │ 3. Creates AgentPrincipal with capabilities ││ +│ │ 4. CBAC (Capability-Based Access Control) ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ ā–² │ +│ │ Invokes via MCP │ +│ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ MCP Tools ││ +│ │ ││ +│ │ MCP Server handles: ││ +│ │ - Protocol translation (stdio, SSE, HTTP) ││ +│ │ - Tool discovery and invocation ││ +│ │ - Rate limiting and circuit breakers ││ +│ │ ││ +│ │ Identity Propagation: ││ +│ │ - Agent's SPIFFE ID passed in X-Spiffe-Id header ││ +│ │ - MCP tools validate agent capabilities ││ +│ │ - Audit log includes agent identity ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### Example Flow + +```python +# Agent executes MCP tool with SPIFFE identity + +# 1. Agent authenticates with SPIFFE JWT-SVID +agent_principal = await spiffe_authenticator.validate_svid(jwt_token) +# AgentPrincipal(spiffe_id="spiffe://rag-modulo/agent/search-enricher/abc123", +# capabilities=["search:read", "llm:invoke"]) + +# 2. Agent calls MCP tool +response = await mcp_server.invoke_tool( + tool_name="rag_search", + arguments={"collection_id": "...", "query": "Q4 projections"}, + auth_context=agent_principal +) + +# 3. MCP tool validates capability +if "search:read" not in agent_principal.capabilities: + raise PermissionDenied("Agent lacks search:read capability") + +# 4. Audit log captures full chain +logger.info( + "MCP tool invoked", + agent_spiffe_id=agent_principal.spiffe_id, + tool="rag_search", + owner_user_id=str(agent.owner_user_id) +) +``` + +## Security Considerations + +1. **Authentication Required**: All MCP endpoints require authentication +2. **Capability Validation**: Every tool invocation checks agent capabilities +3. **Collection Scoping**: Agents can only access authorized collections +4. **Rate Limiting**: Per-agent rate limits prevent abuse +5. **Audit Logging**: All tool invocations logged with identity context +6. **Token Expiration**: JWT-SVIDs have short lifetimes (15 minutes) +7. **Revocation**: Agents can be suspended/revoked immediately + +## Observability + +- OpenTelemetry spans for all MCP operations +- Metrics: tool invocation counts, latency, error rates +- Structured logging with agent identity context +- Integration with Context Forge admin UI + +## Related Documents + +- [MCP Integration Architecture](./mcp-integration-architecture.md) +- [SearchService Agent Hooks Architecture](./search-agent-hooks-architecture.md) +- [SPIRE Integration Architecture](./spire-integration-architecture.md) diff --git a/docs/architecture/search-agent-hooks-architecture.md b/docs/architecture/search-agent-hooks-architecture.md new file mode 100644 index 00000000..8ee862a9 --- /dev/null +++ b/docs/architecture/search-agent-hooks-architecture.md @@ -0,0 +1,416 @@ +# SearchService Agent Hooks Architecture + +**Date**: November 2025 +**Status**: Architecture Design +**Version**: 1.0 +**Related Documents**: [MCP Integration Architecture](./mcp-integration-architecture.md) + +## Overview + +This document describes the three-stage agent execution hook system integrated into +SearchService. Agents can be injected at strategic points in the search pipeline to enhance, +transform, or augment the search process. + +## Pipeline Flow + +``` +User Query: "What are the revenue projections for Q4?" + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STAGE 1: PRE-SEARCH AGENTS │ +│ │ +│ Purpose: Enhance/transform the query BEFORE vector search │ +│ │ +│ Example agents: │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ • Query Expander: "revenue projections Q4" → │ │ +│ │ "revenue projections Q4 2024 2025 forecast financial outlook" │ │ +│ │ │ │ +│ │ • Language Detector/Translator: Detect non-English, translate to EN │ │ +│ │ │ │ +│ │ • Acronym Resolver: "Q4" → "fourth quarter, Q4, Oct-Dec" │ │ +│ │ │ │ +│ │ • Intent Classifier: Tag as "financial_analysis" for routing │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ Input: { query: "What are the revenue projections for Q4?" } │ +│ Output: { query: "revenue projections Q4 2024 forecast...", metadata: {} } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ CORE RAG SEARCH (existing logic - unchanged) │ +│ │ +│ • Vector embedding of (enhanced) query │ +│ • Milvus similarity search │ +│ • Document retrieval │ +│ • Optional: Chain-of-Thought reasoning │ +│ │ +│ Output: 10 ranked documents with scores │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STAGE 2: POST-SEARCH AGENTS │ +│ │ +│ Purpose: Process/filter/augment retrieved documents BEFORE answer gen │ +│ │ +│ Example agents: │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ • Re-ranker: Use cross-encoder to re-score documents for relevance │ │ +│ │ │ │ +│ │ • Deduplicator: Remove near-duplicate content across documents │ │ +│ │ │ │ +│ │ • Fact Checker: Validate claims against trusted sources │ │ +│ │ │ │ +│ │ • PII Redactor: Remove sensitive info before showing to user │ │ +│ │ │ │ +│ │ • External Enricher: Add real-time stock prices, weather, etc. │ │ +│ │ (This is what SearchResultEnricher does) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ Input: { documents: [...10 docs...], query: "..." } │ +│ Output: { documents: [...8 docs, reordered, enriched...] } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ ANSWER GENERATION (existing logic - unchanged) │ +│ │ +│ • LLM synthesizes answer from documents │ +│ • Source attribution │ +│ • CoT reasoning steps (if enabled) │ +│ │ +│ Output: { answer: "Based on the documents...", sources: [...] } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STAGE 3: RESPONSE AGENTS │ +│ │ +│ Purpose: Generate artifacts/transformations from the final answer │ +│ │ +│ Example agents: │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ • PowerPoint Generator: Create slides from answer + sources │ │ +│ │ Output: { type: "pptx", data: "base64...", filename: "Q4.pptx" } │ │ +│ │ │ │ +│ │ • PDF Report Generator: Formatted document with citations │ │ +│ │ Output: { type: "pdf", data: "base64...", filename: "report.pdf" } │ │ +│ │ │ │ +│ │ • Chart Generator: Visualize numerical data from answer │ │ +│ │ Output: { type: "png", data: "base64...", filename: "chart.png" } │ │ +│ │ │ │ +│ │ • Audio Summary: Text-to-speech of key findings │ │ +│ │ Output: { type: "mp3", data: "base64...", filename: "summary.mp3" } │ │ +│ │ │ │ +│ │ • Email Draft: Format answer for email sharing │ │ +│ │ Output: { type: "html", data: "...", subject: "Q4 Summary" } │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ These run in PARALLEL since they're independent transformations │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ FINAL RESPONSE │ +│ │ +│ { │ +│ "answer": "Based on the financial documents, Q4 revenue is...", │ +│ "sources": [ │ +│ { "document_id": "...", "title": "Q4 Forecast", "score": 0.92 } │ +│ ], │ +│ "cot_steps": [...], // If CoT enabled │ +│ "agent_artifacts": [ // NEW - from response agents │ +│ { │ +│ "agent_id": "ppt_generator", │ +│ "type": "pptx", │ +│ "data": "UEsDBBQAAAAIAH...", // base64 │ +│ "filename": "Q4_Revenue_Projections.pptx", │ +│ "metadata": { "slides": 5 } │ +│ }, │ +│ { │ +│ "agent_id": "chart_generator", │ +│ "type": "png", │ +│ "data": "iVBORw0KGgo...", // base64 │ +│ "filename": "revenue_chart.png", │ +│ "metadata": { "width": 800, "height": 600 } │ +│ } │ +│ ] │ +│ } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +## Agent Stages + +### Stage 1: Pre-Search Agents + +**Purpose**: Transform or enhance the query before vector search. + +**Execution**: Sequential by priority (results chain to next agent). + +| Agent Type | Description | Use Case | +|------------|-------------|----------| +| Query Expander | Adds synonyms and related terms | Improve recall | +| Language Detector | Identifies query language | Multi-language support | +| Translator | Translates non-English queries | Internationalization | +| Acronym Resolver | Expands abbreviations | Domain-specific search | +| Intent Classifier | Tags query intent | Routing and filtering | +| Spell Checker | Corrects typos | User experience | + +**Input Schema**: + +```python +class PreSearchInput: + query: str + collection_id: UUID + user_id: UUID + metadata: dict[str, Any] +``` + +**Output Schema**: + +```python +class PreSearchOutput: + query: str # Modified query + metadata: dict[str, Any] # Additional context + skip_search: bool = False # If True, skip core search +``` + +### Stage 2: Post-Search Agents + +**Purpose**: Process, filter, or augment retrieved documents before answer generation. + +**Execution**: Sequential by priority (documents flow through each agent). + +| Agent Type | Description | Use Case | +|------------|-------------|----------| +| Re-ranker | Cross-encoder re-scoring | Improve precision | +| Deduplicator | Remove near-duplicates | Cleaner results | +| Fact Checker | Validate against trusted sources | Accuracy | +| PII Redactor | Remove sensitive information | Compliance | +| External Enricher | Add real-time data | Currency | +| Relevance Filter | Remove low-quality results | Quality | + +**Input Schema**: + +```python +class PostSearchInput: + documents: list[Document] + query: str + collection_id: UUID + user_id: UUID + metadata: dict[str, Any] +``` + +**Output Schema**: + +```python +class PostSearchOutput: + documents: list[Document] # Modified/filtered documents + metadata: dict[str, Any] # Enrichment data +``` + +### Stage 3: Response Agents + +**Purpose**: Generate artifacts or transformations from the final answer. + +**Execution**: Parallel (independent transformations). + +| Agent Type | Description | Output Format | +|------------|-------------|---------------| +| PowerPoint Generator | Create presentation slides | `.pptx` | +| PDF Report Generator | Formatted document with citations | `.pdf` | +| Chart Generator | Visualize numerical data | `.png`, `.svg` | +| Audio Summary | Text-to-speech narration | `.mp3` | +| Email Draft | Format for email sharing | `.html` | +| Executive Summary | Condensed key findings | `.txt` | + +**Input Schema**: + +```python +class ResponseAgentInput: + answer: str + sources: list[Source] + query: str + documents: list[Document] + collection_id: UUID + user_id: UUID + cot_steps: list[CotStep] | None +``` + +**Output Schema**: + +```python +class AgentArtifact: + agent_id: str + type: str # "pptx", "pdf", "png", "mp3", "html" + data: str # base64 encoded + filename: str + metadata: dict[str, Any] +``` + +## Agent Priority and Chaining + +Agents at each stage execute in priority order (lower number = higher priority): + +``` +Pre-search stage (priority order): + 1. Language Detector (priority: 0) → detects "es" (Spanish) + 2. Translator (priority: 10) → uses detection, translates to EN + 3. Query Expander (priority: 20) → expands the translated query + +Each agent receives: + - AgentContext with query, collection_id, user_id + - previous_agent_results: List of results from earlier agents in this stage +``` + +## AgentContext + +Context object passed to all agents: + +```python +@dataclass +class AgentContext: + # Collection context + collection_id: UUID + user_id: UUID + + # Conversation context + conversation_id: UUID | None = None + conversation_history: list[dict[str, str]] | None = None + + # Search context (populated as pipeline progresses) + query: str | None = None + retrieved_documents: list[dict[str, Any]] | None = None + search_metadata: dict[str, Any] | None = None + + # Pipeline context + pipeline_stage: str # 'pre_search', 'post_search', 'response' + + # Agent chaining + previous_agent_results: list[AgentResult] | None = None +``` + +## AgentResult + +Result object returned by all agents: + +```python +@dataclass +class AgentResult: + agent_id: str + success: bool + data: dict[str, Any] + metadata: dict[str, Any] + errors: list[str] | None = None + + # For chaining agents + next_agent_id: str | None = None +``` + +## Collection-Agent Association + +Agents are configured per collection: + +``` +Collection Settings → Agents & Tools +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ ā˜‘ PowerPoint Generator Stage: Response Priority: 1 │ +│ Creates slides from search results [Configure] │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ ā˜‘ Query Expander Stage: Pre-search Priority: 0 │ +│ Adds synonyms and related terms [Configure] │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ ☐ External Knowledge Enricher Stage: Post-search Priority: 5 │ +│ Augments with real-time market data [Configure] │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +## Database Schema + +### AgentConfig Table + +```sql +CREATE TABLE agent_configs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + agent_id VARCHAR(100) NOT NULL, -- From agent registry + name VARCHAR(255) NOT NULL, + description TEXT, + config JSONB NOT NULL DEFAULT '{}', -- Agent-specific settings + enabled BOOLEAN NOT NULL DEFAULT true, + trigger_stage VARCHAR(50) NOT NULL, -- 'pre_search', 'post_search', 'response' + priority INTEGER NOT NULL DEFAULT 0, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +-- Many-to-many: Collections ↔ AgentConfigs +CREATE TABLE collection_agents ( + collection_id UUID NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + agent_config_id UUID NOT NULL REFERENCES agent_configs(id) ON DELETE CASCADE, + PRIMARY KEY (collection_id, agent_config_id) +); + +-- Indexes +CREATE INDEX idx_agent_configs_user_id ON agent_configs(user_id); +CREATE INDEX idx_agent_configs_trigger_stage ON agent_configs(trigger_stage); +CREATE INDEX idx_agent_configs_enabled ON agent_configs(enabled); +``` + +### Example AgentConfig + +```json +{ + "id": "abc123...", + "user_id": "user456...", + "agent_id": "ppt_generator", + "name": "PowerPoint Generator", + "config": { + "type": "mcp", + "context_forge_tool_id": "generate_powerpoint", + "argument_mapping": { + "title": "query", + "documents": "documents", + "max_slides": "config.max_slides" + }, + "settings": { + "max_slides": 15, + "template": "corporate" + } + }, + "enabled": true, + "trigger_stage": "response", + "priority": 10 +} +``` + +## Error Handling + +- **Agent Timeout**: Each agent has configurable timeout (default 30s) +- **Agent Failure**: Logged, skipped, pipeline continues +- **Circuit Breaker**: Failing agents disabled after threshold +- **Fallback**: Optional fallback agents for critical stages + +## Performance Considerations + +1. **Pre-search agents**: Run sequentially (query transformation order matters) +2. **Post-search agents**: Run sequentially (document filtering order matters) +3. **Response agents**: Run in parallel (independent artifact generation) +4. **Caching**: Agent results cached by (query_hash, agent_id, config_hash) +5. **Timeouts**: Per-agent and per-stage timeouts prevent runaway execution + +## Observability + +- All agent executions logged with structured context +- OpenTelemetry spans for each agent invocation +- Metrics: execution time, success rate, artifact sizes +- Traces flow through Context Forge for end-to-end visibility + +## Related Documents + +- [MCP Integration Architecture](./mcp-integration-architecture.md) +- [RAG Modulo MCP Server Architecture](./rag-modulo-mcp-server-architecture.md) +- [Agent MCP Architecture Design](../design/agent-mcp-architecture.md) diff --git a/docs/architecture/spire-integration-architecture.md b/docs/architecture/spire-integration-architecture.md new file mode 100644 index 00000000..264636fc --- /dev/null +++ b/docs/architecture/spire-integration-architecture.md @@ -0,0 +1,900 @@ +# SPIRE Integration Architecture for Agent Identity in RAG Modulo + +## Executive Summary + +This document outlines the architecture for integrating SPIFFE/SPIRE into RAG Modulo to provide +cryptographic workload identities for AI agents. By combining SPIRE's production-ready SPIFFE +implementation with IBM MCP Context Forge, RAG Modulo will gain zero-trust agent authentication, +enabling secure agent-to-agent (A2A) communication and verifiable machine identities. + +## Table of Contents + +1. [Background and Motivation](#1-background-and-motivation) +2. [Current State Analysis](#2-current-state-analysis) +3. [SPIFFE/SPIRE Fundamentals](#3-spiffespire-fundamentals) +4. [Proposed Architecture](#4-proposed-architecture) +5. [Integration Points](#5-integration-points) +6. [Agent Identity Model](#6-agent-identity-model) +7. [Deployment Topology](#7-deployment-topology) +8. [Security Considerations](#8-security-considerations) +9. [Implementation Phases](#9-implementation-phases) +10. [References](#10-references) + +--- + +## 1. Background and Motivation + +### 1.1 Why Agent Identity Matters + +As RAG Modulo evolves to support AI agents through IBM MCP Context Forge integration (PR #684), +a critical need emerges: **verifiable machine/agent identities**. Unlike human users authenticated +via OAuth/OIDC, AI agents require: + +- **Cryptographic identity** that cannot be forged or impersonated +- **Zero-trust verification** at every interaction point +- **Automatic credential rotation** without service disruption +- **Audit trails** for agent actions tied to immutable identities +- **Cross-service trust** in distributed agent ecosystems + +### 1.2 Limitations of Current Approach + +The current authentication system in RAG Modulo (`backend/core/authentication_middleware.py`) relies on: + +- JWT tokens with shared secrets +- Human-centric OAuth/OIDC flows +- Mock token patterns for development +- No native support for workload/machine identities + +These approaches don't scale for multi-agent systems where: + +- Agents spawn dynamically +- Credentials must rotate automatically +- Trust must be cryptographically verifiable +- Agent-to-agent calls require mutual authentication + +### 1.3 Strategic Value + +SPIRE integration enables RAG Modulo to: + +1. **Establish trust domains** for agent ecosystems +2. **Issue verifiable identities** (SVIDs) to each agent workload +3. **Enable mTLS** for secure agent-to-agent communication +4. **Support federation** across organizational boundaries +5. **Align with industry standards** (SPIFFE is a CNCF graduated project) + +--- + +## 2. Current State Analysis + +### 2.1 RAG Modulo Authentication Architecture + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Current Authentication Flow │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ Client │───▶│ AuthenticationMiddleware │───▶│ User Model │ │ +│ │ (Human) │ │ (JWT / OAuth / Mock) │ │ (PostgreSQL) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ Authentication Methods: │ +│ • JWT tokens (custom claims: sub, email, name, uuid, role) │ +│ • OIDC/OAuth via backend/auth/oidc.py │ +│ • Mock tokens for dev/test (SKIP_AUTH, DEVELOPMENT_MODE) │ +│ │ +│ User Model (backend/rag_solution/models/user.py): │ +│ • id (UUID), ibm_id, email, name, role │ +│ • Relationships: collections, teams, files, pipelines │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 2.2 IBM MCP Context Forge Integration (PR #684) + +PR #684 introduces the MCP Gateway integration with: + +- **ResilientMCPGatewayClient**: Circuit breaker pattern for gateway calls +- **SearchResultEnricher**: Tool-based search enrichment +- **MCP Router**: `/api/v1/mcp/*` endpoints for tool discovery/invocation +- **JWT Support**: `mcp_jwt_token` configuration (identified security gap) + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ MCP Context Forge Architecture │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ RAG Modulo │───▶│ MCP Context │───▶│ External MCP │ │ +│ │ Backend │ │ Forge Gateway │ │ Tool Servers │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”“ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ │ Virtual Servers │ │ +│ │ │ Tool Registry │ │ +│ │ │ Federation │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā–¼ │ +│ Current Auth: JWT Bearer tokens │ +│ Gap: No cryptographic workload identity │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +--- + +## 3. SPIFFE/SPIRE Fundamentals + +### 3.1 Core Concepts + +| Component | Description | +|-----------|-------------| +| **SPIFFE** | Secure Production Identity Framework For Everyone - the specification | +| **SPIRE** | SPIFFE Runtime Environment - production-ready implementation | +| **SVID** | SPIFFE Verifiable Identity Document - the credential | +| **Trust Domain** | Namespace for identities (e.g., `spiffe://rag-modulo.example.com`) | +| **Workload** | Any running process that needs identity (agent, service, etc.) | + +### 3.2 SPIFFE ID Structure + +``` +spiffe://trust-domain/path/to/workload + +Examples: +spiffe://rag-modulo.example.com/agent/search-enricher +spiffe://rag-modulo.example.com/agent/cot-reasoning/instance-1 +spiffe://rag-modulo.example.com/service/backend-api +spiffe://rag-modulo.example.com/mcp/tool-server/watson-nlp +``` + +### 3.3 SVID Types + +| Type | Format | Use Case | +|------|--------|----------| +| **X.509-SVID** | X.509 certificate | mTLS, long-lived connections, service mesh | +| **JWT-SVID** | JWT token | REST APIs, short-lived authentication, federation | + +For RAG Modulo's agent architecture, **JWT-SVIDs** are recommended because: + +- Native integration with existing JWT middleware +- Audience-scoped access control +- Lightweight verification without certificate chains +- Better fit for MCP's HTTP-based protocols + +### 3.4 Attestation Flow + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ SPIRE Attestation Flow │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ 1. Node Attestation (Server ←→ Agent) │ +│ Agent proves node identity using platform evidence: │ +│ • Kubernetes ServiceAccount tokens │ +│ • AWS Instance Identity Documents │ +│ • Azure Managed Identity │ +│ • Docker container selectors │ +│ │ +│ 2. Workload Attestation (Agent ←→ Workload) │ +│ Agent verifies workload properties: │ +│ • Kubernetes: namespace, service account, pod labels │ +│ • Unix: uid, gid, binary path, sha256 hash │ +│ • Docker: image ID, container labels, environment │ +│ │ +│ 3. SVID Issuance │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │Workload │───▶│ SPIRE │───▶│ SPIRE │ │ +│ │(Agent) │ │ Agent │ │ Server │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ │ │ +│ │ Request │ Fetch │ │ +│ │ SVID │ from cache │ │ +│ │ │ or server │ │ +│ ā–¼ ā–¼ │ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ JWT-SVID Token ā”‚ā—€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ (Signed, scoped) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +--- + +## 4. Proposed Architecture + +### 4.1 High-Level Architecture + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ RAG Modulo + SPIRE + MCP Architecture │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ Trust Domain: spiffe://rag-modulo.example.com │ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ SPIRE Server Cluster │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ SPIRE │ │ Registration │ │ Trust Bundle Store │ │ │ +│ │ │ Server │ │ Entries │ │ (PostgreSQL / Datastore) │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ │ │ │ +│ ā–¼ ā–¼ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ SPIRE Agent │ │ SPIRE Agent │ │ SPIRE Agent │ │ +│ │ (Backend Pod)│ │ (MCP Gateway)│ │ (Tool Servers) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ │ │ +│ ā–¼ ā–¼ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ RAG Modulo │ │ MCP Context │ │ MCP Tool │ │ +│ │ Backend │◀▶│ Forge Gateway │◀▶│ Servers │ │ +│ │ │ │ │ │ (WatsonX, etc.) │ │ +│ │ SPIFFE ID: │ │ SPIFFE ID: │ │ SPIFFE ID: │ │ +│ │ /service/ │ │ /gateway/ │ │ /mcp/tool/ │ │ +│ │ backend │ │ mcp-forge │ │ watson-nlp │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ │ │ +│ │ │ │ │ +│ ā–¼ ā–¼ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ Agent Workloads (JWT-SVID) │ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ Search │ │ Chain of │ │ Question │ │ Source │ │ │ +│ │ │ Enricher │ │ Thought │ │ Decomposer │ │ Attribution │ │ │ +│ │ │ Agent │ │ Agent │ │ Agent │ │ Agent │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ SPIFFE ID: │ │ SPIFFE ID: │ │ SPIFFE ID: │ │ SPIFFE ID: │ │ │ +│ │ │/agent/ │ │/agent/ │ │/agent/ │ │/agent/ │ │ │ +│ │ │ search- │ │ cot- │ │ question- │ │ source- │ │ │ +│ │ │ enricher │ │ reasoning │ │ decomposer │ │ attribution │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 4.2 Component Descriptions + +| Component | Purpose | SPIFFE ID Pattern | +|-----------|---------|-------------------| +| **SPIRE Server** | Central authority for identity management | N/A (infrastructure) | +| **SPIRE Agent** | Per-node daemon, exposes Workload API | N/A (infrastructure) | +| **RAG Modulo Backend** | Main API service | `spiffe://rag-modulo.example.com/service/backend` | +| **MCP Context Forge Gateway** | Tool registry and routing | `spiffe://rag-modulo.example.com/gateway/mcp-forge` | +| **MCP Tool Servers** | External tool providers | `spiffe://rag-modulo.example.com/mcp/tool/{tool-name}` | +| **Agent Workloads** | AI agents performing tasks | `spiffe://rag-modulo.example.com/agent/{agent-type}` | + +--- + +## 5. Integration Points + +### 5.1 Python SPIFFE Client Integration + +The `py-spiffe` library provides native Python support for SPIFFE: + +```python +# Installation: pip install spiffe + +from spiffe import WorkloadApiClient, JwtSource + +# Fetch JWT-SVID for agent authentication +with WorkloadApiClient() as client: + jwt_svid = client.fetch_jwt_svid(audience={"mcp-gateway", "backend-api"}) + spiffe_id = jwt_svid.spiffe_id # spiffe://rag-modulo.example.com/agent/search-enricher + token = jwt_svid.token # JWT to use in Authorization header + +# Auto-refreshing JWT source for long-running agents +with JwtSource() as source: + svid = source.fetch_svid(audience={'mcp-gateway'}) + # Token automatically rotates before expiration +``` + +### 5.2 Integration with AuthenticationMiddleware + +Extend `backend/core/authentication_middleware.py` to support SPIFFE JWT-SVIDs: + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Enhanced Authentication Flow │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ Request │ │ +│ ā””ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā–¼ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ AuthenticationMiddleware │ │ +│ │ │ │ +│ │ 1. Check bypass mode (dev/test) │ │ +│ │ 2. Check open paths │ │ +│ │ 3. Extract Authorization header │ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ Token Type Detection │ │ │ +│ │ │ │ │ │ +│ │ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ │ +│ │ │ │ User JWT │ │ SPIFFE │ │ Mock │ │ │ │ +│ │ │ │ (iss=self) │ │ JWT-SVID │ │ Token │ │ │ │ +│ │ │ │ │ │ (iss=SPIRE) │ │ │ │ │ │ +│ │ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ │ +│ │ │ │ │ │ │ │ │ +│ │ │ ā–¼ ā–¼ ā–¼ │ │ │ +│ │ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ │ +│ │ │ │ Unified Principal Object │ │ │ │ +│ │ │ │ • identity_type: "user" | "agent" │ │ │ │ +│ │ │ │ • spiffe_id: (for agents) │ │ │ │ +│ │ │ │ • user_id: (for users) │ │ │ │ +│ │ │ │ • capabilities: [list of allowed actions] │ │ │ │ +│ │ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 5.3 Integration with MCP Context Forge + +Enhance the MCP Gateway client to use SPIFFE authentication: + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ MCP Gateway SPIFFE Integration │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ ResilientMCPGatewayClient │ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ SPIFFEAuthenticator │ │ │ +│ │ │ │ │ │ +│ │ │ • JwtSource for auto-refreshing tokens │ │ │ +│ │ │ • Audience scoping per endpoint │ │ │ +│ │ │ • Fallback to legacy JWT if SPIRE unavailable │ │ │ +│ │ │ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ │ def _get_auth_headers(self, audience: str) -> dict: │ │ +│ │ svid = self.jwt_source.fetch_svid(audience={aud}) │ │ +│ │ return {"Authorization": f"Bearer {svid.token}"} │ │ +│ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ Benefits: │ +│ • Mutual authentication (both sides verify SPIFFE IDs) │ +│ • No shared secrets to manage or rotate │ +│ • Audience validation prevents token reuse attacks │ +│ • Cryptographic proof of workload identity │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +--- + +## 6. Agent Identity Model + +### 6.1 Agent Schema Extension + +Extend the existing data model to support agent identities: + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Agent Identity Model │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ Table: agents │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ Column │ Type │ Description ││ +│ │──────────────────┼─────────────┼────────────────────────────││ +│ │ id │ UUID │ Primary key ││ +│ │ spiffe_id │ VARCHAR │ SPIFFE identity (unique) ││ +│ │ agent_type │ VARCHAR │ Type classification ││ +│ │ name │ VARCHAR │ Human-readable name ││ +│ │ description │ TEXT │ Purpose/capabilities ││ +│ │ owner_user_id │ UUID (FK) │ User who owns this agent ││ +│ │ team_id │ UUID (FK) │ Team association (optional)││ +│ │ capabilities │ JSONB │ Allowed actions/scopes ││ +│ │ metadata │ JSONB │ Additional properties ││ +│ │ status │ VARCHAR │ active/suspended/revoked ││ +│ │ created_at │ TIMESTAMP │ Registration time ││ +│ │ last_seen_at │ TIMESTAMP │ Last authentication ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ │ +│ SPIFFE ID Pattern: │ +│ spiffe://rag-modulo.example.com/agent/{agent_type}/{agent_id} │ +│ │ +│ Example: │ +│ spiffe://rag-modulo.example.com/agent/search-enricher/abc123 │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 6.2 Agent Types and Capabilities + +| Agent Type | SPIFFE Path | Capabilities | +|------------|-------------|--------------| +| `search-enricher` | `/agent/search-enricher/{id}` | `mcp:tool:invoke`, `search:read` | +| `cot-reasoning` | `/agent/cot-reasoning/{id}` | `search:read`, `llm:invoke`, `pipeline:execute` | +| `question-decomposer` | `/agent/question-decomposer/{id}` | `search:read`, `llm:invoke` | +| `source-attribution` | `/agent/source-attribution/{id}` | `document:read`, `search:read` | +| `entity-extraction` | `/agent/entity-extraction/{id}` | `document:read`, `llm:invoke` | +| `answer-synthesis` | `/agent/answer-synthesis/{id}` | `search:read`, `llm:invoke`, `cot:invoke` | + +### 6.3 Registration Entry Templates + +SPIRE registration entries map SPIFFE IDs to selectors: + +```yaml +# Kubernetes deployment selector example +entries: + - spiffe_id: spiffe://rag-modulo.example.com/agent/search-enricher + parent_id: spiffe://rag-modulo.example.com/spire/agent/k8s/node + selectors: + - k8s:ns:rag-modulo + - k8s:sa:search-enricher-agent + - k8s:pod-label:app:search-enricher + ttl: 3600 # 1 hour token lifetime + + - spiffe_id: spiffe://rag-modulo.example.com/agent/cot-reasoning + parent_id: spiffe://rag-modulo.example.com/spire/agent/k8s/node + selectors: + - k8s:ns:rag-modulo + - k8s:sa:cot-reasoning-agent + - k8s:pod-label:app:cot-reasoning + ttl: 3600 + + - spiffe_id: spiffe://rag-modulo.example.com/service/backend + parent_id: spiffe://rag-modulo.example.com/spire/agent/k8s/node + selectors: + - k8s:ns:rag-modulo + - k8s:sa:backend-api + - k8s:pod-label:app:rag-modulo-backend + ttl: 86400 # 24 hours for services +``` + +--- + +## 7. Deployment Topology + +### 7.1 Kubernetes Deployment + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kubernetes Deployment Topology │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ Namespace: spire-system │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ StatefulSet: spire-server (replicas: 3 for HA) │ │ │ +│ │ │ │ │ │ +│ │ │ • Shared PostgreSQL datastore for consistency │ │ │ +│ │ │ • Trust bundle distribution via ConfigMap │ │ │ +│ │ │ • K8s Workload Registrar sidecar for auto-registration │ │ │ +│ │ │ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ DaemonSet: spire-agent │ │ │ +│ │ │ │ │ │ +│ │ │ • One agent per node │ │ │ +│ │ │ • CSI driver for workload API exposure (recommended) │ │ │ +│ │ │ • Rolling update strategy (maxUnavailable: 5) │ │ │ +│ │ │ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +│ Namespace: rag-modulo │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ Deployment: │ │ Deployment: │ │ Deployment: │ │ │ +│ │ │ backend-api │ │ mcp-gateway │ │ agent-workers │ │ │ +│ │ │ │ │ │ │ │ │ │ +│ │ │ • SPIRE CSI │ │ • SPIRE CSI │ │ • SPIRE CSI │ │ │ +│ │ │ volume mount │ │ volume mount │ │ volume mount │ │ │ +│ │ │ • py-spiffe │ │ • SPIFFE auth │ │ • JwtSource │ │ │ +│ │ │ WorkloadAPI │ │ middleware │ │ per agent type │ │ │ +│ │ │ │ │ │ │ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ │ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ │ │ Infrastructure Services │ │ │ +│ │ │ │ │ │ +│ │ │ PostgreSQL │ Milvus │ MinIO │ MLFlow │ Redis │ │ │ +│ │ │ (existing infrastructure - no SPIFFE changes required) │ │ │ +│ │ │ │ │ │ +│ │ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ +│ │ │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 7.2 Docker Compose Development Setup + +For local development, a simplified SPIRE deployment: + +```yaml +# docker-compose.spire.yml (new file) +services: + spire-server: + image: ghcr.io/spiffe/spire-server:1.9.0 + volumes: + - ./spire/server.conf:/etc/spire/server.conf + - ./spire/data:/var/lib/spire/server + ports: + - "8081:8081" # Server API + command: ["-config", "/etc/spire/server.conf"] + + spire-agent: + image: ghcr.io/spiffe/spire-agent:1.9.0 + volumes: + - ./spire/agent.conf:/etc/spire/agent.conf + - /var/run/spire:/var/run/spire # Workload API socket + depends_on: + - spire-server + command: ["-config", "/etc/spire/agent.conf"] + pid: "host" # Required for Unix workload attestation + + # Extend existing backend service + backend: + volumes: + - /var/run/spire:/var/run/spire:ro + environment: + - SPIFFE_ENDPOINT_SOCKET=unix:///var/run/spire/agent.sock + - SPIFFE_ENABLED=true +``` + +--- + +## 8. Security Considerations + +### 8.1 Threat Model + +| Threat | Mitigation | +|--------|------------| +| **Agent impersonation** | SPIRE attestation verifies workload properties before issuing SVIDs | +| **Token theft** | Short TTLs (1 hour), audience scoping, automatic rotation | +| **Replay attacks** | JWT `exp` and `aud` claims prevent cross-service reuse | +| **Compromised node** | SPIRE agent revocation, registration entry removal | +| **Trust domain compromise** | Federation allows cross-org trust without shared root | +| **Insider threat** | Audit logging via SPIRE, capability-based access control | + +### 8.2 Trust Hierarchy + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Trust Hierarchy │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ │ +│ Level 0: Trust Domain Root │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”ā”‚ +│ │ spiffe://rag-modulo.example.com ││ +│ │ (SPIRE Server - root of trust) ││ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ā”‚ +│ │ │ +│ ā–¼ │ +│ Level 1: Infrastructure Services │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ /service/ │ │ /gateway/ │ │ /spire/ │ │ +│ │ backend │ │ mcp-forge │ │ agent/* │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā–¼ │ +│ Level 2: Agent Workloads │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ /agent/search-enricher/* │ /agent/cot-reasoning/* │ ... │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ │ +│ ā–¼ │ +│ Level 3: External Tool Servers (Federated) │ +│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ +│ │ spiffe://external-tool.example.com/tool/* │ │ +│ │ (Federated trust via bundle exchange) │ │ +│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ +│ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### 8.3 Capability-Based Access Control + +```python +# Example capability definitions for agents +AGENT_CAPABILITIES = { + "search-enricher": { + "allowed_audiences": ["mcp-gateway", "backend-api"], + "allowed_actions": [ + "mcp:tool:invoke", + "search:read", + ], + "rate_limit": 100, # requests per minute + }, + "cot-reasoning": { + "allowed_audiences": ["backend-api", "llm-provider"], + "allowed_actions": [ + "search:read", + "llm:invoke", + "pipeline:execute", + ], + "rate_limit": 50, + }, +} +``` + +--- + +## 9. Implementation Phases + +### Phase 1: Foundation (Weeks 1-2) + +**Objective**: Deploy SPIRE infrastructure and establish trust domain + +1. **SPIRE Server Deployment** + - Deploy SPIRE Server (StatefulSet in Kubernetes or Docker container) + - Configure PostgreSQL as datastore + - Set up trust domain: `spiffe://rag-modulo.example.com` + +2. **SPIRE Agent Deployment** + - Deploy SPIRE Agent (DaemonSet in K8s or sidecar in Docker) + - Configure node attestation (K8s ServiceAccount tokens) + - Expose Workload API via CSI driver or hostPath + +3. **Registration Entries** + - Create initial entries for backend service + - Create entries for MCP Gateway + - Validate SVID issuance + +**Deliverables**: + +- Working SPIRE infrastructure +- Trust bundle generation +- Basic SVID fetch validation + +### Phase 2: Backend Integration (Weeks 3-4) + +**Objective**: Integrate SPIFFE authentication into RAG Modulo backend + +1. **Add py-spiffe Dependency** + + ```toml + # pyproject.toml + [tool.poetry.dependencies] + spiffe = "^0.2.2" + ``` + +2. **Create SPIFFE Authentication Module** + - New file: `backend/core/spiffe_auth.py` + - `SPIFFEAuthenticator` class with JwtSource + - Token validation and SPIFFE ID extraction + +3. **Extend AuthenticationMiddleware** + - Detect SPIFFE JWT-SVIDs (issuer check) + - Create unified Principal object + - Support both user JWTs and agent SVIDs + +4. **Add Agent Data Model** + - Create `Agent` SQLAlchemy model + - Create agent repository and service + - Add agent management endpoints + +**Deliverables**: + +- SPIFFE-aware authentication middleware +- Agent data model and APIs +- Integration tests + +### Phase 3: MCP Gateway Integration (Weeks 5-6) + +**Objective**: Enable SPIFFE authentication for MCP tool invocations + +1. **Update ResilientMCPGatewayClient** + - Add `SPIFFEAuthenticator` integration + - Implement audience-scoped token fetching + - Graceful fallback to legacy JWT + +2. **Mutual Authentication** + - MCP Gateway validates agent SVIDs + - Agent validates gateway SVID + - Log SPIFFE IDs for audit + +3. **Tool Server Registration** + - Create SPIRE entries for tool servers + - Configure federation if cross-domain + +**Deliverables**: + +- SPIFFE-authenticated MCP calls +- Mutual TLS option for sensitive tools +- Audit logging integration + +### Phase 4: Agent Workloads (Weeks 7-8) + +**Objective**: Enable individual AI agents to obtain and use SVIDs + +1. **Agent Worker Framework** + - Base class for SPIFFE-enabled agents + - Automatic SVID lifecycle management + - Capability validation + +2. **Implement Agent Types** + - SearchEnricherAgent + - ChainOfThoughtAgent + - QuestionDecomposerAgent + - SourceAttributionAgent + +3. **Agent Orchestration** + - Agent spawning with SPIFFE registration + - Dynamic capability assignment + - Agent monitoring and health checks + +**Deliverables**: + +- Working agent workloads with SVIDs +- Agent orchestration framework +- End-to-end integration tests + +### Phase 5: Production Hardening (Weeks 9-10) + +**Objective**: Production-ready deployment with HA and monitoring + +1. **High Availability** + - SPIRE Server clustering (3 replicas) + - Shared PostgreSQL datastore + - Agent failover testing + +2. **Observability** + - OpenTelemetry integration for SPIRE + - SPIFFE ID correlation in logs + - Grafana dashboards for SVID metrics + +3. **Security Audit** + - Penetration testing + - Token TTL tuning + - Revocation procedures + +**Deliverables**: + +- Production-ready deployment +- Monitoring and alerting +- Security documentation + +--- + +## 10. References + +### SPIFFE/SPIRE Documentation + +- [SPIFFE Concepts](https://spiffe.io/docs/latest/spiffe-about/spiffe-concepts/) +- [SPIRE Concepts](https://spiffe.io/docs/latest/spire-about/spire-concepts/) +- [Quickstart for Kubernetes](https://spiffe.io/docs/latest/try/getting-started-k8s/) +- [JWT-SVID Specification](https://github.com/spiffe/spiffe/blob/main/standards/JWT-SVID.md) + +### Python Libraries + +- [py-spiffe (GitHub)](https://github.com/HewlettPackard/py-spiffe) +- [spiffe on PyPI](https://pypi.org/project/spiffe/) + +### IBM MCP Context Forge + +- [IBM MCP Context Forge (GitHub)](https://github.com/IBM/mcp-context-forge) +- [PR #684: MCP Gateway Integration](https://github.com/manavgup/rag_modulo/pull/684) + +### Industry Resources + +- [SPIFFE/SPIRE CSI Driver](https://www.kusari.dev/blog/spiffe-spire-csi-driver) +- [Indeed Engineering: Workload Identity with SPIRE](https://engineering.indeedblog.com/blog/2024/07/workload-identity-with-spire-oidc-for-k8s-istio/) +- [Understanding SPIRE Kubernetes Workload Registrar](https://medium.com/@nathalia.gomazako/understanding-spire-kubernetes-workload-registrar-5dd153ce68fc) + +--- + +## Appendix A: Glossary + +| Term | Definition | +|------|------------| +| **SPIFFE** | Secure Production Identity Framework For Everyone | +| **SPIRE** | SPIFFE Runtime Environment | +| **SVID** | SPIFFE Verifiable Identity Document | +| **Trust Domain** | Administrative namespace for SPIFFE identities | +| **Workload** | Any process requiring identity (service, agent, job) | +| **Attestation** | Process of verifying workload identity | +| **Registration Entry** | Mapping of SPIFFE ID to workload selectors | +| **JWT-SVID** | SVID in JWT format for stateless verification | +| **X.509-SVID** | SVID in X.509 certificate format for mTLS | +| **Agent (SPIRE)** | Per-node daemon exposing Workload API | +| **Agent (AI)** | AI workload performing RAG tasks | + +## Appendix B: Configuration Templates + +### SPIRE Server Configuration + +```hcl +# spire/server.conf +server { + bind_address = "0.0.0.0" + bind_port = "8081" + trust_domain = "rag-modulo.example.com" + data_dir = "/var/lib/spire/server" + log_level = "INFO" + + jwt_issuer = "spire://rag-modulo.example.com" + + ca_ttl = "168h" # 7 days + default_x509_svid_ttl = "24h" + default_jwt_svid_ttl = "1h" +} + +plugins { + DataStore "sql" { + plugin_data { + database_type = "postgres" + connection_string = "${SPIRE_DB_CONNECTION_STRING}" # Set via environment variable + } + } + + KeyManager "disk" { + plugin_data { + keys_path = "/var/lib/spire/server/keys" + } + } + + NodeAttestor "k8s_sat" { + plugin_data { + clusters = { + "rag-modulo-cluster" = { + service_account_allow_list = ["spire-system:spire-agent"] + } + } + } + } +} +``` + +### SPIRE Agent Configuration + +```hcl +# spire/agent.conf +agent { + data_dir = "/var/lib/spire/agent" + log_level = "INFO" + server_address = "spire-server" + server_port = "8081" + socket_path = "/var/run/spire/agent.sock" + trust_domain = "rag-modulo.example.com" +} + +plugins { + NodeAttestor "k8s_sat" { + plugin_data { + cluster = "rag-modulo-cluster" + } + } + + KeyManager "memory" { + plugin_data {} + } + + WorkloadAttestor "k8s" { + plugin_data { + skip_kubelet_verification = true + } + } +} +``` + +--- + +*Document Version: 1.0* +*Last Updated: 2025-01-26* +*Author: Claude Code (AI-Assisted Architecture Design)* +*Status: Architecture Proposal - Pending Review* diff --git a/docs/architecture/system-architecture.md b/docs/architecture/system-architecture.md new file mode 100644 index 00000000..ddff44b6 --- /dev/null +++ b/docs/architecture/system-architecture.md @@ -0,0 +1,425 @@ +# RAG Modulo System Architecture + +## Repository Overview + +**RAG Modulo** is a production-ready Retrieval-Augmented Generation (RAG) platform that enables +intelligent document processing, semantic search, and AI-powered question answering. The system +combines enterprise-grade document processing with advanced AI reasoning capabilities to provide +accurate, context-aware answers from large document collections. + +### Key Capabilities + +1. **Document Processing**: Supports multiple formats (PDF, DOCX, XLSX, TXT) with advanced + processing via IBM Docling for tables, images, and complex layouts +2. **Intelligent Search**: Vector similarity search with hybrid strategies, reranking, and source attribution +3. **Chain of Thought Reasoning**: Automatic question decomposition with step-by-step reasoning for complex queries +4. **Multi-LLM Support**: Seamless integration with WatsonX, OpenAI, and Anthropic +5. **Multi-Vector Database**: Pluggable support for Milvus, Elasticsearch, Pinecone, Weaviate, and ChromaDB +6. **Conversational Interface**: Multi-turn conversations with context preservation +7. **Podcast Generation**: AI-powered podcast creation from document collections +8. **Voice Synthesis**: Text-to-speech capabilities with multiple providers + +## System Architecture Diagram + +```mermaid +graph TB + subgraph "Client Layer" + WEB[React Web Frontend
TypeScript + Tailwind CSS
Carbon Design System] + CLI[CLI Client
rag-cli commands] + API_CLIENT[External API Clients
REST/WebSocket] + end + + subgraph "API Gateway Layer" + FASTAPI[FastAPI Application
main.py
Port 8000] + + subgraph "Middleware Stack" + CORS[LoggingCORSMiddleware
CORS + Request Logging] + SESSION[SessionMiddleware
Session Management] + AUTH_MW[AuthenticationMiddleware
SPIFFE/OIDC Validation] + end + end + + subgraph "Router Layer - REST Endpoints" + AUTH_R["/auth
Authentication"] + SEARCH_R["/api/search
RAG Search"] + COLLECTION_R["/api/collections
Document Management"] + CHAT_R["/api/chat
Conversational Interface"] + CONV_R["/api/conversations
Session Management"] + PODCAST_R["/api/podcast
Podcast Generation"] + VOICE_R["/api/voice
Voice Synthesis"] + AGENT_R["/api/agents
SPIFFE Agent Management"] + USER_R["/api/users
User Management"] + TEAM_R["/api/teams
Team Collaboration"] + DASH_R["/api/dashboard
Analytics"] + HEALTH_R["/api/health
Health Checks"] + WS_R["/ws
WebSocket"] + end + + subgraph "Service Layer - Business Logic" + SEARCH_SVC[SearchService
RAG Orchestration] + CONV_SVC[ConversationService
Multi-turn Context] + MSG_ORCH[MessageProcessingOrchestrator
Message Flow] + COLLECTION_SVC[CollectionService
Collection Management] + FILE_SVC[FileManagementService
File Operations] + PODCAST_SVC[PodcastService
Content Generation] + VOICE_SVC[VoiceService
Audio Synthesis] + AGENT_SVC[AgentService
SPIFFE Identity] + USER_SVC[UserService
User Operations] + TEAM_SVC[TeamService
Team Operations] + DASH_SVC[DashboardService
Analytics] + PIPELINE_SVC[PipelineService
Pipeline Execution] + COT_SVC[ChainOfThoughtService
Reasoning Engine] + ANSWER_SYNTH[AnswerSynthesizer
Answer Generation] + CITATION_SVC[CitationAttributionService
Source Attribution] + end + + subgraph "RAG Pipeline Architecture - 6 Stages" + PIPELINE_EXEC[PipelineExecutor
Orchestrates Stages] + SEARCH_CTX[SearchContext
State Management] + + STAGE1[Stage 1: Pipeline Resolution
Resolve User Pipeline Config] + STAGE2[Stage 2: Query Enhancement
Rewrite/Enhance Query] + STAGE3[Stage 3: Retrieval
Vector Similarity Search] + STAGE4[Stage 4: Reranking
Relevance Scoring] + STAGE5[Stage 5: Reasoning
Chain of Thought] + STAGE6[Stage 6: Generation
LLM Answer Synthesis] + end + + subgraph "Document Ingestion Pipeline" + DOC_STORE[DocumentStore
Ingestion Orchestration] + DOC_PROC[DocumentProcessor
Format Router] + + PDF_PROC[PdfProcessor
PyMuPDF + OCR] + DOCLING_PROC[DoclingProcessor
IBM Docling
Tables/Images] + WORD_PROC[WordProcessor
DOCX Support] + EXCEL_PROC[ExcelProcessor
XLSX Support] + TXT_PROC[TxtProcessor
Plain Text] + + CHUNKING[Chunking Strategies
Sentence/Semantic/Hierarchical] + EMBEDDING[Embedding Generation
Vector Creation] + end + + subgraph "Retrieval Layer" + RETRIEVER[Retriever
Vector Search] + RERANKER[Reranker
Relevance Scoring] + QUERY_REWRITER[QueryRewriter
Query Optimization] + end + + subgraph "Generation Layer" + LLM_FACTORY[LLMProviderFactory
Provider Management] + + WATSONX[WatsonX Provider
IBM WatsonX AI] + OPENAI[OpenAI Provider
GPT Models] + ANTHROPIC[Anthropic Provider
Claude Models] + + AUDIO_FACTORY[AudioFactory
Audio Provider Management] + ELEVENLABS[ElevenLabs Audio
Voice Synthesis] + OPENAI_AUDIO[OpenAI Audio
TTS] + OLLAMA_AUDIO[Ollama Audio
Local TTS] + end + + subgraph "Repository Layer - Data Access" + USER_REPO[UserRepository] + COLLECTION_REPO[CollectionRepository] + FILE_REPO[FileRepository] + CONV_REPO[ConversationRepository] + AGENT_REPO[AgentRepository] + PODCAST_REPO[PodcastRepository] + VOICE_REPO[VoiceRepository] + TEAM_REPO[TeamRepository] + PIPELINE_REPO[PipelineRepository] + LLM_REPO[LLMProviderRepository] + end + + subgraph "Data Persistence Layer" + POSTGRES[(PostgreSQL
Port 5432
Metadata & Config)] + + VECTOR_DB[(Vector Database
Abstracted Interface)] + MILVUS[Milvus
Primary Vector DB
Port 19530] + PINECONE[Pinecone
Cloud Vector DB] + WEAVIATE[Weaviate
GraphQL Vector DB] + ELASTICSEARCH[Elasticsearch
Search Engine] + CHROMA[ChromaDB
Lightweight Vector DB] + end + + subgraph "Object Storage" + MINIO[(MinIO
Port 9000
Object Storage
Files & Audio)] + end + + subgraph "External Services" + SPIRE[SPIRE Server
SPIFFE Workload Identity
Agent Authentication] + OIDC[OIDC Provider
IBM AppID
User Authentication] + MLFLOW[MLFlow
Port 5001
Model Tracking] + end + + subgraph "Core Infrastructure" + CONFIG[Settings/Config
Pydantic Settings
Environment Variables] + LOGGING[Logging Utils
Structured Logging
Context Tracking] + IDENTITY[Identity Service
User/Agent Identity] + EXCEPTIONS[Custom Exceptions
Domain Errors] + end + + %% Client to API Gateway + WEB -->|HTTP/WebSocket| FASTAPI + CLI -->|HTTP| FASTAPI + API_CLIENT -->|REST API| FASTAPI + + %% Middleware Flow + FASTAPI --> CORS + CORS --> SESSION + SESSION --> AUTH_MW + + %% Router Registration + AUTH_MW --> AUTH_R + AUTH_MW --> SEARCH_R + AUTH_MW --> COLLECTION_R + AUTH_MW --> CHAT_R + AUTH_MW --> CONV_R + AUTH_MW --> PODCAST_R + AUTH_MW --> VOICE_R + AUTH_MW --> AGENT_R + AUTH_MW --> USER_R + AUTH_MW --> TEAM_R + AUTH_MW --> DASH_R + AUTH_MW --> HEALTH_R + AUTH_MW --> WS_R + + %% Router to Service + SEARCH_R --> SEARCH_SVC + CHAT_R --> CONV_SVC + CONV_R --> CONV_SVC + CONV_SVC --> MSG_ORCH + MSG_ORCH --> SEARCH_SVC + COLLECTION_R --> COLLECTION_SVC + COLLECTION_SVC --> FILE_SVC + PODCAST_R --> PODCAST_SVC + VOICE_R --> VOICE_SVC + AGENT_R --> AGENT_SVC + USER_R --> USER_SVC + TEAM_R --> TEAM_SVC + DASH_R --> DASH_SVC + + %% Search Service to Pipeline + SEARCH_SVC --> PIPELINE_EXEC + PIPELINE_EXEC --> STAGE1 + STAGE1 --> STAGE2 + STAGE2 --> STAGE3 + STAGE3 --> STAGE4 + STAGE4 --> STAGE5 + STAGE5 --> STAGE6 + PIPELINE_EXEC --> SEARCH_CTX + + %% Pipeline Stages to Services + STAGE1 --> PIPELINE_SVC + STAGE2 --> QUERY_REWRITER + STAGE3 --> RETRIEVER + STAGE4 --> RERANKER + STAGE5 --> COT_SVC + STAGE6 --> ANSWER_SYNTH + + %% Retrieval to Vector DB + RETRIEVER --> VECTOR_DB + VECTOR_DB --> MILVUS + VECTOR_DB --> PINECONE + VECTOR_DB --> WEAVIATE + VECTOR_DB --> ELASTICSEARCH + VECTOR_DB --> CHROMA + + %% Generation Layer + ANSWER_SYNTH --> LLM_FACTORY + LLM_FACTORY --> WATSONX + LLM_FACTORY --> OPENAI + LLM_FACTORY --> ANTHROPIC + PODCAST_SVC --> LLM_FACTORY + VOICE_SVC --> AUDIO_FACTORY + AUDIO_FACTORY --> ELEVENLABS + AUDIO_FACTORY --> OPENAI_AUDIO + AUDIO_FACTORY --> OLLAMA_AUDIO + + %% Data Ingestion Flow + FILE_SVC --> DOC_STORE + DOC_STORE --> DOC_PROC + DOC_PROC --> PDF_PROC + DOC_PROC --> DOCLING_PROC + DOC_PROC --> WORD_PROC + DOC_PROC --> EXCEL_PROC + DOC_PROC --> TXT_PROC + DOC_PROC --> CHUNKING + CHUNKING --> EMBEDDING + DOC_STORE --> VECTOR_DB + DOC_STORE --> MINIO + + %% Service to Repository + USER_SVC --> USER_REPO + COLLECTION_SVC --> COLLECTION_REPO + FILE_SVC --> FILE_REPO + CONV_SVC --> CONV_REPO + AGENT_SVC --> AGENT_REPO + PODCAST_SVC --> PODCAST_REPO + VOICE_SVC --> VOICE_REPO + TEAM_SVC --> TEAM_REPO + PIPELINE_SVC --> PIPELINE_REPO + PIPELINE_SVC --> LLM_REPO + + %% Repository to Database + USER_REPO --> POSTGRES + COLLECTION_REPO --> POSTGRES + FILE_REPO --> POSTGRES + CONV_REPO --> POSTGRES + AGENT_REPO --> POSTGRES + PODCAST_REPO --> POSTGRES + VOICE_REPO --> POSTGRES + TEAM_REPO --> POSTGRES + PIPELINE_REPO --> POSTGRES + LLM_REPO --> POSTGRES + + %% Authentication + AUTH_MW --> SPIRE + AUTH_MW --> OIDC + AGENT_SVC --> SPIRE + + %% Storage + FILE_SVC --> MINIO + PODCAST_SVC --> MINIO + VOICE_SVC --> MINIO + + %% Core Infrastructure + FASTAPI --> CONFIG + FASTAPI --> LOGGING + AUTH_MW --> IDENTITY + SEARCH_SVC --> EXCEPTIONS + CONV_SVC --> EXCEPTIONS + + %% Styling + style FASTAPI fill:#4A90E2,stroke:#2E5C8A,stroke-width:3px + style PIPELINE_EXEC fill:#50C878,stroke:#2D8659,stroke-width:2px + style VECTOR_DB fill:#FF6B6B,stroke:#C92A2A,stroke-width:2px + style POSTGRES fill:#4ECDC4,stroke:#2D7D7D,stroke-width:2px + style LLM_FACTORY fill:#FFD93D,stroke:#CC9900,stroke-width:2px + style DOC_STORE fill:#9B59B6,stroke:#6C3483,stroke-width:2px + style WEB fill:#61DAFB,stroke:#20232A,stroke-width:2px + style MINIO fill:#FFA500,stroke:#CC7700,stroke-width:2px +``` + +## Architecture Layers Explained + +### 1. Client Layer + +- **React Web Frontend**: Modern TypeScript/React application with Carbon Design System +- **CLI Client**: Command-line interface for automation and scripting +- **API Clients**: External integrations via REST/WebSocket + +### 2. API Gateway Layer + +- **FastAPI Application**: Main entry point handling HTTP requests +- **Middleware Stack**: CORS, session management, and authentication + +### 3. Router Layer + +RESTful endpoints organized by domain (auth, search, collections, chat, etc.) + +### 4. Service Layer + +Business logic services that orchestrate operations across repositories and external services + +### 5. RAG Pipeline (6 Stages) + +1. **Pipeline Resolution**: Determines user's default pipeline configuration +2. **Query Enhancement**: Rewrites/enhances queries for better retrieval +3. **Retrieval**: Performs vector similarity search +4. **Reranking**: Scores and reranks results for relevance +5. **Reasoning**: Applies Chain of Thought for complex questions +6. **Generation**: Synthesizes final answer using LLM + +### 6. Document Ingestion Pipeline + +- Processes multiple document formats +- Applies chunking strategies +- Generates embeddings +- Stores in vector database and object storage + +### 7. Data Persistence + +- **PostgreSQL**: Metadata, configuration, user data +- **Vector Databases**: Pluggable support for multiple vector DBs +- **MinIO**: Object storage for files and generated content + +### 8. External Services + +- **SPIRE**: SPIFFE workload identity for agent authentication +- **OIDC**: User authentication via IBM AppID +- **MLFlow**: Model tracking and experimentation + +## Key Data Flows + +### Search Request Flow + +1. Client → FastAPI → Search Router +2. Search Router → SearchService +3. SearchService → PipelineExecutor +4. Pipeline executes 6 stages sequentially +5. RetrievalStage queries Vector Database +6. GenerationStage calls LLM Provider +7. Response flows back through layers + +### Document Ingestion Flow + +1. Client → Collection Router → CollectionService → FileManagementService +2. FileManagementService → DocumentStore +3. DocumentStore → DocumentProcessor → Format-specific Processor +4. Processor → Chunking Strategy → Embeddings +5. Embeddings → Vector Database +6. Original files → MinIO Object Storage + +### Conversation Flow + +1. Client → Conversation Router → ConversationService +2. ConversationService → MessageProcessingOrchestrator +3. Orchestrator → SearchService (with conversation context) +4. SearchService executes pipeline with context +5. Response saved via ConversationRepository → PostgreSQL + +## Design Patterns + +- **Repository Pattern**: Data access abstraction +- **Factory Pattern**: LLM and Vector DB instantiation +- **Strategy Pattern**: Chunking strategies, LLM providers +- **Pipeline Pattern**: Stage-based RAG processing +- **Dependency Injection**: Services and repositories +- **Middleware Pattern**: Cross-cutting concerns + +## Technology Stack + +### Backend + +- **Framework**: FastAPI (Python 3.12+) +- **Database**: PostgreSQL (SQLAlchemy ORM) +- **Vector DB**: Milvus (primary), Pinecone, Weaviate, Elasticsearch, ChromaDB +- **Object Storage**: MinIO +- **Document Processing**: IBM Docling, PyMuPDF, python-docx, openpyxl + +### Frontend + +- **Framework**: React 18 with TypeScript +- **Styling**: Tailwind CSS + Carbon Design System +- **HTTP Client**: Axios +- **State Management**: React Context API + +### Infrastructure + +- **Containerization**: Docker + Docker Compose +- **CI/CD**: GitHub Actions +- **Container Registry**: GitHub Container Registry (GHCR) +- **Authentication**: SPIFFE/SPIRE (agents), OIDC (users) + +### LLM Providers + +- IBM WatsonX +- OpenAI (GPT models) +- Anthropic (Claude) + +### Audio Providers + +- ElevenLabs +- OpenAI TTS +- Ollama (local) diff --git a/migrations/add_agents_table.sql b/migrations/add_agents_table.sql new file mode 100644 index 00000000..ce0a4e95 --- /dev/null +++ b/migrations/add_agents_table.sql @@ -0,0 +1,62 @@ +-- Migration: Add agents table for SPIFFE/SPIRE workload identity +-- Reference: docs/architecture/spire-integration-architecture.md +-- +-- This migration creates the agents table to store AI agent identities +-- with SPIFFE-based authentication support. + +-- Create agents table +CREATE TABLE IF NOT EXISTS agents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + spiffe_id VARCHAR(512) UNIQUE NOT NULL, + agent_type VARCHAR(100) NOT NULL, + name VARCHAR(255) NOT NULL, + description TEXT, + owner_user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + team_id UUID REFERENCES teams(id) ON DELETE SET NULL, + capabilities JSONB NOT NULL DEFAULT '[]'::jsonb, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + status VARCHAR(50) NOT NULL DEFAULT 'pending', + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + last_seen_at TIMESTAMP WITH TIME ZONE +); + +-- Add comments +COMMENT ON TABLE agents IS 'AI agents with SPIFFE-based workload identity for RAG Modulo'; +COMMENT ON COLUMN agents.spiffe_id IS 'Full SPIFFE ID (e.g., spiffe://rag-modulo.example.com/agent/search-enricher/abc123)'; +COMMENT ON COLUMN agents.agent_type IS 'Type of agent (search-enricher, cot-reasoning, etc.)'; +COMMENT ON COLUMN agents.capabilities IS 'JSON array of granted capabilities (search:read, llm:invoke, etc.)'; +COMMENT ON COLUMN agents.metadata IS 'Additional agent metadata as JSON'; +COMMENT ON COLUMN agents.status IS 'Agent status: active, suspended, revoked, pending'; +COMMENT ON COLUMN agents.last_seen_at IS 'Last successful authentication timestamp'; + +-- Create indexes for efficient lookups +CREATE INDEX IF NOT EXISTS idx_agents_spiffe_id ON agents(spiffe_id); +CREATE INDEX IF NOT EXISTS idx_agents_agent_type ON agents(agent_type); +CREATE INDEX IF NOT EXISTS idx_agents_owner_user_id ON agents(owner_user_id); +CREATE INDEX IF NOT EXISTS idx_agents_team_id ON agents(team_id); +CREATE INDEX IF NOT EXISTS idx_agents_status ON agents(status); +CREATE INDEX IF NOT EXISTS idx_agents_created_at ON agents(created_at DESC); + +-- Composite indexes for common query patterns (owner+status, type+status, team+status) +CREATE INDEX IF NOT EXISTS ix_agents_owner_status ON agents(owner_user_id, status); +CREATE INDEX IF NOT EXISTS ix_agents_type_status ON agents(agent_type, status); +CREATE INDEX IF NOT EXISTS ix_agents_team_status ON agents(team_id, status); + +-- Create GIN index for capabilities JSONB for efficient containment queries +CREATE INDEX IF NOT EXISTS idx_agents_capabilities ON agents USING GIN (capabilities); + +-- Add trigger to auto-update updated_at timestamp +CREATE OR REPLACE FUNCTION update_agents_updated_at() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS trigger_agents_updated_at ON agents; +CREATE TRIGGER trigger_agents_updated_at + BEFORE UPDATE ON agents + FOR EACH ROW + EXECUTE FUNCTION update_agents_updated_at(); diff --git a/migrations/apply_agents_migration.py b/migrations/apply_agents_migration.py new file mode 100644 index 00000000..a8aa09e2 --- /dev/null +++ b/migrations/apply_agents_migration.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +"""Apply migration to add agents table for SPIFFE/SPIRE workload identity. + +Reference: docs/architecture/spire-integration-architecture.md + +Usage: + python migrations/apply_agents_migration.py +""" + +import os +import sys +from pathlib import Path + +import psycopg2 +from dotenv import load_dotenv + +# Add backend to path +backend_path = Path(__file__).parent.parent / "backend" +sys.path.insert(0, str(backend_path)) + +# Load environment variables +load_dotenv() + +# Database connection parameters +DB_HOST = os.getenv("COLLECTIONDB_HOST", "localhost") +DB_PORT = os.getenv("COLLECTIONDB_PORT", "5432") +DB_USER = os.getenv("COLLECTIONDB_USER", "rag_modulo_user") +DB_PASSWORD = os.getenv("COLLECTIONDB_PASSWORD") +DB_NAME = os.getenv("COLLECTIONDB_NAME", "rag_modulo") + + +def apply_migration(): + """Apply the agents table migration.""" + print(f"Connecting to database: {DB_NAME} at {DB_HOST}:{DB_PORT}") + + conn = None + cursor = None + + try: + # Connect to database + conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME) + cursor = conn.cursor() + + print("Connected successfully!") + + # Check if table already exists + cursor.execute( + """ + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'agents' + ); + """ + ) + + if cursor.fetchone()[0]: + print("āœ… Table 'agents' already exists.") + + # Verify structure + cursor.execute( + """ + SELECT column_name, data_type, is_nullable + FROM information_schema.columns + WHERE table_name = 'agents' + ORDER BY ordinal_position; + """ + ) + + print("\nExisting columns:") + for row in cursor.fetchall(): + print(f" - {row[0]}: {row[1]} (nullable: {row[2]})") + + return True + + print("Creating 'agents' table...") + + # Read and execute migration SQL + migration_file = Path(__file__).parent / "add_agents_table.sql" + with open(migration_file) as f: + migration_sql = f.read() + + cursor.execute(migration_sql) + print("āœ… Successfully created 'agents' table!") + + # Verify the table was created + cursor.execute( + """ + SELECT column_name, data_type, is_nullable, column_default + FROM information_schema.columns + WHERE table_name = 'agents' + ORDER BY ordinal_position; + """ + ) + + results = cursor.fetchall() + if results: + print(f"\nTable created with {len(results)} columns:") + for row in results: + print(f" - {row[0]}: {row[1]} (nullable: {row[2]})") + else: + print("āŒ ERROR: Table 'agents' not found after migration!") + if conn: + conn.rollback() + return False + + # Verify indexes + cursor.execute( + """ + SELECT indexname + FROM pg_indexes + WHERE tablename = 'agents'; + """ + ) + + indexes = cursor.fetchall() + print(f"\nCreated {len(indexes)} indexes:") + for idx in indexes: + print(f" - {idx[0]}") + + # Commit transaction if all successful + conn.commit() + + print("\nšŸŽ‰ Migration completed successfully!") + return True + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + if conn: + conn.rollback() + print(" Transaction rolled back.") + return False + except Exception as e: + print(f"āŒ Error: {e}") + if conn: + conn.rollback() + print(" Transaction rolled back.") + return False + finally: + if cursor: + cursor.close() + if conn: + conn.close() + + +if __name__ == "__main__": + success = apply_migration() + sys.exit(0 if success else 1) diff --git a/poetry.lock b/poetry.lock index 21ca2d22..393c3183 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4374,6 +4374,24 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "pem" +version = "23.1.0" +description = "PEM file parsing in Python." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pem-23.1.0-py3-none-any.whl", hash = "sha256:78bbb1e75b737891350cb9499cbba31da5d59545f360f44163c0bc751cad55d3"}, + {file = "pem-23.1.0.tar.gz", hash = "sha256:06503ff2441a111f853ce4e8b9eb9d5fedb488ebdbf560115d3dd53a1b4afc73"}, +] + +[package.extras] +dev = ["pem[tests,types]", "twisted[tls]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "twisted[tls]"] +tests = ["certifi", "coverage[toml] (>=5.0.2)", "pretend", "pyopenssl", "pytest"] +types = ["mypy", "twisted", "types-pyopenssl"] + [[package]] name = "pillow" version = "11.0.0" @@ -5356,6 +5374,9 @@ files = [ {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, ] +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + [package.extras] crypto = ["cryptography (>=3.4.0)"] dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"] @@ -7115,6 +7136,27 @@ files = [ {file = "spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645"}, ] +[[package]] +name = "spiffe" +version = "0.2.2" +description = "Python library for SPIFFE support" +optional = false +python-versions = "<4.0,>=3.10" +groups = ["main"] +files = [ + {file = "spiffe-0.2.2-py3-none-any.whl", hash = "sha256:a53fb39ab59408b15dd2f969989045d68bc6b3ebfd283bf2f77e9ff9a66b047b"}, + {file = "spiffe-0.2.2.tar.gz", hash = "sha256:e4ca1247b1a08631a3f822eec7db70447b6d99734ff50670f2c9020dfb006231"}, +] + +[package.dependencies] +cryptography = ">=45.0,<46.0" +grpcio = ">=1.62,<2.0" +pem = ">=23.0,<24.0" +protobuf = ">=5,<7" +pyasn1 = ">=0.6.0,<0.7.0" +pyasn1-modules = ">=0.4.0,<0.5.0" +pyjwt = {version = ">=2.0,<3.0", extras = ["crypto"]} + [[package]] name = "sqlalchemy" version = "2.0.36" @@ -8438,4 +8480,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.12,<3.13" -content-hash = "291564a1add6ef1ca7652fc7faddae62ab9b022ebd17a68a0cc1ad1b034c609d" +content-hash = "d0e386cbf12853c2373cc897481e1cc464869f6698382cadaf6bb5a700cfa312" diff --git a/pyproject.toml b/pyproject.toml index bccbb662..57efe894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dependencies = [ "sentence-transformers (>=5.1.2,<6.0.0)", "tiktoken (>=0.12.0,<0.13.0)", "html2text (>=2025.4.15,<2026.0.0)", + "spiffe (>=0.2.2,<0.3.0)", # SPIFFE/SPIRE workload identity for agent authentication ] [tool.poetry] diff --git a/tests/unit/core/test_spiffe_auth.py b/tests/unit/core/test_spiffe_auth.py new file mode 100644 index 00000000..1be6bf9d --- /dev/null +++ b/tests/unit/core/test_spiffe_auth.py @@ -0,0 +1,772 @@ +"""Unit tests for SPIFFE authentication module. + +This module tests the SPIFFE authentication functionality including: +- SPIFFE ID parsing and building +- JWT-SVID detection and validation +- Agent principal creation +- Configuration management +- Capability enforcement decorator + +Reference: docs/architecture/spire-integration-architecture.md +""" + +import os +from datetime import UTC, datetime, timedelta +from unittest.mock import MagicMock, patch + +import pytest + +from core.spiffe_auth import ( + AGENT_TYPE_CAPABILITIES, + AgentCapability, + AgentPrincipal, + AgentType, + SPIFFEAuthenticator, + SPIFFEConfig, + build_spiffe_id, + get_agent_principal_from_request, + is_spiffe_jwt_svid, + parse_spiffe_id, + require_capabilities, +) + + +class TestSPIFFEConfig: + """Tests for SPIFFEConfig class.""" + + def test_from_env_defaults(self) -> None: + """Test configuration with default values.""" + with patch.dict(os.environ, {}, clear=True): + config = SPIFFEConfig.from_env() + + assert config.enabled is False # Default is disabled + assert config.trust_domain == "rag-modulo.example.com" + assert config.endpoint_socket == "unix:///var/run/spire/agent.sock" + assert config.default_audiences == ["rag-modulo", "mcp-gateway"] + assert config.svid_ttl_seconds == 3600 + assert config.fallback_to_jwt is True + + def test_from_env_custom_values(self) -> None: + """Test configuration with custom environment values.""" + custom_env = { + "SPIFFE_ENABLED": "true", + "SPIFFE_TRUST_DOMAIN": "custom.domain.com", + "SPIFFE_ENDPOINT_SOCKET": "unix:///custom/socket.sock", + "SPIFFE_JWT_AUDIENCES": "api1,api2,api3", + "SPIFFE_SVID_TTL_SECONDS": "7200", + "SPIFFE_FALLBACK_TO_JWT": "false", + } + with patch.dict(os.environ, custom_env, clear=True): + config = SPIFFEConfig.from_env() + + assert config.enabled is True + assert config.trust_domain == "custom.domain.com" + assert config.endpoint_socket == "unix:///custom/socket.sock" + assert config.default_audiences == ["api1", "api2", "api3"] + assert config.svid_ttl_seconds == 7200 + assert config.fallback_to_jwt is False + + def test_from_env_enabled_variations(self) -> None: + """Test different values for SPIFFE_ENABLED.""" + # True values + for value in ["true", "True", "TRUE"]: + with patch.dict(os.environ, {"SPIFFE_ENABLED": value}, clear=True): + config = SPIFFEConfig.from_env() + assert config.enabled is True, f"Expected True for '{value}'" + + # False values (including default) + for value in ["false", "False", "FALSE", "0", "no", ""]: + with patch.dict(os.environ, {"SPIFFE_ENABLED": value}, clear=True): + config = SPIFFEConfig.from_env() + assert config.enabled is False, f"Expected False for '{value}'" + + +class TestAgentType: + """Tests for AgentType enum.""" + + def test_agent_types(self) -> None: + """Test all defined agent types.""" + assert AgentType.SEARCH_ENRICHER.value == "search-enricher" + assert AgentType.COT_REASONING.value == "cot-reasoning" + assert AgentType.QUESTION_DECOMPOSER.value == "question-decomposer" + assert AgentType.SOURCE_ATTRIBUTION.value == "source-attribution" + assert AgentType.ENTITY_EXTRACTION.value == "entity-extraction" + assert AgentType.ANSWER_SYNTHESIS.value == "answer-synthesis" + assert AgentType.CUSTOM.value == "custom" + + def test_agent_type_from_string(self) -> None: + """Test creating agent type from string value.""" + assert AgentType("search-enricher") == AgentType.SEARCH_ENRICHER + assert AgentType("cot-reasoning") == AgentType.COT_REASONING + + +class TestAgentCapability: + """Tests for AgentCapability enum.""" + + def test_capabilities(self) -> None: + """Test all defined capabilities.""" + assert AgentCapability.MCP_TOOL_INVOKE.value == "mcp:tool:invoke" + assert AgentCapability.SEARCH_READ.value == "search:read" + assert AgentCapability.SEARCH_WRITE.value == "search:write" + assert AgentCapability.LLM_INVOKE.value == "llm:invoke" + assert AgentCapability.PIPELINE_EXECUTE.value == "pipeline:execute" + assert AgentCapability.DOCUMENT_READ.value == "document:read" + assert AgentCapability.DOCUMENT_WRITE.value == "document:write" + assert AgentCapability.COT_INVOKE.value == "cot:invoke" + assert AgentCapability.AGENT_SPAWN.value == "agent:spawn" + assert AgentCapability.ADMIN.value == "admin" + + +class TestAgentPrincipal: + """Tests for AgentPrincipal class.""" + + def test_create_agent_principal(self) -> None: + """Test creating an agent principal directly.""" + principal = AgentPrincipal( + spiffe_id="spiffe://rag-modulo.example.com/agent/search-enricher/agent-001", + trust_domain="rag-modulo.example.com", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="agent-001", + capabilities=[AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE], + audiences=["backend-api"], + expires_at=datetime.now(UTC) + timedelta(hours=1), + ) + + assert principal.spiffe_id == "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001" + assert principal.trust_domain == "rag-modulo.example.com" + assert principal.agent_type == AgentType.SEARCH_ENRICHER + assert principal.agent_id == "agent-001" + assert len(principal.capabilities) == 2 + assert AgentCapability.SEARCH_READ in principal.capabilities + + def test_from_spiffe_id(self) -> None: + """Test creating an agent principal from SPIFFE ID.""" + spiffe_id = "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001" + principal = AgentPrincipal.from_spiffe_id(spiffe_id) + + assert principal.spiffe_id == spiffe_id + assert principal.trust_domain == "rag-modulo.example.com" + assert principal.agent_type == AgentType.SEARCH_ENRICHER + assert principal.agent_id == "agent-001" + # Default capabilities for search-enricher + assert AgentCapability.MCP_TOOL_INVOKE in principal.capabilities + assert AgentCapability.SEARCH_READ in principal.capabilities + + def test_from_spiffe_id_custom_capabilities(self) -> None: + """Test creating an agent principal with custom capabilities.""" + spiffe_id = "spiffe://rag-modulo.example.com/agent/cot-reasoning/cot-001" + custom_caps = [AgentCapability.ADMIN, AgentCapability.SEARCH_WRITE] + + principal = AgentPrincipal.from_spiffe_id(spiffe_id, capabilities=custom_caps) + + assert principal.agent_type == AgentType.COT_REASONING + assert principal.capabilities == custom_caps + + def test_from_spiffe_id_invalid(self) -> None: + """Test creating an agent principal from invalid SPIFFE ID.""" + invalid_ids = [ + "not-a-spiffe-id", + "http://example.com/agent/type/id", + "spiffe://domain/not-agent/type", # Missing 'agent' prefix + ] + + for invalid_id in invalid_ids: + with pytest.raises(ValueError): + AgentPrincipal.from_spiffe_id(invalid_id) + + def test_is_expired_false(self) -> None: + """Test is_expired returns False for valid principal.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/custom/id", + trust_domain="domain", + agent_type=AgentType.CUSTOM, + agent_id="id", + capabilities=[], + audiences=[], + expires_at=datetime.now(UTC) + timedelta(hours=1), + ) + + assert principal.is_expired() is False + + def test_is_expired_true(self) -> None: + """Test is_expired returns True for expired principal.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/custom/id", + trust_domain="domain", + agent_type=AgentType.CUSTOM, + agent_id="id", + capabilities=[], + audiences=[], + expires_at=datetime.now(UTC) - timedelta(hours=1), + ) + + assert principal.is_expired() is True + + def test_is_expired_none(self) -> None: + """Test is_expired returns False when no expiration set.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/custom/id", + trust_domain="domain", + agent_type=AgentType.CUSTOM, + agent_id="id", + capabilities=[], + audiences=[], + expires_at=None, + ) + + assert principal.is_expired() is False + + def test_has_capability(self) -> None: + """Test has_capability method.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE], + audiences=[], + expires_at=None, + ) + + assert principal.has_capability(AgentCapability.SEARCH_READ) is True + assert principal.has_capability(AgentCapability.LLM_INVOKE) is True + assert principal.has_capability(AgentCapability.ADMIN) is False + + def test_has_any_capability(self) -> None: + """Test has_any_capability method.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ], + audiences=[], + expires_at=None, + ) + + assert principal.has_any_capability([AgentCapability.SEARCH_READ, AgentCapability.ADMIN]) is True + assert principal.has_any_capability([AgentCapability.ADMIN, AgentCapability.COT_INVOKE]) is False + + def test_has_all_capabilities(self) -> None: + """Test has_all_capabilities method.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE], + audiences=[], + expires_at=None, + ) + + assert principal.has_all_capabilities([AgentCapability.SEARCH_READ]) is True + assert principal.has_all_capabilities([AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE]) is True + assert principal.has_all_capabilities([AgentCapability.SEARCH_READ, AgentCapability.ADMIN]) is False + + def test_is_valid_for_audience(self) -> None: + """Test is_valid_for_audience method.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[], + audiences=["backend-api", "mcp-gateway"], + expires_at=None, + ) + + assert principal.is_valid_for_audience("backend-api") is True + assert principal.is_valid_for_audience("mcp-gateway") is True + assert principal.is_valid_for_audience("unknown-api") is False + + +class TestSPIFFEIDParsing: + """Tests for SPIFFE ID parsing functions.""" + + def test_parse_spiffe_id_agent(self) -> None: + """Test parsing a valid agent SPIFFE ID.""" + spiffe_id = "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001" + result = parse_spiffe_id(spiffe_id) + + assert result is not None + trust_domain, path = result + assert trust_domain == "rag-modulo.example.com" + assert path == "agent/search-enricher/agent-001" + + def test_parse_spiffe_id_workload(self) -> None: + """Test parsing a workload SPIFFE ID.""" + spiffe_id = "spiffe://rag-modulo.example.com/workload/backend-api" + result = parse_spiffe_id(spiffe_id) + + assert result is not None + trust_domain, path = result + assert trust_domain == "rag-modulo.example.com" + assert path == "workload/backend-api" + + def test_parse_spiffe_id_invalid(self) -> None: + """Test parsing invalid SPIFFE IDs.""" + invalid_ids = [ + "not-a-spiffe-id", + "http://example.com/agent/type/id", + "spiffe://", + "", + ] + + for invalid_id in invalid_ids: + result = parse_spiffe_id(invalid_id) + assert result is None, f"Expected None for {invalid_id}" + + def test_build_spiffe_id(self) -> None: + """Test building a SPIFFE ID.""" + spiffe_id = build_spiffe_id( + trust_domain="rag-modulo.example.com", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="agent-001", + ) + + assert spiffe_id == "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001" + + def test_build_spiffe_id_without_agent_id(self) -> None: + """Test building a SPIFFE ID without agent ID.""" + spiffe_id = build_spiffe_id( + trust_domain="rag-modulo.example.com", + agent_type=AgentType.SEARCH_ENRICHER, + ) + + assert spiffe_id == "spiffe://rag-modulo.example.com/agent/search-enricher" + + def test_build_spiffe_id_custom_type(self) -> None: + """Test building a SPIFFE ID with custom agent type.""" + spiffe_id = build_spiffe_id( + trust_domain="custom.domain", + agent_type=AgentType.COT_REASONING, + agent_id="cot-agent-123", + ) + + assert spiffe_id == "spiffe://custom.domain/agent/cot-reasoning/cot-agent-123" + + +class TestIsSPIFFEJWTSVID: + """Tests for is_spiffe_jwt_svid function.""" + + def test_valid_spiffe_jwt(self) -> None: + """Test detecting a valid SPIFFE JWT-SVID.""" + # Create a mock JWT with SPIFFE claims + # In reality, this would be a properly signed JWT + # For testing, we just check the structure detection + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001", + "aud": ["backend-api"], + "exp": 1234567890, + } + + result = is_spiffe_jwt_svid("mock.jwt.token") + assert result is True + + def test_non_spiffe_jwt(self) -> None: + """Test detecting a non-SPIFFE JWT.""" + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "user@example.com", + "aud": ["backend-api"], + "exp": 1234567890, + } + + result = is_spiffe_jwt_svid("mock.jwt.token") + assert result is False + + def test_invalid_jwt(self) -> None: + """Test handling invalid JWT.""" + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.side_effect = Exception("Invalid token") + + result = is_spiffe_jwt_svid("invalid.token") + assert result is False + + def test_empty_token(self) -> None: + """Test handling empty token.""" + result = is_spiffe_jwt_svid("") + assert result is False + + def test_mock_token_detection(self) -> None: + """Test that mock tokens are not detected as SPIFFE.""" + result = is_spiffe_jwt_svid("mock-jwt-token") + assert result is False + + +class TestSPIFFEAuthenticator: + """Tests for SPIFFEAuthenticator class.""" + + def test_authenticator_disabled(self) -> None: + """Test authenticator when SPIFFE is disabled.""" + config = SPIFFEConfig( + enabled=False, + trust_domain="test.domain", + endpoint_socket="/tmp/socket", + default_audiences=["api"], + ) + + authenticator = SPIFFEAuthenticator(config) + # When disabled, validation should return None for non-SPIFFE tokens + result = authenticator.validate_jwt_svid("any.token.here") + + assert result is None + + def test_authenticator_validates_trust_domain(self) -> None: + """Test that authenticator validates trust domain.""" + config = SPIFFEConfig( + enabled=True, + trust_domain="expected.domain", + endpoint_socket="/tmp/socket", + default_audiences=["api"], + fallback_to_jwt=True, # Allow fallback for testing without SPIRE + ) + + authenticator = SPIFFEAuthenticator(config) + + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "spiffe://wrong.domain/agent/type/id", + "aud": ["api"], + "exp": (datetime.now(UTC) + timedelta(hours=1)).timestamp(), + } + + result = authenticator.validate_jwt_svid("mock.token") + # Should reject due to trust domain mismatch + assert result is None + + def test_authenticator_validates_audience(self) -> None: + """Test that authenticator validates audience.""" + config = SPIFFEConfig( + enabled=True, + trust_domain="test.domain", + endpoint_socket="/tmp/socket", + default_audiences=["expected-audience"], + fallback_to_jwt=True, + ) + + authenticator = SPIFFEAuthenticator(config) + + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "spiffe://test.domain/agent/search-enricher/id", + "aud": ["wrong-audience"], + "exp": (datetime.now(UTC) + timedelta(hours=1)).timestamp(), + } + + result = authenticator.validate_jwt_svid("mock.token", required_audience="expected-audience") + # Should reject due to audience mismatch + assert result is None + + def test_authenticator_creates_principal_with_fallback(self) -> None: + """Test that authenticator creates valid principal from JWT when using fallback. + + When SPIFFE is enabled, py-spiffe library must be available. This test + mocks the spiffe import to simulate a working environment and tests + the JWT fallback path when SPIRE agent is not reachable. + """ + # Mock the spiffe import to prevent ImportError + mock_jwt_source = MagicMock() + mock_workload_client = MagicMock() + + with patch.dict("sys.modules", {"spiffe": MagicMock(JwtSource=mock_jwt_source, WorkloadApiClient=mock_workload_client)}): + config = SPIFFEConfig( + enabled=True, + trust_domain="rag-modulo.example.com", + endpoint_socket="/tmp/socket", + default_audiences=["backend-api"], + fallback_to_jwt=True, # Allow fallback when SPIRE unavailable + ) + + authenticator = SPIFFEAuthenticator(config) + # Make SPIRE unavailable to trigger fallback + authenticator._spire_available = False + authenticator._initialized = True + + exp_time = datetime.now(UTC) + timedelta(hours=1) + iat_time = datetime.now(UTC) + + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001", + "aud": ["backend-api"], + "iat": iat_time.timestamp(), + "exp": exp_time.timestamp(), + } + + result = authenticator.validate_jwt_svid("mock.token") + + assert result is not None + assert result.spiffe_id == "spiffe://rag-modulo.example.com/agent/search-enricher/agent-001" + assert result.trust_domain == "rag-modulo.example.com" + assert result.agent_type == AgentType.SEARCH_ENRICHER + assert result.agent_id == "agent-001" + # Should have default capabilities for search-enricher + assert AgentCapability.MCP_TOOL_INVOKE in result.capabilities + assert AgentCapability.SEARCH_READ in result.capabilities + + def test_authenticator_handles_expired_token(self) -> None: + """Test that authenticator rejects expired tokens.""" + config = SPIFFEConfig( + enabled=True, + trust_domain="test.domain", + endpoint_socket="/tmp/socket", + default_audiences=["api"], + fallback_to_jwt=True, + ) + + authenticator = SPIFFEAuthenticator(config) + + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + mock_decode.return_value = { + "sub": "spiffe://test.domain/agent/search-enricher/id", + "aud": ["api"], + "exp": (datetime.now(UTC) - timedelta(hours=1)).timestamp(), + } + + result = authenticator.validate_jwt_svid("mock.token") + assert result is None + + def test_authenticator_rejects_non_spiffe_token(self) -> None: + """Test that authenticator rejects non-SPIFFE tokens.""" + config = SPIFFEConfig( + enabled=True, + trust_domain="test.domain", + endpoint_socket="/tmp/socket", + default_audiences=["api"], + fallback_to_jwt=True, + ) + + authenticator = SPIFFEAuthenticator(config) + + with patch("core.spiffe_auth.jwt.decode") as mock_decode: + # Token without spiffe:// prefix in sub + mock_decode.return_value = { + "sub": "user@example.com", + "aud": ["api"], + "exp": (datetime.now(UTC) + timedelta(hours=1)).timestamp(), + } + + result = authenticator.validate_jwt_svid("mock.token") + assert result is None + + def test_authenticator_is_available_false_when_disabled(self) -> None: + """Test is_available returns False when SPIFFE is disabled.""" + config = SPIFFEConfig(enabled=False) + authenticator = SPIFFEAuthenticator(config) + + assert authenticator.is_available is False + + def test_authenticator_get_auth_headers_empty_when_unavailable(self) -> None: + """Test get_auth_headers returns empty dict when SPIFFE unavailable.""" + config = SPIFFEConfig(enabled=False) + authenticator = SPIFFEAuthenticator(config) + + headers = authenticator.get_auth_headers() + assert headers == {} + + +class TestRequireCapabilitiesDecorator: + """Tests for require_capabilities decorator.""" + + @pytest.mark.asyncio + async def test_require_capabilities_allows_user_requests(self) -> None: + """Test that user requests are allowed regardless of capabilities.""" + from fastapi import Request + + # Create a mock request with user state + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = None + mock_request.state.user = {"uuid": "user-123", "identity_type": "user"} + + @require_capabilities(AgentCapability.ADMIN) + async def protected_endpoint(request: Request) -> str: + return "success" + + result = await protected_endpoint(request=mock_request) + assert result == "success" + + @pytest.mark.asyncio + async def test_require_capabilities_allows_agent_with_capability(self) -> None: + """Test that agents with required capability are allowed.""" + from fastapi import Request + + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ], + audiences=[], + ) + + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = principal + mock_request.state.user = None + + @require_capabilities(AgentCapability.SEARCH_READ) + async def protected_endpoint(request: Request) -> str: + return "success" + + result = await protected_endpoint(request=mock_request) + assert result == "success" + + @pytest.mark.asyncio + async def test_require_capabilities_denies_agent_without_capability(self) -> None: + """Test that agents without required capability are denied.""" + from fastapi import HTTPException, Request + + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ], # Only has SEARCH_READ + audiences=[], + ) + + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = principal + mock_request.state.user = None + + @require_capabilities(AgentCapability.ADMIN) # Requires ADMIN + async def protected_endpoint(request: Request) -> str: + return "success" + + with pytest.raises(HTTPException) as exc_info: + await protected_endpoint(request=mock_request) + + assert exc_info.value.status_code == 403 + + @pytest.mark.asyncio + async def test_require_capabilities_require_all(self) -> None: + """Test require_all parameter.""" + from fastapi import HTTPException, Request + + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE], + audiences=[], + ) + + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = principal + mock_request.state.user = None + + # Test with require_all=True (default) - should pass when agent has all + @require_capabilities(AgentCapability.SEARCH_READ, AgentCapability.LLM_INVOKE, require_all=True) + async def endpoint_all(request: Request) -> str: + return "success" + + result = await endpoint_all(request=mock_request) + assert result == "success" + + # Test with require_all=True but agent missing one capability + @require_capabilities(AgentCapability.SEARCH_READ, AgentCapability.ADMIN, require_all=True) + async def endpoint_missing(request: Request) -> str: + return "success" + + with pytest.raises(HTTPException) as exc_info: + await endpoint_missing(request=mock_request) + assert exc_info.value.status_code == 403 + + @pytest.mark.asyncio + async def test_require_capabilities_require_any(self) -> None: + """Test require_all=False (require any).""" + from fastapi import Request + + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[AgentCapability.SEARCH_READ], # Only has one + audiences=[], + ) + + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = principal + mock_request.state.user = None + + # Should pass when agent has any of the required capabilities + @require_capabilities(AgentCapability.SEARCH_READ, AgentCapability.ADMIN, require_all=False) + async def endpoint_any(request: Request) -> str: + return "success" + + result = await endpoint_any(request=mock_request) + assert result == "success" + + @pytest.mark.asyncio + async def test_require_capabilities_unauthenticated(self) -> None: + """Test that unauthenticated requests are denied.""" + from fastapi import HTTPException, Request + + mock_request = MagicMock(spec=Request) + mock_request.state.agent_principal = None + mock_request.state.user = None + + @require_capabilities(AgentCapability.SEARCH_READ) + async def protected_endpoint(request: Request) -> str: + return "success" + + with pytest.raises(HTTPException) as exc_info: + await protected_endpoint(request=mock_request) + + assert exc_info.value.status_code == 401 + + +class TestGetAgentPrincipalFromRequest: + """Tests for get_agent_principal_from_request utility.""" + + def test_returns_principal_when_present(self) -> None: + """Test returns agent principal when present in request.""" + principal = AgentPrincipal( + spiffe_id="spiffe://domain/agent/search-enricher/id", + trust_domain="domain", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="id", + capabilities=[], + audiences=[], + ) + + mock_request = MagicMock() + mock_request.state.agent_principal = principal + + result = get_agent_principal_from_request(mock_request) + assert result == principal + + def test_returns_none_when_not_present(self) -> None: + """Test returns None when no agent principal in request.""" + mock_request = MagicMock() + mock_request.state = MagicMock(spec=[]) # Empty spec, no agent_principal + + result = get_agent_principal_from_request(mock_request) + assert result is None + + +class TestAgentTypeCapabilities: + """Tests for default agent type capabilities mapping.""" + + def test_search_enricher_default_capabilities(self) -> None: + """Test default capabilities for search-enricher agent.""" + caps = AGENT_TYPE_CAPABILITIES[AgentType.SEARCH_ENRICHER] + assert AgentCapability.MCP_TOOL_INVOKE in caps + assert AgentCapability.SEARCH_READ in caps + assert AgentCapability.ADMIN not in caps + + def test_cot_reasoning_default_capabilities(self) -> None: + """Test default capabilities for cot-reasoning agent.""" + caps = AGENT_TYPE_CAPABILITIES[AgentType.COT_REASONING] + assert AgentCapability.SEARCH_READ in caps + assert AgentCapability.LLM_INVOKE in caps + assert AgentCapability.PIPELINE_EXECUTE in caps + assert AgentCapability.COT_INVOKE in caps + + def test_custom_agent_no_default_capabilities(self) -> None: + """Test custom agents have no default capabilities.""" + caps = AGENT_TYPE_CAPABILITIES[AgentType.CUSTOM] + assert caps == [] diff --git a/tests/unit/services/test_agent_service.py b/tests/unit/services/test_agent_service.py new file mode 100644 index 00000000..53628d29 --- /dev/null +++ b/tests/unit/services/test_agent_service.py @@ -0,0 +1,470 @@ +"""Unit tests for Agent service. + +This module tests the AgentService functionality including: +- Agent registration with SPIFFE ID generation +- Agent CRUD operations +- Status management +- Capability management +- JWT-SVID validation + +Reference: docs/architecture/spire-integration-architecture.md +""" + +import uuid +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch + +import pytest + +from core.spiffe_auth import AgentCapability, AgentPrincipal, AgentType +from rag_solution.schemas.agent_schema import AgentCapability as SchemaAgentCapability +from rag_solution.schemas.agent_schema import ( + AgentCapabilityUpdate, + AgentInput, + AgentOutput, + AgentRegistrationRequest, + AgentStatusUpdate, + AgentUpdate, + SPIFFEValidationRequest, +) +from rag_solution.schemas.agent_schema import AgentStatus as SchemaAgentStatus +from rag_solution.schemas.agent_schema import AgentType as SchemaAgentType +from rag_solution.services.agent_service import AgentService + + +@pytest.fixture +def mock_db() -> MagicMock: + """Create a mock database session.""" + return MagicMock() + + +@pytest.fixture +def mock_repository() -> MagicMock: + """Create a mock agent repository.""" + return MagicMock() + + +@pytest.fixture +def sample_agent_output() -> AgentOutput: + """Create a sample agent output for testing.""" + return AgentOutput( + id=uuid.uuid4(), + spiffe_id="spiffe://rag-modulo.example.com/agent/search-enricher/agent-001", + agent_type="search-enricher", + name="Test Agent", + description="A test agent", + owner_user_id=uuid.uuid4(), + team_id=None, + capabilities=["search:read", "llm:invoke"], + metadata={}, + status="active", + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC), + last_seen_at=None, + ) + + +class TestAgentServiceRegistration: + """Tests for agent registration functionality.""" + + def test_register_agent_generates_spiffe_id(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test that registration generates a valid SPIFFE ID.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.create.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + request = AgentRegistrationRequest( + agent_type=SchemaAgentType.SEARCH_ENRICHER, + name="Test Agent", + description="Test description", + capabilities=[SchemaAgentCapability.SEARCH_READ], + ) + + result = service.register_agent(request, uuid.uuid4()) + + assert result is not None + assert result.spiffe_id.startswith("spiffe://") + assert "agent/search-enricher/" in result.spiffe_id + assert result.registration_instructions is not None + + def test_register_agent_with_custom_trust_domain_rejected( + self, mock_db: MagicMock, sample_agent_output: AgentOutput + ) -> None: + """Test that registration with non-matching trust domain is rejected.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.create.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + request = AgentRegistrationRequest( + agent_type=SchemaAgentType.COT_REASONING, + name="Custom Domain Agent", + trust_domain="custom.domain.com", # Different from configured domain + ) + + # Custom trust domains should be rejected for security + with pytest.raises(ValueError) as exc_info: + service.register_agent(request, uuid.uuid4()) + + assert "Trust domain must be" in str(exc_info.value) + assert "Custom trust domains are not allowed" in str(exc_info.value) + # Repository should not be called + mock_repo_instance.create.assert_not_called() + + def test_register_agent_with_custom_path(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test registration with custom SPIFFE path.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.create.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + request = AgentRegistrationRequest( + agent_type=SchemaAgentType.CUSTOM, + name="Custom Path Agent", + custom_path="custom/path/agent-xyz", + ) + + result = service.register_agent(request, uuid.uuid4()) + + assert result is not None + mock_repo_instance.create.assert_called_once() + call_args = mock_repo_instance.create.call_args + assert "/agent/custom/path/agent-xyz" in call_args.kwargs["spiffe_id"] + + +class TestAgentServiceCRUD: + """Tests for agent CRUD operations.""" + + def test_create_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test creating an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.create.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + agent_input = AgentInput( + agent_type=SchemaAgentType.SEARCH_ENRICHER, + name="New Agent", + capabilities=[SchemaAgentCapability.SEARCH_READ], + ) + + result = service.create_agent(agent_input, uuid.uuid4()) + + assert result is not None + assert result.name == sample_agent_output.name + mock_repo_instance.create.assert_called_once() + + def test_get_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test getting an agent by ID.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.get_by_id.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.get_agent(sample_agent_output.id) + + assert result is not None + assert result.id == sample_agent_output.id + mock_repo_instance.get_by_id.assert_called_once_with(sample_agent_output.id) + + def test_get_agent_by_spiffe_id(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test getting an agent by SPIFFE ID.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.get_by_spiffe_id.return_value = sample_agent_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.get_agent_by_spiffe_id(sample_agent_output.spiffe_id) + + assert result is not None + assert result.spiffe_id == sample_agent_output.spiffe_id + mock_repo_instance.get_by_spiffe_id.assert_called_once_with(sample_agent_output.spiffe_id) + + def test_update_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test updating an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + updated_output = sample_agent_output.model_copy() + updated_output.name = "Updated Name" + mock_repo_instance.update.return_value = updated_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + update = AgentUpdate(name="Updated Name") + + result = service.update_agent(sample_agent_output.id, update) + + assert result is not None + assert result.name == "Updated Name" + mock_repo_instance.update.assert_called_once() + + def test_delete_agent(self, mock_db: MagicMock) -> None: + """Test deleting an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.delete.return_value = True + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + agent_id = uuid.uuid4() + + result = service.delete_agent(agent_id) + + assert result is True + mock_repo_instance.delete.assert_called_once_with(agent_id) + + +class TestAgentServiceStatusManagement: + """Tests for agent status management.""" + + def test_update_agent_status(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test updating agent status.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + suspended_output = sample_agent_output.model_copy() + suspended_output.status = "suspended" + mock_repo_instance.update_status.return_value = suspended_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + status_update = AgentStatusUpdate( + status=SchemaAgentStatus.SUSPENDED, + reason="Maintenance", + ) + + result = service.update_agent_status(sample_agent_output.id, status_update) + + assert result is not None + assert result.status == "suspended" + mock_repo_instance.update_status.assert_called_once() + + def test_suspend_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test suspending an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + suspended_output = sample_agent_output.model_copy() + suspended_output.status = "suspended" + mock_repo_instance.update_status.return_value = suspended_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.suspend_agent(sample_agent_output.id, "Security review") + + assert result is not None + assert result.status == "suspended" + + def test_activate_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test activating an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + active_output = sample_agent_output.model_copy() + active_output.status = "active" + mock_repo_instance.update_status.return_value = active_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.activate_agent(sample_agent_output.id, "Review complete") + + assert result is not None + assert result.status == "active" + + def test_revoke_agent(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test revoking an agent.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + revoked_output = sample_agent_output.model_copy() + revoked_output.status = "revoked" + mock_repo_instance.update_status.return_value = revoked_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.revoke_agent(sample_agent_output.id, "Policy violation") + + assert result is not None + assert result.status == "revoked" + + +class TestAgentServiceCapabilities: + """Tests for agent capability management.""" + + def test_update_agent_capabilities(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test updating agent capabilities.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + updated_output = sample_agent_output.model_copy() + updated_output.capabilities = ["search:read", "llm:invoke", "cot:invoke"] + mock_repo_instance.update_capabilities.return_value = updated_output + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + capability_update = AgentCapabilityUpdate( + add_capabilities=[SchemaAgentCapability.COT_INVOKE], + remove_capabilities=[], + ) + + result = service.update_agent_capabilities(sample_agent_output.id, capability_update) + + assert result is not None + assert "cot:invoke" in result.capabilities + + +class TestAgentServiceListing: + """Tests for agent listing functionality.""" + + def test_list_agents(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test listing agents with pagination.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.list_agents.return_value = ([sample_agent_output], 1) + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.list_agents(skip=0, limit=10) + + assert result is not None + assert result.total == 1 + assert len(result.agents) == 1 + assert result.skip == 0 + assert result.limit == 10 + + def test_list_agents_with_filters(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test listing agents with filters.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.list_agents.return_value = ([sample_agent_output], 1) + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + owner_id = uuid.uuid4() + team_id = uuid.uuid4() + + result = service.list_agents( + skip=0, + limit=10, + owner_user_id=owner_id, + team_id=team_id, + agent_type="search-enricher", + status="active", + ) + + assert result is not None + mock_repo_instance.list_agents.assert_called_once_with( + skip=0, + limit=10, + owner_user_id=owner_id, + team_id=team_id, + agent_type="search-enricher", + status="active", + ) + + def test_list_user_agents(self, mock_db: MagicMock, sample_agent_output: AgentOutput) -> None: + """Test listing agents for a specific user.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.list_by_owner.return_value = [sample_agent_output] + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + owner_id = uuid.uuid4() + + result = service.list_user_agents(owner_id) + + assert result is not None + assert len(result) == 1 + mock_repo_instance.list_by_owner.assert_called_once() + + +class TestAgentServiceValidation: + """Tests for JWT-SVID validation.""" + + def test_validate_jwt_svid_valid(self, mock_db: MagicMock) -> None: + """Test validating a valid JWT-SVID.""" + with ( + patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class, + patch("rag_solution.services.agent_service.get_spiffe_authenticator") as mock_get_auth, + ): + mock_repo_instance = MagicMock() + mock_repo_class.return_value = mock_repo_instance + + mock_authenticator = MagicMock() + mock_principal = AgentPrincipal( + spiffe_id="spiffe://rag-modulo.example.com/agent/search-enricher/agent-001", + trust_domain="rag-modulo.example.com", + agent_type=AgentType.SEARCH_ENRICHER, + agent_id="agent-001", + capabilities=[AgentCapability.SEARCH_READ], + audiences=["backend-api"], + expires_at=datetime.now(UTC), + ) + mock_authenticator.validate_jwt_svid.return_value = mock_principal + mock_get_auth.return_value = mock_authenticator + + service = AgentService(mock_db) + request = SPIFFEValidationRequest(token="valid.jwt.token") + + result = service.validate_jwt_svid(request) + + assert result is not None + assert result.valid is True + assert result.spiffe_id == mock_principal.spiffe_id + + def test_validate_jwt_svid_invalid(self, mock_db: MagicMock) -> None: + """Test validating an invalid JWT-SVID.""" + with ( + patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class, + patch("rag_solution.services.agent_service.get_spiffe_authenticator") as mock_get_auth, + ): + mock_repo_instance = MagicMock() + mock_repo_class.return_value = mock_repo_instance + + mock_authenticator = MagicMock() + mock_authenticator.validate_jwt_svid.return_value = None + mock_get_auth.return_value = mock_authenticator + + service = AgentService(mock_db) + request = SPIFFEValidationRequest(token="invalid.jwt.token") + + result = service.validate_jwt_svid(request) + + assert result is not None + assert result.valid is False + assert result.error is not None + + +class TestAgentServiceMetrics: + """Tests for agent metrics and counting.""" + + def test_get_agent_count_for_user(self, mock_db: MagicMock) -> None: + """Test getting agent count for a user.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.count_by_owner.return_value = 5 + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + owner_id = uuid.uuid4() + + result = service.get_agent_count_for_user(owner_id) + + assert result == 5 + mock_repo_instance.count_by_owner.assert_called_once_with(owner_id) + + def test_get_active_agent_count(self, mock_db: MagicMock) -> None: + """Test getting total active agent count.""" + with patch("rag_solution.services.agent_service.AgentRepository") as mock_repo_class: + mock_repo_instance = MagicMock() + mock_repo_instance.count_active.return_value = 10 + mock_repo_class.return_value = mock_repo_instance + + service = AgentService(mock_db) + result = service.get_active_agent_count() + + assert result == 10 + mock_repo_instance.count_active.assert_called_once()