From b3ba61200e4cbe17838cc6fb7e2ed02bda8b816f Mon Sep 17 00:00:00 2001 From: manavgup Date: Fri, 5 Dec 2025 10:09:49 -0500 Subject: [PATCH] fix(user): Add email-based user lookup to prevent duplicates (#719) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using trusted proxy authentication with X-Authenticated-User header, users were being looked up by ibm_id instead of email. This caused duplicate users when the existing user had a different ibm_id than their email. Changes: - Add get_by_email() method to UserRepository for email-based lookup - Add get_user_by_email() and get_or_create_by_email() to UserService - Update MCP auth to use service layer instead of direct DB queries - Catch specific exceptions (DomainError, RepositoryError) instead of general Exception The get_or_create_by_email() method first checks if a user exists by email, and only creates a new user if not found. This prevents duplicates when authenticating via trusted proxy. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/mcp_server/auth.py | 54 ++++++------- .../repository/user_repository.py | 24 ++++++ backend/rag_solution/services/user_service.py | 75 +++++++++++++++++++ 3 files changed, 120 insertions(+), 33 deletions(-) diff --git a/backend/mcp_server/auth.py b/backend/mcp_server/auth.py index 1bb4139a..a91bde6c 100644 --- a/backend/mcp_server/auth.py +++ b/backend/mcp_server/auth.py @@ -23,11 +23,11 @@ import jwt from core.config import get_settings +from core.custom_exceptions import RepositoryError from core.enhanced_logging import get_logger from mcp_server.permissions import DefaultPermissionSets, MCPPermissions +from rag_solution.core.exceptions import DomainError from rag_solution.file_management.database import get_db -from rag_solution.models.user import User -from rag_solution.schemas.user_schema import UserOutput from rag_solution.services.user_service import UserService # SPIFFE imports are optional - used only when SPIFFE auth is configured @@ -332,6 +332,10 @@ async def _authenticate_trusted_user(self, user_email: str) -> MCPAuthContext: This is used when the MCP server is behind a trusted proxy that has already authenticated the user. + Uses UserService.get_or_create_by_email() to properly look up users + by email address first, avoiding duplicate user creation when the + existing user has a different ibm_id. + Args: user_email: The authenticated user's email from trusted proxy @@ -339,43 +343,27 @@ async def _authenticate_trusted_user(self, user_email: str) -> MCPAuthContext: MCPAuthContext with user identity """ db_gen = None - db_session = None try: db_gen = get_db() db_session = next(db_gen) settings = get_settings() - # First, try to find existing user by email (not ibm_id) - # This handles the case where user was created with a different ibm_id - existing_user = db_session.query(User).filter(User.email == user_email).first() - - # Type annotation for user - can be User model or UserOutput schema - user: User | UserOutput | None = None - - if existing_user: - user = existing_user - logger.debug("Found existing user by email: %s (id=%s)", user_email, user.id) - else: - # Create new user if not found by email - user_service = UserService(db_session, settings) - user = user_service.get_or_create_user_by_fields( - ibm_id=user_email, # Use email as ibm_id for new trusted proxy users - email=user_email, - name=user_email.split("@")[0], # Extract name from email - role="user", - ) - logger.debug("Created new user for trusted proxy: %s (id=%s)", user_email, user.id) + # Use service layer for proper email-based lookup + # This handles existing users AND creates new ones if needed + user_service = UserService(db_session, settings) + user = user_service.get_or_create_by_email(email=user_email) - if user: - return MCPAuthContext( - user_id=user.id, - username=user.email, - is_authenticated=True, - auth_method="trusted_proxy", - permissions=list(DefaultPermissionSets.TRUSTED_PROXY), - metadata={"source": "trusted_proxy"}, - ) - except Exception as e: + logger.debug("Authenticated user via trusted proxy: %s (id=%s)", user_email, user.id) + + return MCPAuthContext( + user_id=user.id, + username=user.email, + is_authenticated=True, + auth_method="trusted_proxy", + permissions=list(DefaultPermissionSets.TRUSTED_PROXY), + metadata={"source": "trusted_proxy"}, + ) + except (DomainError, RepositoryError) as e: logger.warning("Trusted user lookup failed: %s", e) finally: # Ensure proper cleanup of database session diff --git a/backend/rag_solution/repository/user_repository.py b/backend/rag_solution/repository/user_repository.py index 6c5b971b..d828d545 100644 --- a/backend/rag_solution/repository/user_repository.py +++ b/backend/rag_solution/repository/user_repository.py @@ -81,6 +81,30 @@ def get_by_ibm_id(self, ibm_id: str) -> UserOutput: logger.error(f"Error getting user by IBM ID {ibm_id}: {e!s}") raise RepositoryError(f"Failed to get user by IBM ID: {e!s}") from e + def get_by_email(self, email: str) -> UserOutput: + """Fetches user by email address. + + Args: + email: The email address to look up + + Returns: + UserOutput: The user with the given email + + Raises: + NotFoundError: If user not found + RepositoryError: For database errors + """ + try: + user = self.db.query(User).filter(User.email == email).options(joinedload(User.teams)).first() + if not user: + raise NotFoundError("User", identifier=f"email={email}") + return UserOutput.model_validate(user) + except NotFoundError: + raise + except Exception as e: + logger.error(f"Error getting user by email {email}: {e!s}") + raise RepositoryError(f"Failed to get user by email: {e!s}") from e + def update(self, user_id: UUID4, user_update: UserInput) -> UserOutput: """Updates user data with validation. diff --git a/backend/rag_solution/services/user_service.py b/backend/rag_solution/services/user_service.py index 0f06de5a..b4f2c207 100644 --- a/backend/rag_solution/services/user_service.py +++ b/backend/rag_solution/services/user_service.py @@ -56,6 +56,81 @@ def get_or_create_user_by_fields(self, ibm_id: str, email: EmailStr, name: str, UserInput(ibm_id=ibm_id, email=email, name=name, role=role, preferred_provider_id=None) ) + def get_user_by_email(self, email: str) -> UserOutput: + """Gets user by email address. + + Args: + email: The email address to look up + + Returns: + UserOutput: The user with the given email + + Raises: + NotFoundError: If user not found + """ + logger.info("Fetching user with email: %s", email) + return self.user_repository.get_by_email(email) + + def get_or_create_by_email(self, email: EmailStr, name: str | None = None, role: str = "user") -> UserOutput: + """Gets existing user by email or creates new one. + + This method first looks up the user by email address. If found, returns + the existing user. If not found, creates a new user using the email + as both the email and ibm_id (for trusted proxy scenarios). + + This is the preferred method for trusted proxy authentication where + user identity comes from an email header. + + Args: + email: The email address (used for lookup and as ibm_id for new users) + name: Optional display name (defaults to email prefix if not provided) + role: User role (defaults to "user") + + Returns: + UserOutput: The existing or newly created user with all defaults initialized + + Note: + For existing users, ensures required defaults (templates, parameters) + are present, reinitializing them if necessary. + """ + try: + existing_user = self.user_repository.get_by_email(email) + + # Defensive check: Ensure user has required defaults + templates = self.prompt_template_service.get_user_templates(existing_user.id) + + if not templates or len(templates) < MIN_REQUIRED_TEMPLATES: + logger.warning( + "User %s (email=%s) exists but missing defaults - attempting recovery...", + existing_user.id, + email, + ) + try: + _, reinit_templates, parameters = self.user_provider_service.initialize_user_defaults( + existing_user.id + ) + logger.info( + "✅ Successfully recovered user %s: %d templates, %s parameters", + existing_user.id, + len(reinit_templates), + "created" if parameters else "failed", + ) + except Exception as e: + logger.error("❌ Failed to recover user %s: %s", existing_user.id, str(e)) + raise ValidationError( + f"User {existing_user.id} missing required defaults and recovery failed: {e}", + field="user_initialization", + ) from e + + return existing_user + except NotFoundError: + # User doesn't exist by email, create with email as ibm_id + display_name = name if name else email.split("@")[0] + logger.info("Creating new user for email: %s", email) + return self.create_user( + UserInput(ibm_id=email, email=email, name=display_name, role=role, preferred_provider_id=None) + ) + def get_or_create_user(self, user_input: UserInput) -> UserOutput: """Gets existing user or creates new one, ensuring all required defaults exist.