diff --git a/docker/.env.example b/docker/.env.example index 766f92dcc..8297aae44 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -203,10 +203,11 @@ STT_SERVICE=local/base # AIRTABLE_CLIENT_SECRET= # AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callback -# -- Microsoft Teams -- -# TEAMS_CLIENT_ID= -# TEAMS_CLIENT_SECRET= +# -- Microsoft OAuth (Teams & OneDrive) -- +# MICROSOFT_CLIENT_ID= +# MICROSOFT_CLIENT_SECRET= # TEAMS_REDIRECT_URI=http://localhost:8000/api/v1/auth/teams/connector/callback +# ONEDRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/onedrive/connector/callback # -- Composio -- # COMPOSIO_API_KEY= diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example index 13b579889..0cdb581c4 100644 --- a/surfsense_backend/.env.example +++ b/surfsense_backend/.env.example @@ -74,7 +74,7 @@ DISCORD_CLIENT_SECRET=your_discord_client_secret_here DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal -# Atlassian OAuth Configuration +# Atlassian OAuth Configuration (Jira & Confluence) ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here ATLASSIAN_CLIENT_SECRET=your_atlassian_client_secret_here JIRA_REDIRECT_URI=http://localhost:8000/api/v1/auth/jira/connector/callback @@ -95,10 +95,11 @@ SLACK_CLIENT_ID=your_slack_client_id_here SLACK_CLIENT_SECRET=your_slack_client_secret_here SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback -# Teams OAuth Configuration -TEAMS_CLIENT_ID=your_teams_client_id_here -TEAMS_CLIENT_SECRET=your_teams_client_secret_here +# Microsoft OAuth (Teams & OneDrive) +MICROSOFT_CLIENT_ID=your_microsoft_client_id_here +MICROSOFT_CLIENT_SECRET=your_microsoft_client_secret_here TEAMS_REDIRECT_URI=http://localhost:8000/api/v1/auth/teams/connector/callback +ONEDRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/onedrive/connector/callback # Composio Connector # NOTE: Disable "Mask Connected Account Secrets" in Composio dashboard (Settings → Project Settings) for Google indexing to work. diff --git a/surfsense_backend/alembic/versions/110_add_onedrive_connector_enums.py b/surfsense_backend/alembic/versions/110_add_onedrive_connector_enums.py new file mode 100644 index 000000000..699a50ef0 --- /dev/null +++ b/surfsense_backend/alembic/versions/110_add_onedrive_connector_enums.py @@ -0,0 +1,54 @@ +"""Add OneDrive connector enums + +Revision ID: 110 +Revises: 109 +Create Date: 2026-03-28 00:00:00.000000 + +""" + +from collections.abc import Sequence + +from alembic import op + +revision: str = "110" +down_revision: str | None = "109" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'ONEDRIVE_CONNECTOR' + ) THEN + ALTER TYPE searchsourceconnectortype ADD VALUE 'ONEDRIVE_CONNECTOR'; + END IF; + END + $$; + """ + ) + + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'ONEDRIVE_FILE' + ) THEN + ALTER TYPE documenttype ADD VALUE 'ONEDRIVE_FILE'; + END IF; + END + $$; + """ + ) + + +def downgrade() -> None: + pass diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 7227ef61c..2ac00151b 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -84,6 +84,7 @@ "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR", "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR", + "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type # Composio connectors (unified to native document types). # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db. "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", @@ -316,6 +317,12 @@ async def create_surfsense_deep_agent( ] modified_disabled_tools.extend(google_drive_tools) + has_onedrive_connector = ( + available_connectors is not None and "ONEDRIVE_FILE" in available_connectors + ) + if not has_onedrive_connector: + modified_disabled_tools.extend(["create_onedrive_file", "delete_onedrive_file"]) + # Disable Google Calendar action tools if no Google Calendar connector is configured has_google_calendar_connector = ( available_connectors is not None diff --git a/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py b/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py index 5f1f864a0..f5e8f1235 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py +++ b/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py @@ -26,6 +26,7 @@ "trash_gmail_email": "email_subject_or_id", "update_gmail_draft": "draft_subject_or_id", "delete_google_drive_file": "file_name", + "delete_onedrive_file": "file_name", "delete_notion_page": "page_title", "update_notion_page": "page_title", "delete_linear_issue": "issue_ref", diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index 4553f617d..cd4914f15 100644 --- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -201,6 +201,7 @@ async def _browse_recent_documents( "CRAWLED_URL", "CIRCLEBACK", "OBSIDIAN_CONNECTOR", + "ONEDRIVE_FILE", ] # Human-readable descriptions for each connector type @@ -230,6 +231,7 @@ async def _browse_recent_documents( "BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)", "CIRCLEBACK": "Circleback meeting notes, transcripts, and action items", "OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)", + "ONEDRIVE_FILE": "Microsoft OneDrive files and documents (personal cloud storage)", } @@ -357,6 +359,7 @@ def _compute_tool_output_budget(max_input_tokens: int | None) -> int: "event_id", "calendar_id", "google_drive_file_id", + "onedrive_file_id", "page_id", "issue_id", "connector_id", diff --git a/surfsense_backend/app/agents/new_chat/tools/onedrive/__init__.py b/surfsense_backend/app/agents/new_chat/tools/onedrive/__init__.py new file mode 100644 index 000000000..8edb4857e --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/onedrive/__init__.py @@ -0,0 +1,11 @@ +from app.agents.new_chat.tools.onedrive.create_file import ( + create_create_onedrive_file_tool, +) +from app.agents.new_chat.tools.onedrive.trash_file import ( + create_delete_onedrive_file_tool, +) + +__all__ = [ + "create_create_onedrive_file_tool", + "create_delete_onedrive_file_tool", +] diff --git a/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py b/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py new file mode 100644 index 000000000..a712c9a45 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py @@ -0,0 +1,259 @@ +import logging +import os +import tempfile +from pathlib import Path +from typing import Any + +from langchain_core.tools import tool +from langgraph.types import interrupt +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.connectors.onedrive.client import OneDriveClient +from app.db import SearchSourceConnector, SearchSourceConnectorType + +logger = logging.getLogger(__name__) + +DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + + +def _ensure_docx_extension(name: str) -> str: + """Strip any existing extension and append .docx.""" + stem = Path(name).stem + return f"{stem}.docx" + + +def _markdown_to_docx(markdown_text: str) -> bytes: + """Convert a markdown string to DOCX bytes using pypandoc.""" + import pypandoc + + fd, tmp_path = tempfile.mkstemp(suffix=".docx") + os.close(fd) + try: + pypandoc.convert_text( + markdown_text, + "docx", + format="gfm", + extra_args=["--standalone"], + outputfile=tmp_path, + ) + with open(tmp_path, "rb") as f: + return f.read() + finally: + os.unlink(tmp_path) + + +def create_create_onedrive_file_tool( + db_session: AsyncSession | None = None, + search_space_id: int | None = None, + user_id: str | None = None, +): + @tool + async def create_onedrive_file( + name: str, + content: str | None = None, + ) -> dict[str, Any]: + """Create a new Word document (.docx) in Microsoft OneDrive. + + Use this tool when the user explicitly asks to create a new document + in OneDrive. The user MUST specify a topic before you call this tool. + + The file is always saved as a .docx Word document. Provide content as + markdown and it will be automatically converted to a formatted Word file. + + Args: + name: The document title (without extension). Extension will be set to .docx automatically. + content: Optional initial content as markdown. Will be converted to a formatted Word document. + + Returns: + Dictionary with status, file_id, name, web_url, and message. + """ + logger.info(f"create_onedrive_file called: name='{name}'") + + if db_session is None or search_space_id is None or user_id is None: + return { + "status": "error", + "message": "OneDrive tool not properly configured.", + } + + try: + result = await db_session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.search_space_id == search_space_id, + SearchSourceConnector.user_id == user_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + connectors = result.scalars().all() + + if not connectors: + return { + "status": "error", + "message": "No OneDrive connector found. Please connect OneDrive in your workspace settings.", + } + + accounts = [] + for c in connectors: + cfg = c.config or {} + accounts.append({ + "id": c.id, + "name": c.name, + "user_email": cfg.get("user_email"), + "auth_expired": cfg.get("auth_expired", False), + }) + + if all(a.get("auth_expired") for a in accounts): + return { + "status": "auth_error", + "message": "All connected OneDrive accounts need re-authentication.", + "connector_type": "onedrive", + } + + parent_folders: dict[int, list[dict[str, str]]] = {} + for acc in accounts: + cid = acc["id"] + if acc.get("auth_expired"): + parent_folders[cid] = [] + continue + try: + client = OneDriveClient(session=db_session, connector_id=cid) + items, err = await client.list_children("root") + if err: + logger.warning("Failed to list folders for connector %s: %s", cid, err) + parent_folders[cid] = [] + else: + parent_folders[cid] = [ + {"folder_id": item["id"], "name": item["name"]} + for item in items + if item.get("folder") is not None and item.get("id") and item.get("name") + ] + except Exception: + logger.warning("Error fetching folders for connector %s", cid, exc_info=True) + parent_folders[cid] = [] + + context: dict[str, Any] = { + "accounts": accounts, + "parent_folders": parent_folders, + } + + approval = interrupt( + { + "type": "onedrive_file_creation", + "action": { + "tool": "create_onedrive_file", + "params": { + "name": name, + "content": content, + "connector_id": None, + "parent_folder_id": None, + }, + }, + "context": context, + } + ) + + decisions_raw = approval.get("decisions", []) if isinstance(approval, dict) else [] + decisions = decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + decisions = [d for d in decisions if isinstance(d, dict)] + if not decisions: + return {"status": "error", "message": "No approval decision received"} + + decision = decisions[0] + decision_type = decision.get("type") or decision.get("decision_type") + + if decision_type == "reject": + return { + "status": "rejected", + "message": "User declined. The file was not created.", + } + + final_params: dict[str, Any] = {} + edited_action = decision.get("edited_action") + if isinstance(edited_action, dict): + edited_args = edited_action.get("args") + if isinstance(edited_args, dict): + final_params = edited_args + elif isinstance(decision.get("args"), dict): + final_params = decision["args"] + + final_name = final_params.get("name", name) + final_content = final_params.get("content", content) + final_connector_id = final_params.get("connector_id") + final_parent_folder_id = final_params.get("parent_folder_id") + + if not final_name or not final_name.strip(): + return {"status": "error", "message": "File name cannot be empty."} + + final_name = _ensure_docx_extension(final_name) + + if final_connector_id is not None: + result = await db_session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == final_connector_id, + SearchSourceConnector.search_space_id == search_space_id, + SearchSourceConnector.user_id == user_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + connector = result.scalars().first() + else: + connector = connectors[0] + + if not connector: + return {"status": "error", "message": "Selected OneDrive connector is invalid."} + + docx_bytes = _markdown_to_docx(final_content or "") + + client = OneDriveClient(session=db_session, connector_id=connector.id) + created = await client.create_file( + name=final_name, + parent_id=final_parent_folder_id, + content=docx_bytes, + mime_type=DOCX_MIME, + ) + + logger.info(f"OneDrive file created: id={created.get('id')}, name={created.get('name')}") + + kb_message_suffix = "" + try: + from app.services.onedrive import OneDriveKBSyncService + + kb_service = OneDriveKBSyncService(db_session) + kb_result = await kb_service.sync_after_create( + file_id=created.get("id"), + file_name=created.get("name", final_name), + mime_type=DOCX_MIME, + web_url=created.get("webUrl"), + content=final_content, + connector_id=connector.id, + search_space_id=search_space_id, + user_id=user_id, + ) + if kb_result["status"] == "success": + kb_message_suffix = " Your knowledge base has also been updated." + else: + kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync." + except Exception as kb_err: + logger.warning(f"KB sync after create failed: {kb_err}") + kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync." + + return { + "status": "success", + "file_id": created.get("id"), + "name": created.get("name"), + "web_url": created.get("webUrl"), + "message": f"Successfully created '{created.get('name')}' in OneDrive.{kb_message_suffix}", + } + + except Exception as e: + from langgraph.errors import GraphInterrupt + + if isinstance(e, GraphInterrupt): + raise + logger.error(f"Error creating OneDrive file: {e}", exc_info=True) + return { + "status": "error", + "message": "Something went wrong while creating the file. Please try again.", + } + + return create_onedrive_file diff --git a/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py b/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py new file mode 100644 index 000000000..ae7c5e306 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py @@ -0,0 +1,277 @@ +import logging +from typing import Any + +from langchain_core.tools import tool +from langgraph.types import interrupt +from sqlalchemy import String, and_, cast, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.connectors.onedrive.client import OneDriveClient +from app.db import ( + Document, + DocumentType, + SearchSourceConnector, + SearchSourceConnectorType, +) + +logger = logging.getLogger(__name__) + + +def create_delete_onedrive_file_tool( + db_session: AsyncSession | None = None, + search_space_id: int | None = None, + user_id: str | None = None, +): + @tool + async def delete_onedrive_file( + file_name: str, + delete_from_kb: bool = False, + ) -> dict[str, Any]: + """Move a OneDrive file to the recycle bin. + + Use this tool when the user explicitly asks to delete, remove, or trash + a file in OneDrive. + + Args: + file_name: The exact name of the file to trash. + delete_from_kb: Whether to also remove the file from the knowledge base. + Default is False. + Set to True to remove from both OneDrive and knowledge base. + + Returns: + Dictionary with: + - status: "success", "rejected", "not_found", or "error" + - file_id: OneDrive file ID (if success) + - deleted_from_kb: whether the document was removed from the knowledge base + - message: Result message + + IMPORTANT: + - If status is "rejected", the user explicitly declined. Respond with a brief + acknowledgment and do NOT retry or suggest alternatives. + - If status is "not_found", relay the exact message to the user and ask them + to verify the file name or check if it has been indexed. + """ + logger.info(f"delete_onedrive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}") + + if db_session is None or search_space_id is None or user_id is None: + return {"status": "error", "message": "OneDrive tool not properly configured."} + + try: + doc_result = await db_session.execute( + select(Document) + .join( + SearchSourceConnector, + Document.connector_id == SearchSourceConnector.id, + ) + .filter( + and_( + Document.search_space_id == search_space_id, + Document.document_type == DocumentType.ONEDRIVE_FILE, + func.lower(Document.title) == func.lower(file_name), + SearchSourceConnector.user_id == user_id, + ) + ) + .order_by(Document.updated_at.desc().nullslast()) + .limit(1) + ) + document = doc_result.scalars().first() + + if not document: + doc_result = await db_session.execute( + select(Document) + .join( + SearchSourceConnector, + Document.connector_id == SearchSourceConnector.id, + ) + .filter( + and_( + Document.search_space_id == search_space_id, + Document.document_type == DocumentType.ONEDRIVE_FILE, + func.lower( + cast(Document.document_metadata["onedrive_file_name"], String) + ) == func.lower(file_name), + SearchSourceConnector.user_id == user_id, + ) + ) + .order_by(Document.updated_at.desc().nullslast()) + .limit(1) + ) + document = doc_result.scalars().first() + + if not document: + return { + "status": "not_found", + "message": ( + f"File '{file_name}' not found in your indexed OneDrive files. " + "This could mean: (1) the file doesn't exist, (2) it hasn't been indexed yet, " + "or (3) the file name is different." + ), + } + + if not document.connector_id: + return {"status": "error", "message": "Document has no associated connector."} + + meta = document.document_metadata or {} + file_id = meta.get("onedrive_file_id") + document_id = document.id + + if not file_id: + return {"status": "error", "message": "File ID is missing. Please re-index the file."} + + conn_result = await db_session.execute( + select(SearchSourceConnector).filter( + and_( + SearchSourceConnector.id == document.connector_id, + SearchSourceConnector.search_space_id == search_space_id, + SearchSourceConnector.user_id == user_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + ) + connector = conn_result.scalars().first() + if not connector: + return {"status": "error", "message": "OneDrive connector not found or access denied."} + + cfg = connector.config or {} + if cfg.get("auth_expired"): + return { + "status": "auth_error", + "message": "OneDrive account needs re-authentication. Please re-authenticate in your connector settings.", + "connector_type": "onedrive", + } + + context = { + "file": { + "file_id": file_id, + "name": file_name, + "document_id": document_id, + "web_url": meta.get("web_url"), + }, + "account": { + "id": connector.id, + "name": connector.name, + "user_email": cfg.get("user_email"), + }, + } + + approval = interrupt( + { + "type": "onedrive_file_trash", + "action": { + "tool": "delete_onedrive_file", + "params": { + "file_id": file_id, + "connector_id": connector.id, + "delete_from_kb": delete_from_kb, + }, + }, + "context": context, + } + ) + + decisions_raw = approval.get("decisions", []) if isinstance(approval, dict) else [] + decisions = decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + decisions = [d for d in decisions if isinstance(d, dict)] + if not decisions: + return {"status": "error", "message": "No approval decision received"} + + decision = decisions[0] + decision_type = decision.get("type") or decision.get("decision_type") + logger.info(f"User decision: {decision_type}") + + if decision_type == "reject": + return { + "status": "rejected", + "message": "User declined. The file was not trashed. Do not ask again or suggest alternatives.", + } + + final_params: dict[str, Any] = {} + edited_action = decision.get("edited_action") + if isinstance(edited_action, dict): + edited_args = edited_action.get("args") + if isinstance(edited_args, dict): + final_params = edited_args + elif isinstance(decision.get("args"), dict): + final_params = decision["args"] + + final_file_id = final_params.get("file_id", file_id) + final_connector_id = final_params.get("connector_id", connector.id) + final_delete_from_kb = final_params.get("delete_from_kb", delete_from_kb) + + if final_connector_id != connector.id: + result = await db_session.execute( + select(SearchSourceConnector).filter( + and_( + SearchSourceConnector.id == final_connector_id, + SearchSourceConnector.search_space_id == search_space_id, + SearchSourceConnector.user_id == user_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + ) + validated_connector = result.scalars().first() + if not validated_connector: + return { + "status": "error", + "message": "Selected OneDrive connector is invalid or has been disconnected.", + } + actual_connector_id = validated_connector.id + else: + actual_connector_id = connector.id + + logger.info( + f"Deleting OneDrive file: file_id='{final_file_id}', connector={actual_connector_id}" + ) + + client = OneDriveClient(session=db_session, connector_id=actual_connector_id) + await client.trash_file(final_file_id) + + logger.info(f"OneDrive file deleted (moved to recycle bin): file_id={final_file_id}") + + trash_result: dict[str, Any] = { + "status": "success", + "file_id": final_file_id, + "message": f"Successfully moved '{file_name}' to the recycle bin.", + } + + deleted_from_kb = False + if final_delete_from_kb and document_id: + try: + doc_result = await db_session.execute( + select(Document).filter(Document.id == document_id) + ) + doc = doc_result.scalars().first() + if doc: + await db_session.delete(doc) + await db_session.commit() + deleted_from_kb = True + logger.info( + f"Deleted document {document_id} from knowledge base" + ) + else: + logger.warning(f"Document {document_id} not found in KB") + except Exception as e: + logger.error(f"Failed to delete document from KB: {e}") + await db_session.rollback() + trash_result["warning"] = ( + f"File moved to recycle bin, but failed to remove from knowledge base: {e!s}" + ) + + trash_result["deleted_from_kb"] = deleted_from_kb + if deleted_from_kb: + trash_result["message"] = ( + f"{trash_result.get('message', '')} (also removed from knowledge base)" + ) + + return trash_result + + except Exception as e: + from langgraph.errors import GraphInterrupt + + if isinstance(e, GraphInterrupt): + raise + logger.error(f"Error deleting OneDrive file: {e}", exc_info=True) + return {"status": "error", "message": "Something went wrong while trashing the file. Please try again."} + + return delete_onedrive_file diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 56ef752bd..a3901e83a 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -82,6 +82,10 @@ async def my_tool(param: str) -> dict: create_delete_notion_page_tool, create_update_notion_page_tool, ) +from .onedrive import ( + create_create_onedrive_file_tool, + create_delete_onedrive_file_tool, +) from .podcast import create_generate_podcast_tool from .report import create_generate_report_tool from .scrape_webpage import create_scrape_webpage_tool @@ -336,6 +340,30 @@ class ToolDefinition: requires=["db_session", "search_space_id", "user_id"], ), # ========================================================================= + # ONEDRIVE TOOLS - create and trash files + # Auto-disabled when no OneDrive connector is configured (see chat_deepagent.py) + # ========================================================================= + ToolDefinition( + name="create_onedrive_file", + description="Create a new file in Microsoft OneDrive", + factory=lambda deps: create_create_onedrive_file_tool( + db_session=deps["db_session"], + search_space_id=deps["search_space_id"], + user_id=deps["user_id"], + ), + requires=["db_session", "search_space_id", "user_id"], + ), + ToolDefinition( + name="delete_onedrive_file", + description="Move a OneDrive file to the recycle bin", + factory=lambda deps: create_delete_onedrive_file_tool( + db_session=deps["db_session"], + search_space_id=deps["search_space_id"], + user_id=deps["user_id"], + ), + requires=["db_session", "search_space_id", "user_id"], + ), + # ========================================================================= # GOOGLE CALENDAR TOOLS - create, update, delete events # Auto-disabled when no Google Calendar connector is configured # ========================================================================= diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 186936325..b38d7fd1d 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -281,10 +281,11 @@ def is_cloud(cls) -> bool: DISCORD_REDIRECT_URI = os.getenv("DISCORD_REDIRECT_URI") DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN") - # Microsoft Teams OAuth - TEAMS_CLIENT_ID = os.getenv("TEAMS_CLIENT_ID") - TEAMS_CLIENT_SECRET = os.getenv("TEAMS_CLIENT_SECRET") + # Microsoft OAuth (shared for Teams and OneDrive) + MICROSOFT_CLIENT_ID = os.getenv("MICROSOFT_CLIENT_ID") + MICROSOFT_CLIENT_SECRET = os.getenv("MICROSOFT_CLIENT_SECRET") TEAMS_REDIRECT_URI = os.getenv("TEAMS_REDIRECT_URI") + ONEDRIVE_REDIRECT_URI = os.getenv("ONEDRIVE_REDIRECT_URI") # ClickUp OAuth CLICKUP_CLIENT_ID = os.getenv("CLICKUP_CLIENT_ID") diff --git a/surfsense_backend/app/connectors/onedrive/__init__.py b/surfsense_backend/app/connectors/onedrive/__init__.py new file mode 100644 index 000000000..91b28bd37 --- /dev/null +++ b/surfsense_backend/app/connectors/onedrive/__init__.py @@ -0,0 +1,13 @@ +"""Microsoft OneDrive Connector Module.""" + +from .client import OneDriveClient +from .content_extractor import download_and_extract_content +from .folder_manager import get_file_by_id, get_files_in_folder, list_folder_contents + +__all__ = [ + "OneDriveClient", + "download_and_extract_content", + "get_file_by_id", + "get_files_in_folder", + "list_folder_contents", +] diff --git a/surfsense_backend/app/connectors/onedrive/client.py b/surfsense_backend/app/connectors/onedrive/client.py new file mode 100644 index 000000000..cc118c0c9 --- /dev/null +++ b/surfsense_backend/app/connectors/onedrive/client.py @@ -0,0 +1,279 @@ +"""Microsoft OneDrive API client using Microsoft Graph API v1.0.""" + +import logging +from datetime import UTC, datetime, timedelta +from typing import Any + +import httpx +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select +from sqlalchemy.orm.attributes import flag_modified + +from app.config import config +from app.db import SearchSourceConnector +from app.utils.oauth_security import TokenEncryption + +logger = logging.getLogger(__name__) + +GRAPH_API_BASE = "https://graph.microsoft.com/v1.0" +TOKEN_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/token" + + +class OneDriveClient: + """Client for Microsoft OneDrive via the Graph API.""" + + def __init__(self, session: AsyncSession, connector_id: int): + self._session = session + self._connector_id = connector_id + + async def _get_valid_token(self) -> str: + """Get a valid access token, refreshing if needed.""" + result = await self._session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == self._connector_id + ) + ) + connector = result.scalars().first() + if not connector: + raise ValueError(f"Connector {self._connector_id} not found") + + cfg = connector.config or {} + is_encrypted = cfg.get("_token_encrypted", False) + token_encryption = TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None + + access_token = cfg.get("access_token", "") + refresh_token = cfg.get("refresh_token") + + if is_encrypted and token_encryption: + if access_token: + access_token = token_encryption.decrypt_token(access_token) + if refresh_token: + refresh_token = token_encryption.decrypt_token(refresh_token) + + expires_at_str = cfg.get("expires_at") + is_expired = False + if expires_at_str: + expires_at = datetime.fromisoformat(expires_at_str) + if expires_at.tzinfo is None: + expires_at = expires_at.replace(tzinfo=UTC) + is_expired = expires_at <= datetime.now(UTC) + + if not is_expired and access_token: + return access_token + + if not refresh_token: + cfg["auth_expired"] = True + connector.config = cfg + flag_modified(connector, "config") + await self._session.commit() + raise ValueError("OneDrive token expired and no refresh token available") + + token_data = await self._refresh_token(refresh_token) + + new_access = token_data["access_token"] + new_refresh = token_data.get("refresh_token", refresh_token) + expires_in = token_data.get("expires_in") + + new_expires_at = None + if expires_in: + new_expires_at = datetime.now(UTC) + timedelta(seconds=int(expires_in)) + + if token_encryption: + cfg["access_token"] = token_encryption.encrypt_token(new_access) + cfg["refresh_token"] = token_encryption.encrypt_token(new_refresh) + else: + cfg["access_token"] = new_access + cfg["refresh_token"] = new_refresh + + cfg["expires_at"] = new_expires_at.isoformat() if new_expires_at else None + cfg["expires_in"] = expires_in + cfg["_token_encrypted"] = bool(token_encryption) + cfg.pop("auth_expired", None) + + connector.config = cfg + flag_modified(connector, "config") + await self._session.commit() + + return new_access + + async def _refresh_token(self, refresh_token: str) -> dict: + data = { + "client_id": config.MICROSOFT_CLIENT_ID, + "client_secret": config.MICROSOFT_CLIENT_SECRET, + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "scope": "offline_access User.Read Files.Read.All Files.ReadWrite.All", + } + async with httpx.AsyncClient() as client: + resp = await client.post( + TOKEN_URL, + data=data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=30.0, + ) + if resp.status_code != 200: + error_detail = resp.text + try: + error_json = resp.json() + error_detail = error_json.get("error_description", error_detail) + except Exception: + pass + raise ValueError(f"OneDrive token refresh failed: {error_detail}") + return resp.json() + + async def _request(self, method: str, path: str, **kwargs) -> httpx.Response: + """Make an authenticated request to the Graph API.""" + token = await self._get_valid_token() + headers = {"Authorization": f"Bearer {token}"} + if "headers" in kwargs: + headers.update(kwargs.pop("headers")) + + async with httpx.AsyncClient() as client: + resp = await client.request( + method, + f"{GRAPH_API_BASE}{path}", + headers=headers, + timeout=60.0, + **kwargs, + ) + + if resp.status_code == 401: + result = await self._session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == self._connector_id + ) + ) + connector = result.scalars().first() + if connector: + cfg = connector.config or {} + cfg["auth_expired"] = True + connector.config = cfg + flag_modified(connector, "config") + await self._session.commit() + raise ValueError("OneDrive authentication expired (401)") + + return resp + + async def list_children( + self, item_id: str = "root" + ) -> tuple[list[dict[str, Any]], str | None]: + all_items: list[dict[str, Any]] = [] + url = f"/me/drive/items/{item_id}/children" + params: dict[str, Any] = { + "$top": 200, + "$select": "id,name,size,file,folder,parentReference,lastModifiedDateTime,createdDateTime,webUrl,remoteItem,package", + } + while url: + resp = await self._request("GET", url, params=params) + if resp.status_code != 200: + return [], f"Failed to list children: {resp.status_code} - {resp.text}" + data = resp.json() + all_items.extend(data.get("value", [])) + next_link = data.get("@odata.nextLink") + if next_link: + url = next_link.replace(GRAPH_API_BASE, "") + params = {} + else: + url = "" + return all_items, None + + async def get_item_metadata( + self, item_id: str + ) -> tuple[dict[str, Any] | None, str | None]: + resp = await self._request( + "GET", + f"/me/drive/items/{item_id}", + params={ + "$select": "id,name,size,file,folder,parentReference,lastModifiedDateTime,createdDateTime,webUrl" + }, + ) + if resp.status_code != 200: + return None, f"Failed to get item: {resp.status_code} - {resp.text}" + return resp.json(), None + + async def download_file(self, item_id: str) -> tuple[bytes | None, str | None]: + token = await self._get_valid_token() + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.get( + f"{GRAPH_API_BASE}/me/drive/items/{item_id}/content", + headers={"Authorization": f"Bearer {token}"}, + timeout=120.0, + ) + if resp.status_code != 200: + return None, f"Download failed: {resp.status_code}" + return resp.content, None + + async def download_file_to_disk(self, item_id: str, dest_path: str) -> str | None: + """Stream file content to disk. Returns error message on failure.""" + token = await self._get_valid_token() + async with httpx.AsyncClient(follow_redirects=True) as client: + async with client.stream( + "GET", + f"{GRAPH_API_BASE}/me/drive/items/{item_id}/content", + headers={"Authorization": f"Bearer {token}"}, + timeout=120.0, + ) as resp: + if resp.status_code != 200: + return f"Download failed: {resp.status_code}" + with open(dest_path, "wb") as f: + async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024): + f.write(chunk) + return None + + async def create_file( + self, + name: str, + parent_id: str | None = None, + content: str | bytes | None = None, + mime_type: str | None = None, + ) -> dict[str, Any]: + """Create (upload) a file in OneDrive.""" + folder_path = f"/me/drive/items/{parent_id or 'root'}" + if isinstance(content, bytes): + body = content + else: + body = (content or "").encode("utf-8") + resp = await self._request( + "PUT", + f"{folder_path}:/{name}:/content", + content=body, + headers={"Content-Type": mime_type or "application/octet-stream"}, + ) + if resp.status_code not in (200, 201): + raise ValueError(f"File creation failed: {resp.status_code} - {resp.text}") + return resp.json() + + async def trash_file(self, item_id: str) -> bool: + """Delete (move to recycle bin) a OneDrive item.""" + resp = await self._request("DELETE", f"/me/drive/items/{item_id}") + if resp.status_code not in (200, 204): + raise ValueError(f"Trash failed: {resp.status_code} - {resp.text}") + return True + + async def get_delta( + self, folder_id: str | None = None, delta_link: str | None = None + ) -> tuple[list[dict[str, Any]], str | None, str | None]: + """Get delta changes. Returns (changes, new_delta_link, error).""" + all_changes: list[dict[str, Any]] = [] + if delta_link: + url = delta_link.replace(GRAPH_API_BASE, "") + elif folder_id: + url = f"/me/drive/items/{folder_id}/delta" + else: + url = "/me/drive/root/delta" + + params: dict[str, Any] = {"$top": 200} + while url: + resp = await self._request("GET", url, params=params) + if resp.status_code != 200: + return [], None, f"Delta failed: {resp.status_code} - {resp.text}" + data = resp.json() + all_changes.extend(data.get("value", [])) + next_link = data.get("@odata.nextLink") + new_delta_link = data.get("@odata.deltaLink") + if next_link: + url = next_link.replace(GRAPH_API_BASE, "") + params = {} + else: + url = "" + return all_changes, new_delta_link, None diff --git a/surfsense_backend/app/connectors/onedrive/content_extractor.py b/surfsense_backend/app/connectors/onedrive/content_extractor.py new file mode 100644 index 000000000..109a8cb15 --- /dev/null +++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py @@ -0,0 +1,169 @@ +"""Content extraction for OneDrive files. + +Reuses the same ETL parsing logic as Google Drive since file parsing is +extension-based, not provider-specific. +""" + +import asyncio +import logging +import os +import tempfile +import threading +import time +from pathlib import Path +from typing import Any + +from .client import OneDriveClient +from .file_types import get_extension_from_mime, should_skip_file + +logger = logging.getLogger(__name__) + + +async def download_and_extract_content( + client: OneDriveClient, + file: dict[str, Any], +) -> tuple[str | None, dict[str, Any], str | None]: + """Download a OneDrive file and extract its content as markdown. + + Returns (markdown_content, onedrive_metadata, error_message). + """ + item_id = file.get("id") + file_name = file.get("name", "Unknown") + + if should_skip_file(file): + return None, {}, "Skipping non-indexable item" + + file_info = file.get("file", {}) + mime_type = file_info.get("mimeType", "") + + logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})") + + metadata: dict[str, Any] = { + "onedrive_file_id": item_id, + "onedrive_file_name": file_name, + "onedrive_mime_type": mime_type, + "source_connector": "onedrive", + } + if "lastModifiedDateTime" in file: + metadata["modified_time"] = file["lastModifiedDateTime"] + if "createdDateTime" in file: + metadata["created_time"] = file["createdDateTime"] + if "size" in file: + metadata["file_size"] = file["size"] + if "webUrl" in file: + metadata["web_url"] = file["webUrl"] + file_hashes = file_info.get("hashes", {}) + if file_hashes.get("sha256Hash"): + metadata["sha256_hash"] = file_hashes["sha256Hash"] + elif file_hashes.get("quickXorHash"): + metadata["quick_xor_hash"] = file_hashes["quickXorHash"] + + temp_file_path = None + try: + extension = Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin" + with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp: + temp_file_path = tmp.name + + error = await client.download_file_to_disk(item_id, temp_file_path) + if error: + return None, metadata, error + + markdown = await _parse_file_to_markdown(temp_file_path, file_name) + return markdown, metadata, None + + except Exception as e: + logger.warning(f"Failed to extract content from {file_name}: {e!s}") + return None, metadata, str(e) + finally: + if temp_file_path and os.path.exists(temp_file_path): + try: + os.unlink(temp_file_path) + except Exception: + pass + + +async def _parse_file_to_markdown(file_path: str, filename: str) -> str: + """Parse a local file to markdown using the configured ETL service. + + Same logic as Google Drive -- file parsing is extension-based. + """ + lower = filename.lower() + + if lower.endswith((".md", ".markdown", ".txt")): + with open(file_path, encoding="utf-8") as f: + return f.read() + + if lower.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")): + from app.config import config as app_config + from litellm import atranscription + + stt_service_type = ( + "local" + if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") + else "external" + ) + if stt_service_type == "local": + from app.services.stt_service import stt_service + + t0 = time.monotonic() + logger.info(f"[local-stt] START file={filename} thread={threading.current_thread().name}") + result = await asyncio.to_thread(stt_service.transcribe_file, file_path) + logger.info(f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s") + text = result.get("text", "") + else: + with open(file_path, "rb") as audio_file: + kwargs: dict[str, Any] = { + "model": app_config.STT_SERVICE, + "file": audio_file, + "api_key": app_config.STT_SERVICE_API_KEY, + } + if app_config.STT_SERVICE_API_BASE: + kwargs["api_base"] = app_config.STT_SERVICE_API_BASE + resp = await atranscription(**kwargs) + text = resp.get("text", "") + + if not text: + raise ValueError("Transcription returned empty text") + return f"# Transcription of {filename}\n\n{text}" + + from app.config import config as app_config + + if app_config.ETL_SERVICE == "UNSTRUCTURED": + from langchain_unstructured import UnstructuredLoader + + from app.utils.document_converters import convert_document_to_markdown + + loader = UnstructuredLoader( + file_path, + mode="elements", + post_processors=[], + languages=["eng"], + include_orig_elements=False, + include_metadata=False, + strategy="auto", + ) + docs = await loader.aload() + return await convert_document_to_markdown(docs) + + if app_config.ETL_SERVICE == "LLAMACLOUD": + from app.tasks.document_processors.file_processors import ( + parse_with_llamacloud_retry, + ) + + result = await parse_with_llamacloud_retry(file_path=file_path, estimated_pages=50) + markdown_documents = await result.aget_markdown_documents(split_by_page=False) + if not markdown_documents: + raise RuntimeError(f"LlamaCloud returned no documents for {filename}") + return markdown_documents[0].text + + if app_config.ETL_SERVICE == "DOCLING": + from docling.document_converter import DocumentConverter + + converter = DocumentConverter() + t0 = time.monotonic() + logger.info(f"[docling] START file={filename} thread={threading.current_thread().name}") + result = await asyncio.to_thread(converter.convert, file_path) + logger.info(f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s") + return result.document.export_to_markdown() + + raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}") diff --git a/surfsense_backend/app/connectors/onedrive/file_types.py b/surfsense_backend/app/connectors/onedrive/file_types.py new file mode 100644 index 000000000..403fdc337 --- /dev/null +++ b/surfsense_backend/app/connectors/onedrive/file_types.py @@ -0,0 +1,50 @@ +"""File type handlers for Microsoft OneDrive.""" + +ONEDRIVE_FOLDER_FACET = "folder" +ONENOTE_MIME = "application/msonenote" + +SKIP_MIME_TYPES = frozenset( + { + ONENOTE_MIME, + "application/vnd.ms-onenotesection", + "application/vnd.ms-onenotenotebook", + } +) + +MIME_TO_EXTENSION: dict[str, str] = { + "application/pdf": ".pdf", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", + "application/vnd.ms-excel": ".xls", + "application/msword": ".doc", + "application/vnd.ms-powerpoint": ".ppt", + "text/plain": ".txt", + "text/csv": ".csv", + "text/html": ".html", + "text/markdown": ".md", + "application/json": ".json", + "application/xml": ".xml", + "image/png": ".png", + "image/jpeg": ".jpg", +} + + +def get_extension_from_mime(mime_type: str) -> str | None: + return MIME_TO_EXTENSION.get(mime_type) + + +def is_folder(item: dict) -> bool: + return ONEDRIVE_FOLDER_FACET in item + + +def should_skip_file(item: dict) -> bool: + """Skip folders, OneNote files, remote items (shared links), and packages.""" + if is_folder(item): + return True + if "remoteItem" in item: + return True + if "package" in item: + return True + mime = item.get("file", {}).get("mimeType", "") + return mime in SKIP_MIME_TYPES diff --git a/surfsense_backend/app/connectors/onedrive/folder_manager.py b/surfsense_backend/app/connectors/onedrive/folder_manager.py new file mode 100644 index 000000000..7f286453c --- /dev/null +++ b/surfsense_backend/app/connectors/onedrive/folder_manager.py @@ -0,0 +1,94 @@ +"""Folder management for Microsoft OneDrive.""" + +import logging +from typing import Any + +from .client import OneDriveClient +from .file_types import is_folder, should_skip_file + +logger = logging.getLogger(__name__) + + +async def list_folder_contents( + client: OneDriveClient, + parent_id: str | None = None, +) -> tuple[list[dict[str, Any]], str | None]: + """List folders and files in a OneDrive folder. + + Returns (items list with folders first, error message). + """ + try: + items, error = await client.list_children(parent_id or "root") + if error: + return [], error + + for item in items: + item["isFolder"] = is_folder(item) + if item["isFolder"]: + item.setdefault("mimeType", "application/vnd.ms-folder") + else: + item.setdefault("mimeType", item.get("file", {}).get("mimeType", "application/octet-stream")) + + items.sort(key=lambda x: (not x["isFolder"], x.get("name", "").lower())) + + folder_count = sum(1 for item in items if item["isFolder"]) + file_count = len(items) - folder_count + logger.info( + f"Listed {len(items)} items ({folder_count} folders, {file_count} files) " + + (f"in folder {parent_id}" if parent_id else "in root") + ) + return items, None + + except Exception as e: + logger.error(f"Error listing folder contents: {e!s}", exc_info=True) + return [], f"Error listing folder contents: {e!s}" + + +async def get_files_in_folder( + client: OneDriveClient, + folder_id: str, + include_subfolders: bool = True, +) -> tuple[list[dict[str, Any]], str | None]: + """Get all indexable files in a folder, optionally recursing into subfolders.""" + try: + items, error = await client.list_children(folder_id) + if error: + return [], error + + files: list[dict[str, Any]] = [] + for item in items: + if is_folder(item): + if include_subfolders: + sub_files, sub_error = await get_files_in_folder( + client, item["id"], include_subfolders=True + ) + if sub_error: + logger.warning(f"Error recursing into folder {item.get('name')}: {sub_error}") + continue + files.extend(sub_files) + elif not should_skip_file(item): + files.append(item) + + return files, None + + except Exception as e: + logger.error(f"Error getting files in folder: {e!s}", exc_info=True) + return [], f"Error getting files in folder: {e!s}" + + +async def get_file_by_id( + client: OneDriveClient, + file_id: str, +) -> tuple[dict[str, Any] | None, str | None]: + """Get file metadata by ID.""" + try: + item, error = await client.get_item_metadata(file_id) + if error: + return None, error + if not item: + return None, f"File not found: {file_id}" + return item, None + + except Exception as e: + logger.error(f"Error getting file by ID: {e!s}", exc_info=True) + return None, f"Error getting file by ID: {e!s}" diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 9680a7bfd..a8510ebab 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -40,6 +40,7 @@ class DocumentType(StrEnum): FILE = "FILE" SLACK_CONNECTOR = "SLACK_CONNECTOR" TEAMS_CONNECTOR = "TEAMS_CONNECTOR" + ONEDRIVE_FILE = "ONEDRIVE_FILE" NOTION_CONNECTOR = "NOTION_CONNECTOR" YOUTUBE_VIDEO = "YOUTUBE_VIDEO" GITHUB_CONNECTOR = "GITHUB_CONNECTOR" @@ -81,6 +82,7 @@ class SearchSourceConnectorType(StrEnum): BAIDU_SEARCH_API = "BAIDU_SEARCH_API" # Baidu AI Search API for Chinese web search SLACK_CONNECTOR = "SLACK_CONNECTOR" TEAMS_CONNECTOR = "TEAMS_CONNECTOR" + ONEDRIVE_CONNECTOR = "ONEDRIVE_CONNECTOR" NOTION_CONNECTOR = "NOTION_CONNECTOR" GITHUB_CONNECTOR = "GITHUB_CONNECTOR" LINEAR_CONNECTOR = "LINEAR_CONNECTOR" diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 7782c064c..af26e3680 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -43,6 +43,7 @@ from .slack_add_connector_route import router as slack_add_connector_router from .surfsense_docs_routes import router as surfsense_docs_router from .teams_add_connector_route import router as teams_add_connector_router +from .onedrive_add_connector_route import router as onedrive_add_connector_router from .video_presentations_routes import router as video_presentations_router from .youtube_routes import router as youtube_router @@ -73,6 +74,7 @@ router.include_router(notion_add_connector_router) router.include_router(slack_add_connector_router) router.include_router(teams_add_connector_router) +router.include_router(onedrive_add_connector_router) router.include_router(discord_add_connector_router) router.include_router(jira_add_connector_router) router.include_router(confluence_add_connector_router) diff --git a/surfsense_backend/app/routes/onedrive_add_connector_route.py b/surfsense_backend/app/routes/onedrive_add_connector_route.py new file mode 100644 index 000000000..19bcbe6ff --- /dev/null +++ b/surfsense_backend/app/routes/onedrive_add_connector_route.py @@ -0,0 +1,474 @@ +""" +Microsoft OneDrive Connector OAuth Routes. + +Endpoints: +- GET /auth/onedrive/connector/add - Initiate OAuth +- GET /auth/onedrive/connector/callback - Handle OAuth callback +- GET /auth/onedrive/connector/reauth - Re-authenticate existing connector +- GET /connectors/{connector_id}/onedrive/folders - List folder contents +""" + +import logging +from datetime import UTC, datetime, timedelta +from urllib.parse import urlencode +from uuid import UUID + +import httpx +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import RedirectResponse +from sqlalchemy.exc import IntegrityError +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select +from sqlalchemy.orm.attributes import flag_modified + +from app.config import config +from app.connectors.onedrive import OneDriveClient, list_folder_contents +from app.db import ( + SearchSourceConnector, + SearchSourceConnectorType, + User, + get_async_session, +) +from app.users import current_active_user +from app.utils.connector_naming import ( + check_duplicate_connector, + extract_identifier_from_credentials, + generate_unique_connector_name, +) +from app.utils.oauth_security import OAuthStateManager, TokenEncryption + +logger = logging.getLogger(__name__) +router = APIRouter() + +AUTHORIZATION_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/authorize" +TOKEN_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/token" + +SCOPES = [ + "offline_access", + "User.Read", + "Files.Read.All", + "Files.ReadWrite.All", +] + +_state_manager = None +_token_encryption = None + + +def get_state_manager() -> OAuthStateManager: + global _state_manager + if _state_manager is None: + if not config.SECRET_KEY: + raise ValueError("SECRET_KEY must be set for OAuth security") + _state_manager = OAuthStateManager(config.SECRET_KEY) + return _state_manager + + +def get_token_encryption() -> TokenEncryption: + global _token_encryption + if _token_encryption is None: + if not config.SECRET_KEY: + raise ValueError("SECRET_KEY must be set for token encryption") + _token_encryption = TokenEncryption(config.SECRET_KEY) + return _token_encryption + + +@router.get("/auth/onedrive/connector/add") +async def connect_onedrive(space_id: int, user: User = Depends(current_active_user)): + """Initiate OneDrive OAuth flow.""" + try: + if not space_id: + raise HTTPException(status_code=400, detail="space_id is required") + if not config.MICROSOFT_CLIENT_ID: + raise HTTPException(status_code=500, detail="Microsoft OneDrive OAuth not configured.") + if not config.SECRET_KEY: + raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.") + + state_manager = get_state_manager() + state_encoded = state_manager.generate_secure_state(space_id, user.id) + + auth_params = { + "client_id": config.MICROSOFT_CLIENT_ID, + "response_type": "code", + "redirect_uri": config.ONEDRIVE_REDIRECT_URI, + "response_mode": "query", + "scope": " ".join(SCOPES), + "state": state_encoded, + } + auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}" + + logger.info("Generated OneDrive OAuth URL for user %s, space %s", user.id, space_id) + return {"auth_url": auth_url} + + except HTTPException: + raise + except Exception as e: + logger.error("Failed to initiate OneDrive OAuth: %s", str(e), exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive OAuth: {e!s}") from e + + +@router.get("/auth/onedrive/connector/reauth") +async def reauth_onedrive( + space_id: int, + connector_id: int, + return_url: str | None = None, + user: User = Depends(current_active_user), + session: AsyncSession = Depends(get_async_session), +): + """Re-authenticate an existing OneDrive connector.""" + try: + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id, + SearchSourceConnector.user_id == user.id, + SearchSourceConnector.search_space_id == space_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + connector = result.scalars().first() + if not connector: + raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied") + + if not config.SECRET_KEY: + raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.") + + state_manager = get_state_manager() + extra: dict = {"connector_id": connector_id} + if return_url and return_url.startswith("/"): + extra["return_url"] = return_url + state_encoded = state_manager.generate_secure_state(space_id, user.id, **extra) + + auth_params = { + "client_id": config.MICROSOFT_CLIENT_ID, + "response_type": "code", + "redirect_uri": config.ONEDRIVE_REDIRECT_URI, + "response_mode": "query", + "scope": " ".join(SCOPES), + "state": state_encoded, + "prompt": "consent", + } + auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}" + + logger.info("Initiating OneDrive re-auth for user %s, connector %s", user.id, connector_id) + return {"auth_url": auth_url} + + except HTTPException: + raise + except Exception as e: + logger.error("Failed to initiate OneDrive re-auth: %s", str(e), exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive re-auth: {e!s}") from e + + +@router.get("/auth/onedrive/connector/callback") +async def onedrive_callback( + code: str | None = None, + error: str | None = None, + error_description: str | None = None, + state: str | None = None, + session: AsyncSession = Depends(get_async_session), +): + """Handle OneDrive OAuth callback.""" + try: + if error: + error_msg = error_description or error + logger.warning("OneDrive OAuth error: %s", error_msg) + space_id = None + if state: + try: + data = get_state_manager().validate_state(state) + space_id = data.get("space_id") + except Exception: + pass + if space_id: + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=onedrive_oauth_denied" + ) + return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_oauth_denied") + + if not code or not state: + raise HTTPException(status_code=400, detail="Missing required OAuth parameters") + + state_manager = get_state_manager() + try: + data = state_manager.validate_state(state) + space_id = data["space_id"] + user_id = UUID(data["user_id"]) + except (HTTPException, ValueError, KeyError) as e: + logger.error("Invalid OAuth state: %s", str(e)) + return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=invalid_state") + + reauth_connector_id = data.get("connector_id") + reauth_return_url = data.get("return_url") + + token_data = { + "client_id": config.MICROSOFT_CLIENT_ID, + "client_secret": config.MICROSOFT_CLIENT_SECRET, + "code": code, + "redirect_uri": config.ONEDRIVE_REDIRECT_URI, + "grant_type": "authorization_code", + } + + async with httpx.AsyncClient() as client: + token_response = await client.post( + TOKEN_URL, + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=30.0, + ) + + if token_response.status_code != 200: + error_detail = token_response.text + try: + error_json = token_response.json() + error_detail = error_json.get("error_description", error_detail) + except Exception: + pass + raise HTTPException(status_code=400, detail=f"Token exchange failed: {error_detail}") + + token_json = token_response.json() + access_token = token_json.get("access_token") + refresh_token = token_json.get("refresh_token") + + if not access_token: + raise HTTPException(status_code=400, detail="No access token received from Microsoft") + + token_encryption = get_token_encryption() + + expires_at = None + if token_json.get("expires_in"): + expires_at = datetime.now(UTC) + timedelta(seconds=int(token_json["expires_in"])) + + user_info: dict = {} + try: + async with httpx.AsyncClient() as client: + user_response = await client.get( + "https://graph.microsoft.com/v1.0/me", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=30.0, + ) + if user_response.status_code == 200: + user_data = user_response.json() + user_info = { + "user_email": user_data.get("mail") or user_data.get("userPrincipalName"), + "user_name": user_data.get("displayName"), + } + except Exception as e: + logger.warning("Failed to fetch user info from Graph: %s", str(e)) + + connector_config = { + "access_token": token_encryption.encrypt_token(access_token), + "refresh_token": token_encryption.encrypt_token(refresh_token) if refresh_token else None, + "token_type": token_json.get("token_type", "Bearer"), + "expires_in": token_json.get("expires_in"), + "expires_at": expires_at.isoformat() if expires_at else None, + "scope": token_json.get("scope"), + "user_email": user_info.get("user_email"), + "user_name": user_info.get("user_name"), + "_token_encrypted": True, + } + + # Handle re-authentication + if reauth_connector_id: + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == reauth_connector_id, + SearchSourceConnector.user_id == user_id, + SearchSourceConnector.search_space_id == space_id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + db_connector = result.scalars().first() + if not db_connector: + raise HTTPException(status_code=404, detail="Connector not found or access denied during re-auth") + + existing_delta_link = db_connector.config.get("delta_link") + db_connector.config = {**connector_config, "delta_link": existing_delta_link, "auth_expired": False} + flag_modified(db_connector, "config") + await session.commit() + await session.refresh(db_connector) + + logger.info("Re-authenticated OneDrive connector %s for user %s", db_connector.id, user_id) + if reauth_return_url and reauth_return_url.startswith("/"): + return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}{reauth_return_url}") + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={db_connector.id}" + ) + + # New connector -- check for duplicates + connector_identifier = extract_identifier_from_credentials( + SearchSourceConnectorType.ONEDRIVE_CONNECTOR, connector_config + ) + is_duplicate = await check_duplicate_connector( + session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier, + ) + if is_duplicate: + logger.warning("Duplicate OneDrive connector for user %s, space %s", user_id, space_id) + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=duplicate_account&connector=ONEDRIVE_CONNECTOR" + ) + + connector_name = await generate_unique_connector_name( + session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier, + ) + + new_connector = SearchSourceConnector( + name=connector_name, + connector_type=SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + is_indexable=True, + config=connector_config, + search_space_id=space_id, + user_id=user_id, + ) + + try: + session.add(new_connector) + await session.commit() + await session.refresh(new_connector) + logger.info("Successfully created OneDrive connector %s for user %s", new_connector.id, user_id) + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={new_connector.id}" + ) + except IntegrityError as e: + await session.rollback() + logger.error("Database integrity error creating OneDrive connector: %s", str(e)) + return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=connector_creation_failed") + + except HTTPException: + raise + except (IntegrityError, ValueError) as e: + logger.error("OneDrive OAuth callback error: %s", str(e), exc_info=True) + return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_auth_error") + + +@router.get("/connectors/{connector_id}/onedrive/folders") +async def list_onedrive_folders( + connector_id: int, + parent_id: str | None = None, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """List folders and files in user's OneDrive.""" + connector = None + try: + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id, + SearchSourceConnector.user_id == user.id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + ) + ) + connector = result.scalars().first() + if not connector: + raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied") + + onedrive_client = OneDriveClient(session, connector_id) + items, error = await list_folder_contents(onedrive_client, parent_id=parent_id) + + if error: + error_lower = error.lower() + if "401" in error or "authentication expired" in error_lower or "invalid_grant" in error_lower: + try: + if connector and not connector.config.get("auth_expired"): + connector.config = {**connector.config, "auth_expired": True} + flag_modified(connector, "config") + await session.commit() + except Exception: + logger.warning("Failed to persist auth_expired for connector %s", connector_id, exc_info=True) + raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.") + raise HTTPException(status_code=500, detail=f"Failed to list folder contents: {error}") + + return {"items": items} + + except HTTPException: + raise + except Exception as e: + logger.error("Error listing OneDrive contents: %s", str(e), exc_info=True) + error_lower = str(e).lower() + if "401" in str(e) or "authentication expired" in error_lower: + try: + if connector and not connector.config.get("auth_expired"): + connector.config = {**connector.config, "auth_expired": True} + flag_modified(connector, "config") + await session.commit() + except Exception: + pass + raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.") from e + raise HTTPException(status_code=500, detail=f"Failed to list OneDrive contents: {e!s}") from e + + +async def refresh_onedrive_token( + session: AsyncSession, connector: SearchSourceConnector +) -> SearchSourceConnector: + """Refresh OneDrive OAuth tokens.""" + logger.info("Refreshing OneDrive OAuth tokens for connector %s", connector.id) + + token_encryption = get_token_encryption() + is_encrypted = connector.config.get("_token_encrypted", False) + refresh_token = connector.config.get("refresh_token") + + if is_encrypted and refresh_token: + try: + refresh_token = token_encryption.decrypt_token(refresh_token) + except Exception as e: + logger.error("Failed to decrypt refresh token: %s", str(e)) + raise HTTPException(status_code=500, detail="Failed to decrypt stored refresh token") from e + + if not refresh_token: + raise HTTPException(status_code=400, detail=f"No refresh token available for connector {connector.id}") + + refresh_data = { + "client_id": config.MICROSOFT_CLIENT_ID, + "client_secret": config.MICROSOFT_CLIENT_SECRET, + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "scope": " ".join(SCOPES), + } + + async with httpx.AsyncClient() as client: + token_response = await client.post( + TOKEN_URL, data=refresh_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, timeout=30.0, + ) + + if token_response.status_code != 200: + error_detail = token_response.text + error_code = "" + try: + error_json = token_response.json() + error_detail = error_json.get("error_description", error_detail) + error_code = error_json.get("error", "") + except Exception: + pass + error_lower = (error_detail + error_code).lower() + if "invalid_grant" in error_lower or "expired" in error_lower or "revoked" in error_lower: + raise HTTPException(status_code=401, detail="OneDrive authentication failed. Please re-authenticate.") + raise HTTPException(status_code=400, detail=f"Token refresh failed: {error_detail}") + + token_json = token_response.json() + access_token = token_json.get("access_token") + new_refresh_token = token_json.get("refresh_token") + + if not access_token: + raise HTTPException(status_code=400, detail="No access token received from Microsoft refresh") + + expires_at = None + expires_in = token_json.get("expires_in") + if expires_in: + expires_at = datetime.now(UTC) + timedelta(seconds=int(expires_in)) + + cfg = dict(connector.config) + cfg["access_token"] = token_encryption.encrypt_token(access_token) + if new_refresh_token: + cfg["refresh_token"] = token_encryption.encrypt_token(new_refresh_token) + cfg["expires_in"] = expires_in + cfg["expires_at"] = expires_at.isoformat() if expires_at else None + cfg["scope"] = token_json.get("scope") + cfg["_token_encrypted"] = True + cfg.pop("auth_expired", None) + + connector.config = cfg + flag_modified(connector, "config") + await session.commit() + await session.refresh(connector) + + logger.info("Successfully refreshed OneDrive tokens for connector %s", connector.id) + return connector diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 00892c6eb..7e9ac1e59 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -999,6 +999,53 @@ async def index_connector_content( ) response_message = "Google Drive indexing started in the background." + elif connector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR: + from app.tasks.celery_tasks.connector_tasks import ( + index_onedrive_files_task, + ) + + if drive_items and drive_items.has_items(): + logger.info( + f"Triggering OneDrive indexing for connector {connector_id} into search space {search_space_id}, " + f"folders: {len(drive_items.folders)}, files: {len(drive_items.files)}" + ) + items_dict = drive_items.model_dump() + else: + config = connector.config or {} + selected_folders = config.get("selected_folders", []) + selected_files = config.get("selected_files", []) + if not selected_folders and not selected_files: + raise HTTPException( + status_code=400, + detail="OneDrive indexing requires folders or files to be configured. " + "Please select folders/files to index.", + ) + indexing_options = config.get( + "indexing_options", + { + "max_files_per_folder": 100, + "incremental_sync": True, + "include_subfolders": True, + }, + ) + items_dict = { + "folders": selected_folders, + "files": selected_files, + "indexing_options": indexing_options, + } + logger.info( + f"Triggering OneDrive indexing for connector {connector_id} into search space {search_space_id} " + f"using existing config" + ) + + index_onedrive_files_task.delay( + connector_id, + search_space_id, + str(user.id), + items_dict, + ) + response_message = "OneDrive indexing started in the background." + elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR: from app.tasks.celery_tasks.connector_tasks import ( index_discord_messages_task, @@ -2489,6 +2536,108 @@ async def run_google_drive_indexing( logger.error(f"Failed to update notification: {notif_error!s}") +async def run_onedrive_indexing( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + items_dict: dict, +): + """Runs the OneDrive indexing task for folders and files with notifications.""" + from uuid import UUID + + notification = None + try: + from app.tasks.connector_indexers.onedrive_indexer import index_onedrive_files + + connector_result = await session.execute( + select(SearchSourceConnector).where( + SearchSourceConnector.id == connector_id + ) + ) + connector = connector_result.scalar_one_or_none() + + if connector: + notification = await NotificationService.connector_indexing.notify_google_drive_indexing_started( + session=session, + user_id=UUID(user_id), + connector_id=connector_id, + connector_name=connector.name, + connector_type=connector.connector_type.value, + search_space_id=search_space_id, + folder_count=len(items_dict.get("folders", [])), + file_count=len(items_dict.get("files", [])), + folder_names=[f.get("name", "Unknown") for f in items_dict.get("folders", [])], + file_names=[f.get("name", "Unknown") for f in items_dict.get("files", [])], + ) + + if notification: + await NotificationService.connector_indexing.notify_indexing_progress( + session=session, + notification=notification, + indexed_count=0, + stage="fetching", + ) + + total_indexed, total_skipped, error_message = await index_onedrive_files( + session, + connector_id, + search_space_id, + user_id, + items_dict, + ) + + if error_message: + logger.error( + f"OneDrive indexing completed with errors for connector {connector_id}: {error_message}" + ) + if _is_auth_error(error_message): + await _persist_auth_expired(session, connector_id) + error_message = "OneDrive authentication expired. Please re-authenticate." + else: + if notification: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_indexing_progress( + session=session, + notification=notification, + indexed_count=total_indexed, + stage="storing", + ) + + logger.info( + f"OneDrive indexing successful for connector {connector_id}. Indexed {total_indexed} documents." + ) + await _update_connector_timestamp_by_id(session, connector_id) + await session.commit() + + if notification: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_indexing_completed( + session=session, + notification=notification, + indexed_count=total_indexed, + error_message=error_message, + skipped_count=total_skipped, + ) + + except Exception as e: + logger.error( + f"Critical error in run_onedrive_indexing for connector {connector_id}: {e}", + exc_info=True, + ) + if notification: + try: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_indexing_completed( + session=session, + notification=notification, + indexed_count=0, + error_message=str(e), + ) + except Exception as notif_error: + logger.error(f"Failed to update notification: {notif_error!s}") + + # Add new helper functions for luma indexing async def run_luma_indexing_with_new_session( connector_id: int, diff --git a/surfsense_backend/app/routes/teams_add_connector_route.py b/surfsense_backend/app/routes/teams_add_connector_route.py index 77ce4965e..4442307ba 100644 --- a/surfsense_backend/app/routes/teams_add_connector_route.py +++ b/surfsense_backend/app/routes/teams_add_connector_route.py @@ -88,7 +88,7 @@ async def connect_teams(space_id: int, user: User = Depends(current_active_user) if not space_id: raise HTTPException(status_code=400, detail="space_id is required") - if not config.TEAMS_CLIENT_ID: + if not config.MICROSOFT_CLIENT_ID: raise HTTPException( status_code=500, detail="Microsoft Teams OAuth not configured." ) @@ -106,7 +106,7 @@ async def connect_teams(space_id: int, user: User = Depends(current_active_user) from urllib.parse import urlencode auth_params = { - "client_id": config.TEAMS_CLIENT_ID, + "client_id": config.MICROSOFT_CLIENT_ID, "response_type": "code", "redirect_uri": config.TEAMS_REDIRECT_URI, "response_mode": "query", @@ -181,8 +181,8 @@ async def teams_callback( # Exchange authorization code for access token token_data = { - "client_id": config.TEAMS_CLIENT_ID, - "client_secret": config.TEAMS_CLIENT_SECRET, + "client_id": config.MICROSOFT_CLIENT_ID, + "client_secret": config.MICROSOFT_CLIENT_SECRET, "code": code, "redirect_uri": config.TEAMS_REDIRECT_URI, "grant_type": "authorization_code", @@ -403,8 +403,8 @@ async def refresh_teams_token( # Microsoft uses oauth2/v2.0/token for token refresh refresh_data = { - "client_id": config.TEAMS_CLIENT_ID, - "client_secret": config.TEAMS_CLIENT_SECRET, + "client_id": config.MICROSOFT_CLIENT_ID, + "client_secret": config.MICROSOFT_CLIENT_SECRET, "grant_type": "refresh_token", "refresh_token": refresh_token, "scope": " ".join(SCOPES), diff --git a/surfsense_backend/app/schemas/onedrive_auth_credentials.py b/surfsense_backend/app/schemas/onedrive_auth_credentials.py new file mode 100644 index 000000000..7690a2694 --- /dev/null +++ b/surfsense_backend/app/schemas/onedrive_auth_credentials.py @@ -0,0 +1,71 @@ +"""Microsoft OneDrive OAuth credentials schema.""" + +from datetime import UTC, datetime + +from pydantic import BaseModel, field_validator + + +class OneDriveAuthCredentialsBase(BaseModel): + """Microsoft OneDrive OAuth credentials.""" + + access_token: str + refresh_token: str | None = None + token_type: str = "Bearer" + expires_in: int | None = None + expires_at: datetime | None = None + scope: str | None = None + user_email: str | None = None + user_name: str | None = None + tenant_id: str | None = None + + @property + def is_expired(self) -> bool: + if self.expires_at is None: + return False + return self.expires_at <= datetime.now(UTC) + + @property + def is_refreshable(self) -> bool: + return self.refresh_token is not None + + def to_dict(self) -> dict: + return { + "access_token": self.access_token, + "refresh_token": self.refresh_token, + "token_type": self.token_type, + "expires_in": self.expires_in, + "expires_at": self.expires_at.isoformat() if self.expires_at else None, + "scope": self.scope, + "user_email": self.user_email, + "user_name": self.user_name, + "tenant_id": self.tenant_id, + } + + @classmethod + def from_dict(cls, data: dict) -> "OneDriveAuthCredentialsBase": + expires_at = None + if data.get("expires_at"): + expires_at = datetime.fromisoformat(data["expires_at"]) + return cls( + access_token=data.get("access_token", ""), + refresh_token=data.get("refresh_token"), + token_type=data.get("token_type", "Bearer"), + expires_in=data.get("expires_in"), + expires_at=expires_at, + scope=data.get("scope"), + user_email=data.get("user_email"), + user_name=data.get("user_name"), + tenant_id=data.get("tenant_id"), + ) + + @field_validator("expires_at", mode="before") + @classmethod + def ensure_aware_utc(cls, v): + if isinstance(v, str): + if v.endswith("Z"): + return datetime.fromisoformat(v.replace("Z", "+00:00")) + dt = datetime.fromisoformat(v) + return dt if dt.tzinfo else dt.replace(tzinfo=UTC) + if isinstance(v, datetime): + return v if v.tzinfo else v.replace(tzinfo=UTC) + return v diff --git a/surfsense_backend/app/services/onedrive/__init__.py b/surfsense_backend/app/services/onedrive/__init__.py new file mode 100644 index 000000000..e67d0b2ed --- /dev/null +++ b/surfsense_backend/app/services/onedrive/__init__.py @@ -0,0 +1,5 @@ +from app.services.onedrive.kb_sync_service import OneDriveKBSyncService + +__all__ = [ + "OneDriveKBSyncService", +] diff --git a/surfsense_backend/app/services/onedrive/kb_sync_service.py b/surfsense_backend/app/services/onedrive/kb_sync_service.py new file mode 100644 index 000000000..5e82950a5 --- /dev/null +++ b/surfsense_backend/app/services/onedrive/kb_sync_service.py @@ -0,0 +1,164 @@ +import logging +from datetime import datetime + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import Document, DocumentType +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.services.llm_service import get_user_long_context_llm +from app.utils.document_converters import ( + create_document_chunks, + embed_text, + generate_content_hash, + generate_document_summary, +) + +logger = logging.getLogger(__name__) + + +class OneDriveKBSyncService: + def __init__(self, db_session: AsyncSession): + self.db_session = db_session + + async def sync_after_create( + self, + file_id: str, + file_name: str, + mime_type: str, + web_url: str | None, + content: str | None, + connector_id: int, + search_space_id: int, + user_id: str, + ) -> dict: + from app.tasks.connector_indexers.base import ( + check_document_by_unique_identifier, + check_duplicate_document_by_hash, + get_current_timestamp, + safe_set_chunks, + ) + + try: + unique_hash = compute_identifier_hash( + DocumentType.ONEDRIVE_FILE.value, file_id, search_space_id + ) + + existing = await check_document_by_unique_identifier( + self.db_session, unique_hash + ) + if existing: + logger.info( + "Document for OneDrive file %s already exists (doc_id=%s), skipping", + file_id, + existing.id, + ) + return {"status": "success"} + + indexable_content = (content or "").strip() + if not indexable_content: + indexable_content = ( + f"OneDrive file: {file_name} (type: {mime_type})" + ) + + content_hash = generate_content_hash(indexable_content, search_space_id) + + with self.db_session.no_autoflush: + dup = await check_duplicate_document_by_hash( + self.db_session, content_hash + ) + if dup: + logger.info( + "Content-hash collision for OneDrive file %s — identical content " + "exists in doc %s. Using unique_identifier_hash as content_hash.", + file_id, + dup.id, + ) + content_hash = unique_hash + + user_llm = await get_user_long_context_llm( + self.db_session, + user_id, + search_space_id, + disable_streaming=True, + ) + + doc_metadata_for_summary = { + "file_name": file_name, + "mime_type": mime_type, + "document_type": "OneDrive File", + "connector_type": "OneDrive", + } + + if user_llm: + summary_content, summary_embedding = await generate_document_summary( + indexable_content, user_llm, doc_metadata_for_summary + ) + else: + logger.warning("No LLM configured — using fallback summary") + summary_content = ( + f"OneDrive File: {file_name}\n\n{indexable_content}" + ) + summary_embedding = embed_text(summary_content) + + chunks = await create_document_chunks(indexable_content) + now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + document = Document( + title=file_name, + document_type=DocumentType.ONEDRIVE_FILE, + document_metadata={ + "onedrive_file_id": file_id, + "onedrive_file_name": file_name, + "onedrive_mime_type": mime_type, + "web_url": web_url, + "source_connector": "onedrive", + "indexed_at": now_str, + "connector_id": connector_id, + }, + content=summary_content, + content_hash=content_hash, + unique_identifier_hash=unique_hash, + embedding=summary_embedding, + search_space_id=search_space_id, + connector_id=connector_id, + source_markdown=content, + updated_at=get_current_timestamp(), + created_by_id=user_id, + ) + + self.db_session.add(document) + await self.db_session.flush() + await safe_set_chunks(self.db_session, document, chunks) + await self.db_session.commit() + + logger.info( + "KB sync after create succeeded: doc_id=%s, file=%s, chunks=%d", + document.id, + file_name, + len(chunks), + ) + return {"status": "success"} + + except Exception as e: + error_str = str(e).lower() + if ( + "duplicate key value violates unique constraint" in error_str + or "uniqueviolationerror" in error_str + ): + logger.warning( + "Duplicate constraint hit during KB sync for file %s. " + "Rolling back — periodic indexer will handle it. Error: %s", + file_id, + e, + ) + await self.db_session.rollback() + return {"status": "error", "message": "Duplicate document detected"} + + logger.error( + "KB sync after create failed for file %s: %s", + file_id, + e, + exc_info=True, + ) + await self.db_session.rollback() + return {"status": "error", "message": str(e)} diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py index 9d52add9c..9eccbc798 100644 --- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py @@ -526,6 +526,54 @@ async def _index_google_drive_files( ) +@celery_app.task(name="index_onedrive_files", bind=True) +def index_onedrive_files_task( + self, + connector_id: int, + search_space_id: int, + user_id: str, + items_dict: dict, +): + """Celery task to index OneDrive folders and files.""" + import asyncio + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete( + _index_onedrive_files( + connector_id, + search_space_id, + user_id, + items_dict, + ) + ) + finally: + loop.close() + + +async def _index_onedrive_files( + connector_id: int, + search_space_id: int, + user_id: str, + items_dict: dict, +): + """Index OneDrive folders and files with new session.""" + from app.routes.search_source_connectors_routes import ( + run_onedrive_indexing, + ) + + async with get_celery_session_maker()() as session: + await run_onedrive_indexing( + session, + connector_id, + search_space_id, + user_id, + items_dict, + ) + + @celery_app.task(name="index_discord_messages", bind=True) def index_discord_messages_task( self, diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index c97914092..c1ca089d0 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1023,6 +1023,8 @@ def complete_current_step() -> str | None: "delete_linear_issue", "create_google_drive_file", "delete_google_drive_file", + "create_onedrive_file", + "delete_onedrive_file", "create_gmail_draft", "update_gmail_draft", "send_gmail_email", diff --git a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py new file mode 100644 index 000000000..e565f6a6a --- /dev/null +++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py @@ -0,0 +1,606 @@ +"""OneDrive indexer using the shared IndexingPipelineService. + +File-level pre-filter (_should_skip_file) handles hash/modifiedDateTime +checks and rename-only detection. download_and_extract_content() +returns markdown which is fed into ConnectorDocument -> pipeline. +""" + +import asyncio +import logging +import time +from collections.abc import Awaitable, Callable + +from sqlalchemy import String, cast, select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm.attributes import flag_modified + +from app.config import config +from app.connectors.onedrive import ( + OneDriveClient, + download_and_extract_content, + get_file_by_id, + get_files_in_folder, +) +from app.connectors.onedrive.file_types import should_skip_file as skip_item +from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService +from app.services.llm_service import get_user_long_context_llm +from app.services.task_logging_service import TaskLoggingService +from app.tasks.connector_indexers.base import ( + check_document_by_unique_identifier, + get_connector_by_id, + update_connector_last_indexed, +) + +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +async def _should_skip_file( + session: AsyncSession, + file: dict, + search_space_id: int, +) -> tuple[bool, str | None]: + """Pre-filter: detect unchanged / rename-only files.""" + file_id = file.get("id") + file_name = file.get("name", "Unknown") + + if skip_item(file): + return True, "folder/onenote/remote" + if not file_id: + return True, "missing file_id" + + primary_hash = compute_identifier_hash( + DocumentType.ONEDRIVE_FILE.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, primary_hash) + + if not existing: + result = await session.execute( + select(Document).where( + Document.search_space_id == search_space_id, + Document.document_type == DocumentType.ONEDRIVE_FILE, + cast(Document.document_metadata["onedrive_file_id"], String) == file_id, + ) + ) + existing = result.scalar_one_or_none() + if existing: + existing.unique_identifier_hash = primary_hash + logger.debug(f"Found OneDrive doc by metadata for file_id: {file_id}") + + if not existing: + return False, None + + incoming_mtime = file.get("lastModifiedDateTime") + meta = existing.document_metadata or {} + stored_mtime = meta.get("modified_time") + + file_info = file.get("file", {}) + file_hashes = file_info.get("hashes", {}) + incoming_hash = file_hashes.get("sha256Hash") or file_hashes.get("quickXorHash") + stored_hash = meta.get("sha256_hash") or meta.get("quick_xor_hash") + + content_unchanged = False + if incoming_hash and stored_hash: + content_unchanged = incoming_hash == stored_hash + elif incoming_hash and not stored_hash: + return False, None + elif not incoming_hash and incoming_mtime and stored_mtime: + content_unchanged = incoming_mtime == stored_mtime + elif not incoming_hash: + return False, None + + if not content_unchanged: + return False, None + + old_name = meta.get("onedrive_file_name") + if old_name and old_name != file_name: + existing.title = file_name + if not existing.document_metadata: + existing.document_metadata = {} + existing.document_metadata["onedrive_file_name"] = file_name + if incoming_mtime: + existing.document_metadata["modified_time"] = incoming_mtime + flag_modified(existing, "document_metadata") + await session.commit() + logger.info(f"Rename-only update: '{old_name}' -> '{file_name}'") + return True, f"File renamed: '{old_name}' -> '{file_name}'" + + if not DocumentStatus.is_state(existing.status, DocumentStatus.READY): + return True, "skipped (previously failed)" + return True, "unchanged" + + +def _build_connector_doc( + file: dict, + markdown: str, + onedrive_metadata: dict, + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + file_id = file.get("id", "") + file_name = file.get("name", "Unknown") + + metadata = { + **onedrive_metadata, + "connector_id": connector_id, + "document_type": "OneDrive File", + "connector_type": "OneDrive", + } + + fallback_summary = f"File: {file_name}\n\n{markdown[:4000]}" + + return ConnectorDocument( + title=file_name, + source_markdown=markdown, + unique_id=file_id, + document_type=DocumentType.ONEDRIVE_FILE, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + +async def _download_files_parallel( + onedrive_client: OneDriveClient, + files: list[dict], + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, + max_concurrency: int = 3, + on_heartbeat: HeartbeatCallbackType | None = None, +) -> tuple[list[ConnectorDocument], int]: + """Download and ETL files in parallel. Returns (docs, failed_count).""" + results: list[ConnectorDocument] = [] + sem = asyncio.Semaphore(max_concurrency) + last_heartbeat = time.time() + completed_count = 0 + hb_lock = asyncio.Lock() + + async def _download_one(file: dict) -> ConnectorDocument | None: + nonlocal last_heartbeat, completed_count + async with sem: + markdown, od_metadata, error = await download_and_extract_content( + onedrive_client, file + ) + if error or not markdown: + file_name = file.get("name", "Unknown") + reason = error or "empty content" + logger.warning(f"Download/ETL failed for {file_name}: {reason}") + return None + doc = _build_connector_doc( + file, markdown, od_metadata, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=enable_summary, + ) + async with hb_lock: + completed_count += 1 + if on_heartbeat: + now = time.time() + if now - last_heartbeat >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat(completed_count) + last_heartbeat = now + return doc + + tasks = [_download_one(f) for f in files] + outcomes = await asyncio.gather(*tasks, return_exceptions=True) + + failed = 0 + for outcome in outcomes: + if isinstance(outcome, Exception): + failed += 1 + elif outcome is None: + failed += 1 + else: + results.append(outcome) + + return results, failed + + +async def _download_and_index( + onedrive_client: OneDriveClient, + session: AsyncSession, + files: list[dict], + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, + on_heartbeat: HeartbeatCallbackType | None = None, +) -> tuple[int, int]: + """Parallel download then parallel indexing. Returns (batch_indexed, total_failed).""" + connector_docs, download_failed = await _download_files_parallel( + onedrive_client, files, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=enable_summary, + on_heartbeat=on_heartbeat, + ) + + batch_indexed = 0 + batch_failed = 0 + if connector_docs: + pipeline = IndexingPipelineService(session) + + async def _get_llm(s): + return await get_user_long_context_llm(s, user_id, search_space_id) + + _, batch_indexed, batch_failed = await pipeline.index_batch_parallel( + connector_docs, _get_llm, max_concurrency=3, + on_heartbeat=on_heartbeat, + ) + + return batch_indexed, download_failed + batch_failed + + +async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int): + """Remove a document that was deleted in OneDrive.""" + primary_hash = compute_identifier_hash( + DocumentType.ONEDRIVE_FILE.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, primary_hash) + + if not existing: + result = await session.execute( + select(Document).where( + Document.search_space_id == search_space_id, + Document.document_type == DocumentType.ONEDRIVE_FILE, + cast(Document.document_metadata["onedrive_file_id"], String) == file_id, + ) + ) + existing = result.scalar_one_or_none() + + if existing: + await session.delete(existing) + logger.info(f"Removed deleted OneDrive file document: {file_id}") + + +async def _index_selected_files( + onedrive_client: OneDriveClient, + session: AsyncSession, + file_ids: list[tuple[str, str | None]], + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, + on_heartbeat: HeartbeatCallbackType | None = None, +) -> tuple[int, int, list[str]]: + """Index user-selected files using the parallel pipeline.""" + files_to_download: list[dict] = [] + errors: list[str] = [] + renamed_count = 0 + skipped = 0 + + for file_id, file_name in file_ids: + file, error = await get_file_by_id(onedrive_client, file_id) + if error or not file: + display = file_name or file_id + errors.append(f"File '{display}': {error or 'File not found'}") + continue + + skip, msg = await _should_skip_file(session, file, search_space_id) + if skip: + if msg and "renamed" in msg.lower(): + renamed_count += 1 + else: + skipped += 1 + continue + + files_to_download.append(file) + + batch_indexed, failed = await _download_and_index( + onedrive_client, session, files_to_download, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=enable_summary, + on_heartbeat=on_heartbeat, + ) + + return renamed_count + batch_indexed, skipped, errors + + +# --------------------------------------------------------------------------- +# Scan strategies +# --------------------------------------------------------------------------- + +async def _index_full_scan( + onedrive_client: OneDriveClient, + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + folder_id: str, + folder_name: str, + task_logger: TaskLoggingService, + log_entry: object, + max_files: int, + include_subfolders: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, + enable_summary: bool = True, +) -> tuple[int, int]: + """Full scan indexing of a folder.""" + await task_logger.log_task_progress( + log_entry, + f"Starting full scan of folder: {folder_name}", + {"stage": "full_scan", "folder_id": folder_id, "include_subfolders": include_subfolders}, + ) + + renamed_count = 0 + skipped = 0 + files_to_download: list[dict] = [] + + all_files, error = await get_files_in_folder( + onedrive_client, folder_id, include_subfolders=include_subfolders, + ) + if error: + err_lower = error.lower() + if "401" in error or "authentication expired" in err_lower: + raise Exception(f"OneDrive authentication failed. Please re-authenticate. (Error: {error})") + raise Exception(f"Failed to list OneDrive files: {error}") + + for file in all_files[:max_files]: + skip, msg = await _should_skip_file(session, file, search_space_id) + if skip: + if msg and "renamed" in msg.lower(): + renamed_count += 1 + else: + skipped += 1 + continue + files_to_download.append(file) + + batch_indexed, failed = await _download_and_index( + onedrive_client, session, files_to_download, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=enable_summary, + on_heartbeat=on_heartbeat_callback, + ) + + indexed = renamed_count + batch_indexed + logger.info(f"Full scan complete: {indexed} indexed, {skipped} skipped, {failed} failed") + return indexed, skipped + + +async def _index_with_delta_sync( + onedrive_client: OneDriveClient, + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + folder_id: str | None, + delta_link: str, + task_logger: TaskLoggingService, + log_entry: object, + max_files: int, + on_heartbeat_callback: HeartbeatCallbackType | None = None, + enable_summary: bool = True, +) -> tuple[int, int, str | None]: + """Delta sync using OneDrive change tracking. Returns (indexed, skipped, new_delta_link).""" + await task_logger.log_task_progress( + log_entry, "Starting delta sync", + {"stage": "delta_sync"}, + ) + + changes, new_delta_link, error = await onedrive_client.get_delta( + folder_id=folder_id, delta_link=delta_link + ) + if error: + err_lower = error.lower() + if "401" in error or "authentication expired" in err_lower: + raise Exception(f"OneDrive authentication failed. Please re-authenticate. (Error: {error})") + raise Exception(f"Failed to fetch OneDrive changes: {error}") + + if not changes: + logger.info("No changes detected since last sync") + return 0, 0, new_delta_link + + logger.info(f"Processing {len(changes)} delta changes") + + renamed_count = 0 + skipped = 0 + files_to_download: list[dict] = [] + files_processed = 0 + + for change in changes: + if files_processed >= max_files: + break + files_processed += 1 + + if change.get("deleted"): + fid = change.get("id") + if fid: + await _remove_document(session, fid, search_space_id) + continue + + if "folder" in change: + continue + + if not change.get("file"): + continue + + skip, msg = await _should_skip_file(session, change, search_space_id) + if skip: + if msg and "renamed" in msg.lower(): + renamed_count += 1 + else: + skipped += 1 + continue + + files_to_download.append(change) + + batch_indexed, failed = await _download_and_index( + onedrive_client, session, files_to_download, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=enable_summary, + on_heartbeat=on_heartbeat_callback, + ) + + indexed = renamed_count + batch_indexed + logger.info(f"Delta sync complete: {indexed} indexed, {skipped} skipped, {failed} failed") + return indexed, skipped, new_delta_link + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + +async def index_onedrive_files( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + items_dict: dict, +) -> tuple[int, int, str | None]: + """Index OneDrive files for a specific connector. + + items_dict format: + { + "folders": [{"id": "...", "name": "..."}, ...], + "files": [{"id": "...", "name": "..."}, ...], + "indexing_options": {"max_files": 500, "include_subfolders": true, "use_delta_sync": true} + } + """ + task_logger = TaskLoggingService(session, search_space_id) + log_entry = await task_logger.log_task_start( + task_name="onedrive_files_indexing", + source="connector_indexing_task", + message=f"Starting OneDrive indexing for connector {connector_id}", + metadata={"connector_id": connector_id, "user_id": str(user_id)}, + ) + + try: + connector = await get_connector_by_id( + session, connector_id, SearchSourceConnectorType.ONEDRIVE_CONNECTOR + ) + if not connector: + error_msg = f"OneDrive connector with ID {connector_id} not found" + await task_logger.log_task_failure(log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}) + return 0, 0, error_msg + + token_encrypted = connector.config.get("_token_encrypted", False) + if token_encrypted and not config.SECRET_KEY: + error_msg = "SECRET_KEY not configured but credentials are encrypted" + await task_logger.log_task_failure(log_entry, error_msg, "Missing SECRET_KEY", {"error_type": "MissingSecretKey"}) + return 0, 0, error_msg + + connector_enable_summary = getattr(connector, "enable_summary", True) + onedrive_client = OneDriveClient(session, connector_id) + + indexing_options = items_dict.get("indexing_options", {}) + max_files = indexing_options.get("max_files", 500) + include_subfolders = indexing_options.get("include_subfolders", True) + use_delta_sync = indexing_options.get("use_delta_sync", True) + + total_indexed = 0 + total_skipped = 0 + + # Index selected individual files + selected_files = items_dict.get("files", []) + if selected_files: + file_tuples = [(f["id"], f.get("name")) for f in selected_files] + indexed, skipped, errors = await _index_selected_files( + onedrive_client, session, file_tuples, + connector_id=connector_id, search_space_id=search_space_id, + user_id=user_id, enable_summary=connector_enable_summary, + ) + total_indexed += indexed + total_skipped += skipped + + # Index selected folders + folders = items_dict.get("folders", []) + for folder in folders: + folder_id = folder.get("id", "root") + folder_name = folder.get("name", "Root") + + folder_delta_links = connector.config.get("folder_delta_links", {}) + delta_link = folder_delta_links.get(folder_id) + can_use_delta = use_delta_sync and delta_link and connector.last_indexed_at + + if can_use_delta: + logger.info(f"Using delta sync for folder {folder_name}") + indexed, skipped, new_delta_link = await _index_with_delta_sync( + onedrive_client, session, connector_id, search_space_id, user_id, + folder_id, delta_link, task_logger, log_entry, max_files, + enable_summary=connector_enable_summary, + ) + total_indexed += indexed + total_skipped += skipped + + if new_delta_link: + await session.refresh(connector) + if "folder_delta_links" not in connector.config: + connector.config["folder_delta_links"] = {} + connector.config["folder_delta_links"][folder_id] = new_delta_link + flag_modified(connector, "config") + + # Reconciliation full scan + ri, rs = await _index_full_scan( + onedrive_client, session, connector_id, search_space_id, user_id, + folder_id, folder_name, task_logger, log_entry, max_files, + include_subfolders, enable_summary=connector_enable_summary, + ) + total_indexed += ri + total_skipped += rs + else: + logger.info(f"Using full scan for folder {folder_name}") + indexed, skipped = await _index_full_scan( + onedrive_client, session, connector_id, search_space_id, user_id, + folder_id, folder_name, task_logger, log_entry, max_files, + include_subfolders, enable_summary=connector_enable_summary, + ) + total_indexed += indexed + total_skipped += skipped + + # Store new delta link for this folder + _, new_delta_link, _ = await onedrive_client.get_delta(folder_id=folder_id) + if new_delta_link: + await session.refresh(connector) + if "folder_delta_links" not in connector.config: + connector.config["folder_delta_links"] = {} + connector.config["folder_delta_links"][folder_id] = new_delta_link + flag_modified(connector, "config") + + if total_indexed > 0 or folders: + await update_connector_last_indexed(session, connector, True) + + await session.commit() + + await task_logger.log_task_success( + log_entry, + f"Successfully completed OneDrive indexing for connector {connector_id}", + {"files_processed": total_indexed, "files_skipped": total_skipped}, + ) + logger.info(f"OneDrive indexing completed: {total_indexed} indexed, {total_skipped} skipped") + return total_indexed, total_skipped, None + + except SQLAlchemyError as db_error: + await session.rollback() + await task_logger.log_task_failure( + log_entry, f"Database error during OneDrive indexing for connector {connector_id}", + str(db_error), {"error_type": "SQLAlchemyError"}, + ) + logger.error(f"Database error: {db_error!s}", exc_info=True) + return 0, 0, f"Database error: {db_error!s}" + except Exception as e: + await session.rollback() + await task_logger.log_task_failure( + log_entry, f"Failed to index OneDrive files for connector {connector_id}", + str(e), {"error_type": type(e).__name__}, + ) + logger.error(f"Failed to index OneDrive files: {e!s}", exc_info=True) + return 0, 0, f"Failed to index OneDrive files: {e!s}" diff --git a/surfsense_backend/app/utils/connector_naming.py b/surfsense_backend/app/utils/connector_naming.py index 9fdec3e79..7c72e0781 100644 --- a/surfsense_backend/app/utils/connector_naming.py +++ b/surfsense_backend/app/utils/connector_naming.py @@ -21,6 +21,7 @@ SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: "Google Calendar", SearchSourceConnectorType.SLACK_CONNECTOR: "Slack", SearchSourceConnectorType.TEAMS_CONNECTOR: "Microsoft Teams", + SearchSourceConnectorType.ONEDRIVE_CONNECTOR: "OneDrive", SearchSourceConnectorType.NOTION_CONNECTOR: "Notion", SearchSourceConnectorType.LINEAR_CONNECTOR: "Linear", SearchSourceConnectorType.JIRA_CONNECTOR: "Jira", @@ -61,6 +62,9 @@ def extract_identifier_from_credentials( if connector_type == SearchSourceConnectorType.TEAMS_CONNECTOR: return credentials.get("tenant_name") + if connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR: + return credentials.get("user_email") + if connector_type == SearchSourceConnectorType.NOTION_CONNECTOR: return credentials.get("workspace_name") diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_onedrive_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_onedrive_pipeline.py new file mode 100644 index 000000000..ee83795a5 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_onedrive_pipeline.py @@ -0,0 +1,100 @@ +"""Integration tests: OneDrive ConnectorDocuments flow through the pipeline.""" + +import pytest +from sqlalchemy import select + +from app.config import config as app_config +from app.db import Document, DocumentStatus, DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +_EMBEDDING_DIM = app_config.embedding_model_instance.dimension + +pytestmark = pytest.mark.integration + + +def _onedrive_doc(*, unique_id: str, search_space_id: int, connector_id: int, user_id: str) -> ConnectorDocument: + return ConnectorDocument( + title=f"File {unique_id}.docx", + source_markdown=f"## Document\n\nContent from {unique_id}", + unique_id=unique_id, + document_type=DocumentType.ONEDRIVE_FILE, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=True, + fallback_summary=f"File: {unique_id}.docx", + metadata={ + "onedrive_file_id": unique_id, + "onedrive_file_name": f"{unique_id}.docx", + "document_type": "OneDrive File", + }, + ) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_onedrive_pipeline_creates_ready_document( + db_session, db_search_space, db_connector, db_user, mocker +): + """A OneDrive ConnectorDocument flows through prepare + index to a READY document.""" + space_id = db_search_space.id + doc = _onedrive_doc( + unique_id="od-file-abc", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=str(db_user.id), + ) + + service = IndexingPipelineService(session=db_session) + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + + await service.index(prepared[0], doc, llm=mocker.Mock()) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + row = result.scalars().first() + + assert row is not None + assert row.document_type == DocumentType.ONEDRIVE_FILE + assert DocumentStatus.is_state(row.status, DocumentStatus.READY) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_onedrive_duplicate_content_skipped( + db_session, db_search_space, db_connector, db_user, mocker +): + """Re-indexing a OneDrive doc with the same content is skipped (content hash match).""" + space_id = db_search_space.id + user_id = str(db_user.id) + + doc = _onedrive_doc( + unique_id="od-dup-file", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=user_id, + ) + + service = IndexingPipelineService(session=db_session) + + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + await service.index(prepared[0], doc, llm=mocker.Mock()) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + first_doc = result.scalars().first() + assert first_doc is not None + first_id = first_doc.id + + doc2 = _onedrive_doc( + unique_id="od-dup-file", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=user_id, + ) + + prepared2 = await service.prepare_for_indexing([doc2]) + assert len(prepared2) == 0 or (len(prepared2) == 1 and prepared2[0].existing_document is not None) diff --git a/surfsense_backend/tests/unit/connector_indexers/test_onedrive_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_onedrive_parallel.py new file mode 100644 index 000000000..b5c774c6f --- /dev/null +++ b/surfsense_backend/tests/unit/connector_indexers/test_onedrive_parallel.py @@ -0,0 +1,227 @@ +"""Tests for parallel download + indexing in the OneDrive indexer.""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from app.db import DocumentType +from app.tasks.connector_indexers.onedrive_indexer import ( + _download_files_parallel, +) + +pytestmark = pytest.mark.unit + +_USER_ID = "00000000-0000-0000-0000-000000000001" +_CONNECTOR_ID = 42 +_SEARCH_SPACE_ID = 1 + + +def _make_file_dict(file_id: str, name: str, mime: str = "text/plain") -> dict: + return { + "id": file_id, + "name": name, + "file": {"mimeType": mime}, + "lastModifiedDateTime": "2026-01-01T00:00:00Z", + } + + +def _mock_extract_ok(file_id: str, file_name: str): + return ( + f"# Content of {file_name}", + {"onedrive_file_id": file_id, "onedrive_file_name": file_name}, + None, + ) + + +@pytest.fixture +def mock_onedrive_client(): + return MagicMock() + + +@pytest.fixture +def patch_extract(monkeypatch): + def _patch(side_effect=None, return_value=None): + mock = AsyncMock(side_effect=side_effect, return_value=return_value) + monkeypatch.setattr( + "app.tasks.connector_indexers.onedrive_indexer.download_and_extract_content", + mock, + ) + return mock + return _patch + + +# Slice 1: Tracer bullet +async def test_single_file_returns_one_connector_document( + mock_onedrive_client, patch_extract, +): + patch_extract(return_value=_mock_extract_ok("f1", "test.txt")) + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + [_make_file_dict("f1", "test.txt")], + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + ) + + assert len(docs) == 1 + assert failed == 0 + assert docs[0].title == "test.txt" + assert docs[0].unique_id == "f1" + assert docs[0].document_type == DocumentType.ONEDRIVE_FILE + + +# Slice 2: Multiple files all produce documents +async def test_multiple_files_all_produce_documents( + mock_onedrive_client, patch_extract, +): + files = [_make_file_dict(f"f{i}", f"file{i}.txt") for i in range(3)] + patch_extract( + side_effect=[_mock_extract_ok(f"f{i}", f"file{i}.txt") for i in range(3)] + ) + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + files, + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + ) + + assert len(docs) == 3 + assert failed == 0 + assert {d.unique_id for d in docs} == {"f0", "f1", "f2"} + + +# Slice 3: Error isolation +async def test_one_download_exception_does_not_block_others( + mock_onedrive_client, patch_extract, +): + files = [_make_file_dict(f"f{i}", f"file{i}.txt") for i in range(3)] + patch_extract( + side_effect=[ + _mock_extract_ok("f0", "file0.txt"), + RuntimeError("network timeout"), + _mock_extract_ok("f2", "file2.txt"), + ] + ) + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + files, + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + ) + + assert len(docs) == 2 + assert failed == 1 + assert {d.unique_id for d in docs} == {"f0", "f2"} + + +# Slice 4: ETL error counts as download failure +async def test_etl_error_counts_as_download_failure( + mock_onedrive_client, patch_extract, +): + files = [_make_file_dict("f0", "good.txt"), _make_file_dict("f1", "bad.txt")] + patch_extract( + side_effect=[ + _mock_extract_ok("f0", "good.txt"), + (None, {}, "ETL failed"), + ] + ) + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + files, + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + ) + + assert len(docs) == 1 + assert failed == 1 + + +# Slice 5: Semaphore bound +async def test_concurrency_bounded_by_semaphore( + mock_onedrive_client, monkeypatch, +): + lock = asyncio.Lock() + active = 0 + peak = 0 + + async def _slow_extract(client, file): + nonlocal active, peak + async with lock: + active += 1 + peak = max(peak, active) + await asyncio.sleep(0.05) + async with lock: + active -= 1 + return _mock_extract_ok(file["id"], file["name"]) + + monkeypatch.setattr( + "app.tasks.connector_indexers.onedrive_indexer.download_and_extract_content", + _slow_extract, + ) + + files = [_make_file_dict(f"f{i}", f"file{i}.txt") for i in range(6)] + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + files, + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + max_concurrency=2, + ) + + assert len(docs) == 6 + assert failed == 0 + assert peak <= 2, f"Peak concurrency was {peak}, expected <= 2" + + +# Slice 6: Heartbeat fires +async def test_heartbeat_fires_during_parallel_downloads( + mock_onedrive_client, monkeypatch, +): + import app.tasks.connector_indexers.onedrive_indexer as _mod + + monkeypatch.setattr(_mod, "HEARTBEAT_INTERVAL_SECONDS", 0) + + async def _slow_extract(client, file): + await asyncio.sleep(0.05) + return _mock_extract_ok(file["id"], file["name"]) + + monkeypatch.setattr( + "app.tasks.connector_indexers.onedrive_indexer.download_and_extract_content", + _slow_extract, + ) + + heartbeat_calls: list[int] = [] + + async def _on_heartbeat(count: int): + heartbeat_calls.append(count) + + files = [_make_file_dict(f"f{i}", f"file{i}.txt") for i in range(3)] + + docs, failed = await _download_files_parallel( + mock_onedrive_client, + files, + connector_id=_CONNECTOR_ID, + search_space_id=_SEARCH_SPACE_ID, + user_id=_USER_ID, + enable_summary=True, + on_heartbeat=_on_heartbeat, + ) + + assert len(docs) == 3 + assert failed == 0 + assert len(heartbeat_calls) >= 1, "Heartbeat should have fired at least once" diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx index 25eeb4cab..3cd1fffe6 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx @@ -16,6 +16,7 @@ export function getDocumentTypeLabel(type: string): string { FILE: "File", SLACK_CONNECTOR: "Slack", TEAMS_CONNECTOR: "Microsoft Teams", + ONEDRIVE_FILE: "OneDrive", NOTION_CONNECTOR: "Notion", YOUTUBE_VIDEO: "YouTube Video", GITHUB_CONNECTOR: "GitHub", diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx index b32ad0ddf..68d971fc4 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx @@ -38,7 +38,6 @@ import { import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; -import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; import { Drawer, DrawerContent, @@ -234,6 +233,7 @@ export function DocumentsTableShell({ mentionedDocIds, onToggleChatMention, isSearchMode = false, + onOpenInTab, }: { documents: Document[]; loading: boolean; @@ -253,6 +253,8 @@ export function DocumentsTableShell({ onToggleChatMention?: (doc: Document, mentioned: boolean) => void; /** Whether results are filtered by a search query or type filters */ isSearchMode?: boolean; + /** When provided, desktop "Preview" opens a document tab instead of the popup dialog */ + onOpenInTab?: (doc: Document) => void; }) { const t = useTranslations("documents"); const { openDialog } = useDocumentUploadDialog(); @@ -742,9 +744,9 @@ export function DocumentsTableShell({ - handleViewDocument(doc)}> + onOpenInTab ? onOpenInTab(doc) : handleViewDocument(doc)}> - Preview + Open {isEditable && ( )} - {/* Document Content Viewer */} - !open && handleCloseViewer()}> - - - + {/* Document Content Viewer (mobile drawer) */} + !open && handleCloseViewer()}> + + + + {viewingDoc?.title} - - + +
)}
-
-
+ + {/* Document Metadata Viewer (Ctrl+Click) */} - Preview + Open {mobileActionDoc && EDITABLE_DOCUMENT_TYPES.includes( diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index ddbdc9dcc..29bbc0c5c 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -40,7 +40,6 @@ import { Thread } from "@/components/assistant-ui/thread"; import { MobileEditorPanel } from "@/components/editor-panel/editor-panel"; import { MobileHitlEditPanel } from "@/components/hitl-edit-panel/hitl-edit-panel"; import { MobileReportPanel } from "@/components/report-panel/report-panel"; -import { Skeleton } from "@/components/ui/skeleton"; import { useChatSessionStateSync } from "@/hooks/use-chat-session-state"; import { useMessagesSync } from "@/hooks/use-messages-sync"; import { documentsApiService } from "@/lib/apis/documents-api.service"; @@ -144,6 +143,8 @@ const TOOLS_WITH_UI = new Set([ "delete_linear_issue", "create_google_drive_file", "delete_google_drive_file", + "create_onedrive_file", + "delete_onedrive_file", "create_calendar_event", "update_calendar_event", "delete_calendar_event", @@ -902,6 +903,7 @@ export default function NewChatPage() { currentThread, currentUser, disabledTools, + updateChatTabTitle, ] ); diff --git a/surfsense_web/app/dashboard/[search_space_id]/team/team-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/team/team-content.tsx index d46594861..b6f008887 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/team/team-content.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/team/team-content.tsx @@ -763,7 +763,7 @@ function CreateInviteDialog({ - + diff --git a/surfsense_web/app/sitemap.ts b/surfsense_web/app/sitemap.ts index f1f0bad72..e7c0d576e 100644 --- a/surfsense_web/app/sitemap.ts +++ b/surfsense_web/app/sitemap.ts @@ -181,6 +181,12 @@ export default function sitemap(): MetadataRoute.Sitemap { changeFrequency: "daily", priority: 0.8, }, + { + url: "https://www.surfsense.com/docs/connectors/microsoft-onedrive", + lastModified, + changeFrequency: "daily", + priority: 0.8, + }, { url: "https://www.surfsense.com/docs/connectors/microsoft-teams", lastModified, diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 9fefecb1c..7be3932af 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -39,6 +39,10 @@ import { CreateGoogleDriveFileToolUI, DeleteGoogleDriveFileToolUI, } from "@/components/tool-ui/google-drive"; +import { + CreateOneDriveFileToolUI, + DeleteOneDriveFileToolUI, +} from "@/components/tool-ui/onedrive"; import { CreateJiraIssueToolUI, DeleteJiraIssueToolUI, @@ -96,6 +100,8 @@ const AssistantMessageInner: FC = () => { delete_linear_issue: DeleteLinearIssueToolUI, create_google_drive_file: CreateGoogleDriveFileToolUI, delete_google_drive_file: DeleteGoogleDriveFileToolUI, + create_onedrive_file: CreateOneDriveFileToolUI, + delete_onedrive_file: DeleteOneDriveFileToolUI, create_calendar_event: CreateCalendarEventToolUI, update_calendar_event: UpdateCalendarEventToolUI, delete_calendar_event: DeleteCalendarEventToolUI, diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx index d4960bd29..f1cf5ee4d 100644 --- a/surfsense_web/components/assistant-ui/connector-popup.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup.tsx @@ -340,10 +340,11 @@ export const ConnectorIndicator = forwardRef { const cfg = connectorConfig || editingConnector.config; - const isDrive = - editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" || - editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"; - const hasDriveItems = isDrive + const isDriveOrOneDrive = + editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" || + editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + editingConnector.connector_type === "ONEDRIVE_CONNECTOR"; + const hasDriveItems = isDriveOrOneDrive ? ((cfg?.selected_folders as unknown[]) ?? []).length > 0 || ((cfg?.selected_files as unknown[]) ?? []).length > 0 : true; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx index f7f490774..0f6044050 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/composio-drive-config.tsx @@ -13,7 +13,7 @@ import { } from "lucide-react"; import type { FC } from "react"; import { useCallback, useEffect, useState } from "react"; -import { ComposioDriveFolderTree } from "@/components/connectors/composio-drive-folder-tree"; +import { DriveFolderTree, type SelectedFolder } from "@/components/connectors/drive-folder-tree"; import { Label } from "@/components/ui/label"; import { Select, @@ -23,13 +23,9 @@ import { SelectValue, } from "@/components/ui/select"; import { Switch } from "@/components/ui/switch"; +import { connectorsApiService } from "@/lib/apis/connectors-api.service"; import type { ConnectorConfigProps } from "../index"; -interface SelectedFolder { - id: string; - name: string; -} - interface IndexingOptions { max_files_per_folder: number; incremental_sync: boolean; @@ -102,6 +98,16 @@ export const ComposioDriveConfig: FC = ({ connector, onCon setAuthError(true); }, []); + const fetchItems = useCallback( + async (parentId?: string) => { + return connectorsApiService.listComposioDriveFolders({ + connector_id: connector.id, + parent_id: parentId, + }); + }, + [connector.id] + ); + const [isEditMode] = useState(() => existingFolders.length > 0 || existingFiles.length > 0); const [isFolderTreeOpen, setIsFolderTreeOpen] = useState(!isEditMode); @@ -255,24 +261,28 @@ export const ComposioDriveConfig: FC = ({ connector, onCon )} {isFolderTreeOpen && ( - )} ) : ( - )} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx index 6b01df9f8..bab993b5d 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/google-drive-config.tsx @@ -242,8 +242,6 @@ export const GoogleDriveConfig: FC = ({ connector, onConfi {totalSelected > 0 ? "Change Selection" : "Select from Google Drive"} - {pickerError && !isAuthExpired &&

{pickerError}

} - {isAuthExpired && (

Your Google Drive authentication has expired. Please re-authenticate using the button diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/onedrive-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/onedrive-config.tsx new file mode 100644 index 000000000..250a353cd --- /dev/null +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/onedrive-config.tsx @@ -0,0 +1,350 @@ +"use client"; + +import { + ChevronDown, + ChevronRight, + File, + FileSpreadsheet, + FileText, + FolderClosed, + Image, + Presentation, + X, +} from "lucide-react"; +import type { FC } from "react"; +import { useCallback, useEffect, useState } from "react"; +import { DriveFolderTree, type SelectedFolder } from "@/components/connectors/drive-folder-tree"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Switch } from "@/components/ui/switch"; +import { connectorsApiService } from "@/lib/apis/connectors-api.service"; +import type { ConnectorConfigProps } from "../index"; + +interface IndexingOptions { + max_files_per_folder: number; + incremental_sync: boolean; + include_subfolders: boolean; +} + +const DEFAULT_INDEXING_OPTIONS: IndexingOptions = { + max_files_per_folder: 100, + incremental_sync: true, + include_subfolders: true, +}; + +function getFileIconFromName(fileName: string, className: string = "size-3.5 shrink-0") { + const lowerName = fileName.toLowerCase(); + if (lowerName.endsWith(".xlsx") || lowerName.endsWith(".xls") || lowerName.endsWith(".csv")) { + return ; + } + if (lowerName.endsWith(".pptx") || lowerName.endsWith(".ppt")) { + return ; + } + if (lowerName.endsWith(".docx") || lowerName.endsWith(".doc") || lowerName.endsWith(".txt")) { + return ; + } + if (/\.(png|jpe?g|gif|webp|svg)$/.test(lowerName)) { + return ; + } + return ; +} + +export const OneDriveConfig: FC = ({ connector, onConfigChange }) => { + const existingFolders = + (connector.config?.selected_folders as SelectedFolder[] | undefined) || []; + const existingFiles = (connector.config?.selected_files as SelectedFolder[] | undefined) || []; + const existingIndexingOptions = + (connector.config?.indexing_options as IndexingOptions | undefined) || DEFAULT_INDEXING_OPTIONS; + + const [selectedFolders, setSelectedFolders] = useState(existingFolders); + const [selectedFiles, setSelectedFiles] = useState(existingFiles); + const [indexingOptions, setIndexingOptions] = useState(existingIndexingOptions); + const [authError, setAuthError] = useState(false); + + const isAuthExpired = connector.config?.auth_expired === true || authError; + + const handleAuthError = useCallback(() => { + setAuthError(true); + }, []); + + const fetchItems = useCallback( + async (parentId?: string) => { + return connectorsApiService.listOneDriveFolders({ + connector_id: connector.id, + parent_id: parentId, + }); + }, + [connector.id] + ); + + const [isEditMode] = useState(() => existingFolders.length > 0 || existingFiles.length > 0); + const [isFolderTreeOpen, setIsFolderTreeOpen] = useState(!isEditMode); + + useEffect(() => { + const folders = (connector.config?.selected_folders as SelectedFolder[] | undefined) || []; + const files = (connector.config?.selected_files as SelectedFolder[] | undefined) || []; + const options = + (connector.config?.indexing_options as IndexingOptions | undefined) || + DEFAULT_INDEXING_OPTIONS; + setSelectedFolders(folders); + setSelectedFiles(files); + setIndexingOptions(options); + }, [connector.config]); + + const updateConfig = ( + folders: SelectedFolder[], + files: SelectedFolder[], + options: IndexingOptions + ) => { + if (onConfigChange) { + onConfigChange({ + ...connector.config, + selected_folders: folders, + selected_files: files, + indexing_options: options, + }); + } + }; + + const handleSelectFolders = (folders: SelectedFolder[]) => { + setSelectedFolders(folders); + updateConfig(folders, selectedFiles, indexingOptions); + }; + + const handleSelectFiles = (files: SelectedFolder[]) => { + setSelectedFiles(files); + updateConfig(selectedFolders, files, indexingOptions); + }; + + const handleIndexingOptionChange = (key: keyof IndexingOptions, value: number | boolean) => { + const newOptions = { ...indexingOptions, [key]: value }; + setIndexingOptions(newOptions); + updateConfig(selectedFolders, selectedFiles, newOptions); + }; + + const handleRemoveFolder = (folderId: string) => { + const newFolders = selectedFolders.filter((folder) => folder.id !== folderId); + setSelectedFolders(newFolders); + updateConfig(newFolders, selectedFiles, indexingOptions); + }; + + const handleRemoveFile = (fileId: string) => { + const newFiles = selectedFiles.filter((file) => file.id !== fileId); + setSelectedFiles(newFiles); + updateConfig(selectedFolders, newFiles, indexingOptions); + }; + + const totalSelected = selectedFolders.length + selectedFiles.length; + + return ( +

+ {/* Folder & File Selection */} +
+
+

Folder & File Selection

+

+ Select specific folders and/or individual files to index from your OneDrive. +

+
+ + {totalSelected > 0 && ( +
+

+ Selected {totalSelected} item{totalSelected > 1 ? "s" : ""}: {(() => { + const parts: string[] = []; + if (selectedFolders.length > 0) { + parts.push( + `${selectedFolders.length} folder${selectedFolders.length > 1 ? "s" : ""}` + ); + } + if (selectedFiles.length > 0) { + parts.push(`${selectedFiles.length} file${selectedFiles.length > 1 ? "s" : ""}`); + } + return parts.length > 0 ? `(${parts.join(", ")})` : ""; + })()} +

+
+ {selectedFolders.map((folder) => ( +
+ + {folder.name} + +
+ ))} + {selectedFiles.map((file) => ( +
+ {getFileIconFromName(file.name)} + {file.name} + +
+ ))} +
+
+ )} + + {isAuthExpired && ( +

+ Your OneDrive authentication has expired. Please re-authenticate using the button + below. +

+ )} + + {isEditMode ? ( +
+ + {isFolderTreeOpen && ( + + )} +
+ ) : ( + + )} +
+ + {/* Indexing Options */} +
+
+

Indexing Options

+

+ Configure how files are indexed from your OneDrive. +

+
+ + {/* Max files per folder */} +
+
+
+ +

+ Maximum number of files to index from each folder +

+
+ +
+
+ + {/* Incremental sync toggle */} +
+
+ +

+ Only sync changes since last index (faster). Disable for a full re-index. +

+
+ handleIndexingOptionChange("incremental_sync", checked)} + /> +
+ + {/* Include subfolders toggle */} +
+
+ +

+ Recursively index files in subfolders of selected folders +

+
+ handleIndexingOptionChange("include_subfolders", checked)} + /> +
+
+
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx index cef0c99ac..ba43ce823 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx @@ -21,6 +21,7 @@ import { MCPConfig } from "./components/mcp-config"; import { ObsidianConfig } from "./components/obsidian-config"; import { SlackConfig } from "./components/slack-config"; import { TavilyApiConfig } from "./components/tavily-api-config"; +import { OneDriveConfig } from "./components/onedrive-config"; import { TeamsConfig } from "./components/teams-config"; import { WebcrawlerConfig } from "./components/webcrawler-config"; @@ -58,6 +59,8 @@ export function getConnectorConfigComponent( return DiscordConfig; case "TEAMS_CONNECTOR": return TeamsConfig; + case "ONEDRIVE_CONNECTOR": + return OneDriveConfig; case "CONFLUENCE_CONNECTOR": return ConfluenceConfig; case "BOOKSTACK_CONNECTOR": diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index 93d280a15..e50f61692 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -27,6 +27,7 @@ const REAUTH_ENDPOINTS: Partial> = { [EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", [EnumConnectorName.COMPOSIO_GMAIL_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", [EnumConnectorName.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", + [EnumConnectorName.ONEDRIVE_CONNECTOR]: "/api/v1/auth/onedrive/connector/reauth", }; interface ConnectorEditViewProps { diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts index ab69d4ca2..969ae1897 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts @@ -61,6 +61,13 @@ export const OAUTH_CONNECTORS = [ connectorType: EnumConnectorName.TEAMS_CONNECTOR, authEndpoint: "/api/v1/auth/teams/connector/add/", }, + { + id: "onedrive-connector", + title: "OneDrive", + description: "Search your OneDrive files", + connectorType: EnumConnectorName.ONEDRIVE_CONNECTOR, + authEndpoint: "/api/v1/auth/onedrive/connector/add/", + }, { id: "discord-connector", title: "Discord", diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index 03d8a8fb4..0ee34d7c2 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -729,10 +729,11 @@ export const useConnectorDialog = () => { async (refreshConnectors: () => void) => { if (!indexingConfig || !searchSpaceId) return; - // Validate date range (skip for Google Drive, Composio Drive, and Webcrawler) + // Validate date range (skip for Google Drive, Composio Drive, OneDrive, and Webcrawler) if ( indexingConfig.connectorType !== "GOOGLE_DRIVE_CONNECTOR" && indexingConfig.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && + indexingConfig.connectorType !== "ONEDRIVE_CONNECTOR" && indexingConfig.connectorType !== "WEBCRAWLER_CONNECTOR" ) { const dateRangeValidation = dateRangeSchema.safeParse({ startDate, endDate }); @@ -778,10 +779,11 @@ export const useConnectorDialog = () => { }); } - // Handle Google Drive folder selection (regular and Composio) - if ( - (indexingConfig.connectorType === "GOOGLE_DRIVE_CONNECTOR" || - indexingConfig.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR") && + // Handle Google Drive / OneDrive folder selection (regular and Composio) + if ( + (indexingConfig.connectorType === "GOOGLE_DRIVE_CONNECTOR" || + indexingConfig.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + indexingConfig.connectorType === "ONEDRIVE_CONNECTOR") && indexingConnectorConfig ) { const selectedFolders = indexingConnectorConfig.selected_folders as @@ -967,10 +969,11 @@ export const useConnectorDialog = () => { async (refreshConnectors: () => void) => { if (!editingConnector || !searchSpaceId || isSaving) return; - // Validate date range (skip for Google Drive which uses folder selection, Webcrawler which uses config, and non-indexable connectors) + // Validate date range (skip for Google Drive/OneDrive which uses folder selection, Webcrawler which uses config, and non-indexable connectors) if ( editingConnector.is_indexable && editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" && + editingConnector.connector_type !== "ONEDRIVE_CONNECTOR" && editingConnector.connector_type !== "WEBCRAWLER_CONNECTOR" ) { const dateRangeValidation = dateRangeSchema.safeParse({ startDate, endDate }); @@ -986,11 +989,12 @@ export const useConnectorDialog = () => { return; } - // Prevent periodic indexing for Google Drive (regular or Composio) without folders/files selected + // Prevent periodic indexing for Google Drive / OneDrive (regular or Composio) without folders/files selected if ( periodicEnabled && (editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" || - editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR") + editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + editingConnector.connector_type === "ONEDRIVE_CONNECTOR") ) { const selectedFolders = (connectorConfig || editingConnector.config)?.selected_folders as | Array<{ id: string; name: string }> @@ -1043,7 +1047,8 @@ export const useConnectorDialog = () => { indexingDescription = "Settings saved."; } else if ( editingConnector.connector_type === "GOOGLE_DRIVE_CONNECTOR" || - editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" + editingConnector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" || + editingConnector.connector_type === "ONEDRIVE_CONNECTOR" ) { // Google Drive (both regular and Composio) uses folder selection from config, not date ranges const selectedFolders = (connectorConfig || editingConnector.config)?.selected_folders as diff --git a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts index 9bf3b61e4..aaa479fce 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts @@ -12,6 +12,7 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record = { // Direct mappings (connector type matches document type) SLACK_CONNECTOR: "SLACK_CONNECTOR", TEAMS_CONNECTOR: "TEAMS_CONNECTOR", + ONEDRIVE_CONNECTOR: "ONEDRIVE_FILE", NOTION_CONNECTOR: "NOTION_CONNECTOR", GITHUB_CONNECTOR: "GITHUB_CONNECTOR", LINEAR_CONNECTOR: "LINEAR_CONNECTOR", diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index da6ad8540..8a1a78807 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -25,6 +25,7 @@ const REAUTH_ENDPOINTS: Partial> = { [EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", [EnumConnectorName.COMPOSIO_GMAIL_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", [EnumConnectorName.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR]: "/api/v1/auth/composio/connector/reauth", + [EnumConnectorName.ONEDRIVE_CONNECTOR]: "/api/v1/auth/onedrive/connector/reauth", [EnumConnectorName.JIRA_CONNECTOR]: "/api/v1/auth/jira/connector/reauth", [EnumConnectorName.CONFLUENCE_CONNECTOR]: "/api/v1/auth/confluence/connector/reauth", }; diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 161400d6d..815c95b68 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -394,7 +394,7 @@ const defaultComponents = memoizeMarkdownComponents({ if (!isCodeBlock) { return ( {children} diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 7817b54c8..68a31d98f 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -1092,6 +1092,12 @@ const TOOL_GROUPS: ToolGroup[] = [ connectorIcon: "google_drive", tooltip: "Create and delete files in Google Drive.", }, + { + label: "OneDrive", + tools: ["create_onedrive_file", "delete_onedrive_file"], + connectorIcon: "onedrive", + tooltip: "Create and delete files in OneDrive.", + }, { label: "Notion", tools: ["create_notion_page", "update_notion_page", "delete_notion_page"], diff --git a/surfsense_web/components/connectors/composio-drive-folder-tree.tsx b/surfsense_web/components/connectors/drive-folder-tree.tsx similarity index 66% rename from surfsense_web/components/connectors/composio-drive-folder-tree.tsx rename to surfsense_web/components/connectors/drive-folder-tree.tsx index 05fbf801b..905d2b7ca 100644 --- a/surfsense_web/components/connectors/composio-drive-folder-tree.tsx +++ b/surfsense_web/components/connectors/drive-folder-tree.tsx @@ -12,15 +12,13 @@ import { Image, Presentation, } from "lucide-react"; -import { useEffect, useState } from "react"; +import { useCallback, useEffect, useState } from "react"; import { Checkbox } from "@/components/ui/checkbox"; import { ScrollArea } from "@/components/ui/scroll-area"; import { Spinner } from "@/components/ui/spinner"; -import { useComposioDriveFolders } from "@/hooks/use-composio-drive-folders"; -import { connectorsApiService } from "@/lib/apis/connectors-api.service"; import { cn } from "@/lib/utils"; -interface DriveItem { +export interface DriveItem { id: string; name: string; mimeType: string; @@ -32,73 +30,92 @@ interface DriveItem { interface ItemTreeNode { item: DriveItem; - children: DriveItem[] | null; // null = not loaded, [] = loaded but empty + children: DriveItem[] | null; isExpanded: boolean; isLoading: boolean; } -interface SelectedFolder { +export interface SelectedFolder { id: string; name: string; } -interface ComposioDriveFolderTreeProps { - connectorId: number; +interface DriveFolderTreeProps { + fetchItems: (parentId?: string) => Promise<{ items: DriveItem[] }>; selectedFolders: SelectedFolder[]; onSelectFolders: (folders: SelectedFolder[]) => void; selectedFiles?: SelectedFolder[]; onSelectFiles?: (files: SelectedFolder[]) => void; onAuthError?: (message: string) => void; + rootLabel?: string; + providerName?: string; } -// Helper to get appropriate icon for file type -function getFileIcon(mimeType: string, className: string = "h-4 w-4") { - if (mimeType.includes("spreadsheet") || mimeType.includes("excel")) { +function getFileIcon(mimeType?: string, className: string = "h-4 w-4") { + const type = mimeType ?? ""; + if (type.includes("spreadsheet") || type.includes("excel")) { return ; } - if (mimeType.includes("presentation") || mimeType.includes("powerpoint")) { + if (type.includes("presentation") || type.includes("powerpoint")) { return ; } - if (mimeType.includes("document") || mimeType.includes("word") || mimeType.includes("text")) { + if (type.includes("document") || type.includes("word") || type.includes("text")) { return ; } - if (mimeType.includes("image")) { + if (type.includes("image")) { return ; } return ; } -export function ComposioDriveFolderTree({ - connectorId, +export function DriveFolderTree({ + fetchItems, selectedFolders, onSelectFolders, selectedFiles = [], onSelectFiles = () => {}, onAuthError, -}: ComposioDriveFolderTreeProps) { + rootLabel = "My Drive", + providerName = "Drive", +}: DriveFolderTreeProps) { const [itemStates, setItemStates] = useState>(new Map()); - - const { - data: rootData, - isLoading: isLoadingRoot, - error: rootError, - } = useComposioDriveFolders({ - connectorId, - }); + const [rootItems, setRootItems] = useState([]); + const [isLoadingRoot, setIsLoadingRoot] = useState(true); + const [rootError, setRootError] = useState(null); useEffect(() => { - if (rootError && onAuthError) { - const msg = rootError instanceof Error ? rootError.message : String(rootError); - if ( - msg.toLowerCase().includes("authentication expired") || - msg.toLowerCase().includes("re-authenticate") - ) { - onAuthError(msg); - } - } - }, [rootError, onAuthError]); + let cancelled = false; + setIsLoadingRoot(true); + setRootError(null); + + fetchItems() + .then((data) => { + if (!cancelled) { + setRootItems(data.items || []); + setIsLoadingRoot(false); + } + }) + .catch((err) => { + if (!cancelled) { + const error = err instanceof Error ? err : new Error(String(err)); + setRootError(error); + setIsLoadingRoot(false); + if (onAuthError) { + const msg = error.message; + if ( + msg.toLowerCase().includes("authentication expired") || + msg.toLowerCase().includes("re-authenticate") + ) { + onAuthError(msg); + } + } + } + }); - const rootItems = rootData?.items || []; + return () => { + cancelled = true; + }; + }, [fetchItems, onAuthError]); const isFolderSelected = (folderId: string): boolean => { return selectedFolders.some((f) => f.id === folderId); @@ -124,89 +141,81 @@ export function ComposioDriveFolderTree({ } }; - /** - * Find an item by ID across all loaded items (root and nested). - */ - const findItem = (itemId: string): DriveItem | undefined => { - const state = itemStates.get(itemId); - if (state?.item) return state.item; + const findItem = useCallback( + (itemId: string): DriveItem | undefined => { + const state = itemStates.get(itemId); + if (state?.item) return state.item; - const rootItem = rootItems.find((item) => item.id === itemId); - if (rootItem) return rootItem; + const rootItem = rootItems.find((item) => item.id === itemId); + if (rootItem) return rootItem; - for (const [, nodeState] of itemStates) { - if (nodeState.children) { - const found = nodeState.children.find((child) => child.id === itemId); - if (found) return found; + for (const [, nodeState] of itemStates) { + if (nodeState.children) { + const found = nodeState.children.find((child) => child.id === itemId); + if (found) return found; + } } - } - return undefined; - }; + return undefined; + }, + [itemStates, rootItems] + ); + + const loadFolderContents = useCallback( + async (folderId: string) => { + try { + setItemStates((prev) => { + const newMap = new Map(prev); + const existing = newMap.get(folderId); + if (existing) { + newMap.set(folderId, { ...existing, isLoading: true }); + } else { + const item = findItem(folderId); + if (item) { + newMap.set(folderId, { + item, + children: null, + isExpanded: false, + isLoading: true, + }); + } + } + return newMap; + }); + + const data = await fetchItems(folderId); + const items = data.items || []; + + setItemStates((prev) => { + const newMap = new Map(prev); + const existing = newMap.get(folderId); + const item = existing?.item || findItem(folderId); - /** - * Load and display contents of a specific folder. - */ - const loadFolderContents = async (folderId: string) => { - try { - setItemStates((prev) => { - const newMap = new Map(prev); - const existing = newMap.get(folderId); - if (existing) { - newMap.set(folderId, { ...existing, isLoading: true }); - } else { - const item = findItem(folderId); if (item) { newMap.set(folderId, { item, - children: null, - isExpanded: false, - isLoading: true, + children: items, + isExpanded: true, + isLoading: false, }); } - } - return newMap; - }); - - const data = await connectorsApiService.listComposioDriveFolders({ - connector_id: connectorId, - parent_id: folderId, - }); - const items = data.items || []; - - setItemStates((prev) => { - const newMap = new Map(prev); - const existing = newMap.get(folderId); - const item = existing?.item || findItem(folderId); - - if (item) { - newMap.set(folderId, { - item, - children: items, - isExpanded: true, - isLoading: false, - }); - } else { - console.error(`Could not find item for folderId: ${folderId}`); - } - return newMap; - }); - } catch (error) { - console.error("Error loading folder contents:", error); - setItemStates((prev) => { - const newMap = new Map(prev); - const existing = newMap.get(folderId); - if (existing) { - newMap.set(folderId, { ...existing, isLoading: false }); - } - return newMap; - }); - } - }; + return newMap; + }); + } catch (error) { + console.error("Error loading folder contents:", error); + setItemStates((prev) => { + const newMap = new Map(prev); + const existing = newMap.get(folderId); + if (existing) { + newMap.set(folderId, { ...existing, isLoading: false }); + } + return newMap; + }); + } + }, + [fetchItems, findItem] + ); - /** - * Toggle folder expand/collapse state. - */ const toggleFolder = async (item: DriveItem) => { if (!item.isFolder) return; @@ -226,9 +235,6 @@ export function ComposioDriveFolderTree({ } }; - /** - * Render a single item (folder or file) with its children. - */ const renderItem = (item: DriveItem, level: number = 0) => { const state = itemStates.get(item.id); const isExpanded = state?.isExpanded || false; @@ -240,7 +246,7 @@ export function ComposioDriveFolderTree({ const childFolders = children?.filter((c) => c.isFolder) || []; const childFiles = children?.filter((c) => !c.isFolder) || []; - const indentSize = 0.75; // Smaller indent for mobile + const indentSize = 0.75; return (
toggleFolderSelection("root", "My Drive")} + onCheckedChange={() => toggleFolderSelection("root", rootLabel)} className="shrink-0 h-3.5 w-3.5 sm:h-4 sm:w-4 border-slate-400/20 dark:border-white/20" />
@@ -372,17 +378,15 @@ export function ComposioDriveFolderTree({ {!isLoadingRoot && rootError && (
- {(rootError instanceof Error ? rootError.message : String(rootError)).includes( - "authentication expired" - ) - ? "Google Drive authentication has expired. Please re-authenticate above." - : "Failed to load Google Drive contents."} + {rootError.message.includes("authentication expired") + ? `${providerName} authentication has expired. Please re-authenticate above.` + : `Failed to load ${providerName} contents.`}
)} {!isLoadingRoot && !rootError && rootItems.length === 0 && (
- No files or folders found in your Google Drive + No files or folders found in your {providerName}
)} diff --git a/surfsense_web/components/documents/CreateFolderDialog.tsx b/surfsense_web/components/documents/CreateFolderDialog.tsx index f37c0263c..55548146f 100644 --- a/surfsense_web/components/documents/CreateFolderDialog.tsx +++ b/surfsense_web/components/documents/CreateFolderDialog.tsx @@ -84,7 +84,7 @@ export function CreateFolderDialog({ /> - + - + e.stopPropagation()}> onPreview(doc)}> Open @@ -254,7 +254,7 @@ export const DocumentNode = React.memo(function DocumentNode({ {contextMenuOpen && ( - + e.stopPropagation()}> onPreview(doc)}> Open diff --git a/surfsense_web/components/documents/FolderPickerDialog.tsx b/surfsense_web/components/documents/FolderPickerDialog.tsx index 3c866e04a..59e02f726 100644 --- a/surfsense_web/components/documents/FolderPickerDialog.tsx +++ b/surfsense_web/components/documents/FolderPickerDialog.tsx @@ -155,7 +155,7 @@ export function FolderPickerDialog({ {renderPickerLevel(null, 1)} - + + )} + + + {/* Context section — pickers in pending */} + {phase === "pending" && interruptData.context && ( + <> +
+
+ {interruptData.context.error ? ( +

{interruptData.context.error}

+ ) : ( + <> + {accounts.length > 0 && ( +
+

+ OneDrive Account * +

+ +
+ )} + +
+

+ File Type +

+ +
+ + {selectedAccountId && ( +
+

Parent Folder

+ + {availableParentFolders.length === 0 && ( +

+ No folders found. File will be created at OneDrive root. +

+ )} +
+ )} + + )} +
+ + )} + +
+
+ {(pendingEdits?.name ?? args.name) != null && ( +

{String(pendingEdits?.name ?? args.name)}

+ )} + {(pendingEdits?.content ?? args.content) != null && ( +
+ +
+ )} +
+ + {phase === "pending" && ( + <> +
+
+ {allowedDecisions.includes("approve") && ( + + )} + {allowedDecisions.includes("reject") && ( + + )} +
+ + )} +
+ ); +} + +function ErrorCard({ result }: { result: ErrorResult }) { + return ( +
+
+

Failed to create OneDrive file

+
+
+

{result.message}

+
+ ); +} + +function AuthErrorCard({ result }: { result: AuthErrorResult }) { + return ( +
+
+

OneDrive authentication expired

+
+
+

{result.message}

+
+ ); +} + +function SuccessCard({ result }: { result: SuccessResult }) { + return ( +
+
+

{result.message || "OneDrive file created successfully"}

+
+
+
+
+ + {result.name} +
+ {result.web_url && ( + + )} +
+
+ ); +} + +export const CreateOneDriveFileToolUI = ({ args, result }: ToolCallMessagePartProps<{ name: string; content?: string }, CreateOneDriveFileResult>) => { + if (!result) return null; + if (isInterruptResult(result)) { + return { window.dispatchEvent(new CustomEvent("hitl-decision", { detail: { decisions: [decision] } })); }} />; + } + if (typeof result === "object" && result !== null && "status" in result && (result as { status: string }).status === "rejected") return null; + if (isAuthErrorResult(result)) return ; + if (isErrorResult(result)) return ; + return ; +}; diff --git a/surfsense_web/components/tool-ui/onedrive/index.ts b/surfsense_web/components/tool-ui/onedrive/index.ts new file mode 100644 index 000000000..4872112ba --- /dev/null +++ b/surfsense_web/components/tool-ui/onedrive/index.ts @@ -0,0 +1,2 @@ +export { CreateOneDriveFileToolUI } from "./create-file"; +export { DeleteOneDriveFileToolUI } from "./trash-file"; diff --git a/surfsense_web/components/tool-ui/onedrive/trash-file.tsx b/surfsense_web/components/tool-ui/onedrive/trash-file.tsx new file mode 100644 index 000000000..b5efd4fab --- /dev/null +++ b/surfsense_web/components/tool-ui/onedrive/trash-file.tsx @@ -0,0 +1,219 @@ +"use client"; + +import type { ToolCallMessagePartProps } from "@assistant-ui/react"; +import { CornerDownLeftIcon, InfoIcon } from "lucide-react"; +import { useCallback, useEffect, useState } from "react"; +import { TextShimmerLoader } from "@/components/prompt-kit/loader"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { useHitlPhase } from "@/hooks/use-hitl-phase"; + +interface OneDriveAccount { + id: number; + name: string; + user_email?: string; + auth_expired?: boolean; +} + +interface OneDriveFile { + file_id: string; + name: string; + document_id?: number; + web_url?: string; +} + +interface InterruptResult { + __interrupt__: true; + __decided__?: "approve" | "reject"; + __completed__?: boolean; + action_requests: Array<{ name: string; args: Record }>; + review_configs: Array<{ action_name: string; allowed_decisions: Array<"approve" | "reject"> }>; + context?: { account?: OneDriveAccount; file?: OneDriveFile; error?: string }; +} + +interface SuccessResult { status: "success"; file_id: string; message?: string; deleted_from_kb?: boolean } +interface ErrorResult { status: "error"; message: string } +interface NotFoundResult { status: "not_found"; message: string } +interface AuthErrorResult { status: "auth_error"; message: string; connector_type?: string } + +type DeleteOneDriveFileResult = InterruptResult | SuccessResult | ErrorResult | NotFoundResult | AuthErrorResult; + +function isInterruptResult(result: unknown): result is InterruptResult { + return typeof result === "object" && result !== null && "__interrupt__" in result && (result as InterruptResult).__interrupt__ === true; +} +function isErrorResult(result: unknown): result is ErrorResult { + return typeof result === "object" && result !== null && "status" in result && (result as ErrorResult).status === "error"; +} +function isNotFoundResult(result: unknown): result is NotFoundResult { + return typeof result === "object" && result !== null && "status" in result && (result as NotFoundResult).status === "not_found"; +} +function isAuthErrorResult(result: unknown): result is AuthErrorResult { + return typeof result === "object" && result !== null && "status" in result && (result as AuthErrorResult).status === "auth_error"; +} + +function ApprovalCard({ interruptData, onDecision }: { + interruptData: InterruptResult; + onDecision: (decision: { type: "approve" | "reject"; message?: string; edited_action?: { name: string; args: Record } }) => void; +}) { + const { phase, setProcessing, setRejected } = useHitlPhase(interruptData); + const [deleteFromKb, setDeleteFromKb] = useState(false); + + const context = interruptData.context; + const account = context?.account; + const file = context?.file; + + const handleApprove = useCallback(() => { + if (phase !== "pending") return; + setProcessing(); + onDecision({ + type: "approve", + edited_action: { + name: interruptData.action_requests[0].name, + args: { file_id: file?.file_id, connector_id: account?.id, delete_from_kb: deleteFromKb }, + }, + }); + }, [phase, setProcessing, onDecision, interruptData, file?.file_id, account?.id, deleteFromKb]); + + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) handleApprove(); + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, [handleApprove]); + + return ( +
+
+
+

+ {phase === "rejected" ? "OneDrive File Deletion Rejected" : phase === "processing" || phase === "complete" ? "OneDrive File Deletion Approved" : "Delete OneDrive File"} +

+ {phase === "processing" ? ( + + ) : phase === "complete" ? ( +

File trashed

+ ) : phase === "rejected" ? ( +

File deletion was cancelled

+ ) : ( +

Requires your approval to proceed

+ )} +
+
+ + {phase !== "rejected" && context && ( + <> +
+
+ {context.error ? ( +

{context.error}

+ ) : ( + <> + {account && ( +
+

OneDrive Account

+
{account.name}
+
+ )} + {file && ( +
+

File to Delete

+
+
{file.name}
+ {file.web_url && ( + Open in OneDrive + )} +
+
+ )} + + )} +
+ + )} + + {phase === "pending" && ( + <> +
+
+

The file will be moved to the OneDrive recycle bin. You can restore it within 93 days.

+
+ setDeleteFromKb(v === true)} className="shrink-0" /> + +
+
+ + )} + + {phase === "pending" && ( + <> +
+
+ + +
+ + )} +
+ ); +} + +function ErrorCard({ result }: { result: ErrorResult }) { + return ( +
+

Failed to delete file

+
+

{result.message}

+
+ ); +} + +function NotFoundCard({ result }: { result: NotFoundResult }) { + return ( +
+
+ +

{result.message}

+
+
+ ); +} + +function AuthErrorCard({ result }: { result: AuthErrorResult }) { + return ( +
+

OneDrive authentication expired

+
+

{result.message}

+
+ ); +} + +function SuccessCard({ result }: { result: SuccessResult }) { + return ( +
+

{result.message || "File moved to recycle bin"}

+ {result.deleted_from_kb && ( + <> +
+
Also removed from knowledge base
+ + )} +
+ ); +} + +export const DeleteOneDriveFileToolUI = ({ result }: ToolCallMessagePartProps<{ file_name: string; delete_from_kb?: boolean }, DeleteOneDriveFileResult>) => { + if (!result) return null; + if (isInterruptResult(result)) { + return { window.dispatchEvent(new CustomEvent("hitl-decision", { detail: { decisions: [decision] } })); }} />; + } + if (typeof result === "object" && result !== null && "status" in result && (result as { status: string }).status === "rejected") return null; + if (isAuthErrorResult(result)) return ; + if (isNotFoundResult(result)) return ; + if (isErrorResult(result)) return ; + return ; +}; diff --git a/surfsense_web/components/ui/alert-dialog.tsx b/surfsense_web/components/ui/alert-dialog.tsx index b76cb8ed7..ec0d3cbd5 100644 --- a/surfsense_web/components/ui/alert-dialog.tsx +++ b/surfsense_web/components/ui/alert-dialog.tsx @@ -68,7 +68,7 @@ function AlertDialogFooter({ className, ...props }: React.ComponentProps<"div">) return (
); diff --git a/surfsense_web/components/ui/dialog.tsx b/surfsense_web/components/ui/dialog.tsx index f13da44a9..5b2ca36e1 100644 --- a/surfsense_web/components/ui/dialog.tsx +++ b/surfsense_web/components/ui/dialog.tsx @@ -60,7 +60,7 @@ DialogHeader.displayName = "DialogHeader"; const DialogFooter = ({ className, ...props }: React.HTMLAttributes) => (
); diff --git a/surfsense_web/content/docs/connectors/index.mdx b/surfsense_web/content/docs/connectors/index.mdx index 501b1fc0b..93caf807d 100644 --- a/surfsense_web/content/docs/connectors/index.mdx +++ b/surfsense_web/content/docs/connectors/index.mdx @@ -53,6 +53,11 @@ Connect SurfSense to your favorite tools and services. Browse the available inte description="Connect your Microsoft Teams to SurfSense" href="/docs/connectors/microsoft-teams" /> + + Microsoft OneDrive and [Microsoft Teams](/docs/connectors/microsoft-teams) share the same Azure App Registration. If you have already created an app for Teams, you can reuse the same Client ID and Client Secret. Just make sure both redirect URIs are added (see Step 3). + + +## Step 1: Access Azure App Registrations + +1. Navigate to [portal.azure.com](https://portal.azure.com) +2. In the search bar, type **"app reg"** +3. Select **"App registrations"** from the Services results + +## Step 2: Create New Registration + +1. On the **App registrations** page, click **"+ New registration"** + +## Step 3: Register the Application + +Fill in the application details: + +| Field | Value | +|-------|-------| +| **Name** | `SurfSense` | +| **Supported account types** | Select **"Accounts in any organizational directory (Any Microsoft Entra ID tenant - Multitenant) and personal Microsoft accounts"** | +| **Redirect URI** | Platform: `Web`, URI: `http://localhost:8000/api/v1/auth/onedrive/connector/callback` | + +Click **"Register"** + +After registration, add the Teams redirect URI as well (if you plan to use the Teams connector): + +1. Go to **Authentication** in the left sidebar +2. Under **Platform configurations** > **Web** > **Redirect URIs**, click **Add URI** +3. Add: `http://localhost:8000/api/v1/auth/teams/connector/callback` +4. Click **Save** + +## Step 4: Get Application (Client) ID + +After registration, you will be taken to the app's **Overview** page. Here you will find: + +1. Copy the **Application (client) ID** - this is your Client ID +2. Note the **Directory (tenant) ID** if needed + +## Step 5: Create Client Secret + +1. In the left sidebar under **Manage**, click **"Certificates & secrets"** +2. Select the **"Client secrets"** tab +3. Click **"+ New client secret"** +4. Enter a description (e.g., `SurfSense`) and select an expiration period +5. Click **"Add"** +6. **Important**: Copy the secret **Value** immediately. It will not be shown again! + + + Never share your client secret publicly or include it in code repositories. + + +## Step 6: Configure API Permissions + +1. In the left sidebar under **Manage**, click **"API permissions"** +2. Click **"+ Add a permission"** +3. Select **"Microsoft Graph"** +4. Select **"Delegated permissions"** +5. Add the following permissions: + +| Permission | Type | Description | Admin Consent | +|------------|------|-------------|---------------| +| `Files.Read.All` | Delegated | Read all files the user can access | No | +| `Files.ReadWrite.All` | Delegated | Read and write all files the user can access | No | +| `offline_access` | Delegated | Maintain access to data you have given it access to | No | +| `User.Read` | Delegated | Sign in and read user profile | No | + +6. Click **"Add permissions"** + + + All four permissions listed above are required. The connector will not authenticate successfully if any are missing. + + +--- + +## Running SurfSense with Microsoft OneDrive Connector + +Add the Microsoft OAuth credentials to your `.env` file (created during [Docker installation](/docs/docker-installation/docker-compose)): + +```bash +MICROSOFT_CLIENT_ID=your_microsoft_client_id +MICROSOFT_CLIENT_SECRET=your_microsoft_client_secret +ONEDRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/onedrive/connector/callback +``` + + + The `MICROSOFT_CLIENT_ID` and `MICROSOFT_CLIENT_SECRET` are shared between the OneDrive and Teams connectors. You only need to set them once. + + +Then restart the services: + +```bash +docker compose up -d +``` diff --git a/surfsense_web/content/docs/connectors/microsoft-teams.mdx b/surfsense_web/content/docs/connectors/microsoft-teams.mdx index aba64da20..166004c1f 100644 --- a/surfsense_web/content/docs/connectors/microsoft-teams.mdx +++ b/surfsense_web/content/docs/connectors/microsoft-teams.mdx @@ -7,6 +7,10 @@ description: Connect your Microsoft Teams to SurfSense This guide walks you through setting up a Microsoft Teams OAuth integration for SurfSense using Azure App Registration. + + Microsoft Teams and [Microsoft OneDrive](/docs/connectors/microsoft-onedrive) share the same Azure App Registration. If you have already created an app for OneDrive, you can reuse the same Client ID and Client Secret. Just make sure both redirect URIs are added (see Step 3). + + ## Step 1: Access Azure App Registrations 1. Navigate to [portal.azure.com](https://portal.azure.com) @@ -33,11 +37,18 @@ Fill in the application details: Click **"Register"** +After registration, add the OneDrive redirect URI as well: + +1. Go to **Authentication** in the left sidebar +2. Under **Platform configurations** > **Web** > **Redirect URIs**, click **Add URI** +3. Add: `http://localhost:8000/api/v1/auth/onedrive/connector/callback` +4. Click **Save** + ![Register Application Form](/docs/connectors/microsoft-teams/azure-register-app.png) ## Step 4: Get Application (Client) ID -After registration, you'll be taken to the app's **Overview** page. Here you'll find: +After registration, you will be taken to the app's **Overview** page. Here you will find: 1. Copy the **Application (client) ID** - this is your Client ID 2. Note the **Directory (tenant) ID** if needed @@ -54,7 +65,7 @@ After registration, you'll be taken to the app's **Overview** page. Here you'll ![Certificates & Secrets - Empty](/docs/connectors/microsoft-teams/azure-certificates-empty.png) -6. **Important**: Copy the secret **Value** immediately - it won't be shown again! +6. **Important**: Copy the secret **Value** immediately. It will not be shown again! ![Certificates & Secrets - Created](/docs/connectors/microsoft-teams/azure-certificates-created.png) @@ -90,14 +101,18 @@ After registration, you'll be taken to the app's **Overview** page. Here you'll ## Running SurfSense with Microsoft Teams Connector -Add the Microsoft Teams credentials to your `.env` file (created during [Docker installation](/docs/docker-installation/docker-compose)): +Add the Microsoft OAuth credentials to your `.env` file (created during [Docker installation](/docs/docker-installation/docker-compose)): ```bash -TEAMS_CLIENT_ID=your_microsoft_client_id -TEAMS_CLIENT_SECRET=your_microsoft_client_secret +MICROSOFT_CLIENT_ID=your_microsoft_client_id +MICROSOFT_CLIENT_SECRET=your_microsoft_client_secret TEAMS_REDIRECT_URI=http://localhost:8000/api/v1/auth/teams/connector/callback ``` + + The `MICROSOFT_CLIENT_ID` and `MICROSOFT_CLIENT_SECRET` are shared between the Teams and OneDrive connectors. You only need to set them once. + + Then restart the services: ```bash diff --git a/surfsense_web/content/docs/docker-installation/docker-compose.mdx b/surfsense_web/content/docs/docker-installation/docker-compose.mdx index 1560d3759..25ace2180 100644 --- a/surfsense_web/content/docs/docker-installation/docker-compose.mdx +++ b/surfsense_web/content/docs/docker-installation/docker-compose.mdx @@ -117,7 +117,7 @@ Uncomment the connectors you want to use. Redirect URIs follow the pattern `http | Linear | `LINEAR_CLIENT_ID`, `LINEAR_CLIENT_SECRET`, `LINEAR_REDIRECT_URI` | | ClickUp | `CLICKUP_CLIENT_ID`, `CLICKUP_CLIENT_SECRET`, `CLICKUP_REDIRECT_URI` | | Airtable | `AIRTABLE_CLIENT_ID`, `AIRTABLE_CLIENT_SECRET`, `AIRTABLE_REDIRECT_URI` | -| Microsoft Teams | `TEAMS_CLIENT_ID`, `TEAMS_CLIENT_SECRET`, `TEAMS_REDIRECT_URI` | +| Microsoft (Teams & OneDrive) | `MICROSOFT_CLIENT_ID`, `MICROSOFT_CLIENT_SECRET`, `TEAMS_REDIRECT_URI`, `ONEDRIVE_REDIRECT_URI` | ### Observability (optional) diff --git a/surfsense_web/content/docs/manual-installation.mdx b/surfsense_web/content/docs/manual-installation.mdx index 1577b8d8b..05e646d6d 100644 --- a/surfsense_web/content/docs/manual-installation.mdx +++ b/surfsense_web/content/docs/manual-installation.mdx @@ -127,9 +127,10 @@ Edit the `.env` file and set the following variables: | SLACK_CLIENT_ID | (Optional) Slack OAuth client ID | | SLACK_CLIENT_SECRET | (Optional) Slack OAuth client secret | | SLACK_REDIRECT_URI | (Optional) Redirect URI for Slack connector OAuth callback (e.g., `http://localhost:8000/api/v1/auth/slack/connector/callback`) | -| TEAMS_CLIENT_ID | (Optional) Microsoft Teams OAuth client ID | -| TEAMS_CLIENT_SECRET | (Optional) Microsoft Teams OAuth client secret | +| MICROSOFT_CLIENT_ID | (Optional) Microsoft OAuth client ID (shared for Teams and OneDrive) | +| MICROSOFT_CLIENT_SECRET | (Optional) Microsoft OAuth client secret (shared for Teams and OneDrive) | | TEAMS_REDIRECT_URI | (Optional) Redirect URI for Teams connector OAuth callback (e.g., `http://localhost:8000/api/v1/auth/teams/connector/callback`) | +| ONEDRIVE_REDIRECT_URI | (Optional) Redirect URI for OneDrive connector OAuth callback (e.g., `http://localhost:8000/api/v1/auth/onedrive/connector/callback`) | **(Optional) Backend LangSmith Observability:** | ENV VARIABLE | DESCRIPTION | diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts index 45b13a20b..36d39f4fc 100644 --- a/surfsense_web/contracts/enums/connector.ts +++ b/surfsense_web/contracts/enums/connector.ts @@ -6,6 +6,7 @@ export enum EnumConnectorName { BAIDU_SEARCH_API = "BAIDU_SEARCH_API", SLACK_CONNECTOR = "SLACK_CONNECTOR", TEAMS_CONNECTOR = "TEAMS_CONNECTOR", + ONEDRIVE_CONNECTOR = "ONEDRIVE_CONNECTOR", NOTION_CONNECTOR = "NOTION_CONNECTOR", GITHUB_CONNECTOR = "GITHUB_CONNECTOR", LINEAR_CONNECTOR = "LINEAR_CONNECTOR", diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index c9375a5ca..19b24cd59 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -39,6 +39,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return Slack; case EnumConnectorName.TEAMS_CONNECTOR: return Microsoft Teams; + case EnumConnectorName.ONEDRIVE_CONNECTOR: + return OneDrive; case EnumConnectorName.NOTION_CONNECTOR: return Notion; case EnumConnectorName.DISCORD_CONNECTOR: @@ -98,6 +100,9 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case "GOOGLE_DRIVE_FILE": return Google Drive; + case "ONEDRIVE_FILE": + case "ONEDRIVE_CONNECTOR": + return OneDrive; case "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": return Google Drive; case "COMPOSIO_GMAIL_CONNECTOR": diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index 2204d4e5e..ef089f1f5 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -9,6 +9,7 @@ export const searchSourceConnectorTypeEnum = z.enum([ "BAIDU_SEARCH_API", "SLACK_CONNECTOR", "TEAMS_CONNECTOR", + "ONEDRIVE_CONNECTOR", "NOTION_CONNECTOR", "GITHUB_CONNECTOR", "LINEAR_CONNECTOR", @@ -53,7 +54,7 @@ export const searchSourceConnector = z.object({ export const googleDriveItem = z.object({ id: z.string(), name: z.string(), - mimeType: z.string(), + mimeType: z.string().optional().default("application/octet-stream"), isFolder: z.boolean(), parents: z.array(z.string()).optional(), size: z.coerce.number().optional(), diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index 5f19915ab..19c730521 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -7,6 +7,7 @@ export const documentTypeEnum = z.enum([ "FILE", "SLACK_CONNECTOR", "TEAMS_CONNECTOR", + "ONEDRIVE_FILE", "NOTION_CONNECTOR", "YOUTUBE_VIDEO", "GITHUB_CONNECTOR", diff --git a/surfsense_web/hooks/use-composio-drive-folders.ts b/surfsense_web/hooks/use-composio-drive-folders.ts deleted file mode 100644 index 31e516286..000000000 --- a/surfsense_web/hooks/use-composio-drive-folders.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { useQuery } from "@tanstack/react-query"; -import { connectorsApiService } from "@/lib/apis/connectors-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; - -interface UseComposioDriveFoldersOptions { - connectorId: number; - parentId?: string; - enabled?: boolean; -} - -export function useComposioDriveFolders({ - connectorId, - parentId, - enabled = true, -}: UseComposioDriveFoldersOptions) { - return useQuery({ - queryKey: cacheKeys.connectors.composioDrive.folders(connectorId, parentId), - queryFn: async () => { - return connectorsApiService.listComposioDriveFolders({ - connector_id: connectorId, - parent_id: parentId, - }); - }, - enabled: enabled && !!connectorId, - staleTime: 5 * 60 * 1000, // 5 minutes - retry: 2, - }); -} diff --git a/surfsense_web/hooks/use-google-picker.ts b/surfsense_web/hooks/use-google-picker.ts index 6dd65f9e3..3a29bcd3e 100644 --- a/surfsense_web/hooks/use-google-picker.ts +++ b/surfsense_web/hooks/use-google-picker.ts @@ -1,6 +1,7 @@ "use client"; import { useCallback, useEffect, useRef, useState } from "react"; +import { toast } from "sonner"; import { connectorsApiService } from "@/lib/apis/connectors-api.service"; export interface PickerItem { @@ -159,7 +160,9 @@ export function useGooglePicker({ connectorId, onPicked }: UseGooglePickerOption } if (action === google.picker.Action.ERROR) { - setError("Google Drive encountered an error. Please try again."); + const msg = "Google Drive encountered an error. Please try again."; + setError(msg); + toast.error("Google Drive Picker failed", { description: msg }); } if ( @@ -180,6 +183,7 @@ export function useGooglePicker({ connectorId, onPicked }: UseGooglePickerOption openingRef.current = false; const msg = err instanceof Error ? err.message : "Failed to open Google Picker"; setError(msg); + toast.error("Google Drive Picker failed", { description: msg }); console.error("Google Picker error:", err); } finally { setLoading(false); diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index fafe1a8fa..062d3b780 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -277,6 +277,19 @@ class ConnectorsApiService { }>(`/api/v1/connectors/${connectorId}/drive-picker-token`); }; + /** + * List OneDrive folders and files + */ + listOneDriveFolders = async (request: { connector_id: number; parent_id?: string }) => { + const queryParams = request.parent_id + ? `?parent_id=${encodeURIComponent(request.parent_id)}` + : ""; + return baseApiService.get( + `/api/v1/connectors/${request.connector_id}/onedrive/folders${queryParams}`, + listGoogleDriveFoldersResponse + ); + }; + // ============================================================================= // MCP Connector Methods // ============================================================================= diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts index cd0a4d7f6..71965a2cb 100644 --- a/surfsense_web/lib/chat/streaming-state.ts +++ b/surfsense_web/lib/chat/streaming-state.ts @@ -132,11 +132,30 @@ export function buildContentForPersistence( return parts.length > 0 ? parts : [{ type: "text", text: "" }]; } +export type SSEEvent = + | { type: "text-delta"; delta: string } + | { type: "tool-input-start"; toolCallId: string; toolName: string } + | { + type: "tool-input-available"; + toolCallId: string; + toolName: string; + input: Record; + } + | { + type: "tool-output-available"; + toolCallId: string; + output: Record; + } + | { type: "data-thinking-step"; data: ThinkingStepData } + | { type: "data-thread-title-update"; data: { threadId: number; title: string } } + | { type: "data-interrupt-request"; data: Record } + | { type: "error"; errorText: string }; + /** * Async generator that reads an SSE stream and yields parsed JSON objects. * Handles buffering, event splitting, and skips malformed JSON / [DONE] lines. */ -export async function* readSSEStream(response: Response): AsyncGenerator { +export async function* readSSEStream(response: Response): AsyncGenerator { if (!response.body) { throw new Error("No response body"); } diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts index 27da40cc3..623a7b862 100644 --- a/surfsense_web/lib/connectors/utils.ts +++ b/surfsense_web/lib/connectors/utils.ts @@ -8,6 +8,7 @@ export const getConnectorTypeDisplay = (type: string): string => { BAIDU_SEARCH_API: "Baidu Search", SLACK_CONNECTOR: "Slack", TEAMS_CONNECTOR: "Microsoft Teams", + ONEDRIVE_CONNECTOR: "OneDrive", NOTION_CONNECTOR: "Notion", GITHUB_CONNECTOR: "GitHub", LINEAR_CONNECTOR: "Linear", diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts index 883c40a77..17f0e5d1a 100644 --- a/surfsense_web/lib/query-client/cache-keys.ts +++ b/surfsense_web/lib/query-client/cache-keys.ts @@ -79,10 +79,6 @@ export const cacheKeys = { folders: (connectorId: number, parentId?: string) => ["connectors", "google-drive", connectorId, "folders", parentId] as const, }, - composioDrive: { - folders: (connectorId: number, parentId?: string) => - ["connectors", "composio-drive", connectorId, "folders", parentId] as const, - }, }, comments: { byMessage: (messageId: number) => ["comments", "message", messageId] as const, diff --git a/surfsense_web/public/connectors/microsoft-teams.svg b/surfsense_web/public/connectors/microsoft-teams.svg index caa352dff..891dccd9d 100644 --- a/surfsense_web/public/connectors/microsoft-teams.svg +++ b/surfsense_web/public/connectors/microsoft-teams.svg @@ -1,155 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/surfsense_web/public/connectors/onedrive.svg b/surfsense_web/public/connectors/onedrive.svg new file mode 100644 index 000000000..499a4ade6 --- /dev/null +++ b/surfsense_web/public/connectors/onedrive.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file