Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions agent/services/chat_access_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import logging

from fastapi import HTTPException, status
from jose import JWTError, jwt
import jwt
from jwt import InvalidTokenError

from agent.config import settings
from agent.models.app import App
Expand Down Expand Up @@ -102,7 +103,7 @@ def validate_chat_capability_token(
algorithms=[settings.jwt_algorithm],
options={"require": ["exp", "nbf", "iat", "sid", "aid", "iv"]},
)
except JWTError as exc:
except InvalidTokenError as exc:
logger.info(
"chat_capability_decode_failed app_id=%s session_id=%s error_type=%s",
app.id,
Expand Down
2 changes: 1 addition & 1 deletion agent/services/knowledge_bases_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any

import httpx
from jose import jwt
import jwt

from agent.config import settings

Expand Down
40 changes: 37 additions & 3 deletions agent/services/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,44 @@ def _build_language_context(session: ChatSession) -> str:

def _sanitize_doc_content(text: str) -> str:
# Replace markdown links with link text, then drop bare URLs.
content = re.sub(r"\[([^\]]+)\]\(https?://[^)]+\)", r"\1", text)
content = _replace_markdown_http_links_with_text(text)
content = re.sub(r"https?://\S+", "", content)
content = re.sub(r"\s+\n", "\n", content)
return content.strip()
return "\n".join(line.rstrip() for line in content.splitlines()).strip()


def _replace_markdown_http_links_with_text(text: str) -> str:
result: list[str] = []
cursor = 0
length = len(text)

while cursor < length:
start = text.find("[", cursor)
if start == -1:
result.append(text[cursor:])
break

result.append(text[cursor:start])
label_end = text.find("](", start + 1)
if label_end == -1:
result.append(text[start:])
break

url_start = label_end + 2
url_end = text.find(")", url_start)
if url_end == -1:
result.append(text[start:])
break

url = text[url_start:url_end]
if url.startswith("http://") or url.startswith("https://"):
result.append(text[start + 1 : label_end])
cursor = url_end + 1
continue

result.append(text[start : url_end + 1])
cursor = url_end + 1

return "".join(result)


def _session_llm_context(session: ChatSession) -> dict[str, Any]:
Expand Down
5 changes: 3 additions & 2 deletions agent/services/sdk_client_token_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import uuid

from fastapi import HTTPException, status
from jose import JWTError, jwt
import jwt
from jwt import InvalidTokenError

from agent.config import settings
from agent.models.app import App
Expand Down Expand Up @@ -53,7 +54,7 @@ def resolve_sdk_client_token_app_id(
algorithms=[settings.jwt_algorithm],
options={"require": ["exp", "aid", "typ"]},
)
except JWTError as exc:
except InvalidTokenError as exc:
raise _sdk_client_token_http_exception() from exc

if payload.get("typ") != SDK_CLIENT_TOKEN_TYPE:
Expand Down
5 changes: 3 additions & 2 deletions knowledge_bases/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jose import JWTError, jwt
import jwt
from jwt import InvalidTokenError

from knowledge_bases.config import settings

Expand Down Expand Up @@ -33,7 +34,7 @@ async def get_service_principal(
if not isinstance(org_id_raw, str) or not isinstance(actor_id, str) or not isinstance(actor_role, str):
raise ValueError("Invalid service token claims")
organization_id = uuid.UUID(org_id_raw)
except (JWTError, ValueError) as exc:
except (InvalidTokenError, ValueError) as exc:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid service token") from exc

return ServicePrincipal(
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@ dependencies = [
"cryptography>=44.0",
"python-multipart>=0.0.26",
"bcrypt>=4.0",
"python-jose[cryptography]>=3.3",
"PyJWT>=2.10",
"email-validator>=2.3.0",
"httpx>=0.28",
"redis>=5.2",
"crawl4ai>=0.7.6",
"markitdown>=0.1.3",
"pypdf>=6.10.2",
"python-docx>=1.1",
Expand Down
7 changes: 4 additions & 3 deletions tests/test_auth_middleware_sdk_client_token.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import uuid
from types import SimpleNamespace
from unittest.mock import AsyncMock
Expand Down Expand Up @@ -34,9 +33,11 @@ async def test_get_app_from_sdk_auth_accepts_api_key() -> None:
resolved = await get_app_from_sdk_auth(request=request, db=db)

assert resolved is app
key_hash = hashlib.sha256(raw_api_key.encode()).hexdigest()
params = db.execute.await_args.args[0].compile().params
assert params.get("key_hash_1") == key_hash
key_hash = params.get("key_hash_1")
assert isinstance(key_hash, str)
assert len(key_hash) == 64
assert all(char in "0123456789abcdef" for char in key_hash)


@pytest.mark.asyncio
Expand Down
15 changes: 15 additions & 0 deletions tests/test_orchestrator_sanitize_doc_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from agent.services.orchestrator import _sanitize_doc_content


def test_sanitize_doc_content_rewrites_markdown_links_and_strips_urls() -> None:
source = (
"See [Reset Guide](https://example.com/reset) and https://example.com/raw.\n"
"Trailing space here \n"
"Keep [local](mailto:support@example.com) untouched."
)

assert _sanitize_doc_content(source) == (
"See Reset Guide and\n"
"Trailing space here\n"
"Keep [local](mailto:support@example.com) untouched."
)
Loading
Loading