Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ AUTH_DEV_USER_ID=uuid
```
> [!NOTE]
> O modo de autenticação de desenvolvedor só funciona quando `ENVIRONMENT=development`.
>
>
> `AUTH_DEV_USER_ID` é opcional. Configure-o para simular um usuário específico durante o desenvolvimento (deve ser um UUID válido). Caso não seja fornecido, um UUID fixo será utilizado.

> [!WARNING]
Expand Down Expand Up @@ -90,9 +90,14 @@ cd backend
Configure e execute de acordo com as [instruções do repositório](https://github.com/basedosdados/backend?tab=readme-ov-file#configura%C3%A7%C3%A3o-do-ambiente-de-desenvolvimento).

### 2. Configuração da API do chatbot
Desabilite o modo de autenticação de desenvolvedor e configure as variáveis `JWT_*` no arquivo `.env` da API do chatbot:
Desabilite o modo de autenticação de desenvolvedor:
```bash
AUTH_DEV_MODE=false
```

Configure as variáveis de autenticação e a URL base da API do website no arquivo `.env` da API do chatbot:
```bash
BASEDOSDADOS_BASE_URL='http://api:8000'
JWT_ALGORITHM=jwt-algorithm
JWT_SECRET_KEY=jwt-secret-key
```
Expand Down
35 changes: 15 additions & 20 deletions app/agent/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@
}
"""

# Shared client for making HTTP requests.
_http_client = httpx.Client(timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT))


class GoogleAPIError:
"""Constants for expected Google API error types."""
Expand Down Expand Up @@ -278,12 +281,10 @@ def search_datasets(query: str) -> str:
Strategy: Start with broad terms like "censo", "ibge", "inep", "rais", then get specific if needed.
Next step: Use `get_dataset_details()` with returned dataset IDs.
""" # noqa: E501
with httpx.Client() as client:
response = client.get(
url=SEARCH_URL,
params={"contains": "tables", "q": query, "page_size": PAGE_SIZE},
timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT),
)
response = _http_client.get(
url=SEARCH_URL,
params={"contains": "tables", "q": query, "page_size": PAGE_SIZE},
)

response.raise_for_status()
data: dict = response.json()
Expand Down Expand Up @@ -333,15 +334,13 @@ def get_dataset_details(dataset_id: str) -> str:

Next step: Use `execute_bigquery_sql()` to execute queries.
""" # noqa: E501
with httpx.Client() as client:
response = client.post(
url=GRAPHQL_URL,
json={
"query": DATASET_DETAILS_QUERY,
"variables": {"id": dataset_id},
},
timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT),
)
response = _http_client.post(
url=GRAPHQL_URL,
json={
"query": DATASET_DETAILS_QUERY,
"variables": {"id": dataset_id},
},
)

response.raise_for_status()
data: dict[str, dict[str, dict]] = response.json()
Expand Down Expand Up @@ -436,11 +435,7 @@ def get_dataset_details(dataset_id: str) -> str:
if gcp_dataset_id is not None:
filename = gcp_dataset_id.replace("_", "-")

with httpx.Client() as client:
response = client.get(
url=f"{BASE_USAGE_GUIDE_URL}/{filename}.md",
timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT),
)
response = _http_client.get(f"{BASE_USAGE_GUIDE_URL}/{filename}.md")

if response.status_code == httpx.codes.OK:
usage_guide = response.text.strip()
Expand Down
38 changes: 38 additions & 0 deletions app/api/dependencies/auth.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import time
from typing import Annotated

import httpx
import jwt
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
Expand All @@ -9,6 +11,36 @@

oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token", auto_error=False)

_http_client = httpx.AsyncClient()


async def _verify_token(token: str) -> bool:
query = """
mutation verifyToken($token: String!) {
verifyToken(token: $token) {
payload
}
}
"""
start = time.perf_counter()
try:
response = await _http_client.post(
f"{settings.BASEDOSDADOS_BASE_URL}/graphql",
json={"query": query, "variables": {"token": token}},
)
response.raise_for_status()
except (httpx.HTTPStatusError, httpx.ConnectError):
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Unable to verify user access",
)
finally:
elapsed = time.perf_counter() - start
logger.info(f"Token verification elapsed time: {elapsed:.4f}s")

payload = response.json()["data"]["verifyToken"]["payload"]
return payload["has_chatbot_access"]


async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> int:
if settings.AUTH_DEV_MODE and settings.ENVIRONMENT == "development":
Expand Down Expand Up @@ -48,6 +80,12 @@ async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> i
except jwt.exceptions.InvalidTokenError:
raise credentials_exception

if not await _verify_token(token):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="User does not have chatbot access",
)

return user_id


Expand Down
9 changes: 9 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ services:
volumes:
# Mount Google Cloud credentials (read-only)
- ${HOME}/.basedosdados/credentials:/app/credentials:ro
networks:
- default
- api_network
deploy:
# Limit local resources to match our pod resources
resources:
Expand All @@ -79,3 +82,9 @@ services:

volumes:
pgdata:

# External network for website api connectivity
# Ref: https://github.com/basedosdados/backend
networks:
api_network:
external: true
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ addopts = [
source = ["app"]
branch = true
concurrency = ["thread", "greenlet"]
omit = ["app/log_config.py"]

[tool.coverage.report]
exclude_lines = [
Expand Down
138 changes: 135 additions & 3 deletions tests/app/api/dependencies/test_auth.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,87 @@
import uuid
from unittest.mock import AsyncMock, MagicMock

import httpx
import jwt
import pytest
from fastapi import HTTPException, status

from app.api.dependencies.auth import get_user_id
from app.api.dependencies.auth import _verify_token, get_user_id
from app.settings import settings


class TestVerifyToken:
"""Tests for _verify_token function."""

def _mock_graphql_response(self, has_access: bool):
"""Create a mock response for the GraphQL endpoint."""
mock_response = MagicMock()
mock_response.json.return_value = {
"data": {"verifyToken": {"payload": {"has_chatbot_access": has_access}}}
}
return mock_response

async def test_returns_true_when_user_has_access(
self, monkeypatch: pytest.MonkeyPatch
):
"""Test returns True when user has chatbot access."""
mock_response = self._mock_graphql_response(has_access=True)
monkeypatch.setattr(
"app.api.dependencies.auth._http_client",
MagicMock(post=AsyncMock(return_value=mock_response)),
)

result = await _verify_token("valid-token")

assert result is True

async def test_returns_false_when_user_lacks_access(
self, monkeypatch: pytest.MonkeyPatch
):
"""Test returns False when user lacks chatbot access."""
mock_response = self._mock_graphql_response(has_access=False)
monkeypatch.setattr(
"app.api.dependencies.auth._http_client",
MagicMock(post=AsyncMock(return_value=mock_response)),
)

result = await _verify_token("valid-token")

assert result is False

async def test_raises_503_on_http_error(self, monkeypatch: pytest.MonkeyPatch):
"""Test raises 503 when GraphQL endpoint returns HTTP error."""
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Server Error",
request=httpx.Request("POST", "http://test"),
response=mock_response,
)
monkeypatch.setattr(
"app.api.dependencies.auth._http_client",
MagicMock(post=AsyncMock(return_value=mock_response)),
)

with pytest.raises(HTTPException) as e:
await _verify_token("valid-token")

assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE

async def test_raises_503_on_connect_error(self, monkeypatch: pytest.MonkeyPatch):
"""Test raises 503 when GraphQL endpoint is unreachable."""
monkeypatch.setattr(
"app.api.dependencies.auth._http_client",
MagicMock(
post=AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
),
)

with pytest.raises(HTTPException) as e:
await _verify_token("valid-token")

assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE


class TestGetUserId:
"""Tests for get_user_id dependency."""

Expand All @@ -19,10 +93,17 @@ def disable_auth_dev_mode(self, monkeypatch: pytest.MonkeyPatch):
settings.model_copy(update={"AUTH_DEV_MODE": False}),
)

async def test_valid_token(self):
"""Test decoding a valid JWT token."""
async def test_valid_token(self, monkeypatch: pytest.MonkeyPatch):
"""Test decoding a valid JWT token with chatbot access."""
user_id = str(uuid.uuid4())

async def mock_verify_token(token: str) -> bool:
return True

monkeypatch.setattr(
"app.api.dependencies.auth._verify_token", mock_verify_token
)

token = jwt.encode(
{"uuid": user_id},
key=settings.JWT_SECRET_KEY,
Expand All @@ -33,6 +114,57 @@ async def test_valid_token(self):

assert result == user_id

async def test_valid_token_without_chatbot_access(
self, monkeypatch: pytest.MonkeyPatch
):
"""Test valid JWT token but user lacks chatbot access raises 403."""
user_id = str(uuid.uuid4())

async def mock_verify_token(token: str) -> bool:
return False

monkeypatch.setattr(
"app.api.dependencies.auth._verify_token", mock_verify_token
)

token = jwt.encode(
{"uuid": user_id},
key=settings.JWT_SECRET_KEY,
algorithm=settings.JWT_ALGORITHM,
)

with pytest.raises(HTTPException) as e:
await get_user_id(token)

assert e.value.status_code == status.HTTP_403_FORBIDDEN

async def test_verify_token_service_unavailable(
self, monkeypatch: pytest.MonkeyPatch
):
"""Test that 503 is raised when token verification service is unavailable."""
user_id = str(uuid.uuid4())

async def mock_verify_token(token: str) -> bool:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Unable to verify user access",
)

monkeypatch.setattr(
"app.api.dependencies.auth._verify_token", mock_verify_token
)

token = jwt.encode(
{"uuid": user_id},
key=settings.JWT_SECRET_KEY,
algorithm=settings.JWT_ALGORITHM,
)

with pytest.raises(HTTPException) as e:
await get_user_id(token)

assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE

async def test_missing_user_id_in_payload(self):
"""Test token with missing user_id raises 401."""
token = jwt.encode(
Expand Down
10 changes: 10 additions & 0 deletions tests/app/api/routers/test_chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ def disable_auth_dev_mode(monkeypatch: pytest.MonkeyPatch):
)


@pytest.fixture(autouse=True)
def mock_verify_token(monkeypatch: pytest.MonkeyPatch):
"""Mock _verify_token to bypass the external API call."""

async def _verify_token(token: str) -> bool:
return True

monkeypatch.setattr("app.api.dependencies.auth._verify_token", _verify_token)


@pytest.fixture
def access_token(user_id: str) -> str:
"""Generate a valid JWT access token for testing."""
Expand Down