diff --git a/README.md b/README.md index 0f9bedc..c9cbaaa 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ AUTH_DEV_USER_ID=uuid ``` > [!NOTE] > O modo de autenticação de desenvolvedor só funciona quando `ENVIRONMENT=development`. -> +> > `AUTH_DEV_USER_ID` é opcional. Configure-o para simular um usuário específico durante o desenvolvimento (deve ser um UUID válido). Caso não seja fornecido, um UUID fixo será utilizado. > [!WARNING] @@ -90,9 +90,14 @@ cd backend Configure e execute de acordo com as [instruções do repositório](https://github.com/basedosdados/backend?tab=readme-ov-file#configura%C3%A7%C3%A3o-do-ambiente-de-desenvolvimento). ### 2. Configuração da API do chatbot -Desabilite o modo de autenticação de desenvolvedor e configure as variáveis `JWT_*` no arquivo `.env` da API do chatbot: +Desabilite o modo de autenticação de desenvolvedor: ```bash AUTH_DEV_MODE=false +``` + +Configure as variáveis de autenticação e a URL base da API do website no arquivo `.env` da API do chatbot: +```bash +BASEDOSDADOS_BASE_URL='http://api:8000' JWT_ALGORITHM=jwt-algorithm JWT_SECRET_KEY=jwt-secret-key ``` diff --git a/app/agent/tools.py b/app/agent/tools.py index 36c6fa4..e329807 100644 --- a/app/agent/tools.py +++ b/app/agent/tools.py @@ -104,6 +104,9 @@ } """ +# Shared client for making HTTP requests. +_http_client = httpx.Client(timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT)) + class GoogleAPIError: """Constants for expected Google API error types.""" @@ -278,12 +281,10 @@ def search_datasets(query: str) -> str: Strategy: Start with broad terms like "censo", "ibge", "inep", "rais", then get specific if needed. Next step: Use `get_dataset_details()` with returned dataset IDs. """ # noqa: E501 - with httpx.Client() as client: - response = client.get( - url=SEARCH_URL, - params={"contains": "tables", "q": query, "page_size": PAGE_SIZE}, - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.get( + url=SEARCH_URL, + params={"contains": "tables", "q": query, "page_size": PAGE_SIZE}, + ) response.raise_for_status() data: dict = response.json() @@ -333,15 +334,13 @@ def get_dataset_details(dataset_id: str) -> str: Next step: Use `execute_bigquery_sql()` to execute queries. """ # noqa: E501 - with httpx.Client() as client: - response = client.post( - url=GRAPHQL_URL, - json={ - "query": DATASET_DETAILS_QUERY, - "variables": {"id": dataset_id}, - }, - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.post( + url=GRAPHQL_URL, + json={ + "query": DATASET_DETAILS_QUERY, + "variables": {"id": dataset_id}, + }, + ) response.raise_for_status() data: dict[str, dict[str, dict]] = response.json() @@ -436,11 +435,7 @@ def get_dataset_details(dataset_id: str) -> str: if gcp_dataset_id is not None: filename = gcp_dataset_id.replace("_", "-") - with httpx.Client() as client: - response = client.get( - url=f"{BASE_USAGE_GUIDE_URL}/{filename}.md", - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.get(f"{BASE_USAGE_GUIDE_URL}/{filename}.md") if response.status_code == httpx.codes.OK: usage_guide = response.text.strip() diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py index c238c3c..772f831 100644 --- a/app/api/dependencies/auth.py +++ b/app/api/dependencies/auth.py @@ -1,5 +1,7 @@ +import time from typing import Annotated +import httpx import jwt from fastapi import Depends, HTTPException, status from fastapi.security import OAuth2PasswordBearer @@ -9,6 +11,36 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token", auto_error=False) +_http_client = httpx.AsyncClient() + + +async def _verify_token(token: str) -> bool: + query = """ + mutation verifyToken($token: String!) { + verifyToken(token: $token) { + payload + } + } + """ + start = time.perf_counter() + try: + response = await _http_client.post( + f"{settings.BASEDOSDADOS_BASE_URL}/graphql", + json={"query": query, "variables": {"token": token}}, + ) + response.raise_for_status() + except (httpx.HTTPStatusError, httpx.ConnectError): + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Unable to verify user access", + ) + finally: + elapsed = time.perf_counter() - start + logger.info(f"Token verification elapsed time: {elapsed:.4f}s") + + payload = response.json()["data"]["verifyToken"]["payload"] + return payload["has_chatbot_access"] + async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> int: if settings.AUTH_DEV_MODE and settings.ENVIRONMENT == "development": @@ -48,6 +80,12 @@ async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> i except jwt.exceptions.InvalidTokenError: raise credentials_exception + if not await _verify_token(token): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="User does not have chatbot access", + ) + return user_id diff --git a/compose.yaml b/compose.yaml index 26cd9a3..f80dd0b 100644 --- a/compose.yaml +++ b/compose.yaml @@ -66,6 +66,9 @@ services: volumes: # Mount Google Cloud credentials (read-only) - ${HOME}/.basedosdados/credentials:/app/credentials:ro + networks: + - default + - api_network deploy: # Limit local resources to match our pod resources resources: @@ -79,3 +82,9 @@ services: volumes: pgdata: + +# External network for website api connectivity +# Ref: https://github.com/basedosdados/backend +networks: + api_network: + external: true diff --git a/pyproject.toml b/pyproject.toml index 5bab9aa..11ca196 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ addopts = [ source = ["app"] branch = true concurrency = ["thread", "greenlet"] +omit = ["app/log_config.py"] [tool.coverage.report] exclude_lines = [ diff --git a/tests/app/api/dependencies/test_auth.py b/tests/app/api/dependencies/test_auth.py index 044560f..42fab1f 100644 --- a/tests/app/api/dependencies/test_auth.py +++ b/tests/app/api/dependencies/test_auth.py @@ -1,13 +1,87 @@ import uuid +from unittest.mock import AsyncMock, MagicMock +import httpx import jwt import pytest from fastapi import HTTPException, status -from app.api.dependencies.auth import get_user_id +from app.api.dependencies.auth import _verify_token, get_user_id from app.settings import settings +class TestVerifyToken: + """Tests for _verify_token function.""" + + def _mock_graphql_response(self, has_access: bool): + """Create a mock response for the GraphQL endpoint.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "data": {"verifyToken": {"payload": {"has_chatbot_access": has_access}}} + } + return mock_response + + async def test_returns_true_when_user_has_access( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test returns True when user has chatbot access.""" + mock_response = self._mock_graphql_response(has_access=True) + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) + + result = await _verify_token("valid-token") + + assert result is True + + async def test_returns_false_when_user_lacks_access( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test returns False when user lacks chatbot access.""" + mock_response = self._mock_graphql_response(has_access=False) + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) + + result = await _verify_token("valid-token") + + assert result is False + + async def test_raises_503_on_http_error(self, monkeypatch: pytest.MonkeyPatch): + """Test raises 503 when GraphQL endpoint returns HTTP error.""" + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Server Error", + request=httpx.Request("POST", "http://test"), + response=mock_response, + ) + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) + + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + async def test_raises_503_on_connect_error(self, monkeypatch: pytest.MonkeyPatch): + """Test raises 503 when GraphQL endpoint is unreachable.""" + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock( + post=AsyncMock(side_effect=httpx.ConnectError("Connection refused")) + ), + ) + + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + class TestGetUserId: """Tests for get_user_id dependency.""" @@ -19,10 +93,17 @@ def disable_auth_dev_mode(self, monkeypatch: pytest.MonkeyPatch): settings.model_copy(update={"AUTH_DEV_MODE": False}), ) - async def test_valid_token(self): - """Test decoding a valid JWT token.""" + async def test_valid_token(self, monkeypatch: pytest.MonkeyPatch): + """Test decoding a valid JWT token with chatbot access.""" user_id = str(uuid.uuid4()) + async def mock_verify_token(token: str) -> bool: + return True + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + token = jwt.encode( {"uuid": user_id}, key=settings.JWT_SECRET_KEY, @@ -33,6 +114,57 @@ async def test_valid_token(self): assert result == user_id + async def test_valid_token_without_chatbot_access( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test valid JWT token but user lacks chatbot access raises 403.""" + user_id = str(uuid.uuid4()) + + async def mock_verify_token(token: str) -> bool: + return False + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + + token = jwt.encode( + {"uuid": user_id}, + key=settings.JWT_SECRET_KEY, + algorithm=settings.JWT_ALGORITHM, + ) + + with pytest.raises(HTTPException) as e: + await get_user_id(token) + + assert e.value.status_code == status.HTTP_403_FORBIDDEN + + async def test_verify_token_service_unavailable( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test that 503 is raised when token verification service is unavailable.""" + user_id = str(uuid.uuid4()) + + async def mock_verify_token(token: str) -> bool: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Unable to verify user access", + ) + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + + token = jwt.encode( + {"uuid": user_id}, + key=settings.JWT_SECRET_KEY, + algorithm=settings.JWT_ALGORITHM, + ) + + with pytest.raises(HTTPException) as e: + await get_user_id(token) + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + async def test_missing_user_id_in_payload(self): """Test token with missing user_id raises 401.""" token = jwt.encode( diff --git a/tests/app/api/routers/test_chatbot.py b/tests/app/api/routers/test_chatbot.py index 538af06..a1dc4ee 100644 --- a/tests/app/api/routers/test_chatbot.py +++ b/tests/app/api/routers/test_chatbot.py @@ -62,6 +62,16 @@ def disable_auth_dev_mode(monkeypatch: pytest.MonkeyPatch): ) +@pytest.fixture(autouse=True) +def mock_verify_token(monkeypatch: pytest.MonkeyPatch): + """Mock _verify_token to bypass the external API call.""" + + async def _verify_token(token: str) -> bool: + return True + + monkeypatch.setattr("app.api.dependencies.auth._verify_token", _verify_token) + + @pytest.fixture def access_token(user_id: str) -> str: """Generate a valid JWT access token for testing."""