From e168acff20240078466427fa7484148b0fd2d461 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 13:46:53 -0300 Subject: [PATCH 1/9] chore: omit log_config module from coverage reports --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5bab9aa..11ca196 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ addopts = [ source = ["app"] branch = true concurrency = ["thread", "greenlet"] +omit = ["app/log_config.py"] [tool.coverage.report] exclude_lines = [ From 6890b564d01d70cac780c645eb7bbb095e032e29 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 13:47:16 -0300 Subject: [PATCH 2/9] fix: verify user has access to the chatbot api --- app/api/dependencies/auth.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py index c238c3c..4aff571 100644 --- a/app/api/dependencies/auth.py +++ b/app/api/dependencies/auth.py @@ -1,5 +1,6 @@ from typing import Annotated +import httpx import jwt from fastapi import Depends, HTTPException, status from fastapi.security import OAuth2PasswordBearer @@ -10,6 +11,32 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token", auto_error=False) +async def _verify_token(token: str) -> bool: + query = """ + mutation verifyToken($token: String!) { + verifyToken(token: $token) { + payload + } + } + """ + + try: + async with httpx.AsyncClient() as client: + response = await client.post( + f"{settings.BASEDOSDADOS_BASE_URL}/graphql", + json={"query": query, "variables": {"token": token}}, + ) + response.raise_for_status() + except (httpx.HTTPStatusError, httpx.ConnectError): + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Unable to verify user access", + ) + + payload = response.json()["data"]["verifyToken"]["payload"] + return payload["has_chatbot_access"] + + async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> int: if settings.AUTH_DEV_MODE and settings.ENVIRONMENT == "development": logger.warning( @@ -48,6 +75,12 @@ async def get_user_id(token: Annotated[str | None, Depends(oauth2_scheme)]) -> i except jwt.exceptions.InvalidTokenError: raise credentials_exception + if not await _verify_token(token): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="User does not have chatbot access", + ) + return user_id From 1acff77974d9d601b40418bbca0f0fb99a3803f6 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 13:48:07 -0300 Subject: [PATCH 3/9] chore: add external network for reaching the website api --- compose.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compose.yaml b/compose.yaml index 26cd9a3..f80dd0b 100644 --- a/compose.yaml +++ b/compose.yaml @@ -66,6 +66,9 @@ services: volumes: # Mount Google Cloud credentials (read-only) - ${HOME}/.basedosdados/credentials:/app/credentials:ro + networks: + - default + - api_network deploy: # Limit local resources to match our pod resources resources: @@ -79,3 +82,9 @@ services: volumes: pgdata: + +# External network for website api connectivity +# Ref: https://github.com/basedosdados/backend +networks: + api_network: + external: true From a0dfd4dfbe252e68a1cd8cf74a398139782cc4a5 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 13:49:24 -0300 Subject: [PATCH 4/9] fix: update authentication tests and fixtures --- tests/app/api/dependencies/test_auth.py | 140 +++++++++++++++++++++++- tests/app/api/routers/test_chatbot.py | 10 ++ 2 files changed, 147 insertions(+), 3 deletions(-) diff --git a/tests/app/api/dependencies/test_auth.py b/tests/app/api/dependencies/test_auth.py index 044560f..4dae2d2 100644 --- a/tests/app/api/dependencies/test_auth.py +++ b/tests/app/api/dependencies/test_auth.py @@ -1,13 +1,89 @@ import uuid +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch +import httpx import jwt import pytest from fastapi import HTTPException, status -from app.api.dependencies.auth import get_user_id +from app.api.dependencies.auth import _verify_token, get_user_id from app.settings import settings +class TestVerifyToken: + """Tests for _verify_token function.""" + + def _mock_client(self, mock_response: Any): + """Create a mock httpx.AsyncClient context manager.""" + mock_client = AsyncMock() + mock_client.post.return_value = mock_response + return mock_client + + def _mock_graphql_response(self, has_access: bool): + """Create a mock response for the GraphQL endpoint.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "data": {"verifyToken": {"payload": {"has_chatbot_access": has_access}}} + } + return mock_response + + async def test_returns_true_when_user_has_access(self): + """Test returns True when user has chatbot access.""" + mock_response = self._mock_graphql_response(has_access=True) + mock_client = self._mock_client(mock_response) + + with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: + MockClient.return_value.__aenter__.return_value = mock_client + + result = await _verify_token("valid-token") + + assert result is True + + async def test_returns_false_when_user_lacks_access(self): + """Test returns False when user lacks chatbot access.""" + mock_response = self._mock_graphql_response(has_access=False) + mock_client = self._mock_client(mock_response) + + with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: + MockClient.return_value.__aenter__.return_value = mock_client + + result = await _verify_token("valid-token") + + assert result is False + + async def test_raises_503_on_http_error(self): + """Test raises 503 when GraphQL endpoint returns HTTP error.""" + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Server Error", + request=httpx.Request("POST", "http://test"), + response=mock_response, + ) + mock_client = self._mock_client(mock_response) + + with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: + MockClient.return_value.__aenter__.return_value = mock_client + + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + async def test_raises_503_on_connect_error(self): + """Test raises 503 when GraphQL endpoint is unreachable.""" + mock_client = AsyncMock() + mock_client.post.side_effect = httpx.ConnectError("Connection refused") + + with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: + MockClient.return_value.__aenter__.return_value = mock_client + + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + class TestGetUserId: """Tests for get_user_id dependency.""" @@ -19,10 +95,17 @@ def disable_auth_dev_mode(self, monkeypatch: pytest.MonkeyPatch): settings.model_copy(update={"AUTH_DEV_MODE": False}), ) - async def test_valid_token(self): - """Test decoding a valid JWT token.""" + async def test_valid_token(self, monkeypatch: pytest.MonkeyPatch): + """Test decoding a valid JWT token with chatbot access.""" user_id = str(uuid.uuid4()) + async def mock_verify_token(token: str) -> bool: + return True + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + token = jwt.encode( {"uuid": user_id}, key=settings.JWT_SECRET_KEY, @@ -33,6 +116,57 @@ async def test_valid_token(self): assert result == user_id + async def test_valid_token_without_chatbot_access( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test valid JWT token but user lacks chatbot access raises 403.""" + user_id = str(uuid.uuid4()) + + async def mock_verify_token(token: str) -> bool: + return False + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + + token = jwt.encode( + {"uuid": user_id}, + key=settings.JWT_SECRET_KEY, + algorithm=settings.JWT_ALGORITHM, + ) + + with pytest.raises(HTTPException) as e: + await get_user_id(token) + + assert e.value.status_code == status.HTTP_403_FORBIDDEN + + async def test_verify_token_service_unavailable( + self, monkeypatch: pytest.MonkeyPatch + ): + """Test that 503 is raised when token verification service is unavailable.""" + user_id = str(uuid.uuid4()) + + async def mock_verify_token(token: str) -> bool: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Unable to verify user access", + ) + + monkeypatch.setattr( + "app.api.dependencies.auth._verify_token", mock_verify_token + ) + + token = jwt.encode( + {"uuid": user_id}, + key=settings.JWT_SECRET_KEY, + algorithm=settings.JWT_ALGORITHM, + ) + + with pytest.raises(HTTPException) as e: + await get_user_id(token) + + assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + async def test_missing_user_id_in_payload(self): """Test token with missing user_id raises 401.""" token = jwt.encode( diff --git a/tests/app/api/routers/test_chatbot.py b/tests/app/api/routers/test_chatbot.py index 538af06..a1dc4ee 100644 --- a/tests/app/api/routers/test_chatbot.py +++ b/tests/app/api/routers/test_chatbot.py @@ -62,6 +62,16 @@ def disable_auth_dev_mode(monkeypatch: pytest.MonkeyPatch): ) +@pytest.fixture(autouse=True) +def mock_verify_token(monkeypatch: pytest.MonkeyPatch): + """Mock _verify_token to bypass the external API call.""" + + async def _verify_token(token: str) -> bool: + return True + + monkeypatch.setattr("app.api.dependencies.auth._verify_token", _verify_token) + + @pytest.fixture def access_token(user_id: str) -> str: """Generate a valid JWT access token for testing.""" From b16e15273b978987500ca97ad9f6efecf949e2aa Mon Sep 17 00:00:00 2001 From: Victor Tornisiello Date: Tue, 3 Mar 2026 14:02:59 -0300 Subject: [PATCH 5/9] docs: update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f9bedc..77d9f53 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,14 @@ cd backend Configure e execute de acordo com as [instruções do repositório](https://github.com/basedosdados/backend?tab=readme-ov-file#configura%C3%A7%C3%A3o-do-ambiente-de-desenvolvimento). ### 2. Configuração da API do chatbot -Desabilite o modo de autenticação de desenvolvedor e configure as variáveis `JWT_*` no arquivo `.env` da API do chatbot: +Desabilite o modo de autenticação de desenvolvedor: ```bash AUTH_DEV_MODE=false +``` + +Configure as variáveis de autenticação e a URL base da API do website no arquivo `.env` da API do chatbot: +```bash +BASEDOSDADOS_BASE_URL='http://api:8000' JWT_ALGORITHM=jwt-algorithm JWT_SECRET_KEY=jwt-secret-key ``` From c7384a8692e64ab4f1fd64ebb6363748d455824b Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 14:09:23 -0300 Subject: [PATCH 6/9] docs: update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 77d9f53..c9cbaaa 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ AUTH_DEV_USER_ID=uuid ``` > [!NOTE] > O modo de autenticação de desenvolvedor só funciona quando `ENVIRONMENT=development`. -> +> > `AUTH_DEV_USER_ID` é opcional. Configure-o para simular um usuário específico durante o desenvolvimento (deve ser um UUID válido). Caso não seja fornecido, um UUID fixo será utilizado. > [!WARNING] From ed0d8760528de0fc019a3a587aba328676c21729 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Tue, 3 Mar 2026 14:43:16 -0300 Subject: [PATCH 7/9] chore: log elapsed time for token verification requests --- app/api/dependencies/auth.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py index 4aff571..42e70a0 100644 --- a/app/api/dependencies/auth.py +++ b/app/api/dependencies/auth.py @@ -1,3 +1,4 @@ +import time from typing import Annotated import httpx @@ -19,7 +20,7 @@ async def _verify_token(token: str) -> bool: } } """ - + start = time.perf_counter() try: async with httpx.AsyncClient() as client: response = await client.post( @@ -32,6 +33,9 @@ async def _verify_token(token: str) -> bool: status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Unable to verify user access", ) + finally: + elapsed = time.perf_counter() - start + logger.info(f"Token verification elapsed time: {elapsed:.4f}s") payload = response.json()["data"]["verifyToken"]["payload"] return payload["has_chatbot_access"] From 314b2f8bfbd7e6f21769183f16c3a70ea7c39e39 Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Wed, 4 Mar 2026 10:24:48 -0300 Subject: [PATCH 8/9] perf: use single http client for making requests --- app/agent/tools.py | 35 +++++++++++++++-------------------- app/api/dependencies/auth.py | 11 ++++++----- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/app/agent/tools.py b/app/agent/tools.py index 36c6fa4..e329807 100644 --- a/app/agent/tools.py +++ b/app/agent/tools.py @@ -104,6 +104,9 @@ } """ +# Shared client for making HTTP requests. +_http_client = httpx.Client(timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT)) + class GoogleAPIError: """Constants for expected Google API error types.""" @@ -278,12 +281,10 @@ def search_datasets(query: str) -> str: Strategy: Start with broad terms like "censo", "ibge", "inep", "rais", then get specific if needed. Next step: Use `get_dataset_details()` with returned dataset IDs. """ # noqa: E501 - with httpx.Client() as client: - response = client.get( - url=SEARCH_URL, - params={"contains": "tables", "q": query, "page_size": PAGE_SIZE}, - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.get( + url=SEARCH_URL, + params={"contains": "tables", "q": query, "page_size": PAGE_SIZE}, + ) response.raise_for_status() data: dict = response.json() @@ -333,15 +334,13 @@ def get_dataset_details(dataset_id: str) -> str: Next step: Use `execute_bigquery_sql()` to execute queries. """ # noqa: E501 - with httpx.Client() as client: - response = client.post( - url=GRAPHQL_URL, - json={ - "query": DATASET_DETAILS_QUERY, - "variables": {"id": dataset_id}, - }, - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.post( + url=GRAPHQL_URL, + json={ + "query": DATASET_DETAILS_QUERY, + "variables": {"id": dataset_id}, + }, + ) response.raise_for_status() data: dict[str, dict[str, dict]] = response.json() @@ -436,11 +435,7 @@ def get_dataset_details(dataset_id: str) -> str: if gcp_dataset_id is not None: filename = gcp_dataset_id.replace("_", "-") - with httpx.Client() as client: - response = client.get( - url=f"{BASE_USAGE_GUIDE_URL}/{filename}.md", - timeout=httpx.Timeout(TIMEOUT, read=READ_TIMEOUT), - ) + response = _http_client.get(f"{BASE_USAGE_GUIDE_URL}/{filename}.md") if response.status_code == httpx.codes.OK: usage_guide = response.text.strip() diff --git a/app/api/dependencies/auth.py b/app/api/dependencies/auth.py index 42e70a0..772f831 100644 --- a/app/api/dependencies/auth.py +++ b/app/api/dependencies/auth.py @@ -11,6 +11,8 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token", auto_error=False) +_http_client = httpx.AsyncClient() + async def _verify_token(token: str) -> bool: query = """ @@ -22,11 +24,10 @@ async def _verify_token(token: str) -> bool: """ start = time.perf_counter() try: - async with httpx.AsyncClient() as client: - response = await client.post( - f"{settings.BASEDOSDADOS_BASE_URL}/graphql", - json={"query": query, "variables": {"token": token}}, - ) + response = await _http_client.post( + f"{settings.BASEDOSDADOS_BASE_URL}/graphql", + json={"query": query, "variables": {"token": token}}, + ) response.raise_for_status() except (httpx.HTTPStatusError, httpx.ConnectError): raise HTTPException( From 5bf80715e6fbe0b09f235e3d19792985aa12647f Mon Sep 17 00:00:00 2001 From: vrtornisiello Date: Wed, 4 Mar 2026 10:26:38 -0300 Subject: [PATCH 9/9] chore: update tests for token verification function --- tests/app/api/dependencies/test_auth.py | 68 ++++++++++++------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/tests/app/api/dependencies/test_auth.py b/tests/app/api/dependencies/test_auth.py index 4dae2d2..42fab1f 100644 --- a/tests/app/api/dependencies/test_auth.py +++ b/tests/app/api/dependencies/test_auth.py @@ -1,6 +1,5 @@ import uuid -from typing import Any -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock import httpx import jwt @@ -14,12 +13,6 @@ class TestVerifyToken: """Tests for _verify_token function.""" - def _mock_client(self, mock_response: Any): - """Create a mock httpx.AsyncClient context manager.""" - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - return mock_client - def _mock_graphql_response(self, has_access: bool): """Create a mock response for the GraphQL endpoint.""" mock_response = MagicMock() @@ -28,31 +21,35 @@ def _mock_graphql_response(self, has_access: bool): } return mock_response - async def test_returns_true_when_user_has_access(self): + async def test_returns_true_when_user_has_access( + self, monkeypatch: pytest.MonkeyPatch + ): """Test returns True when user has chatbot access.""" mock_response = self._mock_graphql_response(has_access=True) - mock_client = self._mock_client(mock_response) - - with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: - MockClient.return_value.__aenter__.return_value = mock_client + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) - result = await _verify_token("valid-token") + result = await _verify_token("valid-token") assert result is True - async def test_returns_false_when_user_lacks_access(self): + async def test_returns_false_when_user_lacks_access( + self, monkeypatch: pytest.MonkeyPatch + ): """Test returns False when user lacks chatbot access.""" mock_response = self._mock_graphql_response(has_access=False) - mock_client = self._mock_client(mock_response) - - with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: - MockClient.return_value.__aenter__.return_value = mock_client + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) - result = await _verify_token("valid-token") + result = await _verify_token("valid-token") assert result is False - async def test_raises_503_on_http_error(self): + async def test_raises_503_on_http_error(self, monkeypatch: pytest.MonkeyPatch): """Test raises 503 when GraphQL endpoint returns HTTP error.""" mock_response = MagicMock() mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( @@ -60,26 +57,27 @@ async def test_raises_503_on_http_error(self): request=httpx.Request("POST", "http://test"), response=mock_response, ) - mock_client = self._mock_client(mock_response) - - with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: - MockClient.return_value.__aenter__.return_value = mock_client + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock(post=AsyncMock(return_value=mock_response)), + ) - with pytest.raises(HTTPException) as e: - await _verify_token("valid-token") + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE - async def test_raises_503_on_connect_error(self): + async def test_raises_503_on_connect_error(self, monkeypatch: pytest.MonkeyPatch): """Test raises 503 when GraphQL endpoint is unreachable.""" - mock_client = AsyncMock() - mock_client.post.side_effect = httpx.ConnectError("Connection refused") - - with patch("app.api.dependencies.auth.httpx.AsyncClient") as MockClient: - MockClient.return_value.__aenter__.return_value = mock_client + monkeypatch.setattr( + "app.api.dependencies.auth._http_client", + MagicMock( + post=AsyncMock(side_effect=httpx.ConnectError("Connection refused")) + ), + ) - with pytest.raises(HTTPException) as e: - await _verify_token("valid-token") + with pytest.raises(HTTPException) as e: + await _verify_token("valid-token") assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE