From b8fb415b227104d48f67ca06e0ea9a1c7afd577d Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 25 Nov 2025 15:37:42 +0800 Subject: [PATCH 1/3] add redis deps to docs --- stac_fastapi/elasticsearch/pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stac_fastapi/elasticsearch/pyproject.toml b/stac_fastapi/elasticsearch/pyproject.toml index bd2eb340..26429e0e 100644 --- a/stac_fastapi/elasticsearch/pyproject.toml +++ b/stac_fastapi/elasticsearch/pyproject.toml @@ -54,6 +54,8 @@ docs = [ "mkdocs~=1.4.0", "mkdocs-material~=9.0.0", "pdocs~=1.2.0", + "redis~=6.4.0", + "retry~=0.9.2", ] redis = [ "stac-fastapi-core[redis]==6.7.5", From 58c83fa0d7a21790c70041c42f0af11c1c45cfd6 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 10 Dec 2025 14:35:34 +0800 Subject: [PATCH 2/3] add /children route --- README.md | 10 + .../stac_fastapi/core/extensions/catalogs.py | 155 +++++++++++++- stac_fastapi/tests/api/test_api.py | 1 + .../tests/extensions/test_catalogs.py | 201 ++++++++++++++++++ 4 files changed, 364 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7744b7f4..33341098 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,7 @@ This implementation follows the [STAC API Catalogs Extension](https://github.com - **POST `/catalogs`**: Create a new catalog (requires appropriate permissions) - **GET `/catalogs/{catalog_id}`**: Retrieve a specific catalog and its children - **DELETE `/catalogs/{catalog_id}`**: Delete a catalog (optionally cascade delete all collections) +- **GET `/catalogs/{catalog_id}/children`**: Retrieve all children (Catalogs and Collections) of this catalog with optional type filtering - **GET `/catalogs/{catalog_id}/collections`**: Retrieve collections within a specific catalog - **POST `/catalogs/{catalog_id}/collections`**: Create a new collection within a specific catalog - **GET `/catalogs/{catalog_id}/collections/{collection_id}`**: Retrieve a specific collection within a catalog @@ -267,6 +268,15 @@ curl "http://localhost:8081/catalogs" # Get specific catalog curl "http://localhost:8081/catalogs/earth-observation" +# Get all children (catalogs and collections) of a catalog +curl "http://localhost:8081/catalogs/earth-observation/children" + +# Get only catalog children of a catalog +curl "http://localhost:8081/catalogs/earth-observation/children?type=Catalog" + +# Get only collection children of a catalog +curl "http://localhost:8081/catalogs/earth-observation/children?type=Collection" + # Get collections in a catalog curl "http://localhost:8081/catalogs/earth-observation/collections" diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py b/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py index 96491701..1444a38f 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py @@ -1,8 +1,8 @@ """Catalogs extension.""" import logging -from typing import List, Optional, Type -from urllib.parse import urlencode +from typing import Any, Dict, List, Optional, Type +from urllib.parse import parse_qs, urlencode, urlparse import attr from fastapi import APIRouter, FastAPI, HTTPException, Query, Request @@ -42,7 +42,9 @@ class CatalogsExtension(ApiExtension): client: BaseCoreClient = attr.ib(default=None) settings: dict = attr.ib(default=attr.Factory(dict)) - conformance_classes: List[str] = attr.ib(default=attr.Factory(list)) + conformance_classes: List[str] = attr.ib( + default=attr.Factory(lambda: ["https://api.stacspec.org/v1.0.0-rc.2/children"]) + ) router: APIRouter = attr.ib(default=attr.Factory(APIRouter)) response_class: Type[Response] = attr.ib(default=JSONResponse) @@ -176,6 +178,17 @@ def register(self, app: FastAPI, settings=None) -> None: tags=["Catalogs"], ) + # Add endpoint for Children Extension + self.router.add_api_route( + path="/catalogs/{catalog_id}/children", + endpoint=self.get_catalog_children, + methods=["GET"], + response_class=self.response_class, + summary="Get Catalog Children", + description="Retrieve all children (Catalogs and Collections) of this catalog.", + tags=["Catalogs"], + ) + app.include_router(self.router, tags=["Catalogs"]) async def catalogs( @@ -852,6 +865,142 @@ async def get_catalog_collection_item( item_id=item_id, collection_id=collection_id, request=request ) + async def get_catalog_children( + self, + catalog_id: str, + request: Request, + limit: int = 10, + token: str = None, + type: Optional[str] = Query( + None, description="Filter by resource type (Catalog or Collection)" + ), + ) -> Dict[str, Any]: + """ + Get all children (Catalogs and Collections) of a specific catalog. + + This is a 'Union' endpoint that returns mixed content types. + """ + # 1. Verify the parent catalog exists + await self.client.database.find_catalog(catalog_id) + + # 2. Build the Search Query + # We search the COLLECTIONS_INDEX because it holds both Catalogs and Collections + + # Base filter: Parent match + # This finds anything where 'parent_ids' contains this catalog_id + filter_queries = [{"term": {"parent_ids": catalog_id}}] + + # Optional filter: Type + if type: + # If user asks for ?type=Catalog, we only return Catalogs + filter_queries.append({"term": {"type": type}}) + + # 3. Calculate Pagination (Search After) + body = { + "query": {"bool": {"filter": filter_queries}}, + "sort": [{"id": {"order": "asc"}}], # Stable sort for pagination + "size": limit, + } + + # Handle search_after token - split by '|' to get all sort values + search_after: Optional[List[str]] = None + if token: + try: + # The token should be a pipe-separated string of sort values + # e.g., "collection-1" + from typing import cast + + search_after_parts = cast(List[str], token.split("|")) + # If the number of sort fields doesn't match token parts, ignore the token + if len(search_after_parts) != len(body["sort"]): # type: ignore + search_after = None + else: + search_after = search_after_parts + except Exception: + search_after = None + + if search_after is not None: + body["search_after"] = search_after + + # 4. Execute Search + search_result = await self.client.database.client.search( + index=COLLECTIONS_INDEX, body=body + ) + + # 5. Process Results + hits = search_result.get("hits", {}).get("hits", []) + total = search_result.get("hits", {}).get("total", {}).get("value", 0) + + children = [] + for hit in hits: + doc = hit["_source"] + resource_type = doc.get( + "type", "Collection" + ) # Default to Collection if missing + + # Serialize based on type + # This ensures we hide internal fields like 'parent_ids' correctly + if resource_type == "Catalog": + child = self.client.catalog_serializer.db_to_stac(doc, request) + else: + child = self.client.collection_serializer.db_to_stac(doc, request) + + children.append(child) + + # 6. Format Response + # The Children extension uses a specific response format + response = { + "children": children, + "links": [ + {"rel": "self", "type": "application/json", "href": str(request.url)}, + { + "rel": "root", + "type": "application/json", + "href": str(request.base_url), + }, + { + "rel": "parent", + "type": "application/json", + "href": f"{str(request.base_url)}catalogs/{catalog_id}", + }, + ], + "numberReturned": len(children), + "numberMatched": total, + } + + # 7. Generate Next Link + next_token = None + if len(hits) == limit: + next_token_values = hits[-1].get("sort") + if next_token_values: + # Join all sort values with '|' to create the token + next_token = "|".join(str(val) for val in next_token_values) + + if next_token: + # Get existing query params + parsed_url = urlparse(str(request.url)) + params = parse_qs(parsed_url.query) + + # Update params + params["token"] = [next_token] + params["limit"] = [str(limit)] + if type: + params["type"] = [type] + + # Flatten params for urlencode (parse_qs returns lists) + flat_params = { + k: v[0] if isinstance(v, list) else v for k, v in params.items() + } + + next_link = { + "rel": "next", + "type": "application/json", + "href": f"{request.base_url}catalogs/{catalog_id}/children?{urlencode(flat_params)}", + } + response["links"].append(next_link) + + return response + async def delete_catalog_collection( self, catalog_id: str, collection_id: str, request: Request ) -> None: diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index fe10bfaa..587fcb85 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -54,6 +54,7 @@ "POST /catalogs", "GET /catalogs/{catalog_id}", "DELETE /catalogs/{catalog_id}", + "GET /catalogs/{catalog_id}/children", "GET /catalogs/{catalog_id}/collections", "POST /catalogs/{catalog_id}/collections", "GET /catalogs/{catalog_id}/collections/{collection_id}", diff --git a/stac_fastapi/tests/extensions/test_catalogs.py b/stac_fastapi/tests/extensions/test_catalogs.py index 9a85d03a..95071593 100644 --- a/stac_fastapi/tests/extensions/test_catalogs.py +++ b/stac_fastapi/tests/extensions/test_catalogs.py @@ -1147,3 +1147,204 @@ async def test_parent_ids_not_exposed_to_client(catalogs_app_client, load_test_d assert ( "parent_ids" not in collection ), "parent_ids should not be exposed in API response" + + +@pytest.mark.asyncio +async def test_get_catalog_children(catalogs_app_client, load_test_data): + """Test getting children (collections) from a catalog.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create multiple collections in the catalog + collection_ids = [] + for i in range(2): + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}-{i}" + test_collection["id"] = collection_id + + coll_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert coll_resp.status_code == 201 + collection_ids.append(collection_id) + + # Get children from the catalog + children_resp = await catalogs_app_client.get(f"/catalogs/{catalog_id}/children") + assert children_resp.status_code == 200 + + children_data = children_resp.json() + assert "children" in children_data + assert "links" in children_data + assert "numberReturned" in children_data + assert "numberMatched" in children_data + + # Should have 2 children (collections) + assert len(children_data["children"]) == 2 + assert children_data["numberReturned"] == 2 + assert children_data["numberMatched"] == 2 + + # Check that all are collections + child_types = [child["type"] for child in children_data["children"]] + assert all(child_type == "Collection" for child_type in child_types) + + # Check that we have the right collection IDs + returned_ids = [child["id"] for child in children_data["children"]] + for collection_id in collection_ids: + assert collection_id in returned_ids + + # Check required links + links = children_data["links"] + link_rels = [link["rel"] for link in links] + assert "self" in link_rels + assert "root" in link_rels + assert "parent" in link_rels + + +@pytest.mark.asyncio +async def test_get_catalog_children_type_filter_catalog( + catalogs_app_client, load_test_data +): + """Test filtering children by type=Catalog (should return empty since no catalogs are children).""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create a collection in the catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + coll_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert coll_resp.status_code == 201 + + # Get only catalog children (should be empty) + children_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/children?type=Catalog" + ) + assert children_resp.status_code == 200 + + children_data = children_resp.json() + assert len(children_data["children"]) == 0 + assert children_data["numberReturned"] == 0 + assert children_data["numberMatched"] == 0 + + +@pytest.mark.asyncio +async def test_get_catalog_children_type_filter_collection( + catalogs_app_client, load_test_data +): + """Test filtering children by type=Collection.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create a collection in the catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + coll_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert coll_resp.status_code == 201 + + # Get only collection children + children_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/children?type=Collection" + ) + assert children_resp.status_code == 200 + + children_data = children_resp.json() + assert len(children_data["children"]) == 1 + assert children_data["children"][0]["type"] == "Collection" + assert children_data["children"][0]["id"] == collection_id + + +@pytest.mark.asyncio +async def test_get_catalog_children_nonexistent_catalog(catalogs_app_client): + """Test getting children from a catalog that doesn't exist.""" + resp = await catalogs_app_client.get("/catalogs/nonexistent-catalog/children") + assert resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_get_catalog_children_pagination(catalogs_app_client, load_test_data): + """Test pagination of children endpoint.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create multiple collections in the catalog + collection_ids = [] + for i in range(5): + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}-{i}" + test_collection["id"] = collection_id + + coll_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert coll_resp.status_code == 201 + collection_ids.append(collection_id) + + # Test pagination with limit=2 + children_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/children?limit=2" + ) + assert children_resp.status_code == 200 + + children_data = children_resp.json() + assert len(children_data["children"]) == 2 + assert children_data["numberReturned"] == 2 + assert children_data["numberMatched"] == 5 + + # Check for next link + links = children_data["links"] + next_link = None + for link in links: + if link.get("rel") == "next": + next_link = link + break + + assert next_link is not None, "Should have next link for pagination" + + # Follow the next link (extract token from URL) + next_url = next_link["href"] + # Parse the token from the URL + from urllib.parse import parse_qs, urlparse + + parsed_url = urlparse(next_url) + query_params = parse_qs(parsed_url.query) + token = query_params.get("token", [None])[0] + + assert token is not None, "Next link should contain token" + + # Get next page + next_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/children?token={token}&limit=2" + ) + assert next_resp.status_code == 200 + + next_data = next_resp.json() + assert len(next_data["children"]) == 2 # Should have remaining 3, but limited to 2 + assert next_data["numberReturned"] == 2 + assert next_data["numberMatched"] == 5 From 9f6b0d79c27c0bb4a934bcedcec3bb9b70142897 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 10 Dec 2025 14:43:22 +0800 Subject: [PATCH 3/3] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2b2181..b38892d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added optional `/catalogs` route support to enable federated hierarchical catalog browsing and navigation. [#547](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/547) - Added DELETE `/catalogs/{catalog_id}/collections/{collection_id}` endpoint to support removing collections from catalogs. When a collection belongs to multiple catalogs, it removes only the specified catalog from the collection's parent_ids. When a collection belongs to only one catalog, the collection is deleted entirely. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) - Added `parent_ids` internal field to collections to support multi-catalog hierarchies. Collections can now belong to multiple catalogs, with parent catalog IDs stored in this field for efficient querying and management. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) +- Added GET `/catalogs/{catalog_id}/children` endpoint implementing the STAC Children extension for efficient hierarchical catalog browsing. Supports type filtering (?type=Catalog|Collection), pagination, and returns numberReturned/numberMatched counts at the top level. [#558](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/558) ### Changed