Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Added

- Added CQL2 Abstract Syntax Tree (AST) structure for efficient query parsing and datetime-based indexes. [#560](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/560)

- Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532)

### Changed

### Fixed
Expand Down
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
- [Examples](#examples)
- [Performance](#performance)
- [Direct Response Mode](#direct-response-mode)
- [CQL2 JSON Search with AST-based Parsing](#cql2-json-search-with-ast-based-parsing)
- [Quick Start](#quick-start)
- [Installation](#installation)
- [Running Locally](#running-locally)
Expand Down Expand Up @@ -409,6 +410,31 @@ These examples provide practical reference implementations for various deploymen
- **Default setting**: `false` for safety.
- **More information**: See [issue #347](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/issues/347) for background and implementation details.


### CQL2 JSON Search with AST-based Parsing

SFEOS now uses an Abstract Syntax Tree (AST) in CQL2-JSON search queries for efficient query parsing and datetime extraction, enabling the selection and management of the appropriate searchable indexes.

#### AST-based Query Processing

The CQL2 implementation uses an Abstract Syntax Tree (AST) structure that replaces the previous dictionary-based processing. This enables:

1. **Structured Query Representation**: Queries are parsed into a tree structure with different node types
2. **Efficient Parameter Access**: Easy traversal and extraction of query parameters
3. **Optimized Index Selection**: Selection of appropriate fields for selection and management of indexes

#### AST Node Types

The AST supports various node types representing different query operations:

- **Logical Nodes**: `AND`, `OR`, `NOT` operators for combining conditions
- **Comparison Nodes**: `=`, `<>`, `<`, `<=`, `>`, `>=`, `isNull` operations
- **Advanced Comparison Nodes**: `LIKE`, `BETWEEN`, `IN` operations
- **Spatial Nodes**: `s_intersects`, `s_contains`, `s_within`, `s_disjoint` for geospatial queries
- **Datetime Nodes**: Special handling for datetime range and exact value queries

The AST-based approach enables efficient extraction of datetime parameters (`datetime`, `start_datetime`, `end_datetime`) from complex queries.

## Quick Start

This section helps you get up and running with stac-fastapi-elasticsearch-opensearch quickly.
Expand Down
11 changes: 10 additions & 1 deletion stac_fastapi/core/stac_fastapi/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,8 @@ async def post_search(
search=search, intersects=getattr(search_request, "intersects")
)

collection_ids = getattr(search_request, "collections", None)

if hasattr(search_request, "query") and getattr(search_request, "query"):
query_fields = set(getattr(search_request, "query").keys())
await self.queryables_cache.validate(query_fields)
Expand All @@ -875,6 +877,13 @@ async def post_search(
query_fields = get_properties_from_cql2_filter(cql2_filter)
await self.queryables_cache.validate(query_fields)
search = await self.database.apply_cql2_filter(search, cql2_filter)
date_str = getattr(search, "_cql2_date_str", None)
collection_ids = getattr(search, "_cql2_collection_ids", None)
if date_str is not None:
datetime_parsed = format_datetime_range(date_str=date_str)
search, datetime_search = self.database.apply_datetime_filter(
search=search, datetime=datetime_parsed
)
except HTTPException:
raise
except Exception as e:
Expand Down Expand Up @@ -907,7 +916,7 @@ async def post_search(
limit=limit,
token=token_param,
sort=sort,
collection_ids=getattr(search_request, "collections", None),
collection_ids=collection_ids,
datetime_search=datetime_search,
)

Expand Down
62 changes: 61 additions & 1 deletion stac_fastapi/core/stac_fastapi/core/extensions/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
# defines spatial operators (S_INTERSECTS, S_CONTAINS, S_WITHIN, S_DISJOINT).
# """

from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict
from typing import Any, Dict, List, Optional

DEFAULT_QUERYABLES: Dict[str, Dict[str, Any]] = {
"id": {
Expand Down Expand Up @@ -90,3 +91,62 @@ class SpatialOp(str, Enum):
S_CONTAINS = "s_contains"
S_WITHIN = "s_within"
S_DISJOINT = "s_disjoint"


@dataclass
class CqlNode:
"""Base class."""

pass


@dataclass
class LogicalNode(CqlNode):
"""Logical operators (AND, OR, NOT)."""

op: LogicalOp
children: List["CqlNode"]


@dataclass
class ComparisonNode(CqlNode):
"""Comparison operators (=, <>, <, <=, >, >=, is null)."""

op: ComparisonOp
field: str
value: Any


@dataclass
class AdvancedComparisonNode(CqlNode):
"""Advanced comparison operators (like, between, in)."""

op: AdvancedComparisonOp
field: str
value: Any


@dataclass
class SpatialNode(CqlNode):
"""Spatial operators."""

op: SpatialOp
field: str
geometry: Dict[str, Any]


@dataclass
class DateTimeRangeNode(CqlNode):
"""Datetime range queries."""

field: str = "properties.datetime"
start: Optional[str] = None
end: Optional[str] = None


@dataclass
class DateTimeExactNode(CqlNode):
"""Exact datetime queries."""

field: str = "properties.datetime"
value: Optional[str] = None
39 changes: 33 additions & 6 deletions stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
merge_to_operations,
operations_to_script,
)
from stac_fastapi.sfeos_helpers.filter import (
Cql2AstParser,
DatetimeOptimizer,
to_es_via_ast,
)
from stac_fastapi.sfeos_helpers.filter.datetime_optimizer import extract_from_ast
from stac_fastapi.sfeos_helpers.mappings import (
AGGREGATION_MAPPING,
COLLECTIONS_INDEX,
Expand Down Expand Up @@ -766,11 +772,11 @@ async def apply_cql2_filter(
self, search: Search, _filter: Optional[Dict[str, Any]]
):
"""
Apply a CQL2 filter to an Opensearch Search object.
Apply a CQL2 filter to an OpenSearch Search object.

This method transforms a dictionary representing a CQL2 filter into an Opensearch query
and applies it to the provided Search object. If the filter is None, the original Search
object is returned unmodified.
This method transforms a CQL2 filter dictionary into an OpenSearch query using
an AST tree-based approach. If the filter is None, the original Search object is returned
unmodified.

Args:
search (Search): The Opensearch Search object to which the filter will be applied.
Expand All @@ -784,8 +790,29 @@ async def apply_cql2_filter(
otherwise the original Search object.
"""
if _filter is not None:
es_query = filter_module.to_es(await self.get_queryables_mapping(), _filter)
search = search.filter(es_query)
queryables_mapping = await self.get_queryables_mapping()

try:
parser = Cql2AstParser(queryables_mapping)
ast = parser.parse(_filter)

optimizer = DatetimeOptimizer()
optimized_ast = optimizer.optimize_query_structure(ast)

date_str = extract_from_ast(optimized_ast, "datetime")
collection_ids = extract_from_ast(optimized_ast, "collection") or None

es_query = to_es_via_ast(queryables_mapping, optimized_ast)

search = search.filter(es_query)
search._cql2_date_str = date_str
search._cql2_collection_ids = collection_ids

except Exception:
# Fallback to dictionary-based approach
es_query = filter_module.to_es(queryables_mapping, _filter)
search = search.filter(es_query)
return search

return search

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
- cql2.py: CQL2 pattern conversion helpers
- transform.py: Query transformation functions
- client.py: Filter client implementation
- ast_parser.py: AST parser for CQL2 queries
- datetime_optimizer.py: Datetime optimization for query structure

When adding new functionality to this package, consider:
1. Will this code be used by both Elasticsearch and OpenSearch implementations?
Expand All @@ -22,6 +24,14 @@
- Parameter names should be consistent across similar functions
"""

from stac_fastapi.core.extensions.filter import (
AdvancedComparisonOp,
ComparisonOp,
LogicalOp,
)

from .ast_parser import Cql2AstParser
from .ast_transform import to_es_via_ast
from .client import EsAsyncBaseFiltersClient

# Re-export the main functions and classes for backward compatibility
Expand All @@ -31,6 +41,7 @@
cql2_like_to_es,
valid_like_substitutions,
)
from .datetime_optimizer import DatetimeOptimizer
from .transform import to_es, to_es_field

__all__ = [
Expand All @@ -40,5 +51,12 @@
"_replace_like_patterns",
"to_es_field",
"to_es",
"to_es_via_ast",
"EsAsyncBaseFiltersClient",
"Cql2AstParser",
"AdvancedComparisonOp",
"ComparisonOp",
"LogicalOp",
"DatetimeOptimizer",
"extract_from_ast",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""AST parser for CQL2 queries."""

import json
from typing import Any, Dict, Union

from stac_fastapi.core.extensions.filter import (
AdvancedComparisonNode,
AdvancedComparisonOp,
ComparisonNode,
ComparisonOp,
CqlNode,
LogicalNode,
LogicalOp,
SpatialNode,
SpatialOp,
)


class Cql2AstParser:
"""Parse CQL2 into AST tree."""

def __init__(self, queryables_mapping: Dict[str, Any]):
"""Initialize the CQL2 AST parser."""
self.queryables_mapping = queryables_mapping

def parse(self, cql: Union[str, Dict[str, Any]]) -> CqlNode:
"""Parse CQL2 into AST tree.

Args:
cql: CQL2 expression as string/dictionary

Returns:
Node of AST tree
"""
if isinstance(cql, str):
data: Dict[str, Any] = json.loads(cql)
return self._parse_node(data)

return self._parse_node(cql)

def _parse_node(self, node: Dict[str, Any]) -> CqlNode:
"""Parse a single CQL2 node into AST."""
if "op" in node and node["op"] in ["and", "or", "not"]:
op = LogicalOp(node["op"])
args = node.get("args", [])

if op == LogicalOp.NOT:
children = [self._parse_node(args[0])] if args else []
else:
children = [self._parse_node(arg) for arg in args]

return LogicalNode(op=op, children=children)

elif "op" in node and node["op"] in ["=", "<>", "<", "<=", ">", ">=", "isNull"]:
op = ComparisonOp(node["op"])
args = node.get("args", [])

if isinstance(args[0], dict) and "property" in args[0]:
field = args[0]["property"]
else:
field = str(args[0])

value = args[1] if len(args) > 1 else None

return ComparisonNode(op=op, field=field, value=value)

elif "op" in node and node["op"] in ["like", "between", "in"]:
op = AdvancedComparisonOp(node["op"])
args = node.get("args", [])

if isinstance(args[0], dict) and "property" in args[0]:
field = args[0]["property"]
else:
field = str(args[0])

if op == AdvancedComparisonOp.BETWEEN:
if len(args) != 3:
raise ValueError(
f"BETWEEN operator requires (property, lower, upper), got {args}"
)
value = (args[1], args[2])

elif op == AdvancedComparisonOp.IN:
if not isinstance(args[1], list):
raise ValueError(f"IN operator expects list, got {type(args[1])}")
value = args[1]

elif op == AdvancedComparisonOp.LIKE:
if len(args) != 2:
raise ValueError(
f"LIKE operator requires (property, pattern), got {args}"
)
value = args[1]

return AdvancedComparisonNode(op=op, field=field, value=value)

elif "op" in node and node["op"] in [
"s_intersects",
"s_contains",
"s_within",
"s_disjoint",
]:
op = SpatialOp(node["op"])
args = node.get("args", [])

if isinstance(args[0], dict) and "property" in args[0]:
field = args[0]["property"]
else:
field = str(args[0])

geometry = args[1]

return SpatialNode(op=op, field=field, geometry=geometry)
Loading