Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 111 additions & 3 deletions app/db/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,58 @@
import rich
import re
from app.db.database import Database
from app.models.clean_data import CLEANColumn
from app.models.query_params import CLEANECLookupQueryParams, CLEANSearchQueryParams, CLEANTypeaheadQueryParams
from app.models.clean_data import CLEANColumn, UniqueValueField
from app.models.query_params import (
CLEANECLookupQueryParams,
CLEANSearchQueryParams,
CLEANTypeaheadQueryParams,
CLEANUniqueValuesQueryParams,
SortOrder,
)


UNIQUE_VALUE_CONFIG: Dict[UniqueValueField, Dict[str, Any]] = {
UniqueValueField.accession: {
"table": "cleandb.predictions_uniprot_annot",
"column": "accession",
"is_text": True,
},
UniqueValueField.organism: {
"table": "cleandb.predictions_uniprot_annot",
"column": "organism",
"is_text": True,
},
UniqueValueField.protein_name: {
"table": "cleandb.predictions_uniprot_annot",
"column": "protein_name",
"is_text": True,
},
UniqueValueField.gene_name: {
"table": "cleandb.predictions_uniprot_annot",
"column": "gene_name",
"is_text": True,
},
UniqueValueField.uniprot_id: {
"table": "cleandb.predictions_uniprot_annot",
"column": "uniprot_id",
"is_text": True,
},
UniqueValueField.curation_status: {
"table": "cleandb.predictions_uniprot_annot",
"column": "curation_status",
"is_text": True,
},
UniqueValueField.enzyme_function: {
"table": "cleandb.predictions_uniprot_annot",
"column": "enzyme_function",
"is_text": True,
},
UniqueValueField.ncbi_taxid: {
"table": "cleandb.predictions_uniprot_annot",
"column": "ncbi_taxid",
"is_text": False,
},
}

async def build_conditions(
params: CLEANSearchQueryParams,
Expand Down Expand Up @@ -195,4 +245,62 @@ async def get_ec_suggestions(db: Database, params: CLEANECLookupQueryParams

# Execute the query
records = await db.fetch(query, number_search, name_search)
return [{ 'ec_number': record['ec_number'], 'ec_name': record['ec_name'] } for record in records]
return [{ 'ec_number': record['ec_number'], 'ec_name': record['ec_name'] } for record in records]


async def get_unique_field_values(
db: Database,
params: CLEANUniqueValuesQueryParams,
) -> Dict[str, Any]:
"""Retrieve unique values for a given field."""
config = UNIQUE_VALUE_CONFIG.get(params.field_name)
if not config:
raise ValueError(f"Unsupported field for unique value lookup: {params.field_name}")

table = config["table"]
column = config["column"]
is_text = config.get("is_text", True)

where_clauses: List[str] = []
query_args: List[Any] = []
param_idx = 0

if not params.include_null:
where_clauses.append(f"{column} IS NOT NULL")
if is_text:
where_clauses.append(f"TRIM({column}) <> ''")

if params.search:
param_idx += 1
query_args.append(f"%{params.search.strip()}%")
if is_text:
where_clauses.append(f"{column} ILIKE ${param_idx}")
else:
where_clauses.append(f"{column}::text ILIKE ${param_idx}")

where_sql = " AND ".join(where_clauses) if where_clauses else "TRUE"
sort_direction = "ASC" if params.sort == SortOrder.ASC else "DESC"

distinct_query = f"""
SELECT DISTINCT {column} AS value
FROM {table}
WHERE {where_sql}
ORDER BY value {sort_direction}
LIMIT {params.limit}
OFFSET {params.offset}
"""

records = await db.fetch(distinct_query, *query_args)
values = [record["value"] for record in records]

count_query = f"""
SELECT COUNT(*) AS total
FROM (
SELECT DISTINCT {column}
FROM {table}
WHERE {where_sql}
) AS distinct_values
"""
total = await db.fetchval(count_query, *query_args)

return {"values": values, "total": total}
48 changes: 47 additions & 1 deletion app/models/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import List, Literal, Optional
from typing import Any, List, Literal, Optional

from pydantic import BaseModel, Field

Expand All @@ -24,6 +24,19 @@ class CLEANColumn(Enum):
annot_ec_number_array = "annot_ec_number_array"


class UniqueValueField(str, Enum):
"""Supported fields for unique value lookups."""

accession = "accession"
organism = "organism"
protein_name = "protein_name"
gene_name = "gene_name"
uniprot_id = "uniprot_id"
curation_status = "curation_status"
enzyme_function = "enzyme_function"
ncbi_taxid = "ncbi_taxid"


class CLEANDataBase(BaseModel):
"""Base model for CLEAN data."""

Expand Down Expand Up @@ -145,4 +158,37 @@ class CLEANECLookupResponse(BaseModel):
matches: List[CLEANECLookupMatch] = Field(
[],
description="List of matches for the EC lookup."
)


class CLEANUniqueValuesResponse(BaseModel):
"""Model for responses returning unique field values."""

field_name: UniqueValueField = Field(
...,
description="Field the unique values were retrieved from."
)
search: Optional[str] = Field(
None,
description="Optional search filter applied to the values."
)
sort: Literal["asc", "desc"] = Field(
"asc",
description="Sort order applied to the returned values."
)
limit: int = Field(
...,
description="Maximum number of values returned."
)
offset: int = Field(
...,
description="Number of values skipped before returning results."
)
total: int = Field(
...,
description="Total number of unique values matching the filters."
)
values: List[Any] = Field(
default_factory=list,
description="Unique values for the requested field."
)
41 changes: 41 additions & 0 deletions app/models/query_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pydantic import BaseModel, Field

from app.models.clean_data import UniqueValueField

class ResponseFormat(str, Enum):
"""Enum for response format options."""
Expand All @@ -11,6 +12,13 @@ class ResponseFormat(str, Enum):
CSV = "csv"


class SortOrder(str, Enum):
"""Sort order options for list responses."""

ASC = "asc"
DESC = "desc"


class CLEANSearchQueryParams(BaseModel):
"""Query parameters for CLEAN data filtering."""

Expand Down Expand Up @@ -87,4 +95,37 @@ class CLEANECLookupQueryParams(BaseModel):
)
limit: Optional[int] = Field(
None, description="Maximum number of records to return"
)


class CLEANUniqueValuesQueryParams(BaseModel):
"""Query parameters for retrieving unique field values."""

field_name: UniqueValueField = Field(
...,
description="Field to retrieve unique values from."
)
search: Optional[str] = Field(
None,
description="Optional substring filter applied to the field values.",
min_length=1,
)
limit: int = Field(
50,
ge=1,
le=1000,
description="Maximum number of unique values to return.",
)
offset: int = Field(
0,
ge=0,
description="Number of unique values to skip before returning results.",
)
sort: SortOrder = Field(
SortOrder.ASC,
description="Sort order for the returned values.",
)
include_null: bool = Field(
False,
description="Whether to include NULL values in the response.",
)
102 changes: 99 additions & 3 deletions app/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,30 @@

from app.core.config import settings
from app.db.database import Database, get_db
from app.db.queries import get_ec_suggestions, get_filtered_data, get_total_count, get_typeahead_suggestions
from app.models.query_params import CLEANECLookupQueryParams, CLEANSearchQueryParams, CLEANTypeaheadQueryParams, ResponseFormat
from app.models.clean_data import CLEANDataBase, CLEANECLookupResponse, CLEANECLookupMatch, CLEANSearchResponse, CLEANTypeaheadResponse
from app.db.queries import (
get_ec_suggestions,
get_filtered_data,
get_total_count,
get_typeahead_suggestions,
get_unique_field_values,
)
from app.models.query_params import (
CLEANECLookupQueryParams,
CLEANSearchQueryParams,
CLEANTypeaheadQueryParams,
CLEANUniqueValuesQueryParams,
ResponseFormat,
SortOrder,
)
from app.models.clean_data import (
CLEANDataBase,
CLEANECLookupResponse,
CLEANECLookupMatch,
CLEANSearchResponse,
CLEANTypeaheadResponse,
CLEANUniqueValuesResponse,
UniqueValueField,
)

router = APIRouter(tags=["Search"])

Expand Down Expand Up @@ -223,6 +244,54 @@ async def get_data(
logger.error(f"Error getting data: {e}")
raise HTTPException(status_code=500, detail=f"Error retrieving data: {str(e)}")


def parse_unique_values_params(
field_name: UniqueValueField = Query(
...,
description="Field to retrieve unique values from.",
),
search: Optional[str] = Query(
None,
description="Optional substring filter applied to the field values.",
min_length=1,
),
limit: int = Query(
50,
ge=1,
le=1000,
description="Maximum number of unique values to return.",
),
offset: int = Query(
0,
ge=0,
description="Number of unique values to skip before returning results.",
),
sort: SortOrder = Query(
SortOrder.ASC,
description="Sort order for the returned values.",
),
include_null: bool = Query(
False,
description="Whether to include NULL values in the response.",
),
) -> CLEANUniqueValuesQueryParams:
"""Parse and validate unique value query parameters."""
try:
return CLEANUniqueValuesQueryParams(
field_name=field_name,
search=search,
limit=limit,
offset=offset,
sort=sort,
include_null=include_null,
)
except Exception as e:
logger.error(f"Error parsing unique value parameters: {e}")
raise HTTPException(
status_code=400,
detail=f"Invalid unique value parameters: {str(e)}",
)

def parse_typeahead_params(
field_name: Literal['accession', 'organism', 'protein_name', 'gene_name', 'uniprot_id'] = Query(
'organism',
Expand Down Expand Up @@ -312,3 +381,30 @@ async def get_ec_lookup(
except Exception as e:
logger.error(f"Error getting data: {e}")
raise HTTPException(status_code=500, detail=f"Error retrieving data: {str(e)}")


@router.get("/unique-values", summary="Get unique values for a specific field.")
async def get_unique_values(
params: CLEANUniqueValuesQueryParams = Depends(parse_unique_values_params),
db: Database = Depends(get_db),
) -> CLEANUniqueValuesResponse:
"""
Retrieve unique values for a given field. Supports basic filtering, pagination, and sorting.
"""

try:
result = await get_unique_field_values(db, params)
return CLEANUniqueValuesResponse(
field_name=params.field_name,
search=params.search,
sort=params.sort.value,
limit=params.limit,
offset=params.offset,
total=result["total"],
values=result["values"],
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) from e
except Exception as e:
logger.error(f"Error getting unique values: {e}")
raise HTTPException(status_code=500, detail=f"Error retrieving data: {str(e)}")