Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/models/efp_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def _schema(species: str, charset: str = "latin1") -> DatabaseSpec:
("root_Schaefer_lab", "root Schaefer lab"),
("rpatel", "rpatel"),
("seed_db", "seed db"),
("seedcoat", "oat"),
("seedcoat", "arabidopsis seedcoat"),
("selaginella", "selaginella"),
("shoot_apex", "arabidopsis"),
("silique", "arabidopsis"),
Expand Down
47 changes: 8 additions & 39 deletions api/resources/gene_expression.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
"""
Reena Obmina | BCB330 Project 2025-2026 | University of Toronto

REST endpoint for gene expression queries across all eFP databases.

Routes: GET /gene_expression/expression/<database>/<gene_id>

All gene IDs are validated by species before reaching the query layer.
Probeset conversion is applied automatically for microarray databases.
"""
from flask_restx import Namespace, Resource
from markupsafe import escape

from api.services.efp_data import query_efp_database_dynamic
from api.utils.bar_utils import BARUtils
from api.utils.gene_id_utils import (
CROSS_SPECIES_DATABASES,
DATABASE_SPECIES,
PROBESET_DATABASES,
convert_gene_to_probeset,
is_probeset_id,
normalize_gene_id,
validate_gene_id,
GeneIdUtils,
DATABASE_EFP_PROJECT,
)

gene_expression = Namespace(
Expand All @@ -46,38 +32,21 @@
)
class GeneExpression(Resource):
def get(self, database, gene_id):
"""Retrieve expression values for a gene from a given eFP database.
"""
"""Retrieve expression values for a gene from a given eFP database."""
database = str(escape(database))
gene_id = str(escape(gene_id))

# 1. Resolve database species and expected input species.
# Cross-species databases (e.g. phelipanche) accept an Arabidopsis AGI
# even though the database itself belongs to a different species.
species = DATABASE_SPECIES.get(database)
if species is None:
return BARUtils.error_exit(f"Unknown database '{database}'"), 400
input_species = CROSS_SPECIES_DATABASES.get(database, species)

# 2. If the caller already supplied a probeset ID, use it directly
if is_probeset_id(gene_id):
if GeneIdUtils.is_probeset_id(gene_id):
query_id = gene_id
else:
# 3. Validate gene ID format against the expected input species regex
if not validate_gene_id(gene_id, input_species):
return BARUtils.error_exit(f"Invalid {input_species} gene ID: '{gene_id}'"), 400

# 4. Normalise (e.g. strip maize transcript suffix _T##)
gene_id = normalize_gene_id(gene_id, species)

# 5. Microarray / non-direct databases need gene ID -> probeset conversion
if database in PROBESET_DATABASES:
probeset, err = convert_gene_to_probeset(gene_id, species, database)
if err:
return BARUtils.error_exit(err), 404
query_id = probeset
else:
query_id = gene_id
if not GeneIdUtils.validate_gene_for_database(gene_id, database):
label = DATABASE_EFP_PROJECT.get(database) or species or database
return BARUtils.error_exit(f"Invalid gene ID for {label}: '{gene_id}'"), 400
query_id = GeneIdUtils.normalize_gene_id(gene_id, species)

result = query_efp_database_dynamic(database, query_id)

Expand Down
469 changes: 414 additions & 55 deletions api/utils/bar_utils.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions api/utils/efp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ def is_efp_input_valid(efp, view, mode, gene_1, gene_2=None):

# Maybe this part could be improved
if efp == "efp_arabidopsis":
# Validate gene ids
if not BARUtils.is_arabidopsis_gene_valid(gene_1):
# Accept AGI format (AT1G01010) or microarray probeset IDs (267643_at)
if not BARUtils.is_efp_gene_valid(gene_1, "efp_arabidopsis"):
return False, "Gene 1 is invalid."

if mode == "Compare":
if not BARUtils.is_arabidopsis_gene_valid(gene_2):
if not BARUtils.is_efp_gene_valid(gene_2, "efp_arabidopsis"):
return False, "Gene 2 is invalid."

if efp == "efp_arachis":
Expand Down
Loading