From f0c5bca5f589117510a484f2ae828e93b475b7d5 Mon Sep 17 00:00:00 2001 From: Puja Trivedi Date: Tue, 14 Oct 2025 16:58:05 -0700 Subject: [PATCH] updated taxonomy query. now data contains additonal information: accession_id, abbreviation, cell_type_set --- query_service/core/routers/query.py | 89 ++++++++++++++++++++++++----- query_service/core/shared.py | 37 ++---------- 2 files changed, 81 insertions(+), 45 deletions(-) diff --git a/query_service/core/routers/query.py b/query_service/core/routers/query.py index b49e374..358dbcb 100644 --- a/query_service/core/routers/query.py +++ b/query_service/core/routers/query.py @@ -68,22 +68,51 @@ async def get_taxonomy(): PREFIX rdf: PREFIX rdfs: PREFIX bican: + PREFIX DHBA: - SELECT ?id ?parent ?name ?hex + SELECT ?id ?name ?accession_id ?parentNode ?abbrNode ?abbrMeaning ?abbrTerm ?parcellationTerm ?geneAnnotation ?cellType ?hex ?setAccessionId WHERE { - GRAPH { - ?id a bican:CellTypeTaxon . - OPTIONAL { ?id bican:has_parent ?parent . } - OPTIONAL { ?id rdfs:label ?name . } - - # Find a DisplayColor node linked to this taxon - OPTIONAL { - ?colorNode a bican:DisplayColor ; - bican:is_color_for_taxon ?cid ; - bican:color_hex_triplet ?hex . - FILTER(STR(?id) = STR(?cid)) - } - } + # 1. Find all CellTypeTaxon nodes + ?id a bican:CellTypeTaxon . + + # 2. Get CellTypeTaxon name + OPTIONAL { ?id rdfs:label ?name . } + + # 3. Get CellTypeTaxon accession_id + OPTIONAL { ?id bican:accession_id ?accession_id . } + + # 4. Get CellTypeTaxon Parent Node + OPTIONAL { + ?id bican:has_parent ?parentNode . + } + + # 5. Get CellTypeTaxon Abbreviation Nodes + OPTIONAL { + ?id bican:has_abbreviation ?abbrNode . + ?abbrNode a bican:Abbreviation ; + bican:meaning ?abbrMeaning ; + bican:term ?abbrTerm . + OPTIONAL { ?abbrNode bican:denotes_parcellation_term ?parcellationTerm . } + OPTIONAL { ?abbrNode bican:denotes_gene_annotation ?geneAnnotation . } + OPTIONAL { ?abbrNode bican:denotes_cell_type ?cellType . } + } + + # 6. Get CellTypeTaxon Color Hex Triplet + OPTIONAL { + ?colorNode a bican:DisplayColor ; + bican:is_color_for_taxon ?cid ; + bican:color_hex_triplet ?hex . + FILTER(STR(?id) = STR(?cid)) + } + + # 7. Get CellTypeSet Node + OPTIONAL { + ?cellTypeSetNode a bican:CellTypeSet ; + bican:contains_taxon ?id ; + bican:accession_id ?setAccessionId . + } + + } """ response = fetch_data_gdb(query_taxonomy) @@ -98,4 +127,36 @@ async def get_taxonomy(): processed_taxonomy = taxonomy_postprocessing(response_taxonomy) return processed_taxonomy +#! TODO: Update lines 119-126 with this: +# data = {} +# for row in response: +# id_, name, accession_id, parentNode, abbrNode, abbrMeaning, abbrTerm, parcellationTerm, geneAnnotation, cellType, hex_, setAccessionId = row +# id_ = str(id_) +# name = str(name) if name else None +# accession_id = str(accession_id) if accession_id else None +# parentNode = str(parentNode) if parentNode else None +# abbrNode = str(abbrNode) if abbrNode else None +# abbrMeaning = str(abbrMeaning) if abbrMeaning else None +# abbrTerm = str(abbrTerm) if abbrTerm else None +# denotes = str(parcellationTerm) if parcellationTerm else None +# denotes = str(geneAnnotation) if geneAnnotation else denotes +# denotes = str(cellType) if cellType else denotes +# hex_ = str(hex_) if hex_ else None + +# if str(id_) in data: +# if abbrNode in data[id_]["abbreviations"]: +# if denotes: +# data[id_]["abbreviations"][abbrNode]["denotes"].append(denotes) +# else: +# data[id_]["abbreviations"][abbrNode] = {"term": abbrTerm, "meaning": abbrMeaning, "denotes": [denotes]} +# else: +# data[id_] = { +# "name": name, +# "accession_id": accession_id, +# "parent": parentNode, +# "abbreviations": dict({abbrNode: {"term": abbrTerm, "meaning": abbrMeaning, "denotes": [denotes]}}) if abbrNode else {}, +# "hex": hex_, +# "belongs_to_set": setAccessionId +# } + diff --git a/query_service/core/shared.py b/query_service/core/shared.py index 71a1072..a896259 100644 --- a/query_service/core/shared.py +++ b/query_service/core/shared.py @@ -443,49 +443,26 @@ def named_graph_metadata(named_graph_url, description): ) return named_graph_metadata -# def taxonomy_postprocessing(items): -# # going through the query output and create dictionary with parents_id and lists of childs ids -# taxon_dict = {} -# for tax_id, el in items.items(): -# if el['parent'] is None: -# par_id = "root" -# par_nm = "root" -# else: -# par_id = el['parent'] -# par_nm = items[par_id]["name"] - -# if par_id not in taxon_dict: -# taxon_dict[par_id] = {"meta": {"name": par_nm}, "childrens_id": [tax_id]} -# else: -# taxon_dict[par_id]["childrens_id"].append(tax_id) - - -# # creating a simple function for one level of children for testing the figure: -# fig_dict = {"name": "root", "nodeColor": "#ffffff", "children": []} -# for child_id in taxon_dict["root"]['childrens_id']: -# fig_dict["children"].append({"name": taxon_dict[child_id]["meta"]["name"], "nodeColor": "#ebb3a7", "children": []}) - -# return fig_dict def getting_childrens(items): # going through the query output and create dictionary with parents_id and lists of childs ids taxon_dict = {} for tax_id, el in items.items(): if el['parent'] is None: par_id = "root" - par_nm, par_col = "root", '#ffffff' + par_name, par_color, par_accession_id, par_abbreviations, par_belongs_to_set = "root", '#ffffff', None, [], None else: par_id = el['parent'] - par_nm, par_col = items[par_id]["name"], items[par_id]["hex"] - + par_name, par_color, par_accession_id, par_abbreviations, par_belongs_to_set = items[par_id]["name"], items[par_id]["hex"], items[par_id]["accession_id"], items[par_id]["abbreviations"].values(), items[par_id]["belongs_to_set"] + if par_id not in taxon_dict: - taxon_dict[par_id] = {"meta": {"name": par_nm, "color": par_col}, "childrens_id": [tax_id]} + taxon_dict[par_id] = {"meta": {"name": par_name, "color": par_color, "accession_id": par_accession_id, "abbreviations": list(par_abbreviations), "belongs_to_set": par_belongs_to_set}, "childrens_id": [tax_id]} else: taxon_dict[par_id]["childrens_id"].append(tax_id) # adding elements without children for tax_id, el in items.items(): if tax_id not in taxon_dict: - taxon_dict[tax_id] = {"meta": {"name": items[tax_id]["name"], "color": items[tax_id]["hex"]}, "childrens_id": []} + taxon_dict[tax_id] = {"meta": {"name": items[tax_id]["name"], "color": items[tax_id]["hex"], "accession_id": items[tax_id]["accession_id"], "abbreviations": list(items[tax_id]["abbreviations"].values()), "belongs_to_set": items[tax_id]["belongs_to_set"]}, "childrens_id": []} return taxon_dict @@ -505,12 +482,10 @@ def update_childrens(children_list, parent_id, taxon_children_dict): #print("child id", child_id) children_list_current = [] update_childrens(children_list_current, child_id, taxon_children_dict) - children_list.append({"name": taxon_children_dict[child_id]["meta"]["name"], "nodeColor": taxon_children_dict[child_id]["meta"]["color"], "children": children_list_current}) + children_list.append({"name": taxon_children_dict[child_id]["meta"]["name"], "nodeColor": taxon_children_dict[child_id]["meta"]["color"], "accession_id": taxon_children_dict[child_id]["meta"]["accession_id"], "abbreviations": taxon_children_dict[child_id]["meta"]["abbreviations"], "belongs_to_set": taxon_children_dict[child_id]["meta"]["belongs_to_set"], "children": children_list_current}) return else: return - - def taxonomy_postprocessing(items): taxon_children = getting_childrens(items)