-
Notifications
You must be signed in to change notification settings - Fork 2
updated taxonomy query #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,22 +68,51 @@ async def get_taxonomy(): | |
| PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> | ||
| PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
| PREFIX bican: <https://identifiers.org/brain-bican/vocab/> | ||
| PREFIX DHBA: <https://purl.brain-bican.org/ontology/dmbao/DMBA_> | ||
|
|
||
| SELECT ?id ?parent ?name ?hex | ||
| SELECT ?id ?name ?accession_id ?parentNode ?abbrNode ?abbrMeaning ?abbrTerm ?parcellationTerm ?geneAnnotation ?cellType ?hex ?setAccessionId | ||
| WHERE { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| GRAPH <http://hmbataxonomy20250927.com/> { | ||
| ?id a bican:CellTypeTaxon . | ||
| OPTIONAL { ?id bican:has_parent ?parent . } | ||
| OPTIONAL { ?id rdfs:label ?name . } | ||
|
|
||
| # Find a DisplayColor node linked to this taxon | ||
| OPTIONAL { | ||
| ?colorNode a bican:DisplayColor ; | ||
| bican:is_color_for_taxon ?cid ; | ||
| bican:color_hex_triplet ?hex . | ||
| FILTER(STR(?id) = STR(?cid)) | ||
| } | ||
| } | ||
| # 1. Find all CellTypeTaxon nodes | ||
| ?id a bican:CellTypeTaxon . | ||
|
|
||
| # 2. Get CellTypeTaxon name | ||
| OPTIONAL { ?id rdfs:label ?name . } | ||
|
|
||
| # 3. Get CellTypeTaxon accession_id | ||
| OPTIONAL { ?id bican:accession_id ?accession_id . } | ||
|
|
||
| # 4. Get CellTypeTaxon Parent Node | ||
| OPTIONAL { | ||
| ?id bican:has_parent ?parentNode . | ||
| } | ||
|
|
||
| # 5. Get CellTypeTaxon Abbreviation Nodes | ||
| OPTIONAL { | ||
| ?id bican:has_abbreviation ?abbrNode . | ||
| ?abbrNode a bican:Abbreviation ; | ||
| bican:meaning ?abbrMeaning ; | ||
| bican:term ?abbrTerm . | ||
| OPTIONAL { ?abbrNode bican:denotes_parcellation_term ?parcellationTerm . } | ||
| OPTIONAL { ?abbrNode bican:denotes_gene_annotation ?geneAnnotation . } | ||
| OPTIONAL { ?abbrNode bican:denotes_cell_type ?cellType . } | ||
| } | ||
|
|
||
| # 6. Get CellTypeTaxon Color Hex Triplet | ||
| OPTIONAL { | ||
| ?colorNode a bican:DisplayColor ; | ||
| bican:is_color_for_taxon ?cid ; | ||
| bican:color_hex_triplet ?hex . | ||
| FILTER(STR(?id) = STR(?cid)) | ||
| } | ||
|
|
||
| # 7. Get CellTypeSet Node | ||
| OPTIONAL { | ||
| ?cellTypeSetNode a bican:CellTypeSet ; | ||
| bican:contains_taxon ?id ; | ||
| bican:accession_id ?setAccessionId . | ||
| } | ||
|
|
||
|
|
||
| } | ||
| """ | ||
| response = fetch_data_gdb(query_taxonomy) | ||
|
|
@@ -98,4 +127,36 @@ async def get_taxonomy(): | |
| processed_taxonomy = taxonomy_postprocessing(response_taxonomy) | ||
| return processed_taxonomy | ||
|
|
||
| #! TODO: Update lines 119-126 with this: | ||
| # data = {} | ||
| # for row in response: | ||
| # id_, name, accession_id, parentNode, abbrNode, abbrMeaning, abbrTerm, parcellationTerm, geneAnnotation, cellType, hex_, setAccessionId = row | ||
| # id_ = str(id_) | ||
| # name = str(name) if name else None | ||
| # accession_id = str(accession_id) if accession_id else None | ||
| # parentNode = str(parentNode) if parentNode else None | ||
| # abbrNode = str(abbrNode) if abbrNode else None | ||
| # abbrMeaning = str(abbrMeaning) if abbrMeaning else None | ||
| # abbrTerm = str(abbrTerm) if abbrTerm else None | ||
| # denotes = str(parcellationTerm) if parcellationTerm else None | ||
| # denotes = str(geneAnnotation) if geneAnnotation else denotes | ||
| # denotes = str(cellType) if cellType else denotes | ||
| # hex_ = str(hex_) if hex_ else None | ||
|
|
||
| # if str(id_) in data: | ||
| # if abbrNode in data[id_]["abbreviations"]: | ||
| # if denotes: | ||
| # data[id_]["abbreviations"][abbrNode]["denotes"].append(denotes) | ||
| # else: | ||
| # data[id_]["abbreviations"][abbrNode] = {"term": abbrTerm, "meaning": abbrMeaning, "denotes": [denotes]} | ||
| # else: | ||
| # data[id_] = { | ||
| # "name": name, | ||
| # "accession_id": accession_id, | ||
| # "parent": parentNode, | ||
| # "abbreviations": dict({abbrNode: {"term": abbrTerm, "meaning": abbrMeaning, "denotes": [denotes]}}) if abbrNode else {}, | ||
| # "hex": hex_, | ||
| # "belongs_to_set": setAccessionId | ||
| # } | ||
|
|
||
|
Comment on lines
+130
to
+161
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This TODO correctly identifies that the data processing logic (lines 119-126) needs to be updated to match the new query. The current, old logic is still in place and will cause runtime errors because it references fields that are no longer returned by the query. Additionally, the proposed logic in this commented-out block has some issues:
Since I cannot comment on the unchanged lines (119-128), I'm providing a corrected implementation here. Please replace lines 119-128 with the following code: response_taxonomy = {}
for taxon_info in response["message"]["results"]["bindings"]:
tax_id = taxon_info.get("id", {}).get("value")
if not tax_id:
continue
if tax_id not in response_taxonomy:
response_taxonomy[tax_id] = {
"id": tax_id,
"parent": taxon_info.get("parentNode", {}).get("value"),
"name": taxon_info.get("name", {}).get("value"),
"hex": taxon_info.get("hex", {}).get("value"),
"accession_id": taxon_info.get("accession_id", {}).get("value"),
"belongs_to_set": taxon_info.get("setAccessionId", {}).get("value"),
"abbreviations": {}
}
abbr_node = taxon_info.get("abbrNode", {}).get("value")
if abbr_node and abbr_node not in response_taxonomy[tax_id]["abbreviations"]:
denotes = []
if "parcellationTerm" in taxon_info:
denotes.append(taxon_info["parcellationTerm"]["value"])
if "geneAnnotation" in taxon_info:
denotes.append(taxon_info["geneAnnotation"]["value"])
if "cellType" in taxon_info:
denotes.append(taxon_info["cellType"]["value"])
response_taxonomy[tax_id]["abbreviations"][abbr_node] = {
"term": taxon_info.get("abbrTerm", {}).get("value"),
"meaning": taxon_info.get("abbrMeaning", {}).get("value"),
"denotes": denotes
}
processed_taxonomy = taxonomy_postprocessing(response_taxonomy)
return processed_taxonomy |
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -443,49 +443,26 @@ def named_graph_metadata(named_graph_url, description): | |||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
| return named_graph_metadata | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # def taxonomy_postprocessing(items): | ||||||||||||||||||||||||
| # # going through the query output and create dictionary with parents_id and lists of childs ids | ||||||||||||||||||||||||
| # taxon_dict = {} | ||||||||||||||||||||||||
| # for tax_id, el in items.items(): | ||||||||||||||||||||||||
| # if el['parent'] is None: | ||||||||||||||||||||||||
| # par_id = "root" | ||||||||||||||||||||||||
| # par_nm = "root" | ||||||||||||||||||||||||
| # else: | ||||||||||||||||||||||||
| # par_id = el['parent'] | ||||||||||||||||||||||||
| # par_nm = items[par_id]["name"] | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # if par_id not in taxon_dict: | ||||||||||||||||||||||||
| # taxon_dict[par_id] = {"meta": {"name": par_nm}, "childrens_id": [tax_id]} | ||||||||||||||||||||||||
| # else: | ||||||||||||||||||||||||
| # taxon_dict[par_id]["childrens_id"].append(tax_id) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # # creating a simple function for one level of children for testing the figure: | ||||||||||||||||||||||||
| # fig_dict = {"name": "root", "nodeColor": "#ffffff", "children": []} | ||||||||||||||||||||||||
| # for child_id in taxon_dict["root"]['childrens_id']: | ||||||||||||||||||||||||
| # fig_dict["children"].append({"name": taxon_dict[child_id]["meta"]["name"], "nodeColor": "#ebb3a7", "children": []}) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # return fig_dict | ||||||||||||||||||||||||
| def getting_childrens(items): | ||||||||||||||||||||||||
| # going through the query output and create dictionary with parents_id and lists of childs ids | ||||||||||||||||||||||||
| taxon_dict = {} | ||||||||||||||||||||||||
| for tax_id, el in items.items(): | ||||||||||||||||||||||||
| if el['parent'] is None: | ||||||||||||||||||||||||
| par_id = "root" | ||||||||||||||||||||||||
| par_nm, par_col = "root", '#ffffff' | ||||||||||||||||||||||||
| par_name, par_color, par_accession_id, par_abbreviations, par_belongs_to_set = "root", '#ffffff', None, [], None | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is quite long and assigns multiple variables at once, which can harm readability. To improve clarity and adhere to common style guidelines (like PEP 8's line length recommendations), consider breaking this into multiple assignment statements.
Suggested change
|
||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||
| par_id = el['parent'] | ||||||||||||||||||||||||
| par_nm, par_col = items[par_id]["name"], items[par_id]["hex"] | ||||||||||||||||||||||||
| par_name, par_color, par_accession_id, par_abbreviations, par_belongs_to_set = items[par_id]["name"], items[par_id]["hex"], items[par_id]["accession_id"], items[par_id]["abbreviations"].values(), items[par_id]["belongs_to_set"] | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the line above, this line is very long and assigns many variables from a dictionary lookup. This can be hard to read and debug. Breaking it down into individual assignments would improve maintainability.
Suggested change
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| if par_id not in taxon_dict: | ||||||||||||||||||||||||
| taxon_dict[par_id] = {"meta": {"name": par_nm, "color": par_col}, "childrens_id": [tax_id]} | ||||||||||||||||||||||||
| taxon_dict[par_id] = {"meta": {"name": par_name, "color": par_color, "accession_id": par_accession_id, "abbreviations": list(par_abbreviations), "belongs_to_set": par_belongs_to_set}, "childrens_id": [tax_id]} | ||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||
| taxon_dict[par_id]["childrens_id"].append(tax_id) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # adding elements without children | ||||||||||||||||||||||||
| for tax_id, el in items.items(): | ||||||||||||||||||||||||
| if tax_id not in taxon_dict: | ||||||||||||||||||||||||
| taxon_dict[tax_id] = {"meta": {"name": items[tax_id]["name"], "color": items[tax_id]["hex"]}, "childrens_id": []} | ||||||||||||||||||||||||
| taxon_dict[tax_id] = {"meta": {"name": items[tax_id]["name"], "color": items[tax_id]["hex"], "accession_id": items[tax_id]["accession_id"], "abbreviations": list(items[tax_id]["abbreviations"].values()), "belongs_to_set": items[tax_id]["belongs_to_set"]}, "childrens_id": []} | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| return taxon_dict | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
@@ -505,12 +482,10 @@ def update_childrens(children_list, parent_id, taxon_children_dict): | |||||||||||||||||||||||
| #print("child id", child_id) | ||||||||||||||||||||||||
| children_list_current = [] | ||||||||||||||||||||||||
| update_childrens(children_list_current, child_id, taxon_children_dict) | ||||||||||||||||||||||||
| children_list.append({"name": taxon_children_dict[child_id]["meta"]["name"], "nodeColor": taxon_children_dict[child_id]["meta"]["color"], "children": children_list_current}) | ||||||||||||||||||||||||
| children_list.append({"name": taxon_children_dict[child_id]["meta"]["name"], "nodeColor": taxon_children_dict[child_id]["meta"]["color"], "accession_id": taxon_children_dict[child_id]["meta"]["accession_id"], "abbreviations": taxon_children_dict[child_id]["meta"]["abbreviations"], "belongs_to_set": taxon_children_dict[child_id]["meta"]["belongs_to_set"], "children": children_list_current}) | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line has become very long with the addition of new keys. To improve readability, consider creating the dictionary first and then appending it to the list. This also allows you to reduce repetition by getting the
Suggested change
|
||||||||||||||||||||||||
| return | ||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||
| return | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def taxonomy_postprocessing(items): | ||||||||||||||||||||||||
| taxon_children = getting_childrens(items) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
DHBAprefix is defined but not used in the SPARQL query. It should be removed to avoid confusion and keep the code clean.