Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion src/ncbitaxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ def convert_synonyms(tax_id, synonyms):
return output


def convert_node(node, label, merged, synonyms, citations):
def convert_node(node, label, merged, synonyms, citations, divisions):
"""Given a node dictionary, a label string, and lists for merged, synonyms, and citations,
and a divisions dictionary mapping division IDs to names,
return a Turtle string representing this tax_id."""
tax_id = node["tax_id"]
output = [f"NCBITaxon:{tax_id} a owl:Class"]
Expand Down Expand Up @@ -193,6 +194,13 @@ def convert_node(node, label, merged, synonyms, citations):
gc_id = node["genetic_code_id"]
if gc_id:
output.append(f'; oboInOwl:hasDbXref "GC_ID:{gc_id}"^^xsd:string')

div_id = node["division_id"]
if div_id and div_id in divisions:
division_name= escape_literal(divisions[div_id])
output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string')
# division_id = label_to_id(divisions[div_id])
# output.append(f'; ncbitaxon:has_division NCBITaxon:{division_id}')

for merge in merged:
output.append(f'; oboInOwl:hasAlternativeId "NCBITaxon:{merge}"^^xsd:string')
Expand Down Expand Up @@ -224,6 +232,7 @@ def convert(taxdmp_path, output_path, taxa=None):
synonyms = defaultdict(list)
merged = defaultdict(list)
citations = defaultdict(list)
divisions = defaultdict(str)
with open(output_path, "w") as output:
isodate = date.today().isoformat()
ncbi_date = date.today().replace(day=1)
Expand Down Expand Up @@ -299,7 +308,18 @@ def convert(taxdmp_path, output_path, taxa=None):
rdfs:subPropertyOf oboInOwl:SynonymTypeProperty .
"""))

output.write("""ncbitaxon:has_division a owl:AnnotationProperty
; rdfs:label "has division"^^xsd:string
; rdfs:comment "A metadata relation indicating taxonomic division (eg Bacteria, Eukaryota)"^^xsd:string
; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string
.
""")
with zipfile.ZipFile(taxdmp_path) as taxdmp:
with taxdmp.open("division.dmp") as dmp:
for line in io.TextIOWrapper(dmp):
div_id, _div_code, name, _comments , _ = split_line(line)
divisions[div_id] = name

with taxdmp.open("names.dmp") as dmp:
for line in io.TextIOWrapper(dmp):
tax_id, name, unique, name_class, _ = split_line(line)
Expand Down Expand Up @@ -365,6 +385,7 @@ def convert(taxdmp_path, output_path, taxa=None):
merged[tax_id],
synonyms[tax_id],
citations[tax_id],
divisions,
)
output.write(result)

Expand Down Expand Up @@ -424,6 +445,17 @@ def convert(taxdmp_path, output_path, taxa=None):
; obo:IAO_0100001 {rank_curie}
.
"""
# )
# # Add division classes
# for division_id, division_name in divisions.items():
# division_class_id = label_to_id(division_name)
# output.write(
# f"""NCBITaxon:{division_class_id} a owl:Class
# ; rdfs:label "{division_name}"^^xsd:string
# ; rdfs:subClassOf <http://purl.obolibrary.org/obo/NCBITaxon#_taxonomic_division>
# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string
# .
# """
)

parent_taxrank_id = "9000000" if label in pseudo_ranks else "0000000"
Expand Down Expand Up @@ -460,3 +492,12 @@ def main():

if __name__ == "__main__":
main()


# commented chunk lines 368 to 373
# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string
# .

# <http://purl.obolibrary.org/obo/NCBITaxon#_taxonomic_division> a owl:Class
# ; rdfs:label "taxonomic division"^^xsd:string
# ; rdfs:comment "This is an abstract class for NCBI taxonomic divisions."^^xsd:string