-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathontology_processing.py
More file actions
66 lines (54 loc) · 2.01 KB
/
ontology_processing.py
File metadata and controls
66 lines (54 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from pathlib import Path
import pandas as pd
from functools import lru_cache
def build_snomed_filter(rel_file_path):
"""
rel_file_path: path to sct2_Relationship_*.csv (UK Edition)
Returns: a filter function expecting your dictionary
"""
# Load the RF2 relationships (Is-a = typeId == 116680003)
rels = pd.read_csv(rel_file_path, dtype=str, sep="\t")
isa_rels = rels[rels["typeId"] == "116680003"][["sourceId", "destinationId"]]
# Map child → parent
parents = isa_rels
# SNOMED top-level roots we allow
allowed_roots = {
"439401001", # diagnosis
"71388002", # procedure
"185361000000102", # medication
}
@lru_cache(maxsize=None)
def is_descendant(code):
stack = [code]
visited = set()
while stack:
c = stack.pop()
if c in visited:
continue
visited.add(c)
direct_parents = parents.loc[parents["sourceId"] == c, "destinationId"].tolist()
# If any parent is a desired top-level group → accept
if any(p in allowed_roots for p in direct_parents):
return True
stack.extend(direct_parents)
return False
def filter_annotations(my_dict):
"""
my_dict["annotations"] → list of objects
each annotation has annotation["label_id"] (SNOMED code)
Returns filtered list.
"""
result = my_dict.copy()
result["annotations"] = []
for annot in my_dict.get("annotations", []):
code = str(annot.get("label_id"))
if is_descendant(code):
result["annotations"].append(annot)
return result
return filter_annotations
# Build the filter once
print("Loading SNOMED filter...")
snomed_filter = build_snomed_filter(
"OntologyData/uk_sct2cl_41.2.0_20251119000001Z/SnomedCT_UKClinicalRF2_PRODUCTION_20251119T000001Z/Full/Terminology/sct2_Relationship_UKCLFull_GB1000000_20251119.txt"
)
print("SNOMED filter loaded.")