Skip to content

Commit f2eb9c9

Browse files
committed
New Feature: SNOMED::ICD10CM Mapping Support
- Added feature to allow for conversion of these premade mappings provided by SNOMED into SSSOM format. General updates - cli.py: Reorganized SSSOM_READ_FORMATS: Top half are plain data formats, and bottom half are special-case formats. Both halves of the list are alphabetically sorted.
1 parent da12568 commit f2eb9c9

File tree

2 files changed

+167
-4
lines changed

2 files changed

+167
-4
lines changed

sssom/parsers.py

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import typing
77
from collections import Counter
8+
from dateutil import parser as date_parser
89
from pathlib import Path
910
from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union, cast
1011
from urllib.request import urlopen
@@ -19,9 +20,11 @@
1920
from linkml_runtime.loaders.json_loader import JSONLoader
2021
from rdflib import Graph, URIRef
2122

22-
# from .sssom_datamodel import Mapping, MappingSet
23+
# TODO: PR comment: where matchtypeenum? can't find sssomschema, Mapping, or MappingSet. only MappingSetDataFrame
24+
# from .sssom_datamodel import Mapping, MappingSet, MatchTypeEnum
2325
from sssom_schema import Mapping, MappingSet
2426

27+
2528
from sssom.constants import (
2629
CONFIDENCE,
2730
CURIE_MAP,
@@ -261,6 +264,24 @@ def parse_obographs_json(
261264
)
262265

263266

267+
def parse_snomed_icd10cm_map_tsv(
268+
file_path: str,
269+
prefix_map: Dict[str, str] = None,
270+
meta: Dict[str, str] = None,
271+
) -> MappingSetDataFrame:
272+
"""Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
273+
274+
:param file_path: The path to the obographs file
275+
:param prefix_map: an optional prefix map
276+
:param meta: an optional dictionary of metadata elements
277+
:return: A SSSOM MappingSetDataFrame
278+
"""
279+
raise_for_bad_path(file_path)
280+
df = read_pandas(file_path)
281+
df2 = from_snomed_icd10cm_map_tsv(df, prefix_map=prefix_map, meta=meta)
282+
return df2
283+
284+
264285
def _get_prefix_map_and_metadata(
265286
prefix_map: Optional[PrefixMap] = None, meta: Optional[MetadataType] = None
266287
) -> Metadata:
@@ -666,6 +687,144 @@ def from_obographs(
666687
return to_mapping_set_dataframe(mdoc)
667688

668689

690+
def from_snomed_icd10cm_map_tsv(
691+
df: pd.DataFrame,
692+
prefix_map: Optional[PrefixMap] = None,
693+
meta: Optional[MetadataType] = None,
694+
) -> MappingSetDataFrame:
695+
"""Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
696+
697+
:param df: A mappings dataframe
698+
:param prefix_map: A prefix map
699+
:param meta: A metadata dictionary
700+
:return: MappingSetDataFrame
701+
702+
# Field descriptions
703+
# - Taken from: doc_Icd10cmMapReleaseNotes_Current-en-US_US1000124_20210901.pdf
704+
FIELD,DATA_TYPE,PURPOSE,Joe's comments
705+
- id,UUID,A 128 bit unsigned integer, uniquely identifying the map record,
706+
- effectiveTime,Time,Specifies the inclusive date at which this change becomes effective.,
707+
- active,Boolean,Specifies whether the member’s state was active (=1) or inactive (=0) from the nominal release date
708+
specified by the effectiveTime field.,
709+
- moduleId,SctId,Identifies the member version’s module. Set to a child of 900000000000443000|Module| within the
710+
metadata hierarchy.,The only value in the entire set is '5991000124107', which has label 'SNOMED CT to ICD-10-CM
711+
rule-based mapping module' (
712+
https://www.findacode.com/snomed/5991000124107--snomed-ct-to-icd-10-cm-rule-based-mapping-module.html).
713+
- refSetId,SctId,Set to one of the children of the |Complex map type| concept in the metadata hierarchy.,The only
714+
value in the entire set is '5991000124107', which has label 'ICD-10-CM complex map reference set' (
715+
https://www.findacode.com/snomed/6011000124106--icd-10-cm-complex-map-reference-set.html).
716+
- referencedComponentId,SctId,The SNOMED CT source concept ID that is the subject of the map record.,
717+
- mapGroup,Integer,An integer identifying a grouping of complex map records which will designate one map target at
718+
the time of map rule evaluation. Source concepts that require two map targets for classification will have two sets
719+
of map groups.,
720+
- mapPriority,Integer,Within a map group, the mapPriority specifies the order in which complex map records should be
721+
evaluated to determine the correct map target.,
722+
- mapRule,String,A machine-readable rule, (evaluating to either ‘true’ or ‘false’ at run-time) that indicates
723+
whether this map record should be selected within its map group.,
724+
- mapAdvice,String,Human-readable advice that may be employed by the software vendor to give an end-user advice on
725+
selection of the appropriate target code. This includes a) a summary statement of the map rule logic, b) a statement
726+
of any limitations of the map record and c) additional classification guidance for the coding professional.,
727+
- mapTarget,String,The target ICD-10 classification code of the map record.,
728+
- correlationId,SctId,A child of |Map correlation value| in the metadata hierarchy, identifying the correlation
729+
between the SNOMED CT concept and the target code.,
730+
- mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
731+
associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.,
732+
"""
733+
# https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
734+
match_type_snomed_unspecified_id = 447561005
735+
prefix_map = _ensure_prefix_map(prefix_map)
736+
ms = _init_mapping_set(meta)
737+
738+
mlist: List[Mapping] = []
739+
for _, row in df.iterrows():
740+
mdict = {
741+
'subject_id': f'SNOMED:{row["referencedComponentId"]}',
742+
'subject_label': row['referencedComponentName'],
743+
744+
# 'predicate_id': 'skos:exactMatch',
745+
# - mapCategoryId: can use for mapping predicate? Or is correlationId more suitable?
746+
# or is there a SKOS predicate I can map to in case where predicate is unknown? I think most of these
747+
# mappings are attempts at exact matches, but I can't be sure (at least not without using these fields
748+
# to determine: mapGroup, mapPriority, mapRule, mapAdvice).
749+
# mapCategoryId,mapCategoryName: Only these in set: 447637006 "MAP SOURCE CONCEPT IS PROPERLY CLASSIFIED",
750+
# 447638001 "MAP SOURCE CONCEPT CANNOT BE CLASSIFIED WITH AVAILABLE DATA",
751+
# 447639009 "MAP OF SOURCE CONCEPT IS CONTEXT DEPENDENT"
752+
# 'predicate_modifier': '???',
753+
# Description: Modifier for negating the prediate. See https://github.com/mapping-commons/sssom/issues/40
754+
# Range: PredicateModifierEnum: (joe: only lists 'Not' as an option)
755+
# Example: Not Negates the predicate, see documentation of predicate_modifier_enum
756+
# - predicate_id <- mapAdvice?
757+
# - predicate_modifier <- mapAdvice?
758+
# mapAdvice: Pipe-delimited qualifiers. Ex:
759+
# "ALWAYS Q71.30 | CONSIDER LATERALITY SPECIFICATION"
760+
# "IF LISSENCEPHALY TYPE 3 FAMILIAL FETAL AKINESIA SEQUENCE SYNDROME CHOOSE Q04.3 | MAP OF SOURCE CONCEPT
761+
# IS CONTEXT DEPENDENT"
762+
# "MAP SOURCE CONCEPT CANNOT BE CLASSIFIED WITH AVAILABLE DATA"
763+
'predicate_id': f'SNOMED:{row["mapCategoryId"]}',
764+
'predicate_label': row['mapCategoryName'],
765+
766+
'object_id': f'ICD10CM:{row["mapTarget"]}',
767+
'object_label': row['mapTargetName'],
768+
769+
# match_type <- mapRule?
770+
# ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE. Does this
771+
# mean I could use skos:exactMatch in these cases?
772+
# match_type <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded
773+
# SNOMED mappings, all of them had correlationId of 447561005, which also happens to be 'unspecified'.
774+
# If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
775+
# field for 'match_type'.
776+
'match_type': MatchTypeEnum('Unspecified') if row['correlationId'] == match_type_snomed_unspecified_id \
777+
else MatchTypeEnum('Unspecified'),
778+
779+
'mapping_date': date_parser.parse(str(row['effectiveTime'])).date(),
780+
'other': '|'.join([f'{k}={str(row[k])}' for k in [
781+
'id',
782+
'active',
783+
'moduleId',
784+
'refsetId',
785+
'mapGroup',
786+
'mapPriority',
787+
'mapRule',
788+
'mapAdvice',
789+
]]),
790+
791+
# More fields (https://mapping-commons.github.io/sssom/Mapping/):
792+
# - subject_category: absent
793+
# - author_id: can this be "SNOMED"?
794+
# - author_label: can this be "SNOMED"?
795+
# - reviewer_id: can this be "SNOMED"?
796+
# - reviewer_label: can this be "SNOMED"?
797+
# - creator_id: can this be "SNOMED"?
798+
# - creator_label: can this be "SNOMED"?
799+
# - license: Is this something that can be determined?
800+
# - subject_source: URL of some official page for SNOMED version used?
801+
# - subject_source_version: Is this knowable?
802+
# - objectCategory <= mapRule?
803+
# mapRule: ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE.
804+
# Does this mean I could use skos:exactMatch in these cases?
805+
# object_category:
806+
# objectCategory:
807+
# Description: The conceptual category to which the subject belongs to. This can be a string denoting
808+
# the category or a term from a controlled vocabulary.
809+
# Example: UBERON:0001062 (The CURIE of the Uberon term for "anatomical entity".)
810+
# - object_source: URL of some official page for ICD10CM version used?
811+
# - object_source_version: would this be "10CM" as in "ICD10CM"? Or something else? Or nothing?
812+
# - mapping_provider: can this be "SNOMED"?
813+
# - mapping_cardinality: Could I determine 1:1 or 1:many or many:1 based on:
814+
# mapGroup, mapPriority, mapRule, mapAdvice?
815+
# - match_term_type: What is this?
816+
# - see_also: Should this be a URL to the SNOMED term?
817+
# - comment: Description: Free text field containing either curator notes or text generated by tool providing
818+
# additional informative information.
819+
}
820+
mlist.append(_prepare_mapping(Mapping(**mdict)))
821+
822+
ms.mappings = mlist
823+
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
824+
doc = MappingSetDocument(mapping_set=ms, prefix_map=prefix_map)
825+
return to_mapping_set_dataframe(doc)
826+
827+
669828
# All from_* take as an input a python object (data frame, json, etc) and return a MappingSetDataFrame
670829
# All read_* take as an input a a file handle and return a MappingSetDataFrame (usually wrapping a from_* method)
671830

@@ -690,6 +849,9 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
690849
return parse_alignment_xml
691850
elif input_format == "obographs-json":
692851
return parse_obographs_json
852+
elif input_format == "snomed-icd10cm-map-tsv":
853+
return parse_snomed_icd10cm_map_tsv
854+
693855
else:
694856
raise Exception(f"Unknown input format: {input_format}")
695857

sssom/util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,13 @@
6969
PREFIX_MAP_KEY = "curie_map"
7070

7171
SSSOM_READ_FORMATS = [
72-
"tsv",
73-
"rdf",
72+
"json",
7473
"owl",
74+
"rdf",
75+
"tsv",
7576
"alignment-api-xml",
7677
"obographs-json",
77-
"json",
78+
"snomed-icd10cm-map-tsv"
7879
]
7980
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir"]
8081

0 commit comments

Comments
 (0)