From 0fa0ff68ac1e956a65c7dc90af4a9794c16e8d2c Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 10 Sep 2025 14:03:15 +0200 Subject: [PATCH 1/3] sdsdgs --- src/sssom/util.py | 62 ++++++++++++++++++++++++++++++++++++++++++++- tests/test_utils.py | 20 +++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 3dc20939..d45125a7 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -667,7 +667,7 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate: bool = False) -> p df = df[ df.apply( lambda x: x[CONFIDENCE] - >= max_conf[(x[SUBJECT_ID], x[OBJECT_ID], x[PREDICATE_ID])], + >= max_conf[(x[SUBJECT_ID], x[OBJECT_ID], x[PREDICATE_ID])], axis=1, ) ] @@ -1712,3 +1712,63 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True): except KeyError: # Option does not exist in this version of pandas pass + + +#: A mapping from slots to the weight they have for calculating the FAIRness of a mapping +FAIR_WEIGHTS: dict[str, float] = { + "publication_date": 1.0, + "mapping_justification": 1.0, + "curation_rule": 1.0, + "similarity_measure": 1.0, + "author_label": 1.0, + "subject_preprocessing": 1.0, + "confidence": 1.0, + "object_category": 1.0, + "subject_source_version": 1.0, + "license": 1.0, + "see_also": 1.0, + "mapping_source": 1.0, + "subject_match_field": 1.0, + "issue_tracker_item": 1.0, + "subject_label": 1.0, + "subject_source": 1.0, + "object_source": 1.0, + "object_id": 1.0, + "author_id": 1.0, + "object_source_version": 1.0, + "mapping_tool": 1.0, + "other": 1.0, + "reviewer_id": 1.0, + "reviewer_label": 1.0, + "predicate_label": 1.0, + "object_label": 1.0, + "object_preprocessing": 1.0, + "curation_rule_text": 1.0, + "creator_label": 1.0, + "predicate_id": 1.0, + "subject_id": 1.0, + "object_match_field": 1.0, + "mapping_tool_version": 1.0, + "subject_type": 1.0, + "mapping_cardinality": 1.0, + "similarity_score": 1.0, + "mapping_provider": 1.0, + "match_string": 1.0, + "predicate_modifier": 1.0, + "mapping_date": 1.0, + "object_type": 1.0, + "creator_id": 1.0, + "subject_category": 1.0, + "comment": 1.0, +} +FAIR_TOTAL_WEIGHT = sum(FAIR_WEIGHTS.values()) + + +def calculate_fairness(mapping: SSSOM_Mapping) -> float: + """Calculate FAIRness of a mapping.""" + s: float = sum( + weight + for key, weight in FAIR_WEIGHTS.items() + if getattr(mapping, key, None) + ) + return s / FAIR_TOTAL_WEIGHT diff --git a/tests/test_utils.py b/tests/test_utils.py index d2da2703..ee21581a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -27,7 +27,9 @@ from sssom.io import extract_iris from sssom.parsers import parse_sssom_table from sssom.util import ( + FAIR_WEIGHTS, MappingSetDataFrame, + _get_sssom_schema_object, filter_out_prefixes, filter_prefixes, get_dict_from_mapping, @@ -635,3 +637,21 @@ def test_infer_scoped_cardinality(self) -> None: expected = ["1:n", "1:n", "1:n", "1:n", "1:n", "1:n"] self.assertEqual(expected, list(msdf.df[MAPPING_CARDINALITY].values)) self.assertNotIn(CARDINALITY_SCOPE, msdf.df.columns) + + +class TestFAIRScore(unittest.TestCase): + """Test the FAIRness score.""" + + def test_complete_weighting(self) -> None: + """Test that there are weights for all fields.""" + missing = set(_get_sssom_schema_object().mapping_slots).difference(FAIR_WEIGHTS) + if missing: + msg = "\n".join(missing) + self.fail(msg=f"missing weights for mapping fields: {msg}") + + def test_mapping_weight(self) -> None: + """Test calculating the weight on a mapping.""" + m = SSSOM_Mapping( + + ) + From 77ed9ccc309a8a3b0d8770f209a0a7b1e4de7cee Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Sep 2025 14:18:09 +0200 Subject: [PATCH 2/3] asa --- src/sssom/util.py | 8 ++------ tests/test_utils.py | 5 +---- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index d45125a7..0711b96e 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -667,7 +667,7 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate: bool = False) -> p df = df[ df.apply( lambda x: x[CONFIDENCE] - >= max_conf[(x[SUBJECT_ID], x[OBJECT_ID], x[PREDICATE_ID])], + >= max_conf[(x[SUBJECT_ID], x[OBJECT_ID], x[PREDICATE_ID])], axis=1, ) ] @@ -1766,9 +1766,5 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True): def calculate_fairness(mapping: SSSOM_Mapping) -> float: """Calculate FAIRness of a mapping.""" - s: float = sum( - weight - for key, weight in FAIR_WEIGHTS.items() - if getattr(mapping, key, None) - ) + s: float = sum(weight for key, weight in FAIR_WEIGHTS.items() if getattr(mapping, key, None)) return s / FAIR_TOTAL_WEIGHT diff --git a/tests/test_utils.py b/tests/test_utils.py index ee21581a..215fed7c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -651,7 +651,4 @@ def test_complete_weighting(self) -> None: def test_mapping_weight(self) -> None: """Test calculating the weight on a mapping.""" - m = SSSOM_Mapping( - - ) - + m = SSSOM_Mapping() From ec588bbc8ffafaaf2569a60c4ba6a233b4869289 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 18 Sep 2025 08:56:58 +0200 Subject: [PATCH 3/3] Add full test --- src/sssom/util.py | 59 ++++++++++++++++++++++++++------------------- tests/test_utils.py | 17 ++++++++++++- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 0711b96e..50a6f3a4 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -1716,50 +1716,56 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True): #: A mapping from slots to the weight they have for calculating the FAIRness of a mapping FAIR_WEIGHTS: dict[str, float] = { - "publication_date": 1.0, + # required + "object_id": 1.0, + "predicate_id": 1.0, + "subject_id": 1.0, "mapping_justification": 1.0, + # Not required, but important + "license": 1.0, + "author_id": 1.0, + "creator_id": 1.0, + "reviewer_id": 1.0, + "confidence": 1.0, + # + "publication_date": 1.0, + "mapping_date": 1.0, + "issue_tracker_item": 1.0, "curation_rule": 1.0, + "curation_rule_text": 1.0, "similarity_measure": 1.0, - "author_label": 1.0, "subject_preprocessing": 1.0, - "confidence": 1.0, "object_category": 1.0, "subject_source_version": 1.0, - "license": 1.0, - "see_also": 1.0, "mapping_source": 1.0, "subject_match_field": 1.0, - "issue_tracker_item": 1.0, - "subject_label": 1.0, "subject_source": 1.0, "object_source": 1.0, - "object_id": 1.0, - "author_id": 1.0, "object_source_version": 1.0, - "mapping_tool": 1.0, - "other": 1.0, - "reviewer_id": 1.0, - "reviewer_label": 1.0, - "predicate_label": 1.0, - "object_label": 1.0, "object_preprocessing": 1.0, - "curation_rule_text": 1.0, - "creator_label": 1.0, - "predicate_id": 1.0, - "subject_id": 1.0, "object_match_field": 1.0, + "mapping_tool": 1.0, "mapping_tool_version": 1.0, "subject_type": 1.0, - "mapping_cardinality": 1.0, "similarity_score": 1.0, "mapping_provider": 1.0, "match_string": 1.0, - "predicate_modifier": 1.0, - "mapping_date": 1.0, "object_type": 1.0, - "creator_id": 1.0, "subject_category": 1.0, - "comment": 1.0, + # These give extra context, but are not critical + "predicate_label": 0.1, + "object_label": 0.1, + "subject_label": 0.1, + # These don't matter / are not actionable for FAIR + "comment": 0.0, + "other": 0.0, + "creator_label": 0.0, + "reviewer_label": 0.0, + "author_label": 0.0, + # These might not be relevant, so don't penalize if missing + "predicate_modifier": 0.0, + "mapping_cardinality": 0.0, + "see_also": 0.0, } FAIR_TOTAL_WEIGHT = sum(FAIR_WEIGHTS.values()) @@ -1767,4 +1773,7 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True): def calculate_fairness(mapping: SSSOM_Mapping) -> float: """Calculate FAIRness of a mapping.""" s: float = sum(weight for key, weight in FAIR_WEIGHTS.items() if getattr(mapping, key, None)) - return s / FAIR_TOTAL_WEIGHT + # Penalize for using label fields instead of ID fields + + rv: float = s / FAIR_TOTAL_WEIGHT + return rv diff --git a/tests/test_utils.py b/tests/test_utils.py index 215fed7c..03df2822 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -30,6 +30,7 @@ FAIR_WEIGHTS, MappingSetDataFrame, _get_sssom_schema_object, + calculate_fairness, filter_out_prefixes, filter_prefixes, get_dict_from_mapping, @@ -651,4 +652,18 @@ def test_complete_weighting(self) -> None: def test_mapping_weight(self) -> None: """Test calculating the weight on a mapping.""" - m = SSSOM_Mapping() + m1 = SSSOM_Mapping( + subject_id="DOID:0050601", + predicate_id="skos:exactMatch", + object_id="UMLS:C1863204", + mapping_justification=SEMAPV.ManualMappingCuration.value, + ) + m2 = SSSOM_Mapping( + subject_id="DOID:0050601", + subject_label="ADULT syndrome", + predicate_id="skos:exactMatch", + object_id="UMLS:C1863204", + object_label="ADULT syndrome", + mapping_justification=SEMAPV.ManualMappingCuration.value, + ) + self.assertLess(calculate_fairness(m1), calculate_fairness(m2))