From 4eec9d1434ed6304f227cf3cba8aee448cc0d907 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 14 Jul 2022 11:44:06 -0400 Subject: [PATCH 1/3] draft function --- dandi/bids_utils.py | 30 ++++++++++++++++++++++++++++++ dandi/bids_validator_xs.py | 10 +++++++--- dandi/tests/test_bids_utils.py | 4 ++++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 dandi/tests/test_bids_utils.py diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py index 3e4cdbe09..a306eac59 100644 --- a/dandi/bids_utils.py +++ b/dandi/bids_utils.py @@ -1,3 +1,6 @@ +from dandi.bids_validator_xs import validate_bids +from dandi.dandiapi import DandiAPIClient + from .utils import pluralize @@ -75,3 +78,30 @@ def report_errors( bold=True, fg="green", ) + + +def summary(dandi_id): + import re + + with DandiAPIClient.for_dandi_instance("dandi") as client: + dandiset = client.get_dandiset(dandi_id) + path_list = [] + for asset in dandiset.get_assets(): + i = f"dummy/{asset.path}" + if "_photo" in i: + print( + "Fixing _photo file, https://github.com/dandisets/000108/issues/7" + ) + print(" - Pre-repair: ", i) + session = re.match( + ".*?/ses-(?P([a-zA-Z0-9]*?))/.*?", + "sub-MITU01/ses-20220311h18m03s49/micr/sub-MITU01_sample-20_photo.jpg", + ).groupdict()["session"] + i = i.replace("_sample", f"_ses-{session}_sample") + print(" + Post-repair: ", i) + path_list.append(i) + + result = validate_bids(path_list, dummy_paths=True) + print(result["match_listing"]) + print(result["path_tracking"]) + print(result.keys()) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index 3de0bde3d..6dd41d980 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -364,7 +364,7 @@ def load_all( def validate_all( - bids_paths, + paths_list, regex_schema, debug=False, ): @@ -398,7 +398,6 @@ def validate_all( """ tracking_schema = deepcopy(regex_schema) - paths_list = _get_paths(bids_paths) tracking_paths = deepcopy(paths_list) if debug: itemwise_results = [] @@ -695,6 +694,7 @@ def validate_bids( schema_version=None, debug=False, report_path=False, + dummy_paths=False, ): """ Validate paths according to BIDS schema. @@ -743,8 +743,12 @@ def validate_bids( bids_paths, schema_reference_root, schema_version ) regex_schema = load_all(bids_schema_dir) + if not dummy_paths: + paths_list = _get_paths(bids_paths) + else: + paths_list = bids_paths validation_result = validate_all( - bids_paths, + paths_list, regex_schema, debug=debug, ) diff --git a/dandi/tests/test_bids_utils.py b/dandi/tests/test_bids_utils.py new file mode 100644 index 000000000..eec621808 --- /dev/null +++ b/dandi/tests/test_bids_utils.py @@ -0,0 +1,4 @@ +def test_summary(): + from dandi.bids_utils import summary + + summary("000108") From 8a23c5c70c3b68e5e371749f870b156674259dce Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 14 Jul 2022 14:51:39 -0400 Subject: [PATCH 2/3] working draft --- dandi/bids_utils.py | 126 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 18 deletions(-) diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py index a306eac59..2edabf6b9 100644 --- a/dandi/bids_utils.py +++ b/dandi/bids_utils.py @@ -1,7 +1,9 @@ from dandi.bids_validator_xs import validate_bids from dandi.dandiapi import DandiAPIClient -from .utils import pluralize +from .utils import get_logger, pluralize + +lgr = get_logger() def is_valid( @@ -80,28 +82,116 @@ def report_errors( ) -def summary(dandi_id): - import re +def print_summary( + summary, + sections=[ + ["subject", "session"], + ["session", "subject"], + ["subject", "sample"], + ["subject", "stain"], + ], + max_detail=3, +): + out = "" + for section in sections: + base = section[0] + detail = section[1] + out += f"Here is the {base} to {detail} summary:\n" + for entry in summary[base + "_summary"]: + details = entry[detail + "s"] + detail_literal = f"{len(details)} (" + if len(details) > max_detail: + detail_literal += ", ".join(details[:max_detail]) + ", ...)" + else: + detail_literal += ", ".join(details) + ")" + out += f"\t-`{entry[base]}`\t{detail_literal}\n" + print(out) + + +def summary( + dandi_id, + entities=["subject", "session", "sample", "stain"], +): with DandiAPIClient.for_dandi_instance("dandi") as client: dandiset = client.get_dandiset(dandi_id) path_list = [] for asset in dandiset.get_assets(): i = f"dummy/{asset.path}" - if "_photo" in i: - print( - "Fixing _photo file, https://github.com/dandisets/000108/issues/7" - ) - print(" - Pre-repair: ", i) - session = re.match( - ".*?/ses-(?P([a-zA-Z0-9]*?))/.*?", - "sub-MITU01/ses-20220311h18m03s49/micr/sub-MITU01_sample-20_photo.jpg", - ).groupdict()["session"] - i = i.replace("_sample", f"_ses-{session}_sample") - print(" + Post-repair: ", i) - path_list.append(i) + if "sub-MITU01h3" in i and "sub-MITU01" in i: + lgr.warning("Fixing subject field inconsistencies:") + lgr.warning(" - Pre-repair: %s", i) + i = i.replace("sub-MITU01h3", "sub-MITU01") + lgr.warning(" + Post-repair: %s", i) + # ome.zarr support pending: + # https://github.com/dandi/dandi-cli/pull/1050 + if "ome.zarr" not in i: + path_list.append(i) result = validate_bids(path_list, dummy_paths=True) - print(result["match_listing"]) - print(result["path_tracking"]) - print(result.keys()) + for i in result["path_tracking"]: + lgr.warning("`%s` was not matched by any BIDS regex pattern.", i) + match_listing = result["match_listing"] + entity_sets = {} + for entity in entities: + entity_sets[entity] = set( + [i[entity] for i in match_listing if entity in i.keys()] + ) + # subjects = set([i["subject"] for i in match_listing if "subject" in i.keys()]) + # sessions = set([i["session"] for i in match_listing if "session" in i.keys()]) + # sessions = set([i["sample"] for i in match_listing if "sample" in i.keys()]) + + summary_full = {} + for entity in entities: + sub_summary = [] + for value in entity_sets[entity]: + entry = {} + entry[entity] = value + for _entity in entities: + if _entity == entity: + continue + entry[_entity + "s"] = list( + set( + [ + i[_entity] + for i in match_listing + if entity in i.keys() + and _entity in i.keys() + and i[entity] == value + ] + ) + ) + sub_summary.append(entry) + summary_full[entity + "_summary"] = sub_summary + + # subject_summary = [] + # for subject in subjects: + # entry = {} + # entry["subject"] = subject + # entry["sessions"] = set( + # [ + # i["session"] + # for i in match_listing + # if "subject" in i.keys() + # and "session" in i.keys() + # and i["subject"]==subject + # ] + # ) + # subject_summary.append(entry) + # summary_full["subject_summary"] = subject_summary + # session_summary = [] + # for session in sessions: + # entry = {} + # entry["session"] = session + # entry["subjects"] = set( + # [ + # i["subject"] for + # i in match_listing + # if "subject" in i.keys() + # and "session" in i.keys() + # and i["session"]==session + # ] + # ) + # session_summary.append(entry) + # summary_full["session_summary"] = session_summary + print_summary(summary_full) From 1ae0a3b2a8ab407b9f7347ba0d7e33c3b627fc87 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 14 Jul 2022 14:52:19 -0400 Subject: [PATCH 3/3] Dropped obsolete bits --- dandi/bids_utils.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py index 2edabf6b9..a1b13f4d4 100644 --- a/dandi/bids_utils.py +++ b/dandi/bids_utils.py @@ -137,9 +137,6 @@ def summary( entity_sets[entity] = set( [i[entity] for i in match_listing if entity in i.keys()] ) - # subjects = set([i["subject"] for i in match_listing if "subject" in i.keys()]) - # sessions = set([i["session"] for i in match_listing if "session" in i.keys()]) - # sessions = set([i["sample"] for i in match_listing if "sample" in i.keys()]) summary_full = {} for entity in entities: @@ -163,35 +160,4 @@ def summary( ) sub_summary.append(entry) summary_full[entity + "_summary"] = sub_summary - - # subject_summary = [] - # for subject in subjects: - # entry = {} - # entry["subject"] = subject - # entry["sessions"] = set( - # [ - # i["session"] - # for i in match_listing - # if "subject" in i.keys() - # and "session" in i.keys() - # and i["subject"]==subject - # ] - # ) - # subject_summary.append(entry) - # summary_full["subject_summary"] = subject_summary - # session_summary = [] - # for session in sessions: - # entry = {} - # entry["session"] = session - # entry["subjects"] = set( - # [ - # i["subject"] for - # i in match_listing - # if "subject" in i.keys() - # and "session" in i.keys() - # and i["session"]==session - # ] - # ) - # session_summary.append(entry) - # summary_full["session_summary"] = session_summary print_summary(summary_full)