diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py index 3e4cdbe09..a1b13f4d4 100644 --- a/dandi/bids_utils.py +++ b/dandi/bids_utils.py @@ -1,4 +1,9 @@ -from .utils import pluralize +from dandi.bids_validator_xs import validate_bids +from dandi.dandiapi import DandiAPIClient + +from .utils import get_logger, pluralize + +lgr = get_logger() def is_valid( @@ -75,3 +80,84 @@ def report_errors( bold=True, fg="green", ) + + +def print_summary( + summary, + sections=[ + ["subject", "session"], + ["session", "subject"], + ["subject", "sample"], + ["subject", "stain"], + ], + max_detail=3, +): + out = "" + for section in sections: + base = section[0] + detail = section[1] + out += f"Here is the {base} to {detail} summary:\n" + for entry in summary[base + "_summary"]: + details = entry[detail + "s"] + detail_literal = f"{len(details)} (" + if len(details) > max_detail: + detail_literal += ", ".join(details[:max_detail]) + ", ...)" + else: + detail_literal += ", ".join(details) + ")" + out += f"\t-`{entry[base]}`\t{detail_literal}\n" + print(out) + + +def summary( + dandi_id, + entities=["subject", "session", "sample", "stain"], +): + + with DandiAPIClient.for_dandi_instance("dandi") as client: + dandiset = client.get_dandiset(dandi_id) + path_list = [] + for asset in dandiset.get_assets(): + i = f"dummy/{asset.path}" + if "sub-MITU01h3" in i and "sub-MITU01" in i: + lgr.warning("Fixing subject field inconsistencies:") + lgr.warning(" - Pre-repair: %s", i) + i = i.replace("sub-MITU01h3", "sub-MITU01") + lgr.warning(" + Post-repair: %s", i) + # ome.zarr support pending: + # https://github.com/dandi/dandi-cli/pull/1050 + if "ome.zarr" not in i: + path_list.append(i) + + result = validate_bids(path_list, dummy_paths=True) + for i in result["path_tracking"]: + lgr.warning("`%s` was not matched by any BIDS regex pattern.", i) + match_listing = result["match_listing"] + entity_sets = {} + for entity in entities: + entity_sets[entity] = set( + [i[entity] for i in match_listing if entity in i.keys()] + ) + + summary_full = {} + for entity in entities: + sub_summary = [] + for value in entity_sets[entity]: + entry = {} + entry[entity] = value + for _entity in entities: + if _entity == entity: + continue + entry[_entity + "s"] = list( + set( + [ + i[_entity] + for i in match_listing + if entity in i.keys() + and _entity in i.keys() + and i[entity] == value + ] + ) + ) + sub_summary.append(entry) + summary_full[entity + "_summary"] = sub_summary + print_summary(summary_full) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index 3de0bde3d..6dd41d980 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -364,7 +364,7 @@ def load_all( def validate_all( - bids_paths, + paths_list, regex_schema, debug=False, ): @@ -398,7 +398,6 @@ def validate_all( """ tracking_schema = deepcopy(regex_schema) - paths_list = _get_paths(bids_paths) tracking_paths = deepcopy(paths_list) if debug: itemwise_results = [] @@ -695,6 +694,7 @@ def validate_bids( schema_version=None, debug=False, report_path=False, + dummy_paths=False, ): """ Validate paths according to BIDS schema. @@ -743,8 +743,12 @@ def validate_bids( bids_paths, schema_reference_root, schema_version ) regex_schema = load_all(bids_schema_dir) + if not dummy_paths: + paths_list = _get_paths(bids_paths) + else: + paths_list = bids_paths validation_result = validate_all( - bids_paths, + paths_list, regex_schema, debug=debug, ) diff --git a/dandi/tests/test_bids_utils.py b/dandi/tests/test_bids_utils.py new file mode 100644 index 000000000..eec621808 --- /dev/null +++ b/dandi/tests/test_bids_utils.py @@ -0,0 +1,4 @@ +def test_summary(): + from dandi.bids_utils import summary + + summary("000108")