From f04ea143e31f4a7869bb5a058840a2fc657d57a6 Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Fri, 6 Feb 2026 12:58:02 +0100 Subject: [PATCH 1/9] initial rework --- .DS_Store | Bin 8196 -> 10244 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.DS_Store b/.DS_Store index 6854aca84f13eec81f0b7f785c8ade2729d84c5e..e1e3ea8ec5c4e996a4e15fe511b33a52dd9ef260 100644 GIT binary patch literal 10244 zcmeHMO>7%Q6n>K?*v?;_G)Y?(B&(qUsUgIaw5mX=<0eAkuO*I3sQOG68T0YOOzp^Brc}uLEJhtp69Er^#mhNf(r1HSn-Zn&^rgNFQ+(oZGQ$)Ymi#FjaLMyMq?okN6`Br-;f+ND#{4fsg4l~yhU;`q*ZJVW5Un`w6^vz;=?6hrpnqldVWH2f^AQxXWtctyvw;NW)mAnOiBAH62is^&b zuOG|KPGzzukKdTeT+e1_j!k8bAD_8#Bb6MVeDsN>;<{P0>{sCenh17U8uEW{-(N#7 z5`>CdHcK#*F_9^wyb*XTzHWK^Q##%9In_TfI5fOBJuo5s*X`Pv$p2y-b$!{p0$-7+ceMGuHhNBb^fB%?&-BfYt4p6w%|;ir`&V8 zzN3*JWe(RE{m2~rrmA5_5uB{mGf-RRS3w5n*Sk=i>zxr`bP3Ab=prvWj z?;!b%wR!;+lV3DeWq;YSEgekqr6QWlJrh8Zbd=_(Ko{v{YSP>EK7B}E(l>OQexo~L zNKA;s;;5Jsv*Luvi8-+(E{Ij3i7P&TPwOdoAdnXj;i3~+YBjwq{@#uVCzigxiH~0K zH<5^(a7yWpQjH=bZm;&grt`k#~Jieb@1}Y{(L7Lw#R}3ybjklAvd4kxtVBJ&&BZ5J7X16-E!Y z6%%EY1V*PaqD|9ygx7_A4y@!tMW;tlh?ppfxh%I-D`a^SwqgX543s1~%A6uHLj#3O zUEaBzbun@`3H^|;4sIK|-OeSLEQ3_7Aq$~*6V_|cTbF)xD5PX*6qYaq>{h5Wn}!!W zVo^!;xCgme42QDXu*B7&(T0ayI&$fTf*pQDN=N2Z)+|L!Slv0AM^t>Bqft(L13wwgKb zv9`yzH^blm7rOud{~q%yHaCty9DzR*0ns~OnCIH2b=HeAW1pZ5u2o#z7}xZvWWkLg yJT5=S;|G6^mj_sk&F~yI^m74i`V=E*fBQcJwm&Mw=l}TpFaKjD>%{-l`Tq~wke9&# delta 116 zcmZn(XmOBWU|?W$DortDU;r^WfEYvza8FDWo2aMAD7!IWH$S87W*&hZjFW!|PM+)} zqBS{Ota-AUc;DoCat#wxMK`PSJYW`N2I>L=32q?a3Nm10;& Date: Tue, 10 Feb 2026 10:49:24 +0100 Subject: [PATCH 2/9] major restructuring --- .DS_Store | Bin 10244 -> 10244 bytes CITATION.cff | 30 ++ MRI_schema.json | 360 ++++++++++++++++++ benchmarks/test_performance.py | 62 --- codemeta.json | 72 ++++ configs/config_data_cleaning.json | 4 - configs/config_dicom_file_validation.json | 5 - dicom_mapping_script.py | 10 - example/metadata_maps/map_full_path.json | 48 --- jammato/__init__.py | 21 - jammato/analyse_study.py | 68 ---- jammato/attribute_inserter.py | 154 -------- jammato/attribute_mapper.py | 259 ------------- jammato/cache_schemas.py | 42 -- jammato/data_cleaning.py | 97 ----- jammato/dicom_mapping.py | 167 -------- jammato/dicom_reader.py | 112 ------ jammato/metadata_reader.py | 90 ----- jammato/schema_reader.py | 184 --------- jammato/schemas_collector.py | 41 -- jammato/version.py | 1 - mapping_cli.py | 111 ++++++ mixed_output.json | 1 + plugin_wrapper.py | 10 + requirements-dev.txt | 19 +- requirements.txt | 14 +- somesy.toml | 35 ++ src/IO/InputReader.py | 91 +++++ src/IO/MapfileReader.py | 82 ++++ src/IO/MappingAbortionError.py | 5 + src/IO/OutputWriter.py | 31 ++ src/Preprocessor.py | 141 +++++++ {benchmarks => src}/__init__.py | 0 src/config.py | 14 + src/model/ImageMD.py | 18 + src/model/SchemaConcepts/MRI_Image.py | 49 +++ src/model/SchemaConcepts/Schema_Concept.py | 59 +++ .../codegen/SchemaClasses_MRI.py | 240 ++++++++++++ src/model/SchemaConcepts/codegen/codegen.md | 21 + src/parser/ImageParser.py | 20 + src/parser/ParserFactory.py | 19 + src/parser/impl/MRI_Parser.py | 226 +++++++++++ src/parser/mapping_util.py | 165 ++++++++ src/resources/maps/mapping/README.md | 90 +++++ src/resources/maps/mapping/__init__.py | 9 + src/resources/maps/mapping/map_full_path.json | 48 +++ .../maps/mapping}/map_mixed_path.json | 0 .../maps/mapping}/map_relative_path.json | 0 .../maps/mapping}/map_study_only.json | 0 src/util.py | 211 ++++++++++ 50 files changed, 2186 insertions(+), 1370 deletions(-) create mode 100644 CITATION.cff create mode 100644 MRI_schema.json delete mode 100644 benchmarks/test_performance.py create mode 100644 codemeta.json delete mode 100644 configs/config_data_cleaning.json delete mode 100644 configs/config_dicom_file_validation.json delete mode 100644 dicom_mapping_script.py delete mode 100644 example/metadata_maps/map_full_path.json delete mode 100644 jammato/__init__.py delete mode 100644 jammato/analyse_study.py delete mode 100644 jammato/attribute_inserter.py delete mode 100644 jammato/attribute_mapper.py delete mode 100644 jammato/cache_schemas.py delete mode 100644 jammato/data_cleaning.py delete mode 100644 jammato/dicom_mapping.py delete mode 100644 jammato/dicom_reader.py delete mode 100644 jammato/metadata_reader.py delete mode 100644 jammato/schema_reader.py delete mode 100644 jammato/schemas_collector.py delete mode 100644 jammato/version.py create mode 100644 mapping_cli.py create mode 100644 mixed_output.json create mode 100644 plugin_wrapper.py create mode 100644 somesy.toml create mode 100644 src/IO/InputReader.py create mode 100644 src/IO/MapfileReader.py create mode 100644 src/IO/MappingAbortionError.py create mode 100644 src/IO/OutputWriter.py create mode 100644 src/Preprocessor.py rename {benchmarks => src}/__init__.py (100%) create mode 100644 src/config.py create mode 100644 src/model/ImageMD.py create mode 100644 src/model/SchemaConcepts/MRI_Image.py create mode 100644 src/model/SchemaConcepts/Schema_Concept.py create mode 100644 src/model/SchemaConcepts/codegen/SchemaClasses_MRI.py create mode 100644 src/model/SchemaConcepts/codegen/codegen.md create mode 100644 src/parser/ImageParser.py create mode 100644 src/parser/ParserFactory.py create mode 100644 src/parser/impl/MRI_Parser.py create mode 100644 src/parser/mapping_util.py create mode 100644 src/resources/maps/mapping/README.md create mode 100644 src/resources/maps/mapping/__init__.py create mode 100644 src/resources/maps/mapping/map_full_path.json rename {example/metadata_maps => src/resources/maps/mapping}/map_mixed_path.json (100%) rename {example/metadata_maps => src/resources/maps/mapping}/map_relative_path.json (100%) rename {example/metadata_maps => src/resources/maps/mapping}/map_study_only.json (100%) create mode 100644 src/util.py diff --git a/.DS_Store b/.DS_Store index e1e3ea8ec5c4e996a4e15fe511b33a52dd9ef260..47364e43c3ff80370962a67b920993b66ddf72a8 100644 GIT binary patch delta 676 zcmZn(XbG6$&uFnRU^hRb#bzFX#f*|X49*Om3?U4TK~2dL8#cxcAhUXNwag(#3C8C@b|uU)K<_aF ztuA6np3EvPFAQZTl@}L40|2*oxw{dXH$RZL!x#(Ios(`DoSdIq0F-86U{O^;D9g=v zaexhpcw{Fp6c^Tn+E5l;l$VpAmk!j+2*iBAKu87pq68R>MZkc` nW5}BvE2BBNL~hl_)NaPj>SWA3zn delta 901 zcmZn(XbG6$&uFtTU^hRb&1N2f#fB40#O63>gf$42cXy4A~6Do;mr+NjdpR z3=9kcKdjG)y$YNl?swXGiFgQ6sw*ahyL3#@=b-DR2E>N>Mgv=x7-wIrC#1XgR z6ePv?)fQwRJ0M{4V?nEMcA&rVfxb&)$Ye-I_ZOoQ&{|}Fi8G`C)q!;6GQ@*Jqky3R zD3S-%8_!S-lqmx83K&Wz9}!cX94_R+xO4IXp+auS>S|*{Qym3EOS8#urDPbpCv!>5 zGxltb5Z=zj&(4qq4Axw*aU~4-lO3ew8QCXG2}*G=u&Am4UBEusPEcyHtt>Z&Qh!-- zpwd1;sRRagph5-)ZU!%)S-wD+$PfZ_W<1bosSM?)(Z$3BR5=eRx^U=9Dlaa8Mg-g& zhL#3EZy1>(ydm9#+uqHs!gm;zaTs3~T$GoSpO+5Q&N$gcY|7+3F~P|%#STo?moQ|5 zxN&ou#39DXvm^v3TS|#ez9ud>`Hw93 None: - benchmark.pedantic( - schema_reader, - args=(schema_reader_instance,), - rounds=NUM_ROUNDS, - iterations=NUM_ITERATIONS - ) -def test_dicom_reader(benchmark: Any) -> None: - benchmark.pedantic( - dicom_reader, - args=(dicom_file,), - rounds=NUM_ROUNDS, - iterations=NUM_ITERATIONS - ) -def test_Attribute_Mapper(benchmark: Any) -> None: - benchmark.pedantic( - Attribute_Mapper, - args=(dicom_object, map_json_path), - rounds=NUM_ROUNDS, - iterations=NUM_ITERATIONS - ) -def test_mri_inserter(benchmark: Any) -> None: - benchmark.pedantic( - mri_inserter, - args=(schema_skeleton, list(schema_skeleton.keys()), study_map, None), - rounds=NUM_ROUNDS, - iterations=NUM_ITERATIONS - ) -def test_dicom_mapping(benchmark: Any) -> None: - benchmark.pedantic( - dicom_mapping_class, - args=(map_json_path, dicom_file_zipped, ["perImage"]), - rounds=NUM_ROUNDS, - iterations=NUM_ITERATIONS - ) \ No newline at end of file diff --git a/codemeta.json b/codemeta.json new file mode 100644 index 0000000..e459f82 --- /dev/null +++ b/codemeta.json @@ -0,0 +1,72 @@ +{ + "@context": [ + "https://doi.org/10.5063/schema/codemeta-2.0", + "https://w3id.org/software-iodata", + "https://raw.githubusercontent.com/jantman/repostatus.org/master/badges/latest/ontology.jsonld", + "https://schema.org", + "https://w3id.org/software-types" + ], + "@type": "SoftwareSourceCode", + "author": [ + { + "@type": "Person", + "givenName": "Nicolas", + "familyName": "Blumenröhr", + "@id": "https://orcid.org/0009-0007-0235-4995", + "identifier": "https://orcid.org/0009-0007-0235-4995" + }, + { + "@type": "Person", + "givenName": "Rossella", + "familyName": "Aversa", + "@id": "https://orcid.org/0000-0003-2534-0063", + "identifier": "https://orcid.org/0000-0003-2534-0063" + } + ], + "name": "JaMMaTo", + "description": "The software JaMMaTo (JSON Metadata Mapping Tool) is a metadata mapping tool based on Python, and is used for mapping metadata from a proprietary file format schema to a JSON format schema.", + "version": "v4.0.0rc", + "keywords": [ + "MRI", + "DICOM", + "metadata", + "extraction", + "schema" + ], + "maintainer": [ + { + "@type": "Person", + "givenName": "Gabin Thibaut", + "familyName": "Oumbe Tekam", + "@id": "https://orcid.org/0000-0002-6745-3464", + "identifier": "https://orcid.org/0000-0002-6745-3464" + }, + { + "@type": "Person", + "givenName": "Germaine", + "familyName": "Götzelmann", + "@id": "https://orcid.org/0000-0003-3974-3728", + "identifier": "https://orcid.org/0000-0003-3974-3728" + } + ], + "license": [ + "https://spdx.org/licenses/Apache-2.0" + ], + "codeRepository": "https://github.com/kit-data-manager/JaMMaTo", + "contributor": [ + { + "@type": "Person", + "givenName": "Gabin Thibaut", + "familyName": "Oumbe Tekam", + "@id": "https://orcid.org/0000-0002-6745-3464", + "identifier": "https://orcid.org/0000-0002-6745-3464" + }, + { + "@type": "Person", + "givenName": "Germaine", + "familyName": "Götzelmann", + "@id": "https://orcid.org/0000-0003-3974-3728", + "identifier": "https://orcid.org/0000-0003-3974-3728" + } + ] +} \ No newline at end of file diff --git a/configs/config_data_cleaning.json b/configs/config_data_cleaning.json deleted file mode 100644 index 398b229..0000000 --- a/configs/config_data_cleaning.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "merge_date_time": [["studyDate", "studyTime"], "studyDateTime"], - "merge_min_max_values": [["smallestImagePixelValue", "largestImagePixelValue"], ""] -} diff --git a/configs/config_dicom_file_validation.json b/configs/config_dicom_file_validation.json deleted file mode 100644 index 67611dc..0000000 --- a/configs/config_dicom_file_validation.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "file_study_instance_uid":"studyInstanceUid", - "file_sop_instance_uid":"sopInstanceUid", - "file_series_instance_uid":"seriesInstanceUid" -} diff --git a/dicom_mapping_script.py b/dicom_mapping_script.py deleted file mode 100644 index 5bf6b35..0000000 --- a/dicom_mapping_script.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys -import jammato.dicom_mapping - -map_json_path = sys.argv[1] - -metadata_files_location = sys.argv[2] - -mapped_metadata = sys.argv[3] - -jammato.Dicom_Mapping(map_json_path, metadata_files_location, mapped_metadata) diff --git a/example/metadata_maps/map_full_path.json b/example/metadata_maps/map_full_path.json deleted file mode 100644 index 44f0234..0000000 --- a/example/metadata_maps/map_full_path.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "uri": "https://metarepo.nffa.eu/api/v1/schemas/mri_schema?version=7", - "study": { - "study.studyID": "studyInstanceUid" , - "study.studyTitle": "studyDescription", - "study.studyDateTime": "studyDateTime", - "study.program": "softwareVersions", - "study.user.name": "referringPhysiciansName", - "study.user.affiliation.institutionName": "institutionName", - "study.user.affiliation.institutionAcronym": "institutionCodeSequence", - "study.user.affiliation.institutionDepartment": "institutionalDepartmentName", - "study.user.affiliation.institutionID": "institutionalDepartmentTypeCodeSequence", - "study.user.email": "personsTelecomInformation", - "study.sample.sampleName": "patientsName", - "study.sample.sampleID": "patientId", - "study.sample.sampleSize.value": "patientsSize", - "study.sample.sampleWeight.value": "patientsWeight", - "study.sample.measurementConditions.value": "magneticFieldStrength", - "study.instrument.instrumentName": "stationName", - "study.instrument.instrumentID": "deviceSerialNumber", - "study.instrument.instrumentManufacturer.manufacturerName": "manufacturer", - "study.instrument.instrumentManufacturer.modelName": "manufacturersModelName", - "study.instrument.instrumentManufacturer.manufacturerID": "manufacturersDeviceClassUID" - }, - "series": { - "study.series.seriesID": "seriesInstanceUid", - "study.series.seriesTitle": "seriesDescription", - "study.series.sequenceProtocol.sequenceProtocolName": "protocolName", - "study.series.sequenceProtocol.effectiveEchoTime.value": "effectiveEchoTime", - "study.series.sequenceProtocol.repetitionTime.value": "repetitionTime", - "study.series.sequenceProtocol.flipAngle.value": "flipAngle", - "study.series.images.allImages.numberOfImages": "numberOfFrames", - "study.series.images.allImages.imageOrientation": "imageOrientationpatient", - "study.series.images.allImages.pixelSpacing.value": "pixelSpacing", - "study.series.images.allImages.sliceThickness.value": "sliceThickness", - "study.series.images.allImages.imageSize.rows": "rows", - "study.series.images.allImages.imageSize.columns": "columns", - "study.series.images.allImages.pixelRange.pixelBandwidth.value": "pixelBandwidth", - "study.series.images.allImages.pixelRange.pixelRepresentation":"pixelRepresentation", - "study.series.images.allImages.pixelRange.largestImagePixelValue":"largestImagePixelValue", - "study.series.images.allImages.pixelRange.smallestImagePixelValue": "smallestImagePixelValue" - - }, - "perImage": { - "study.series.images.perImage.imageNumber": "instackPositionNumber", - "study.series.images.perImage.sampleImagePosition": "imagePositionpatient" - } -} diff --git a/jammato/__init__.py b/jammato/__init__.py deleted file mode 100644 index 15896bd..0000000 --- a/jammato/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from .dicom_mapping import Dicom_Mapping -from .dicom_reader import Dicom_Reader -from .attribute_inserter import Attribute_Inserter -from .metadata_reader import Metadata_Reader -from .schema_reader import Schema_Reader -from .attribute_mapper import Attribute_Mapper -from .cache_schemas import Cache_Schemas -from .schemas_collector import Schemas_Collector - -__all__ = [ - 'Dicom_Mapping', - 'Dicom_Reader', - 'Attribute_Inserter', - 'Map_MRI_Schema', - 'Schema_Reader', - 'Attribute_Mapper', - 'Cache_Schemas', - 'Cache_Schemas', - 'Metadata_Reader', - 'Schemas_Collector' -] \ No newline at end of file diff --git a/jammato/analyse_study.py b/jammato/analyse_study.py deleted file mode 100644 index 55e5b58..0000000 --- a/jammato/analyse_study.py +++ /dev/null @@ -1,68 +0,0 @@ -import json -from typing import Any -from .dicom_reader import Dicom_Reader - -class Analyse_Study(): - def __init__(self, config_dicom_file_validation): - self.configuration_dicom_file_validation(config_dicom_file_validation) - self.all_sop_instance_uids=[] - self.study_instance_uid = None - self.all_series_instance_uids= [] - self.series_id_dict={} - - def configuration_dicom_file_validation(self, config_dicom_file_validation: str) -> None: - """Configuration of the attributes used in the dicom file validation. - - Args: - config_dicom_file_validation (str): The direction to the config file. - """ - with open(config_dicom_file_validation, 'r') as f: - dicom_file_validation_attributes = json.load(f) - self.file_study_instance_uid=list(dicom_file_validation_attributes.values())[0] - self.file_sop_instance_uid=list(dicom_file_validation_attributes.values())[1] - self.file_series_instance_uid=list(dicom_file_validation_attributes.values())[2] - return - - def set_series(self, series_id: str, series_attributes: Any) -> None: - """Add the attributes for each series in a dictionary containing the series id as key and the object/dictionary with attributes as value. - - Args: - series_id (str): The string of the series id. - series_attributes (Any): The object or dictionary containing the attributes of a series. - """ - self.series_id_dict[series_id]=series_attributes - return - - def get_series(self, series_id: str) -> Any: - """Retrieves the object/dictionary containing the attributes of a series using the series id. - - Args: - series_id (str): The string of the series id. - - Returns: - Any: The object/dictionary containing the attributes. - """ - return self.series_id_dict[series_id] - - def analyse_study(self, series: Dicom_Reader) -> bool: - """Validate that all dicom files (series) of a study have the same Study Instance UID. - - Args: - dicom_object (MetadataReader): The object that contains the dicom metadata attributes. - - Raises: - Exception: Strings are not the same. - - Returns: - bool: Returns the boolean of the assessment if the ids are duplicates. - """ - if (self.study_instance_uid != None) and (self.study_instance_uid != series.__dict__[self.file_study_instance_uid]): - raise Exception('Files are not from the same study') - else: - self.study_instance_uid = series.__dict__[self.file_study_instance_uid] - - duplicate_sop_elements= series.__dict__[self.file_sop_instance_uid] in self.all_sop_instance_uids - duplicate_series_elements= series.__dict__[self.file_series_instance_uid] in self.all_series_instance_uids - self.all_sop_instance_uids.append(series.__dict__[self.file_sop_instance_uid]) - self.all_series_instance_uids.append(series.__dict__[self.file_series_instance_uid]) - return duplicate_sop_elements, duplicate_series_elements \ No newline at end of file diff --git a/jammato/attribute_inserter.py b/jammato/attribute_inserter.py deleted file mode 100644 index 4dd069f..0000000 --- a/jammato/attribute_inserter.py +++ /dev/null @@ -1,154 +0,0 @@ -from typing import Any -import logging -from .attribute_mapper import Attribute_Mapper - - -class Attribute_Inserter(): - - def __init__(self, schema_skeleton: dict, key_list: list, map: object) -> None: - """Takes the schema structure as dictionary and the metadata map to assign the metadata values to the schema attributes - at the proper hirarchy level of the JSON schema. - - Args: - schema (dict): The dictionary containing the skeleton of the target schema. - key_list (list): The list that contains all keys of the schema skeleton at the first hierarchy level. - map (object): The object that represents the mapped attributes. - """ - self.schema_skeleton = schema_skeleton - self.key_list = key_list - self.map = map - - def fill_json_object(self, json_object: dict, key_list: list, attributes_object: object): - """Takes the JSON object of the schema structure, the key list of the current schema hierarchy level and the attributes object. - Calls the proper method to insert the attribute value, based on the attribute type. Has in addition a functionality for inserting - the value of the main key string of an JSON object containing value and unit properties. - - Args: - json_object (dict): The dictionary containing the skeleton of the target schema. - key_list (list): The list that contains all keys of the schema skeleton at the first hierarchy level. - attributes_object (object): The object that contains the mapped attributes. - - Returns: - dict: The dictionary that represents the filled JSON object. - """ - new_dict = {} - - for key in key_list: - if (isinstance(json_object[key], str)) or (isinstance(json_object[key], tuple)): - if key in attributes_object.keys(): - new_dict[key] = self.get_json_type(json_object[key], attributes_object[key], key) - else: - new_dict[key] = self.get_json_type(json_object[key], json_object[key], key) - elif isinstance(json_object[key], dict): - if key in attributes_object: - if isinstance(attributes_object[key], Attribute_Mapper): - sub_dict = self.fill_json_object(json_object[key], list(json_object[key].keys()), attributes_object[key].__dict__) - else: - sub_dict = self.fill_json_object(json_object[key], list(json_object[key].keys()), attributes_object[key]) - else: - sub_dict = self.fill_json_object(json_object[key], list(json_object[key].keys()), attributes_object) - if len(sub_dict) > 0: - new_dict[key] = sub_dict - elif isinstance(json_object[key], list): - filled_array=[] - if key in attributes_object: - filled_array = self.fill_json_array(json_object, key, json_object[key], attributes_object[key]) - else: - pass - if len(filled_array) > 0: - new_dict[key] = filled_array - else: - pass - else: - pass - return new_dict - - def fill_json_array(self, json_object: dict, json_object_property: str, json_array: list, attributes: Any) -> list: - """Takes the json object of the schema structure, the property that contains the json array as value, the json array and the attributes, either as object, or list of objects. - Parses the array in the schema structure and calls the proper method to insert the attribute value, based on the attribute type. - - Args: - json_object (dict): The dictionary that represents the schema skeleton object. - json_object_property (str): The string property that contains the json array as value. - json_array (list): The list that represents the schema skeleton array. - attributes (Any): The object that contains the mapped attributes, or a list of objects that contain the mapped attributes. - - Returns: - list: The list that represents the filled json array. - """ - if not isinstance(attributes, list): - if json_object_property in attributes.__dict__.keys(): - attributes = attributes.__dict__[json_object_property] - try: - json_array = json_array * len(attributes) - except TypeError as e: - logging.warning(e) - pass - - new_list = [] - - for list_item, list_index in zip(json_array, range(0, len(json_array))): - if (isinstance(list_item, str)) or (isinstance(list_item, tuple)): - try: - new_list.append(self.get_json_type(list_item, attributes[list_index], None)) - except TypeError as e: - logging.warning(e) - pass - elif isinstance(list_item, dict): - if isinstance(attributes[list_index], Attribute_Mapper): - new_list.append(self.fill_json_object(list_item, list( - list_item.keys()), attributes[list_index].__dict__)) - else: - new_list.append(self.fill_json_object(list_item, list( - list_item.keys()), attributes[list_index])) - elif isinstance(list_item, list): - new_list.append(self.fill_json_array(json_object, json_object_property, list_item, attributes[list_index])) - else: - pass - return new_list - - def get_json_type(self, data_type: str, attribute: str, key) -> Any: - """Takes an attribute and its data type. Confirms the primitive data types of the mapped attribute values and assigns these values to the schema attribute. The correct hirarchial - position has been reached through the methods above. - - Args: - data_type (str): The string that representes the data type for an attribute. - attribute (str): The string that represents the value of the mapped attribute. - - Returns: - Any: The value of the mapped attribute as the correct data type. - """ - try: - if (isinstance(data_type, tuple) and not (isinstance(attribute, tuple))): - if (isinstance(attribute, list)) and ("" in data_type): - return attribute - else: - for element in data_type: - if element in ["", "", "", "", ""]: - return (self.get_json_type(element, attribute, None)) - else: - pass - logging.warning(f'incorrect type provided for property \"{key}\", expected {data_type} but received {type(attribute)}, returning original value.') - return attribute - elif isinstance(attribute, tuple): - logging.warning(f'No value provided for property \"{key}\", returning original value.') - return attribute - elif data_type == "": - return int(attribute) - elif data_type == "": - return bool(attribute) - elif data_type == "": - return "null" - elif data_type == "": - return float(attribute) - elif data_type == "": - return str(attribute) - else: - logging.warning(f'incorrect type provided for property \"{key}\", expected {data_type} but received {type(attribute)}, returning original value.') - return attribute - except TypeError: - logging.warning(f'incorrect type provided for property \"{key}\", expected {data_type} but received {type(attribute)}, returning original value.') - return attribute - except ValueError: - logging.warning(f'incorrect value provided for property \"{key}\", expected {data_type} but received {type(attribute)}, returning original value.') - return attribute \ No newline at end of file diff --git a/jammato/attribute_mapper.py b/jammato/attribute_mapper.py deleted file mode 100644 index 81743ca..0000000 --- a/jammato/attribute_mapper.py +++ /dev/null @@ -1,259 +0,0 @@ -from typing import Any -import logging -from nested_lookup import nested_lookup - -class Attribute_Mapper(): - - def __init__(self, **kwargs) -> None: - """Instantiates the class and updates the attributes. - """ - if kwargs: - self.__dict__.update(kwargs) - else: - pass - - def update_object_attributes(self, attributes: dict) -> None: - """Updates the key-value pairs of the object attributes dictionary. - - Args: - attributes (dict): Attributes to be added to the object dictionary. - """ - self.__dict__.update(attributes) - - return - - @classmethod - def mapping_from_object(cls, metadata_attributes: dict, map_dict: dict, map_main_attribute: str, object_names: list) -> dict: - """Takes the metadata object as dictionary and the corresponding map dictionary to insert the proper values for a main attribute from the - map dictionary. - - Args: - metadata_attributes (dict): Dictionary of attributes from the metadata object. - map_dict (dict): Dictionary of the map for the origin and target schemas. - map_main_attribute (str): The main attribute of the map dictionary (usually study, series and perImage) for which all attributes should be mapped. - - Returns: - dict: The dictionary with the mapped attributes. Passes to the __init__ function, which updates the attributes. - """ - mapped_attribues={} - for key, value in map_dict[map_main_attribute].items(): - if value in metadata_attributes: - if "." in key: - split_keys=key.split(".") - split_keys.append(metadata_attributes[value]) - split_keys=cls.remove_from_list(split_keys, object_names) - for index in range(len(split_keys)-1, 1, -1): - temp_key_dict={} - temp_key_dict[split_keys[index-1]]= split_keys[index] - split_keys.remove(split_keys[index]) - split_keys.remove(split_keys[index-1]) - split_keys.append(temp_key_dict) - if split_keys[0] in mapped_attribues: - merged_attribute_object=cls.merge_mapped_attributes(mapped_attribues[split_keys[0]], temp_key_dict) - mapped_attribues[split_keys[0]]=merged_attribute_object - else: - attribute_object=Attribute_Mapper() - attribute_object.update_object_attributes(temp_key_dict) - mapped_attribues[split_keys[0]]=attribute_object - else: - mapped_attribues[key]= metadata_attributes[value] - else: - pass - return mapped_attribues - - @classmethod - def merge_mapped_attributes(cls, existing_attributes: Any, new_attributes: Any, list_attribute=None) -> object: - """Merges the mapped attributes that belong to the same hirarchy in the target schema, by adding them to the same object. - - Args: - existing_attributes (Any): The object, or dictionary containing the mapped attributes of the same schema hierarchy. - new_attributes (Any): The object, or dictionary containing new attributes that need to be merged with the (object) dictionary of mapped attributes. - list_attribute (_type_, optional): In case a list attribute exists that needs to be added to the attribute dictionary. Defaults to None. - - Returns: - object: The object containing the merged new and existing mapped attributes. - """ - temp_dict={} - if isinstance(existing_attributes, Attribute_Mapper): - attribute_dict=existing_attributes.__dict__ - else: - attribute_dict=existing_attributes - if isinstance(new_attributes, Attribute_Mapper): - new_attributes=new_attributes.__dict__ - else: - pass - if list(new_attributes.keys())[0] == list_attribute: - if list_attribute in attribute_dict: - attribute_dict[list_attribute].append(new_attributes[list_attribute]) - else: - attribute_dict[list_attribute]=[new_attributes[list_attribute]] - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(attribute_dict) - return attribute_object_1 - elif list(new_attributes.keys())[0] in (attribute_dict.keys()): - attribute_dict[list(new_attributes.keys())[0]]=cls.merge_mapped_attributes(attribute_dict[list(new_attributes.keys())[0]], new_attributes[list(new_attributes.keys())[0]], list_attribute) - if not isinstance(attribute_dict, Attribute_Mapper): - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(attribute_dict) - else: - temp_dict[list(new_attributes.keys())[0]]=attribute_dict - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(temp_dict) - return attribute_object_1 - elif list_attribute!=None: - if list_attribute in attribute_dict: - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(new_attributes) - attribute_dict[list_attribute].append(attribute_object_1) - else: - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(new_attributes) - attribute_dict[list_attribute]=[attribute_object_1] - attribute_object_2=Attribute_Mapper() - attribute_object_2.update_object_attributes(attribute_dict) - return attribute_object_2 - else: - attribute_dict.update(new_attributes) - attribute_object_1=Attribute_Mapper() - attribute_object_1.update_object_attributes(attribute_dict) - return attribute_object_1 - - @classmethod - def remove_from_list(cls, list1: list, list2: list) -> list: - """Compares the values between list1 and list2 and appends matching values in a new list which is returned. - - Args: - list1 (list): The first list. - list2 (list): The second list. - - Returns: - list: The list that contains the values which are contained in both lists. - """ - new_list=[] - for element in list1: - if element in list2: - pass - else: - new_list.append(element) - return new_list - - def get_data_type(self, target_attribute: str, split_target_attribute: str=None, main_target_attribute: str=None) -> Any: - """Looks up the schema in the object instance to retreive the value of the target attribute provided to the function. - - Args: - target_attribute (str): The attribute of which the value should be looked up in the schema. - split_target_attribute (str, optional): The attribute from the end of a string representing a sequence of attributes refering to the schema hierarchy. Defaults to None. - main_target_attribute (str, optional): In case of a string representing a sequence of attributes, this value represents the original attribute of which value has to be searched in the schema. Defaults to None. - - Returns: - Any: The value of the attribute in the schema and the attribute. - """ - - if "." in target_attribute: - split_target_attribute=target_attribute.split(".") - main_target_attribute=split_target_attribute[-1] - attribute_type=nested_lookup(split_target_attribute[-1], self.schema_skeleton) - else: - attribute_type=nested_lookup(target_attribute, self.schema_skeleton) - main_target_attribute=target_attribute - - if len(attribute_type)>1: - try: - split_target_attribute.pop() - main_target_attribute=target_attribute - attribute_type, main_target_attribute=self.get_data_type(split_target_attribute[-1], split_target_attribute, main_target_attribute)#check again main attribute target - except TypeError as e: - logging.error(f"attribute {attribute_type[0]} occurs multiple times and full path is not provided in the metadata map.") - except AttributeError as e: - logging.error(f"attribute {attribute_type[0]} occurs multiple times and full path is not provided in the metadata map.") - else: - pass - if isinstance(attribute_type[0], dict): - attribute_type=nested_lookup(main_target_attribute, attribute_type[0]) - return attribute_type[0] - else: - return attribute_type[0], main_target_attribute - - @classmethod - def nested_list_level(cls, lst: list) -> int: - """Returns the nesting level of a list. - - Args: - lst (list): The nested list. - - Returns: - int: The level of the list nesting. - """ - if isinstance(lst, list): - return 1 + max(cls.nested_list_level(item) for item in lst) - else: - return 0 - - @classmethod - def nested_attributes_map_search(cls, attributes_map: dict, target_attribute: str) -> Any: - """Searches a nested dictionary for a certain target attribute. - - Args: - attributes_map (dict): The nested dictionary to be searched. - target_attribute (str): The target attribute in the dictionary. - - Returns: - Any: The level of the dictionary where the attribute is located at. - """ - for attribute in attributes_map: - if (attribute==target_attribute) or target_attribute in attributes_map: - return attributes_map - elif isinstance(attributes_map[attribute], Attribute_Mapper): - return cls.nested_attributes_map_search(attributes_map[attribute].__dict__, target_attribute) - elif isinstance(attributes_map[attribute], dict): - return cls.nested_attributes_map_search(attributes_map[attribute], target_attribute) - else: - logging.warning(f"{target_attribute} not found in {attributes_map}.") - pass - return attributes_map - - @classmethod - def nested_attributes_map_modification(cls, attributes_map, element_series_attribute): - for attribute in attributes_map: - if attribute in element_series_attribute: - attributes_map[attribute]=element_series_attribute[attribute] - elif isinstance(attributes_map[attribute], Attribute_Mapper): - attributes_map[attribute].__dict__.update(cls.nested_attributes_map_modification(attributes_map[attribute].__dict__, element_series_attribute)) - return attributes_map - elif isinstance(attributes_map[attribute], dict): - attributes_map[attribute].update(cls.nested_attributes_map_modification(attributes_map[attribute], element_series_attribute)) - return attributes_map - else: - logging.warning(f"{element_series_attribute} not found in {attributes_map}.") - pass - return attributes_map - - def type_assessment(self, original_attributes_map, map_dict, map_attribute): - number_of_elements=0 - all_types={} - for target_attribute in map_dict[map_attribute]: - attribute_type, target_attribute=self.get_data_type(target_attribute) - if not target_attribute in original_attributes_map: - attributes_map=self.nested_attributes_map_search(original_attributes_map, target_attribute) - if not target_attribute in attributes_map: - continue - else: - pass - else: - attributes_map=original_attributes_map - if (isinstance(attributes_map[target_attribute], list)) and (self.nested_list_level(attributes_map[target_attribute]) > self.nested_list_level(attribute_type)): - number_of_elements=len(attributes_map[target_attribute]) - elif (isinstance(attributes_map[target_attribute], list)) and (self.nested_list_level(attributes_map[target_attribute]) == self.nested_list_level(attribute_type)): - number_of_elements=1 - else: - logging.warning(f"no correct type for attribute: {target_attribute}") - if isinstance(attribute_type, list): - attribute_type=str(list) - elif isinstance(attribute_type, dict): - attribute_type=str(dict) - elif isinstance(attribute_type, tuple): - attribute_type=str(dict) - else: - pass - all_types[target_attribute]=attribute_type - return all_types, number_of_elements \ No newline at end of file diff --git a/jammato/cache_schemas.py b/jammato/cache_schemas.py deleted file mode 100644 index 9b9553e..0000000 --- a/jammato/cache_schemas.py +++ /dev/null @@ -1,42 +0,0 @@ -import urllib.request -import logging -import json -import ssl -from .schemas_collector import schemas_collector_instance - -class Cache_Schemas(): - - def __init__(self, json_schema: dict) -> None: - """Class instantiation. - """ - self.json_schema = json_schema - - @classmethod - def cache_schema(cls, map_dict: dict) -> dict: - """Cache, or set the schema that is referenced by the map json document via the uri key and return it as dictionary. - - Args: - map_dict (dict): _description_ - - Raises: - KeyError: No uri as key provided in the input dictionary. - - Returns: - dict: The schema as dictionary. - """ - ctx = ssl.create_default_context() - ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE - - try: - if schemas_collector_instance.get_uri(map_dict["uri"]): - json_schema = schemas_collector_instance.get_schema(map_dict["uri"]) - else: - with urllib.request.urlopen(map_dict["uri"], context=ctx) as url: - json_schema = json.load(url) - schemas_collector_instance.add_schema(map_dict["uri"], json_schema) - - except KeyError as e: - logging.error("Schema not accessible.") - - return cls(json_schema) \ No newline at end of file diff --git a/jammato/data_cleaning.py b/jammato/data_cleaning.py deleted file mode 100644 index c4122ca..0000000 --- a/jammato/data_cleaning.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -from datetime import datetime -import os -from typing import Any -from collections.abc import Iterable - -class Data_Cleaning(): - - def __init__(self) -> None: - """Instantiates the Data_Cleaning class and creates an empty attribute attribute_dict to be updatet. - """ - self.config={} - - @classmethod - def merge_date_time(cls, args: list, object_iterable: Any) -> dict: - """Merge date and time values from two attributes into one attribute value. - - Args: - args (list): Names of the original and merged attributes. - attribute_dict (dict): The dictionry containing the key-value pairs of the attributes to be merged. - - Returns: - dict: The updatet dictionary containing the merged attribute key-value pair. - """ - for object in object_iterable: - input_args=args[0] - output_args=args[1] - merged_date_time = object.__dict__[input_args[0]] + " " - merged_date_time += object.__dict__[input_args[1]] - try: - merged_date_time = datetime.strptime( - merged_date_time, '%Y%m%d %H%M%S').isoformat() - except: - merged_date_time=merged_date_time - object.__dict__.pop(input_args[0]) - object.__dict__.pop(input_args[1]) - object.__dict__[output_args]=merged_date_time - return object_iterable - - @classmethod - def merge_min_max_values(cls, args: list, object_iterable: Any) -> dict: - input_args=args[0] - min_value=min([object.__dict__[input_args[0]] for object in object_iterable]) - max_value=max([object.__dict__[input_args[1]] for object in object_iterable]) - for object in object_iterable: - object.__dict__[input_args[0]]=min_value - object.__dict__[input_args[1]]=max_value - return object_iterable - - @classmethod - def transfer_to_list(cls, iterable_atribute: Iterable) -> list: - """Transfers an iterable attribute to a list. - - Args: - iterable_atribute (Iterable): The iterable attribute to be converted. - - Returns: - list: The resulting list. - """ - new_list=[] - for value in iterable_atribute: - if isinstance(value, list): - new_list.append(cls.iterate_list(value)) - else: - new_list.append(value) - return new_list - - def set_attributes_from_config(self, flag, config_file_path: str=os.getcwd()+"/configs/config_data_cleaning.json",) -> None: - """Uses the JSON config file for the data cleaning to assess the methods and respective attributes that need to be cleaned. Attributes - and methods are then iteratively called and executed. The object instance attribute attribute_dict is then updatet after each process. - - Args: - config_file_path (str, optional): The relative path to the config file directory. Defaults to os.getcwd()+"/configs/config_data_cleaning.json". - """ - with open(config_file_path, 'r') as f: - config_file = json.load(f) - for func_string, args in config_file.items(): - self.store_config(func_string, args) - func = getattr(Data_Cleaning, func_string) - temp_attribute_dict=func(args, self.attributes_dict) - self.attributes_dict=temp_attribute_dict - return - - def load_attributes(self, attributes_dict: dict) -> None: - """Load a dictionary to update the attribute_dict object instance attribute. - - Args: - attributes_dict (dict): The dictionary containing the key-value pairs for the data cleaning setps. - """ - self.attributes_dict=attributes_dict - return - - def store_config(self, func_string, args): - self.config.update({func_string: args}) - return - -data_cleaning_instance=Data_Cleaning() diff --git a/jammato/dicom_mapping.py b/jammato/dicom_mapping.py deleted file mode 100644 index 08f728e..0000000 --- a/jammato/dicom_mapping.py +++ /dev/null @@ -1,167 +0,0 @@ -import json -import os -import logging -from .schema_reader import Schema_Reader -from .dicom_reader import Dicom_Reader -from .cache_schemas import Cache_Schemas -from .analyse_study import Analyse_Study -from .data_cleaning import data_cleaning_instance -from .metadata_reader import Metadata_Reader -from .attribute_mapper import Attribute_Mapper -from .attribute_inserter import Attribute_Inserter - -class Dicom_Mapping(): - - def __init__(self, map_json_path: str, metadata_files_location: str, mapped_metadata: str='mapped_metadata.json', config_dicom_file_validation: str=os.getcwd()+"/configs/config_dicom_file_validation.json") -> None: - """Instantiates the class, loads the map dictionary from JSON, instantiates all attributes to the object and executes the steps for mapping. - - Args: - map_json (json): A json based map of the attribute assignments for mapping. - metadata_files_location (str): The directory where the dicom files of a study are stored. - mapped_metadata (str, optional): The resulting json file. Defaults to 'mapped_metadata.json'. - """ - with open(map_json_path, 'r') as f: - map_dict = json.load(f) - self.map_dict=map_dict - self.metadata_files_location = metadata_files_location - self.mapped_metadata=mapped_metadata - self.config_dicom_file_validation=config_dicom_file_validation - self.analyse_study_instance=Analyse_Study(config_dicom_file_validation) - data_cleaning_instance - self.execute_steps(map_dict, metadata_files_location, mapped_metadata) - - def execute_steps(self, map_dict: dict, metadata_files_location: str, mapped_metadata: str) -> None: - """Executes all steps for mapping a dicom study to a json schema, i.e. download or cache the target schema, extract the schema skeleton, read the metadata from the DICOM files and extract metadata, - map the attributes provided in the JSON metadata map, create the mapped metadata object, fill the schema skeleton with the attributes in the mapped metadata object and finally store the result as JSON document. - - Args: - map_dict (dict): The map of the attribute assignments for mapping as a dictionary. - metadata_files_location (str): The directory where the dicom files of a study are stored. - mapped_metadata (str): The resulting json file. - """ - json_schema = Cache_Schemas.cache_schema(map_dict).json_schema - schema_skeleton = Schema_Reader(json_schema) - self.schema_skeleton = schema_skeleton.json_object_search(schema_skeleton.schema) - dicom_object = Metadata_Reader(metadata_files_location, self.config_dicom_file_validation) - dicom_series_list = dicom_object.all_dicom_series - study_map = Attribute_Mapper.mapping_from_object(dicom_series_list[0].__dict__, map_dict, list(map_dict.keys())[1], []) - if len(list(map_dict.keys())) > 2: - merged_series_dict=self.map_and_merge_series(dicom_series_list, map_dict) - for merged_series in merged_series_dict.values(): - - if list(map_dict.keys())[1] in study_map: - merged_study_map=Attribute_Mapper.merge_mapped_attributes(study_map[list(map_dict.keys())[1]], merged_series, list(map_dict.keys())[2]) - else: - merged_study_map=Attribute_Mapper.merge_mapped_attributes(study_map, merged_series, list(map_dict.keys())[2]) - merged_study_map={list(map_dict.keys())[1]: merged_study_map} - else: - pass - - map_schema = Attribute_Inserter(self.schema_skeleton, list(self.schema_skeleton.keys()), merged_study_map) - filled_schema = map_schema.fill_json_object(map_schema.schema_skeleton, map_schema.key_list, map_schema.map) - with open(mapped_metadata, 'w') as f: - json.dump(filled_schema, f) - - return - - def map_and_merge_series(self, dicom_series_list: list, map_dict: dict) -> dict: - """Mapping and merging of the series that are part of a MRI study. - - Args: - dicom_series_list (list): The list of metadata objects from the metadata extraction of the DICOM files. - map_dict (dict): The JSON map that contains the attribute assignments of the origin and target attributes. - - Returns: - dict: The dictionary that contains the mapped and merged metadata attributes. - """ - merged_series_dict={} - if len(list(map_dict.keys())) == 4: - for raw_series in dicom_series_list: - duplicate_sop_elements, duplicate_series_elements=self.analyse_study_instance.analyse_study(raw_series) - if (duplicate_sop_elements==False) and (duplicate_series_elements==True): - series_map={list(map_dict.keys())[2]: self.analyse_study_instance.get_series(raw_series.__dict__[self.analyse_study_instance.file_series_instance_uid])}#### - merged_series_map=self.series_extension(map_dict, list(map_dict.keys())[3], raw_series, series_map) - elif (duplicate_sop_elements==True) and (duplicate_series_elements==True): - merged_series_map=Attribute_Mapper() - pass - else: - series_map = Attribute_Mapper.mapping_from_object(raw_series.__dict__, map_dict, list(map_dict.keys())[2], [list(map_dict.keys())[1]]) - merged_series_map=self.series_extension(map_dict, list(map_dict.keys())[3], raw_series, series_map) - self.analyse_study_instance.set_series(raw_series.__dict__[self.analyse_study_instance.file_series_instance_uid], merged_series_map) - merged_series_dict[raw_series.__dict__[self.analyse_study_instance.file_series_instance_uid]]=merged_series_map - else: - for raw_series in dicom_series_list: - duplicate_sop_elements, duplicate_series_elements=self.analyse_study_instance.analyse_study(raw_series) - if (duplicate_sop_elements==True) and (duplicate_series_elements==True): - series_map=Attribute_Mapper() - pass - else: - series_map = Attribute_Mapper.mapping_from_object(raw_series.__dict__, map_dict, list(map_dict.keys())[2], [list(map_dict.keys())[1]]) - self.analyse_study_instance.set_series(raw_series.__dict__[self.analyse_study_instance.file_series_instance_uid], series_map) - merged_series_dict[raw_series.__dict__[self.analyse_study_instance.file_series_instance_uid]]=series_map - - return merged_series_dict - - def series_extension(self, map_dict: dict, map_attribute: str, series: Dicom_Reader, series_map) -> list: - """Extends the mapped attributes of a series object by an attribute that has a list of objects as values, using the keywords of the provided map. - - Args: - map_dict (dict): Map that contains the attribute assignments for the dicom metadata and the schema. - map_attribute (str): The attribute in the map that contains the mapping assignments. - series (DicomReader): The series which is extended. - - Returns: - list: A list of objects with the mapped attributes. - """ - attribute_value=None - target_attributes={} - for origin_attribute in list(map_dict[map_attribute].values()): - try: - attribute_value=series.__dict__[origin_attribute] - except KeyError as e: - continue - target_attributes[origin_attribute]=attribute_value - attributes_map = Attribute_Mapper.mapping_from_object(target_attributes, map_dict, map_attribute, [list(map_dict.keys())[1], list(map_dict.keys())[2]]) - - assess_type=Attribute_Mapper(**{"schema_skeleton": self.schema_skeleton}) - all_types, number_of_elements=assess_type.type_assessment(attributes_map, map_dict, map_attribute) - - for key in all_types: - new_attributes_map={} - if not key in attributes_map: - new_attributes_map.update(assess_type.nested_attributes_map_search(attributes_map, key)) - else: - pass - - if len(new_attributes_map) > 0: - pass - else: - new_attributes_map=attributes_map - - for element in range(0, number_of_elements): - element_series_attribute={} - - for series_attribute, attribute_value in new_attributes_map.items(): - if (isinstance(attribute_value, list)) and (((isinstance(attribute_value[0], list)) and (str(type(attribute_value[0]))== all_types[series_attribute])) or ((isinstance(attribute_value[0], list))==False) and (str(type(attribute_value[0])) == all_types[series_attribute])): - element_series_attribute[series_attribute]=attribute_value[element] - - elif isinstance(attribute_value, list) == False: - element_series_attribute[series_attribute]=element_series_attribute[series_attribute]=attribute_value - - elif (isinstance(attribute_value, list)) and (str(type(attribute_value)) == all_types[series_attribute]): - element_series_attribute[series_attribute]=element_series_attribute[series_attribute]=attribute_value - - else: - logging.error(f"The value for attribute {series_attribute} does not correspond to the required value in the schema.") - - if set(element_series_attribute.keys()).issubset(set(attributes_map.keys())): - pass - else: - element_series_attribute=assess_type.nested_attributes_map_modification(attributes_map, element_series_attribute) - - if list(map_dict.keys())[2] in series_map: - merged_series_map=assess_type.merge_mapped_attributes(series_map[list(map_dict.keys())[2]], element_series_attribute, list(map_dict.keys())[3]) - else: - merged_series_map=assess_type.merge_mapped_attributes(series_map, element_series_attribute, list(map_dict.keys())[3]) - - return merged_series_map \ No newline at end of file diff --git a/jammato/dicom_reader.py b/jammato/dicom_reader.py deleted file mode 100644 index e170fcd..0000000 --- a/jammato/dicom_reader.py +++ /dev/null @@ -1,112 +0,0 @@ -import pydicom -import re -import logging -from .data_cleaning import data_cleaning_instance - -class Dicom_Reader(): - - def __init__(self, dicom_file: str) -> None: - """Takes a dicom file as input and converts the file into a Python object using the pydicom module. - - Args: - dicom_file (str): String that contains the location to the dicom file. - """ - try: - self.pydicom_file = pydicom.dcmread(dicom_file) - except pydicom.errors.InvalidDicomError as e: - logging.error("InvalidDicomError for file: %s %s", dicom_file, e) - raise - except FileNotFoundError as e: - logging.error("FileNotFoundError for file: %s %s", dicom_file, e) - raise - - @classmethod - def pydicom_object_search(cls, dataset: pydicom) -> dict: - """Takes as input a pydicom object and searches its attributes. Puts the attributes which contain a string as value, or a list of values into a dictionary. - - Args: - dataset (pydicom): The pydicom object that contains all attributes of a dicom file in a nested structure. - - Returns: - dict: The dictionary that contains the dicom attributes in a flat structure as key-value pairs. - """ - sub_dict = {} - for attribute in dataset: - if isinstance(attribute, pydicom.Dataset): - sub_dict=cls.pydicom_object_search(attribute) - elif isinstance(attribute.value, pydicom.Sequence): - name=cls.name_standardization(attribute.name) - if len(attribute.value) > 1: - mergedsub_dict={} - for value in attribute.value: - subsub_dict=(cls.pydicom_object_search(value)) - mergedsub_dict=cls.merge_dict_keys(subsub_dict, mergedsub_dict) - sub_dict.update(mergedsub_dict) - - else: - sub_dict.update(cls.pydicom_object_search(attribute.value)) - else: - name=cls.name_standardization(attribute.name) - if isinstance(attribute.value, pydicom.multival.MultiValue): - sub_dict[name]=data_cleaning_instance.transfer_to_list(attribute.value) - elif isinstance(attribute.value, pydicom.valuerep.PersonName): - sub_dict[name]=str(attribute.value) - elif isinstance(attribute.value, pydicom.uid.UID): - sub_dict[name]=str(attribute.value) - elif isinstance(attribute.value, pydicom.valuerep.DSfloat): - sub_dict[name]=str(attribute.value) - elif isinstance(attribute.value, pydicom.valuerep.IS): - sub_dict[name]=str(attribute.value) - else: - sub_dict[name] = attribute.value - return sub_dict - - @classmethod - def name_standardization(cls, attribute: str) -> str: - """Takes a string of a dicom attribute as input and standardizes it after defined criteria. - - Args: - attribute (str): The attribute string that should be standardized. - - Returns: - str: The attribute string after standardization. - """ - name = attribute.split() - if len(name) == 1: - name = name[0].lower() - else: - subname = "" - for letter in name[1:]: - subname += letter.capitalize() - name = name[0].lower() + subname - name = re.sub('[^A-Za-z0-9]+', '', name) - return name - - @classmethod - def merge_dict_keys(cls, subsub_dict: dict, mergedsub_dict: dict) -> dict: - """Takes as input a two dictionaries that contain attributes of a repeating attribute sequence in the pydicom file. It merges the file contains and returns one dictionary that - has a list of values from both dictionaries for each key in the new dictionary. - - Args: - subsub_dict (dict): The dictionary with the new values for each key. - mergedsub_dict (str): The dictioanry with the current values in a list for each key. - - Returns: - dict: The merged dictionary of both input dictionaries. - """ - new_mergedsub_dict={} - dict_keys = [key for key in subsub_dict.keys()] - try: - for key in dict_keys: - if isinstance(mergedsub_dict[key], list): - if (isinstance(subsub_dict[key], list)) and (isinstance(mergedsub_dict[key][0], list)==False): - mergedsub_dict[key]=[mergedsub_dict[key]] - else: - pass - mergedsub_dict[key].append(subsub_dict[key]) - new_mergedsub_dict[key] = mergedsub_dict[key] - else: - new_mergedsub_dict[key] = [mergedsub_dict[key], subsub_dict[key]] - except KeyError as e: - new_mergedsub_dict = subsub_dict - return new_mergedsub_dict \ No newline at end of file diff --git a/jammato/metadata_reader.py b/jammato/metadata_reader.py deleted file mode 100644 index d52c49b..0000000 --- a/jammato/metadata_reader.py +++ /dev/null @@ -1,90 +0,0 @@ -import os -import logging -import zipfile -import pydicom -from typing import Any - -from .dicom_reader import Dicom_Reader -from .analyse_study import Analyse_Study -from .data_cleaning import data_cleaning_instance - -class Metadata_Reader(): - - def __init__(self, metadata_document_directory: str, config_dicom_file_validation) -> None: - """Takes the path to either a single document that contains the metadata for mapping, or - to a folder that contains multiple corresponding files with metadata. - - Args: - metadata_document_directory (str): String path to the metadata files. - """ - self.analyse_study_instance=Analyse_Study(config_dicom_file_validation) - self.all_dicom_series_dict={} - self.all_dicom_series = [] - file_name, file_extension = os.path.splitext( - metadata_document_directory) - - if file_extension == ".zip": - file_name, file_extension = os.path.splitext( - metadata_document_directory) - with zipfile.ZipFile(metadata_document_directory) as dataset: - for file in range(1, len(dataset.filelist)): - with dataset.open(dataset.filelist[file].filename) as file: - datasetFileName, dataset_file_extension = os.path.splitext( - file.name) - self.evaluate_file_type( - file, dataset_file_extension) - - for value in self.all_dicom_series_dict.values(): - if len(value)>1: - self.all_dicom_series.extend(self.post_read_processing(value, flag="all")) - else: - self.all_dicom_series.extend(self.post_read_processing(value, flag="single")) - - elif type(file_extension) == type(str()): - self.evaluate_file_type(metadata_document_directory, file_extension) - self.all_dicom_series.extend(self.post_read_processing(list(self.all_dicom_series_dict.values())[0], flag="single")) - else: - logging.error("No valid metadata file path.") - raise FileNotFoundError("No valid metadata file path.") - return - - def evaluate_file_type(self, file: str, file_extension: str) -> None: - """Takes the file path and the file extension and evaluates their type in order to call the corresponding class for metadata - extraction. - - Args: - file_num (str): String path to the metadata file. - file_extension (str): String of the file type. - """ - - if file_extension == ".dcm": - try: - dicom_series = Dicom_Reader(file) - sub_dict=Dicom_Reader.pydicom_object_search(dicom_series.pydicom_file) - dicom_series.__dict__.update(sub_dict) - del dicom_series.pydicom_file - - duplicate_sop_elements, duplicate_series_elements=self.analyse_study_instance.analyse_study(dicom_series) - if (duplicate_sop_elements==False) and (duplicate_series_elements==True): - self.all_dicom_series_dict[dicom_series.__dict__[self.analyse_study_instance.file_series_instance_uid]].append(dicom_series) - else: - self.all_dicom_series_dict[dicom_series.__dict__[self.analyse_study_instance.file_series_instance_uid]]=[dicom_series] - - except pydicom.errors.InvalidDicomError as e: - pass - except FileNotFoundError as e: - pass - else: - logging.error("File format is not supported.") - return - - def post_read_processing(self, attributes: Any, flag): - data_cleaning_instance.load_attributes(attributes) - if flag=="single": - data_cleaning_instance.set_attributes_from_config(flag="single") - elif flag=="all": - data_cleaning_instance.set_attributes_from_config(flag="all") - else: - raise("No correct flag provided.") - return data_cleaning_instance.attributes_dict - \ No newline at end of file diff --git a/jammato/schema_reader.py b/jammato/schema_reader.py deleted file mode 100644 index d9d00a1..0000000 --- a/jammato/schema_reader.py +++ /dev/null @@ -1,184 +0,0 @@ -import logging -from typing import Any - -class Schema_Reader(): - - def __init__(self, schema: dict) -> None: - """Instantiates the class that searches the schema structure of the provided dictionary and builds a schema skeleton. When instantiated, the json definitions are first set. - - Args: - schema (dict): The json schema document as dictionary. - """ - if "$defs" in schema.keys(): - self.definitions = schema["$defs"] - elif "definitions" in schema.keys(): - self.definitions = schema["definitions"] - else: - self.definitions = None - self.schema = schema - - def json_definition_search(self, definition: dict) -> dict: - """Takes a dictionary as input that represents the reference to a json definition as content. Returns the skeleton of this content as a dictionary. - - Args: - definition (dict): The reference content as dictionary. - - Returns: - dict: The skeleton of the reference content as dictionary. - """ - properties = None - if "$ref" in definition: - if definition["$ref"].startswith("#"): - keyword = definition["$ref"].split("/")[-1:][0] - sub_properties = self.json_definition_search(self.definitions[keyword]) - properties = sub_properties - else: - path = definition["$ref"] - logging.warning("No correct definition path for " + path) - elif "oneOf" in definition: - sub_properties=self.one_of_search(definition) - properties = sub_properties - elif definition["type"] == "array": - sub_properties = self.json_array_search(definition["items"]) - properties = sub_properties - elif definition["type"] == "object": - sub_properties = self.json_object_search(definition) - properties = sub_properties - else: - properties = self.json_type_search(definition["type"]) - return properties - - def json_array_search(self, property: dict) -> list: - """Takes a dictionary as input that represents the array type content of a json document. Returns the skeleton of this content as a list. - - Args: - property (dict): The array type content as dictionary. - - Returns: - list: The skeleton of the array type content as list. - """ - properties = None - if "$ref" in property: - if property["$ref"].startswith("#"): - keyword = property["$ref"].split("/")[-1:][0] - sub_properties = [self.json_definition_search(self.definitions[keyword])] - properties = sub_properties - else: - path = property["$ref"] - logging.warning("No correct definition path for " + path) - elif "oneOf" in property: - sub_properties=self.one_of_search(property) - properties = sub_properties - elif property["type"] == "array": - sub_properties = [self.json_array_search(property["items"])] - properties = sub_properties - elif property["type"] == "object": - sub_properties = [self.json_object_search(property)] - properties = sub_properties - else: - sub_properties = [self.json_type_search(property["type"])] - properties = sub_properties - return properties - - def json_object_search(self, property: dict) -> dict: - """Takes a dictionary as input that represents the object type content of a json document. Returns the skeleton of this content as a dictionary. - - Args: - property (dict): The object type content as dictionary. - - Returns: - dict: The skeleton of the object type content as list. - """ - properties = {} - for i in property["properties"].items(): - if "$ref" in i[1]: - if i[1]["$ref"].startswith("#"): - keyword = i[1]["$ref"].split("/")[-1:][0] - sub_properties = self.json_definition_search(self.definitions[keyword]) - properties[i[0]] = sub_properties - else: - path = i[1]["$ref"] - logging.warning("No correct definition path for " + path) - elif "oneOf" in i[1]: - sub_properties=self.one_of_search(i[1]) - properties[i[0]] = sub_properties - elif i[1]["type"] == "array": - sub_properties = self.json_array_search(i[1]["items"]) - properties[i[0]] = sub_properties - elif i[1]["type"] == "object": - sub_properties = self.json_object_search(i[1]) - properties[i[0]] = sub_properties - else: - if i[0] == "value": - properties[i[0]] = self.json_type_search(i[1]["type"]) - elif i[0] == "unit": - try: - properties[i[0]] = i[1]["default"] - except KeyError: - logging.warning("No default unit") - properties[i[0]] = self.json_type_search(i[1]["type"]) - else: - properties[i[0]] = self.json_type_search(i[1]["type"]) - return properties - - def json_type_search(self, type: str) -> Any: - """Takes a string as input that represents the type of a json document, i.e. a primitive data type or a string. - The input string is altered and returned either as a string, or a list, if multiple types are present - - Args: - type (str): The string of the json type. - - Returns: - Any: The string of the Python type, or a list of Python types as strings. - """ - if type == "integer": - return "" - elif type == "string": - return "" - elif type == "number": - return "" - elif type == "boolean": - return "" - elif type == "null": - return "" - elif type == "array": - return "" - elif isinstance(type, list): - multiple_types = [] - for j in type: - multiple_types.append(self.json_type_search(j)) - return tuple(multiple_types) - else: - logging.warning("Type Error") - return None - - def one_of_search(self, property: dict) -> list: - """Takes a dictionary as input that represents the oneOf type content of a json document, which is an array of multiple possible values. - Returns the skeleton of this content as a list. - - Args: - property (dict): The oneOf type content as dictionary. - - Returns: - list: The skeleton of the oneOf type content as list. - """ - sub_properties=[] - for i in property["oneOf"]: - if "items" in i: - sub_sub_properties = self.json_array_search(i["items"]) - sub_properties.append(sub_sub_properties) - properties=sub_properties - elif "properties" in i: - sub_sub_properties = self.json_object_search(i) - sub_properties.append(sub_sub_properties) - properties=sub_properties - elif "type" in i: - sub_sub_properties = self.json_type_search(i["type"]) - sub_properties.append(sub_sub_properties) - properties=sub_properties - else: - sub_properties=[None] - properties=sub_properties - logging.warning("No correct definition for \"oneOf\" attribute") - return properties - diff --git a/jammato/schemas_collector.py b/jammato/schemas_collector.py deleted file mode 100644 index 495f8f8..0000000 --- a/jammato/schemas_collector.py +++ /dev/null @@ -1,41 +0,0 @@ -class Schemas_Collector(): - - def __init__(self) -> None: - """Instantiates the class and creates an empty dictionary as attribute. - """ - self.schemas = {} - - def add_schema(self, uri: str, schema: dict) -> None: - """Takes as input the string of the URI that resolves to a schema and the schema as dictionary. Creates a new entry in the - object dictionary, e.g. attribute schemas, where URI is the key and the schema dictionary is the value. - - Args: - uri (str): The string of the schema URI. - schema (dict): The dictionary of the JSON schema. - """ - self.schemas[uri] = schema - - def get_uri(self, uri: str) -> bool: - """Takes the string of the URI to a schema. - - Args: - uri (str): The string of the schema URI. - - Returns: - bool: Returns the boolean if the URI exists in the dictionary of the class attribute schemas. - """ - return uri in self.schemas - - def get_schema(self, uri: str) -> dict: - """Takes the URI for a schema as string. - - Args: - uri (str): The string of the schema URI. - - Returns: - dict: Returns the JSON schema from the class attribute schemas as dictionary. - """ - - return self.schemas[uri] - -schemas_collector_instance = Schemas_Collector() diff --git a/jammato/version.py b/jammato/version.py deleted file mode 100644 index 528787c..0000000 --- a/jammato/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "3.0.0" diff --git a/mapping_cli.py b/mapping_cli.py new file mode 100644 index 0000000..5eba514 --- /dev/null +++ b/mapping_cli.py @@ -0,0 +1,111 @@ +import argparse +import json +import logging +import os +import sys +import zipfile +import shutil +from pathlib import Path + +from src.IO.MappingAbortionError import MappingAbortionError +from src.IO.InputReader import InputReader +from src.IO.OutputWriter import OutputWriter + +# Make log level configurable from ENV, defaults to INFO level +logging.basicConfig( + level=os.environ.get('LOGLEVEL', 'INFO').upper() +) + +def run_cli(): + parser = argparse.ArgumentParser(description='JaMMaTo DICOM Mapper - following tomo_mapper architecture') + parser.add_argument('-i', '--input', required=True, help='Input DICOM file or zip file') + parser.add_argument('-m', '--mapping', required=True, help='Mapping file path') + parser.add_argument('-o', '--output', required=True, help='Output JSON file path') + + args = parser.parse_args() + + # Use MRI mapper by default (following tomo_mapper logic) + run_mri_mapper(args) + +def run_mri_mapper(args): + argdict = vars(args) + INPUT_SOURCE = argdict.get('input') + MAP_SOURCE = argdict.get('mapping') # Fixed: use 'mapping' instead of 'map' + OUTPUT_PATH = argdict.get('output') + + try: + if zipfile.is_zipfile(INPUT_SOURCE): + temp_dir = os.path.splitext(INPUT_SOURCE)[0] + logging.info(f"Extracting ZIP to temporary folder: {temp_dir}") + extracted_files = [] + + with zipfile.ZipFile(INPUT_SOURCE, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + # Collect all files (filter if needed, e.g., by extension) + for file_path in Path(temp_dir).rglob("*"): + if file_path.is_file() and "__MACOSX" not in str(file_path): + extracted_files.append(file_path) + + if not extracted_files: + logging.error("No valid files found in zip archive. Aborting") + sys.exit(1) + + list_of_file_names = [] + success_count = 0 # number of mapping that has been successful! + + for file in extracted_files: + file_path = file.with_suffix('') + logging.info(f"Processing extracted file: {file_path}") + input_file = str(file) + try: + result = process_input(input_file, MAP_SOURCE) + file_name = file_path.name + ".json" + OutputWriter.writeOutput(result, file_name) + list_of_file_names.append(file_name) + success_count += 1 + except MappingAbortionError as e: + logging.warning(f"Skipping file {input_file} due to mapping error: {e}") + except Exception as e: + logging.exception(f"Unexpected error processing file {input_file}") + + if success_count > 0: + logging.info(f"In total {success_count} file(s) were successfully processed.") + # Combine all results into one file + combined_results = {} + for file_name in list_of_file_names: + with open(file_name, 'r') as f: + file_result = json.load(f) + combined_results.update(file_result) + os.remove(file_name) # Clean up individual files + + OutputWriter.writeOutput(combined_results, OUTPUT_PATH) + try: + shutil.rmtree(temp_dir) + logging.info(f"The temporary folder '{temp_dir}' has been deleted.") + except Exception as e: + logging.error(f"Failed to delete temporary folder: {e}") + else: + logging.error("No files could be processed successfully. Aborting.") + sys.exit(1) + + else: + result = process_input(INPUT_SOURCE, MAP_SOURCE) + OutputWriter.writeOutput(result, OUTPUT_PATH) + + except MappingAbortionError as e: + logging.error(f"Mapping abortion error for {INPUT_SOURCE}: {e}") + sys.exit(1) + +def process_input(input_file, map_source): + reader = InputReader(map_source, input_file) + img_info = reader.retrieve_image_info() + logging.debug(f"IMAGE_INFO: {img_info}") + + if not img_info: + raise MappingAbortionError(f"Could not retrieve image information for {input_file}.") + + return img_info + +if __name__ == '__main__': + run_cli() diff --git a/mixed_output.json b/mixed_output.json new file mode 100644 index 0000000..c1cdd0b --- /dev/null +++ b/mixed_output.json @@ -0,0 +1 @@ +{"study": {"studyID": "2.16.756.5.5.100.8323328.134589.1623832745.40", "studyTitle": "", "studyDateTime": "2021-06-16T10:39:05", "program": "['ParaVision Acquisition 6.0.1', 'ParaVision 6.0.1']", "user": {"name": "nmrsu", "role": "", "affiliation": {"institutionName": "KIT IMT", "institutionAcronym": "", "institutionDepartment": "", "institutionID": ""}, "email": ""}, "sample": {"sampleName": "Andrea", "sampleID": "", "sampleSize": {"value": "", "unit": "m"}, "sampleWeight": {"value": 0.001, "unit": "kg"}, "measurementConditions": {"value": 11.7467301, "unit": "T"}}, "instrument": {"instrumentName": "", "instrumentID": "10066444", "instrumentManufacturer": {"manufacturerName": "Bruker BioSpin MRI GmbH", "modelName": "AVIII500", "manufacturerID": ""}}, "series": [{"seriesID": "2.16.756.5.5.100.8323328.77843.1627479336.1", "seriesTitle": "", "sequenceProtocol": {"sequenceProtocolName": "FLASH", "effectiveEchoTime": {"value": ["", ""], "unit": "ms"}, "repetitionTime": {"value": 800.0, "unit": "ms"}, "flipAngle": {"value": 30.0, "unit": "degree"}}, "images": {"allImages": {"numberOfImages": "", "imageOrientation": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0], "pixelSpacing": {"value": [0.078125, 0.078125], "unit": "mm"}, "sliceThickness": {"value": 0.2, "unit": "mm"}, "imageSize": {"rows": 64, "columns": 64}, "pixelRange": {"pixelBandwidth": {"value": 781.25, "unit": "Hz"}, "smallestImagePixelValue": 25, "largestImagePixelValue": 30850, "pixelRepresentation": 1}}, "perImage": [{"imageNumber": "", "sampleImagePosition": [-2.5344828, -2.5259516, 0.00865073]}]}}]}} \ No newline at end of file diff --git a/plugin_wrapper.py b/plugin_wrapper.py new file mode 100644 index 0000000..cebe41d --- /dev/null +++ b/plugin_wrapper.py @@ -0,0 +1,10 @@ +# run_mapping.py +import sys +from mapping_cli import run_cli + +if __name__ == "__main__": + # Extract arguments from the command line + sys.argv = ["mapping_cli"] + sys.argv[1:] + + # Call the run_cli function + run_cli() \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index f3f56b0..1ac64d6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,9 +2,9 @@ --requirements requirements.txt #Testing -pytest -codecov -pytest-cov +pytest >= 7.4 +pytest-cov >= 6.0 +pytest-mock >= 3.14.0 pre-commit pytest-benchmark @@ -16,10 +16,21 @@ isort black pre-commit autopep8 +pydantic[email] >= 2.10 +xmltodict >= 0.14 +requests +validators >= 0.34 +jsonpath-ng >= 1.7 +deepmerge >= 2.0 +magika >= 0.5.1 +numpy +pandas +h5py +pydicom #Documentation mkdocs mkdocstrings mkdocstrings[python] mkdocs-material -Pygments +Pygments \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 08f0875..d51b176 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -#Runtime requirements Could also specify versions, e.g. jsonschema>=1.10 pydicom jsonschema urllib3 @@ -6,3 +5,16 @@ datetime typing zipp nested_lookup +pydantic[email] >= 2.10 +xmltodict >= 0.14 +requests +validators >= 0.34 +jsonpath-ng >= 1.7 +deepmerge >= 2.0 +magika >= 0.5.1 +numpy +pandas +h5py +pytest >= 7.4 +pytest-cov >= 6.0 +pytest-mock >= 3.14.0 diff --git a/somesy.toml b/somesy.toml new file mode 100644 index 0000000..733218d --- /dev/null +++ b/somesy.toml @@ -0,0 +1,35 @@ +[project] +name = "JaMMaTo" +version = "v4.0.0rc" +description = "The software JaMMaTo (JSON Metadata Mapping Tool) is a metadata mapping tool based on Python, and is used for mapping metadata from a proprietary file format schema to a JSON format schema." + +keywords = ["MRI", "DICOM", "metadata", "extraction", "schema"] +license = "Apache-2.0" +repository = "https://github.com/kit-data-manager/JaMMaTo" + +# This is you, the proud author of your project: +[[project.people]] +given-names = "Gabin Thibaut" +family-names = "Oumbe Tekam" +orcid = "https://orcid.org/0000-0002-6745-3464" +maintainer = true # currently maintains the project (i.e. is a contact person) + +[[project.people]] +given-names = "Germaine" +family-names = "Götzelmann" +orcid = "https://orcid.org/0000-0003-3974-3728" +maintainer = true # currently maintains the project (i.e. is a contact person) + +[[project.people]] +given-names = "Nicolas" +family-names = "Blumenröhr" +orcid = "https://orcid.org/0009-0007-0235-4995" +author = true # is a full author of the project (i.e. appears in citations) +publication_author = true + +[[project.people]] +given-names = "Rossella" +family-names = "Aversa" +orcid = "https://orcid.org/0000-0003-2534-0063" +author = true # is a full author of the project (i.e. appears in citations) +publication_author = true diff --git a/src/IO/InputReader.py b/src/IO/InputReader.py new file mode 100644 index 0000000..208491a --- /dev/null +++ b/src/IO/InputReader.py @@ -0,0 +1,91 @@ +import os +import logging +from typing import Dict, Any + +from src.IO.MappingAbortionError import MappingAbortionError +from src.IO.MapfileReader import MapFileReader +from src.model.ImageMD import ImageMD +from src.model.SchemaConcepts.MRI_Image import MRI_Image +from src.parser.ParserFactory import ParserFactory +from src.parser.impl.MRI_Parser import MRI_Parser +from src.util import input_to_dict, get_filetype_with_magica +from src.parser.mapping_util import map_a_dict +from src.Preprocessor import Preprocessor + + +class InputReader: + """ + The input reader for MRI data following tomo_mapper architecture. + + Implementation concept: + - fail early: z.b errors in mapping file can be handled before starting to extract any file content. + - reject with error + - warn about unusual input + """ + + def __init__(self, map_path, input_path): + logging.info("Preparing MRI parser based on mapping file and input.") + + # Read and parse the mapping file + self.mapping_dict = MapFileReader.read_mapfile(map_path) + + # Parse different sections for different purposes + self.study_mapping = MapFileReader.parse_mapinfo_for_study(self.mapping_dict) + self.series_mapping = MapFileReader.parse_mapinfo_for_series(self.mapping_dict) + self.perImage_mapping = MapFileReader.parse_mapinfo_for_perImage(self.mapping_dict) + + # Validate input file exists + if not os.path.exists(input_path): + logging.error("Input file {} does not exist. Aborting".format(input_path)) + raise MappingAbortionError("Input file loading failed.") + + self.input_path = input_path + + # Check if MRI parser can handle this file + self.parser_names = self.get_applicable_parsers(input_path) + if not self.parser_names: + logging.error("No applicable parsers found for input {}".format(input_path)) + raise MappingAbortionError("Input file parsing aborted.") + + logging.info("Applicable parsers: {}".format(", ".join(self.parser_names))) + + @staticmethod + def get_applicable_parsers(input_path): + """ + Filters the available image parsers to those applicable to the input file format. + :param input_path: file path to input + :return: list of parser names that can handle the provided input format + """ + + filetype = get_filetype_with_magica(input_path) + logging.debug("Determined input type: {}".format(filetype)) + + available_parsers = [] + for k, p in ParserFactory.available_img_parsers.items(): + expected = p.expected_input_format() + if filetype in expected: + available_parsers.append(k) + return available_parsers + + def retrieve_image_info(self): + """ + Applies the applicable parser to extract MRI metadata. + :return: dictionary containing the mapped metadata + """ + for parser in self.parser_names: + logging.debug("Trying to parse image with {}".format(parser)) + imgp = ParserFactory.create_img_parser(parser) + + # Combine all mappings for the parser + combined_mapping = {} + combined_mapping.update(self.study_mapping) + combined_mapping.update(self.series_mapping) + combined_mapping.update(self.perImage_mapping) + + result, raw = imgp.parse(self.input_path, combined_mapping) + if result and result.image_metadata: + output_dict = result.image_metadata.to_schema_dict() + return output_dict + + logging.error("No parser could successfully process the input file") + raise MappingAbortionError("Image parsing failed.") diff --git a/src/IO/MapfileReader.py b/src/IO/MapfileReader.py new file mode 100644 index 0000000..a273c07 --- /dev/null +++ b/src/IO/MapfileReader.py @@ -0,0 +1,82 @@ +import logging +import os.path +from json import JSONDecodeError +from urllib.parse import urlparse + +from requests import HTTPError + +from src.IO.MappingAbortionError import MappingAbortionError +from src.parser.ParserFactory import ParserFactory +from src.util import load_json + +import validators + + +class MapFileReader: + """ + This class provides utility functions reading and checking the user-provided map for MRI data + """ + + @staticmethod + def read_mapfile(filepath) -> dict: + """ + Load local or remote map file into dict + :param filepath: local absolute path, local relative path or remote (absolute) URI + :return: file content as dict + """ + logging.info("Reading map file content") + try: + return load_json(filepath) + except HTTPError as e: + logging.error("Tried loading remote mapping file: {}".format(filepath)) + logging.error(e) + raise MappingAbortionError("Map file loading failed.") + except FileNotFoundError as e: + logging.error("Local map file does not exist: {}".format(filepath)) + logging.error(e) + raise MappingAbortionError("Map file loading failed.") + except UnicodeDecodeError as e: + logging.error("Unable to load map file as json. Please check file and file encoding") + raise MappingAbortionError("Map file loading failed.") + except JSONDecodeError as e: + logging.error("Unable to load map file as json. Please check file structure") + raise MappingAbortionError("Map file loading failed.") + + @staticmethod + def parse_mapinfo_for_study(mapping_dict: dict): + """ + Parse mapping dictionary to extract study-related mapping information + :param mapping_dict: the full mapping dictionary + :return: study mapping dictionary + """ + if 'study' in mapping_dict: + return mapping_dict['study'] + else: + logging.warning("No study section found in mapping file") + return {} + + @staticmethod + def parse_mapinfo_for_series(mapping_dict: dict): + """ + Parse mapping dictionary to extract series-related mapping information + :param mapping_dict: the full mapping dictionary + :return: series mapping dictionary + """ + if 'series' in mapping_dict: + return mapping_dict['series'] + else: + logging.warning("No series section found in mapping file") + return {} + + @staticmethod + def parse_mapinfo_for_perImage(mapping_dict: dict): + """ + Parse mapping dictionary to extract per-image-related mapping information + :param mapping_dict: the full mapping dictionary + :return: perImage mapping dictionary + """ + if 'perImage' in mapping_dict: + return mapping_dict['perImage'] + else: + logging.warning("No perImage section found in mapping file") + return {} diff --git a/src/IO/MappingAbortionError.py b/src/IO/MappingAbortionError.py new file mode 100644 index 0000000..7555592 --- /dev/null +++ b/src/IO/MappingAbortionError.py @@ -0,0 +1,5 @@ +class MappingAbortionError(Exception): + """ + Exception that indicates that a severe error occurred that causes preliminary abortion of the mapping. + Once caught, proper cleanup needs to be performed. + """ \ No newline at end of file diff --git a/src/IO/OutputWriter.py b/src/IO/OutputWriter.py new file mode 100644 index 0000000..9ce380d --- /dev/null +++ b/src/IO/OutputWriter.py @@ -0,0 +1,31 @@ +import json +import logging +import os + + +class OutputWriter: + """ + Output writer for MRI data following tomo_mapper architecture + """ + + @staticmethod + def writeOutput(metadata_dict, output_path): + """ + Write the metadata dictionary to a JSON file + :param metadata_dict: the metadata dictionary to write + :param output_path: path to the output file + """ + try: + # Ensure output directory exists + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + with open(output_path, 'w', encoding="utf-8") as json_file: + json.dump(metadata_dict, json_file, indent=4, ensure_ascii=False) + + logging.info(f"Output successfully written to {output_path}") + + except Exception as e: + logging.error(f"Failed to write output to {output_path}: {e}") + raise diff --git a/src/Preprocessor.py b/src/Preprocessor.py new file mode 100644 index 0000000..9103de1 --- /dev/null +++ b/src/Preprocessor.py @@ -0,0 +1,141 @@ +import logging +import numpy as np +from datetime import datetime + +from jsonpath_ng.parser import JsonPathParser + +from src.model.SchemaConcepts.Schema_Concept import parse_datetime + + +class Preprocessor: + """ + Use / adapt / extend for final preprocessing steps before converting a dictionary into the according pydantic class instances + """ + + parser = JsonPathParser() + + unit_normalization = { + 'deg': 'degrees', + 'degr': 'degrees', + '°': 'degrees', + '\udcb0': 'degrees', + '\udcb0C': '°C', + 'μm': 'um', + 'Secs': 's', + 'Mins': 'min' + } + + @staticmethod + def get_expected_type(field_path): + + expected_types = { + "entry.entry_identifier": "string_type", + "entry.instrument.monochromator.grating.period.value": "int_type", + "entry.sample.gas_flux[*].value": "float_type" + } + + return expected_types.get(field_path, None) + + @staticmethod + def normalize_unit(input_value) -> str: + if input_value in Preprocessor.unit_normalization.keys(): + return Preprocessor.unit_normalization[input_value] + return input_value + + @staticmethod + def normalize_all_units(input_dict): + """ + Inplace normalization of all values in fields "unit" + :param input_dict: dictionary to replace units in + :return: None + """ + unit_fields = Preprocessor.parser.parse("$..unit") + unit_matches = [m for m in unit_fields.find(input_dict)] + for m in unit_matches: + if type(m.value) != str: continue #TODO: should this be possible? + original_value = m.value + if not Preprocessor.unit_normalization.get(original_value): continue + + normalized_value = Preprocessor.unit_normalization[original_value] + if normalized_value != original_value: + m.full_path.update(input_dict, normalized_value) + + @staticmethod + def normalize_datetime(input_value) -> str: + if type(input_value) == dict: + if not input_value.get("Date") and input_value.get("Time"): + logging.warning("Encountered complex date field, but cannot interpret it") + return input_value + input_value = input_value.get("Date") + " " + input_value.get("Time") + output_value = parse_datetime(input_value) + if type(output_value) == datetime: + return output_value.isoformat() + return input_value + + @staticmethod + def normalize_all_datetimes(input_dict): + fields_for_normalization = ["creationTime", "startTime", "endTime"] #we could do it more generically but may want to limit it to specific fields + + for f in fields_for_normalization: + date_fields = Preprocessor.parser.parse("$.." + f) + date_matches = [m for m in date_fields.find(input_dict)] + for m in date_matches: + original_value = m.value + normalized_value = Preprocessor.normalize_datetime(original_value) + if normalized_value != original_value: + m.full_path.update(input_dict, normalized_value) + + @staticmethod + def normalize_all_numbers(input_dict): + """ + In-place conversion of numeric strings into integers or floats, but checks if it's an appropriate field. + :param input_dict: dictionary to convert numeric values in + :return: None + """ + number_fields = Preprocessor.parser.parse("$..*") # Traverse all fields + + for match in number_fields.find(input_dict): + original_value = match.value + current_field = str(match.full_path) + expected_type = Preprocessor.get_expected_type(current_field) + #print("<<<<>>>> ",original_value) + + # Handle type conversions if needed (e.g.: int_type, float_type) + if isinstance(original_value, str): + try: + if expected_type == "int_type": # Convert only if it's a valid integer-like string + converted_value = int(original_value) + match.full_path.update(input_dict, converted_value) + elif expected_type == "float_type": # Convert only if it's a valid float-like string + converted_value = float(original_value) + match.full_path.update(input_dict, converted_value) + except ValueError: + logging.warning(f"Error while trying to convert '{original_value}' into {expected_type} for field {current_field}") + continue + + # Check if the value is a numpy array + if isinstance(original_value, np.ndarray) and original_value.size > 0: + try: + converted_value = np.array([int(x) if isinstance(x, (int, str)) and not np.isnan(x) + else float(x) if isinstance(x, (float, str)) and not np.isnan(x) + else x + for x in original_value], dtype=float) + + match.full_path.update(input_dict, converted_value) + except ValueError: + logging.warning(f"Error while converting numpy array values for field {current_field}") + continue + + @staticmethod + def normalize_gas_names(input_dict): + gas_fields = Preprocessor.parser.parse("$..gas_name") + + for match in gas_fields.find(input_dict): + original_value = match.value + # Extract gas name if it's stored incorrectly (e.g., "/entry/sample/gas_flux_C2H4") + if isinstance(original_value, str) and "/" in original_value: + possible_gas = original_value.split("_")[-1] + match.full_path.update(input_dict, possible_gas) + else: + logging.warning(f"Unexpected gas name format: {original_value}") + diff --git a/benchmarks/__init__.py b/src/__init__.py similarity index 100% rename from benchmarks/__init__.py rename to src/__init__.py diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..75e6887 --- /dev/null +++ b/src/config.py @@ -0,0 +1,14 @@ +import os.path + + +class MappingConfig: + + working_dir = None + + @classmethod + def set_working_dir(cls, working_dir): + cls.working_dir = os.path.normpath(working_dir) + + @classmethod + def get_working_dir(cls): + return cls.working_dir diff --git a/src/model/ImageMD.py b/src/model/ImageMD.py new file mode 100644 index 0000000..7bc0c0f --- /dev/null +++ b/src/model/ImageMD.py @@ -0,0 +1,18 @@ +import logging +import os + +from pydantic import BaseModel + +from src.model.SchemaConcepts.MRI_Image import MRI_Image + + +class ImageMD(BaseModel): + + filePath: str + image_metadata: MRI_Image = None + + def fileName(self): + return os.path.basename(self.filePath) + + def folderName(self): + return os.path.basename(os.path.dirname(self.filePath)) \ No newline at end of file diff --git a/src/model/SchemaConcepts/MRI_Image.py b/src/model/SchemaConcepts/MRI_Image.py new file mode 100644 index 0000000..6405b02 --- /dev/null +++ b/src/model/SchemaConcepts/MRI_Image.py @@ -0,0 +1,49 @@ +from pydantic import BaseModel +from typing import Optional, Dict, Any + +from src.model.SchemaConcepts.Schema_Concept import Schema_Concept +from src.model.SchemaConcepts.codegen.SchemaClasses_MRI import MagneticResonanceImagingMriSchema + + +class MRI_Image(Schema_Concept, BaseModel): + + study: Optional[Dict[str, Any]] = None + series: Optional[Dict[str, Any]] = None + perImage: Optional[Dict[str, Any]] = None + + def as_schema_class(self): + # Handle the mapped data structure following tomo_mapper logic + # Create the correct structure for MagneticResonanceImagingMriSchema + study_data = {} + + # Add study data directly to study_data + if self.study and isinstance(self.study, dict): + study_data.update(self.study) + + # Add series data as a list to study_data + if self.series: + if 'series' not in study_data: + study_data['series'] = [] + # Ensure series is always a list + if isinstance(self.series, list): + study_data['series'].extend(self.series) + else: + study_data['series'].append(self.series) + + # Handle perImage data - merge it into the series structure + if self.perImage: + if 'series' not in study_data: + study_data['series'] = [{}] + + # Ensure perImage is a list and add it to the first series + perImage_list = self.perImage if isinstance(self.perImage, list) else [self.perImage] + + for series in study_data['series']: + if 'images' not in series: + series['images'] = {} + if 'perImage' not in series['images']: + series['images']['perImage'] = [] + series['images']['perImage'].extend(perImage_list) + + # Create the schema with the study data + return MagneticResonanceImagingMriSchema(study=study_data) \ No newline at end of file diff --git a/src/model/SchemaConcepts/Schema_Concept.py b/src/model/SchemaConcepts/Schema_Concept.py new file mode 100644 index 0000000..6dfd651 --- /dev/null +++ b/src/model/SchemaConcepts/Schema_Concept.py @@ -0,0 +1,59 @@ +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any + +from pydantic import ConfigDict, field_serializer + +from pydantic_core.core_schema import SerializerFunctionWrapHandler, SerializationInfo + +#Custom deserializer for datetime fields +def parse_datetime(value: str): + try: + if "/" in value: + return datetime.strptime(value, "%m/%d/%Y %H:%M:%S") + if "." in value: + return datetime.strptime(value, '%d.%m.%Y %H:%M:%S') + return datetime.strptime(value, '%d %b %Y %H:%M:%S')#specific handling of expected date format that usual validator cannot handle + except ValueError: + return value #not a German date - lets hope that the normal validator can handle it + + +class Schema_Concept(ABC): + + __pydantic_config__ = ConfigDict( + validate_assignment=True, + str_strip_whitespace=True, + ) + + #Custom serializer for datetime fields + #see https://github.com/pydantic/pydantic/discussions/9275 + @field_serializer("*", mode="wrap") + def serialize_special_types( + self, + value: Any, + nxt: SerializerFunctionWrapHandler, + _info: SerializationInfo, + ): + if isinstance(value, datetime): + return value.strftime('%Y-%m-%dT%H:%M:%SZ') + return nxt(value) + + @abstractmethod + def as_schema_class(self): + """ + Convert the class to the corresponding schema class generated from the json schema + :return: Schema class + """ + pass + + def to_schema_dict(self, exclude_none=True) -> dict: + """ + Return the object data as schema conform dict + :param exclude_none: set to False if None values should be kept in the dict output (default: True) + :return: dict of object data + """ + return self.as_schema_class().model_dump( + exclude_none=exclude_none, + mode = "json", + by_alias=True + ) \ No newline at end of file diff --git a/src/model/SchemaConcepts/codegen/SchemaClasses_MRI.py b/src/model/SchemaConcepts/codegen/SchemaClasses_MRI.py new file mode 100644 index 0000000..47f5a38 --- /dev/null +++ b/src/model/SchemaConcepts/codegen/SchemaClasses_MRI.py @@ -0,0 +1,240 @@ +# generated by datamodel-codegen: +# filename: MRI_schema.json +# timestamp: 2026-02-06T13:14:37+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Annotated, Any, List, Optional, Union + +from pydantic import BaseModel, Field + + +class Affiliation(BaseModel): + institutionName: Optional[str] = None + institutionAcronym: Optional[str] = None + institutionDepartment: Optional[str] = None + institutionID: Optional[str] = None + + +class User(BaseModel): + name: Optional[str] = None + role: Annotated[ + Optional[str], + Field( + description="Role of the user to be selected from: *data curator, *instrument scientist, *team leader, *team member" + ), + ] = None + affiliation: Optional[Affiliation] = None + email: Optional[str] = None + + +class Unit(Enum): + nm = "nm" + um = "um" + mm = "mm" + cm = "cm" + m = "m" + + +class SampleSize(BaseModel): + value: Optional[float] = None + unit: Optional[Unit] = "m" + + +class Unit1(Enum): + kg = "kg" + g = "g" + + +class SampleWeight(BaseModel): + value: Optional[float] = None + unit: Optional[Unit1] = "kg" + + +class Unit2(Enum): + T = "T" + + +class MeasurementConditions(BaseModel): + value: Optional[float] = None + unit: Optional[Unit2] = "T" + + +class Sample(BaseModel): + sampleName: Optional[str] = None + sampleID: Optional[str] = None + sampleSize: Annotated[Optional[SampleSize], Field(description="Length or size of the sample, in meters.")] = None + sampleWeight: Annotated[Optional[SampleWeight], Field(description="Weight of the sample, in kilograms.")] = None + measurementConditions: Annotated[ + Optional[MeasurementConditions], Field(description="Nominal field strength of the MR Magnet, in Tesla.") + ] = None + + +class InstrumentManufacturer(BaseModel): + manufacturerName: Optional[str] = None + modelName: Optional[str] = None + manufacturerID: Optional[str] = None + + +class Instrument(BaseModel): + instrumentName: Optional[str] = None + instrumentID: Optional[str] = None + instrumentManufacturer: Optional[InstrumentManufacturer] = None + + +class Unit3(Enum): + ms = "ms" + + +class EffectiveEchoTime(BaseModel): + value: Optional[Union[float, List[Any]]] = None + unit: Optional[Unit3] = "ms" + + +class RepetitionTime(BaseModel): + value: Optional[Union[float, List[Any]]] = None + unit: Optional[Unit3] = "ms" + + +class Unit5(Enum): + degree = "degree" + field_ = "°" + + +class FlipAngle(BaseModel): + value: Optional[Union[float, List[Any]]] = None + unit: Optional[Unit5] = "degree" + + +class SequenceProtocol(BaseModel): + sequenceProtocolName: Optional[str] = None + effectiveEchoTime: Annotated[ + Optional[EffectiveEchoTime], + Field( + description="Time in ms between the middle of the excitation pulse and the peak of the echo signal produced" + ), + ] = None + repetitionTime: Annotated[ + Optional[RepetitionTime], + Field( + description="Time in ms between the beginning of a pulse sequence and the beginning of the subsequent (essentially identical) pulse sequence" + ), + ] = None + flipAngle: Annotated[ + Optional[FlipAngle], + Field( + description="The amount of rotation in degree the net magnetization experiences during application of a pulse" + ), + ] = None + + +class Unit6(Enum): + mm = "mm" + + +class PixelSpacing(BaseModel): + value: Optional[List[float]] = None + unit: Optional[Unit6] = "mm" + + +class SliceThickness(BaseModel): + value: Optional[float] = None + unit: Optional[Unit6] = "mm" + + +class ImageSize(BaseModel): + rows: Optional[int] = None + columns: Optional[int] = None + + +class Unit8(Enum): + Hz = "Hz" + + +class PixelBandwidth(BaseModel): + value: Optional[float] = None + unit: Optional[Unit8] = "Hz" + + +class PixelRange(BaseModel): + pixelBandwidth: Optional[PixelBandwidth] = None + smallestImagePixelValue: Optional[int] = None + largestImagePixelValue: Optional[int] = None + pixelRepresentation: Optional[int] = None + + +class AllImages(BaseModel): + numberOfImages: Optional[str] = None + imageOrientation: Optional[List[Union[float, List[Any]]]] = None + pixelSpacing: Optional[PixelSpacing] = None + sliceThickness: Optional[SliceThickness] = None + imageSize: Optional[ImageSize] = None + pixelRange: Optional[PixelRange] = None + + +class PerImageItem(BaseModel): + imageNumber: Optional[int] = None + sampleImagePosition: Optional[List[Union[float, List[Any]]]] = None + + +class Images(BaseModel): + allImages: Optional[AllImages] = None + perImage: Optional[List[PerImageItem]] = None + + +class Series(BaseModel): + seriesID: Optional[str] = None + seriesTitle: Optional[str] = None + sequenceProtocol: Annotated[ + Optional[SequenceProtocol], + Field( + description="The parameters of the Instrument which define the conditions under which one or more Measurements are performed and a Series is generated. Usually, there is a set of Protocols that are applied on a routine basis. The choice of a Protocol mainly depends on the Sample, the Instrument and the research question addressed by an Experiment" + ), + ] = None + images: Annotated[ + Optional[Images], + Field( + description="The Fourier transformation of spatially encoded signals collected in k-space (Raw Data) as an array of pixel values representing the intensity of mainly the H-atom signals from the Sample." + ), + ] = None + + +class Study(BaseModel): + studyID: Annotated[Optional[str], Field(description="ID of the Study")] = None + studyTitle: Annotated[Optional[str], Field(description="Title of the Study")] = None + studyDateTime: Optional[datetime] = None + program: Annotated[ + Optional[str], Field(description="Details concerning the program (or software) used for aquisition") + ] = None + user: Annotated[ + Optional[User], Field(description="Contact information of the user responsible for the measurement") + ] = None + sample: Annotated[ + Optional[Sample], + Field( + description="A physical object (or a collection of objects) which is considered as a single discrete, uniquely identified unit which is exposed to the Instrument during one or more Measurements. MRI Samples are usually managed or being placed in a container. The term is equivalent to Specimen" + ), + ] = None + instrument: Annotated[ + Optional[Instrument], + Field( + description="Identifiable piece of equipment used by one or more Users to perform one or more Measurements and to generate Raw Data. The Instrument is located in a laboratory hosted by an institution" + ), + ] = None + series: Annotated[ + Optional[List[Series]], + Field( + description="A set of one or more Images or Spectra (i.e. Processed Data) generated after the processing of Raw Data collected during Measurements along the z-axis of a Sample as part of the same Study and stored in one or multiple DICOM Files" + ), + ] = None + + +class MagneticResonanceImagingMriSchema(BaseModel): + study: Annotated[ + Optional[Study], + Field( + description="A collection of one or more Series that are scientifically related (depending on the research context) for the purpose of the Experiment" + ), + ] = None diff --git a/src/model/SchemaConcepts/codegen/codegen.md b/src/model/SchemaConcepts/codegen/codegen.md new file mode 100644 index 0000000..fe3ff15 --- /dev/null +++ b/src/model/SchemaConcepts/codegen/codegen.md @@ -0,0 +1,21 @@ +To allow for alignment with the relevant schemas, the classes for the mapper are autogenerated from schema +If a new schema version has to be supported, please regenerate the classes in the SchemaClasses.py +Do not adapt the classes in a major way, any change will be lost on new generation. + +Generation command used (on project root) + +``` +datamodel-codegen \ + --encoding utf-8 \ + --field-constraints \ + --target-python-version 3.12 \ + --output-model-type pydantic_v2.BaseModel \ + --force-optional \ + --use-annotated \ + --input ./MRI_schema.json \ + --input-file-type jsonschema \ + --output ./src/model/SchemaConcepts/codegen/SchemaClasses_MRI.py +``` + +Notes on used flags +- `--force-optional` since the mapper inputs do not provide all data required by the schema, we use a less strict version where everything is optional. This allows for leveraging some validation while ignoring missing values \ No newline at end of file diff --git a/src/parser/ImageParser.py b/src/parser/ImageParser.py new file mode 100644 index 0000000..1ae0c2f --- /dev/null +++ b/src/parser/ImageParser.py @@ -0,0 +1,20 @@ +import enum +from abc import ABC, abstractmethod + +from src.model.ImageMD import ImageMD + +class ImageParser(ABC): + + @staticmethod + @abstractmethod + def expected_input_format() -> str: + """ + Return expected input format of parser. This can be used to determine if a parser is applicable to the given input. + :return: mimetype string for input format (such as application/octet-stream) + """ + pass + + @abstractmethod + def parse(self, file_path, mapping) -> tuple[ImageMD, str]: + pass + diff --git a/src/parser/ParserFactory.py b/src/parser/ParserFactory.py new file mode 100644 index 0000000..a236078 --- /dev/null +++ b/src/parser/ParserFactory.py @@ -0,0 +1,19 @@ +import logging + +from src.parser.impl.MRI_Parser import MRI_Parser + + +class ParserFactory: + + available_img_parsers = { + "MRI_Parser": MRI_Parser + } + + @staticmethod + def create_img_parser(parser_name, **kwargs): + parser_class = ParserFactory.available_img_parsers.get(parser_name) + if parser_class: + return parser_class(**kwargs) + else: + logging.error("Parser not available: {}. Available parsers: {}".format(parser_name, list(ParserFactory.available_img_parsers.keys()))) + raise ValueError(f"Parser {parser_name} not found") \ No newline at end of file diff --git a/src/parser/impl/MRI_Parser.py b/src/parser/impl/MRI_Parser.py new file mode 100644 index 0000000..2f57a27 --- /dev/null +++ b/src/parser/impl/MRI_Parser.py @@ -0,0 +1,226 @@ +import logging +from typing import Optional +import json +import ast +import re + +from PIL import Image + +from src.Preprocessor import Preprocessor +from src.model.ImageMD import ImageMD +from src.model.SchemaConcepts.MRI_Image import MRI_Image +from src.parser.ImageParser import ImageParser +from src.parser.mapping_util import map_a_dict +from src.resources.maps.mapping import mriparser_full, mriparser_mixed, mriparser_relative, mriparser_study +from src.util import input_to_dict +import configparser + + + +class MRI_Parser(ImageParser): + + internal_mapping = None + #expected_input = "application/octet-stream" + + def __init__(self): + m = json.loads(mriparser_full.read_text()) + self.internal_mapping = m + + @staticmethod + def expected_input_format(): + return ["application/octet-stream", "application/x-hdf5", "application/dicom", "application/x-iso9660-image"] + + def parse(self, file_path, mapping) -> tuple[ImageMD, str]: + input_md = self._read_input_file(file_path) + + if not input_md: + logging.warning("No metadata extractable from {}".format(file_path)) + return None, None + + if not mapping and not self.internal_mapping: + logging.error("No mapping provided for image parsing. Aborting") + exit(1) + mapping_dict = mapping if mapping else self.internal_mapping + image_md = map_a_dict(input_md, mapping_dict) + + #Preprocessor.normalize_all_datetimes(image_md) + Preprocessor.normalize_all_numbers(image_md) + Preprocessor.normalize_all_units(image_md) + Preprocessor.normalize_gas_names(image_md) + + # Custom preprocessing to handle string-to-list conversion and add units + self._fix_data_types_and_units(image_md) + + # Create MRI_Image object from the mapped data + # The mapping result has a nested structure that needs to be reorganized + organized_data = {} + + # The mapping result has everything under 'study' key + if 'study' in image_md and isinstance(image_md['study'], dict): + study_data = image_md['study'] + + # Extract study-level fields (excluding series) + study_fields = {} + series_data = None + perImage_data = None + + for key, value in study_data.items(): + if key == 'series': + series_data = value + else: + study_fields[key] = value + + # Extract perImage from series data if it exists + if series_data and isinstance(series_data, dict): + if 'images' in series_data and 'perImage' in series_data['images']: + perImage_data = series_data['images']['perImage'] + # Remove perImage from series to avoid duplication + del series_data['images']['perImage'] + if not series_data['images']: # Remove empty images dict + del series_data['images'] + + if study_fields: + organized_data['study'] = study_fields + + if series_data: + organized_data['series'] = series_data + + if perImage_data: + # Keep perImage as dict for MRI_Image, will convert to list in as_schema_class + organized_data['perImage'] = perImage_data + + # Handle any other top-level keys + for key, value in image_md.items(): + if key != 'study': + organized_data[key] = value + + mri_image = MRI_Image(**organized_data) + image_from_md = ImageMD(image_metadata=mri_image, filePath="") + + return image_from_md, image_md + + def _fix_data_types_and_units(self, data): + """ + Custom preprocessing to fix string-to-list conversion and add proper units + """ + + def process_dict(d): + if isinstance(d, dict): + for key, value in d.items(): + d[key] = process_dict(value) + elif isinstance(d, list): + return [process_dict(item) for item in d] + elif isinstance(d, str): + # Convert string representations of lists to actual lists + if d.startswith('[') and d.endswith(']'): + try: + return ast.literal_eval(d) + except: + # Try to extract numbers from the string + numbers = re.findall(r'-?\d+\.?\d*', d) + if numbers: + return [float(n) if '.' in n else int(n) for n in numbers] + return d + return d + + process_dict(data) + + # Add specific units based on field names + self._add_units(data) + + # Format specific fields + self._format_study_datetime(data) + self._fix_program_field(data) + + def _add_units(self, data): + """ + Add appropriate units to numeric values based on field names + """ + def add_units_recursive(d, path=""): + if isinstance(d, dict): + for key, value in d.items(): + new_path = f"{path}.{key}" if path else key + add_units_recursive(value, new_path) + elif isinstance(d, list): + for i, item in enumerate(d): + add_units_recursive(item, f"{path}[{i}]") + elif isinstance(d, (int, float)) and not isinstance(d, bool): + # Add units based on the field path + if any(field in path.lower() for field in ['echotime', 'repetitiontime', 'flipangle']): + if 'echotime' in path.lower() or 'repetitiontime' in path.lower(): + d = {'value': d, 'unit': 'ms'} + elif 'flipangle' in path.lower(): + d = {'value': d, 'unit': 'degree'} + elif 'slicethickness' in path.lower(): + d = {'value': d, 'unit': 'mm'} + elif 'pixelbandwidth' in path.lower(): + d = {'value': d, 'unit': 'Hz'} + elif 'pixelspacing' in path.lower(): + d = {'value': d, 'unit': 'mm'} + elif 'magneticfieldstrength' in path.lower(): + d = {'value': d, 'unit': 'T'} + elif 'weight' in path.lower(): + d = {'value': d, 'unit': 'kg'} + + add_units_recursive(data) + + def _format_study_datetime(self, data): + """ + Format studyDateTime field to ISO format + """ + def format_datetime_recursive(d): + if isinstance(d, dict): + for key, value in d.items(): + if key == 'studyDateTime' and isinstance(value, str): + # Format DICOM date (YYYYMMDD) to ISO datetime + if len(value) == 8 and value.isdigit(): + date_str = value + # Try to get time from StudyTime if available + time_str = "10:39:05" # Default time from your example + d[key] = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}T{time_str}" + else: + d[key] = value + else: + format_datetime_recursive(value) + elif isinstance(d, list): + for item in d: + format_datetime_recursive(item) + + format_datetime_recursive(data) + + def _fix_program_field(self, data): + """ + Convert program field from list to string representation + """ + def fix_program_recursive(d): + if isinstance(d, dict): + for key, value in d.items(): + if key == 'program' and isinstance(value, list): + # Convert list to string representation + d[key] = str(value) + else: + fix_program_recursive(value) + elif isinstance(d, list): + for item in d: + fix_program_recursive(item) + + fix_program_recursive(data) + + def _read_input_file(self, file_path) -> Optional[dict]: + """ + :param file_path: image file path + :return: data from extracted image file as dict + """ + + # Read the .nxs file + md = file_path + + output_dict = {} + parsed_dict = input_to_dict(md) + + if parsed_dict is None: + logging.error(f"Not able to parse {md}.") + return None + + output_dict.update(parsed_dict) + return output_dict diff --git a/src/parser/mapping_util.py b/src/parser/mapping_util.py new file mode 100644 index 0000000..0a8e9ff --- /dev/null +++ b/src/parser/mapping_util.py @@ -0,0 +1,165 @@ +import logging +import re +import typing +import numpy as np +from jsonpath_ng.ext.parser import ExtentedJsonPathParser +from src.IO.MappingAbortionError import MappingAbortionError +import re + +parser = ExtentedJsonPathParser() + +def escape_pathelements(dotted_path): + funct_match = re.search(r"`(.+?)`", dotted_path) + if funct_match: + function_name = funct_match.group(1) + if function_name == "arithmetic": + return dotted_path.replace(f"`{function_name}`", "FUNCTIONPLACEHOLDER") + + path_elements = dotted_path.split(".") + escaped_elements = [] + for pe in path_elements: + if not pe: + continue + if "[" in pe: + to_escape, to_keep = pe.split("[", 1) + escaped = f"'{to_escape}'" + pe = escaped + "[" + to_keep + else: + pe = f"'{pe}'" + if pe == "'FUNCTIONPLACEHOLDER'": + pe = "`arithmetic`" + escaped_elements.append(pe) + return ".".join(escaped_elements) + +def flatten_dict(d, parent_key="", sep="."): + flattened = {} + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict) and v: + flattened.update(flatten_dict(v, new_key, sep)) + else: + flattened[new_key] = v + return flattened + +def extract_base_path(path: str): + match = re.match(r"^(.*)\.(\w+\[\-?\d+\])$", path) + if match: + base_path, sort_function = match.groups() + return base_path, sort_function + return path, None + +def apply_arithmetic(myArray): + minValue = np.nanmin(myArray) + maxValue = np.nanmax(myArray) + avgValue = (minValue + maxValue) / 2. + + arithmetic = [round(el, 3) if not np.isnan(el) else el for el in [minValue, maxValue, avgValue]] + + arithmetic_dict = {'min_value': arithmetic[0], 'max_value': arithmetic[1], 'avg_value': arithmetic[2]} + return arithmetic_dict + + +def get_matching_keys(original_path_template,input_dict): + # Check if the original_path_template contains (`*`) + if '*' in original_path_template: + # Extract the prefix like 'entry.sample.gas_flux_' + prefix = original_path_template.split('*')[0] + suffix = original_path_template.split('*')[-1] + matching_keys = [k for k in flatten_dict(input_dict).keys() if k.startswith(prefix) and k.endswith(suffix)] # Find all keys in the original_dict that match the prefix + else: + matching_keys = [original_path_template] + return matching_keys + +def create_unified_dict(mapping, input_dict): + output_dict = {} + + for k, v in mapping.items(): + escaped_v = escape_pathelements(v) + v1, v2 = extract_base_path(escaped_v) + + # Handle ARITHMETIC paths + if v2: + index = 0 if '[0]' in v2 else -1 if '[-1]' in v2 else 1 if '[1]' in v2 else None + exprIN = parser.parse(v1) + exprOUT = parser.parse(k) + values = [m.value for m in exprIN.find(input_dict)] + + try: + if values and len(values[0]) > 0: + arithmetic_result = apply_arithmetic(values[0]) + if index == 0: + result = arithmetic_result.get("avg_value") + elif index == -1: + result = arithmetic_result.get("min_value") + elif index == 1: + result = arithmetic_result.get("max_value") + else: + logging.warning("Unsupported index: {}, used in path: {}".format(index, v2)) + continue + + exprOUT.update_or_create(output_dict, result) + else: + logging.warning("Found a value equivalent to None. path: {}, value: {}".format(v, values[0])) + except Exception as e: + logging.error("Unexpected error: {} at path: {}, values: {}".format(e, v, values)) + continue # Skip rest since this path is handled + + # Handle (*) mapping + if "*" in k: + v_list = [escape_pathelements(el) for el in get_matching_keys(v, input_dict)] + exprIN_list = [parser.parse(el) for el in v_list] + values = [] + for exprIN in exprIN_list: + val = [ + m.value.item() if isinstance(m.value, np.ndarray) and len(m.value) == 1 else m.value + for m in exprIN.find(input_dict) + ] + values.extend(val) + else: + exprIN = parser.parse(escaped_v) + exprOUT = parser.parse(k) + values = [ + m.value.item() if isinstance(m.value, np.ndarray) and len(m.value) == 1 else m.value + for m in exprIN.find(input_dict) + ] + + if not values: + logging.warning(f"Mapping defined but no corresponding value found in input dict: {v}") + continue + + # Handle regular output + if "*" not in k: + try: + if not all(isinstance(x, typing.Hashable) for x in values): + logging.warning("Found multiple complex values in input dict, but output target is not a list. Only the first value will be used.") + else: + assert len(set(values)) == 1 + except AssertionError: + logging.error(f"Found multiple values in input dict, but output target is not a list. Aborting. Input path: {v}, values: {values}") + raise MappingAbortionError("Mapping input to output format failed. Mapping not applicable.") + + try: + if len(values) > 0: + exprOUT.update_or_create(output_dict, values[0]) + else: + logging.warning("Found a value equivalent to None. path: {}, value: {}".format(v, values[0])) + except Exception as e: + logging.error("Unexpected error: {} at path: {}, values: {}".format(e, v, values)) + else: + for i, value in enumerate(values): + if value: + indexed_expr = parser.parse(k.replace('*', str(i))) + indexed_expr.update_or_create(output_dict, value) + else: + logging.warning("Found a value equivalent to None. path: {}, value: {}".format(v, value)) + + if not output_dict: + logging.error("No output was produced by applying map to input. Was the correct mapping used?") + raise MappingAbortionError("Mapping input to output format failed. Mapping not applicable.") + + logging.info(f"Successfully mapped {len(output_dict)} fields from input") + return output_dict + + +def map_a_dict(input_dict, mapping_dict): + return create_unified_dict(mapping_dict, input_dict) \ No newline at end of file diff --git a/src/resources/maps/mapping/README.md b/src/resources/maps/mapping/README.md new file mode 100644 index 0000000..7e1de19 --- /dev/null +++ b/src/resources/maps/mapping/README.md @@ -0,0 +1,90 @@ +# Input - Output Path Mapping + +The map file designed for metadata mapping defines the relation between input path and output path in the resulting json. +All read-in information is read in into a json / dict structure for unified mapping definition. + +## Map File + +All maps are expected to be in a json format with a list of key: value pairs, where key is a string representing a dotted path in the input data, +and value is a string representing the corresponding dotted path in the output format. + +```json +{ + "path.to.input1": "path.to.output1", + "path.to.input2": "path.to.output2" +} +``` + +## Basic Mapping + +The mapping definition allows for the following relations: + +- 1-to-1: path to a single value on the left to single value on the right (see line 1 and 2 in example below) +- n-to-n: path to a list element on the left is put to a corresponding list element on the right (see line 3 in example below) +- n-to-1: path to list elements on the left is put into a single field on the right. This should usually only be used for duplicate entries that need to be extracted into a single structure (see line 4 in example below) +- 1-to-n: path to a single value on the left, following a pattern recognized by the preprocessor, is mapped to a corresponding list element on the right (see line 5 in the example below) + +```json +{ + "path.to.input_value": "path.to.output_value", + "path.to.input_list[3].value": "path.to.output_value", + "path.to.input_list[*].value": "path.to.output_list[*].value", + "path.to.input_list[*].value": "path.to.output_value", + "path.to.input_*.value": "path.to.output_list[*].value", +} +``` + +Type conversion is done automatically and schema-compliant if possible. This functionality mainly remains on the core functionality provided by `pydantic`. +The conversion strategy is non-strict, for example mapping values like 'off' or 'no' to boolean true/false values, if expected by the output schema. + +The internal prepropressing provides additional conversion such as simple mapping of common unit representations. This handling at the moment is by no means complete and may need future extension. + +## Advanced mapping + +Besides mapping complete inputs values to an output field, there is in this case the need to make some arithmetic operations on the input values like finding the maximum, mininimum or average value from an input array. To allow this, a substitution function have been created identified by the [extension of jsonpath-ng](https://github.com/h2non/jsonpath-ng?tab=readme-ov-file#extensions) and developed in the preprocessor. To allow a regex-based definition directly attached to the input path. Make sure to include the backticks, otherwise it will not be recognized as a function attachement. + +*Example* + +Input: +``` +{ + "path.to.value": np.array([1.0, np.nan, 2.0]) +} +``` + +Map (regex pattern and capture group): +``` +{ + "path.to.value.`arithmetic`[-1]": "path.to.min_value", + "path.to.value.`arithmetic`[1]": "path.to.max_value", + "path.to.value.`arithmetic`[0]": "path.to.average_value" +} +``` + +Output: +``` +{ + "path.to.value.`arithmetic`[-1]": 1.0, + "path.to.value.`arithmetic`[1]": 2.0, + "path.to.value.`arithmetic`[0]": 1.5 +} +``` + +## Mapping Examples + +To explore the approach for various vendors and input formats, check the files in this folder. Currently, only one has been developed (**vendor:** Elettra-Sincrotrone Trieste synchrotron, **input format:** neXus), but more can be added as needed. + +### FAQ + +**I want to do something more complicated on the data than defined above, how do I do that?** + +> The map file approach tries to provide a way to define and document input handling separately and explicitely to help with extension without coding. It is, however, conceptionally limited in its capabilities. +More complicated parsing likely needs handling in code instead. This is the case for **arithmetic operations**. Feel free to open an issue to discuss further needs. + +**What does * mean in the input or output paths?** + +> When * is in an input path, it dynamically resolves all matching keys and applies the mapping to each in an output path with a result like [output0, output1, ...] + +**What is `arithmetic`?** + +> It’s not a general-purpose function or expression evaluator. It’s a reserved keyword (like a pseudo-function) embedded inside input paths in the mapping file to indicate that a certain field should be statistically reduced before being placed into the output. diff --git a/src/resources/maps/mapping/__init__.py b/src/resources/maps/mapping/__init__.py new file mode 100644 index 0000000..ee1d48d --- /dev/null +++ b/src/resources/maps/mapping/__init__.py @@ -0,0 +1,9 @@ +from importlib import resources + +files = resources.files(__name__) + +mriparser_full = files.joinpath("map_full_path.json") +mriparser_mixed = files.joinpath("map_mixed_path.json") +mriparser_relative = files.joinpath("map_relative_path.json") +mriparser_study = files.joinpath("map_study_only.json") + diff --git a/src/resources/maps/mapping/map_full_path.json b/src/resources/maps/mapping/map_full_path.json new file mode 100644 index 0000000..d7af803 --- /dev/null +++ b/src/resources/maps/mapping/map_full_path.json @@ -0,0 +1,48 @@ +{ + "uri": "https://metarepo.nffa.eu/api/v1/schemas/mri_schema?version=7", + "study": { + "study.studyID": "StudyInstanceUID" , + "study.studyTitle": "StudyDescription", + "study.studyDateTime": "StudyDate", + "study.program": "SoftwareVersions", + "study.user.name": "ReferringPhysicianName", + "study.user.affiliation.institutionName": "InstitutionName", + "study.user.affiliation.institutionAcronym": "InstitutionCodeSequence", + "study.user.affiliation.institutionDepartment": "InstitutionalDepartmentName", + "study.user.affiliation.institutionID": "InstitutionalDepartmentTypeCodeSequence", + "study.user.email": "PersonsTelecomInformation", + "study.sample.sampleName": "PatientName", + "study.sample.sampleID": "PatientID", + "study.sample.sampleSize.value": "PatientsSize", + "study.sample.sampleWeight.value": "PatientWeight", + "study.sample.measurementConditions.value": "MagneticFieldStrength", + "study.instrument.instrumentName": "StationName", + "study.instrument.instrumentID": "DeviceSerialNumber", + "study.instrument.instrumentManufacturer.manufacturerName": "Manufacturer", + "study.instrument.instrumentManufacturer.modelName": "ManufacturerModelName", + "study.instrument.instrumentManufacturer.manufacturerID": "ManufacturersDeviceClassUID" + }, + "series": { + "study.series.seriesID": "SeriesInstanceUID", + "study.series.seriesTitle": "SeriesDescription", + "study.series.sequenceProtocol.sequenceProtocolName": "ProtocolName", + "study.series.sequenceProtocol.effectiveEchoTime.value": "EchoTime", + "study.series.sequenceProtocol.repetitionTime.value": "RepetitionTime", + "study.series.sequenceProtocol.flipAngle.value": "FlipAngle", + "study.series.images.allImages.numberOfImages": "NumberOfFrames", + "study.series.images.allImages.imageOrientation": "ImageOrientationPatient", + "study.series.images.allImages.pixelSpacing.value": "PixelSpacing", + "study.series.images.allImages.sliceThickness.value": "SliceThickness", + "study.series.images.allImages.imageSize.rows": "Rows", + "study.series.images.allImages.imageSize.columns": "Columns", + "study.series.images.allImages.pixelRange.pixelBandwidth.value": "PixelBandwidth", + "study.series.images.allImages.pixelRange.pixelRepresentation":"PixelRepresentation", + "study.series.images.allImages.pixelRange.largestImagePixelValue":"LargestImagePixelValue", + "study.series.images.allImages.pixelRange.smallestImagePixelValue": "SmallestImagePixelValue" + + }, + "perImage": { + "study.series.images.perImage.imageNumber": "InstanceNumber", + "study.series.images.perImage.sampleImagePosition": "ImagePositionPatient" + } +} diff --git a/example/metadata_maps/map_mixed_path.json b/src/resources/maps/mapping/map_mixed_path.json similarity index 100% rename from example/metadata_maps/map_mixed_path.json rename to src/resources/maps/mapping/map_mixed_path.json diff --git a/example/metadata_maps/map_relative_path.json b/src/resources/maps/mapping/map_relative_path.json similarity index 100% rename from example/metadata_maps/map_relative_path.json rename to src/resources/maps/mapping/map_relative_path.json diff --git a/example/metadata_maps/map_study_only.json b/src/resources/maps/mapping/map_study_only.json similarity index 100% rename from example/metadata_maps/map_study_only.json rename to src/resources/maps/mapping/map_study_only.json diff --git a/src/util.py b/src/util.py new file mode 100644 index 0000000..ee569a4 --- /dev/null +++ b/src/util.py @@ -0,0 +1,211 @@ +import json +import logging +from pathlib import Path + +import h5py +import pydicom + +from magika import Magika +import os +import tempfile +import time +from json import JSONDecodeError +from typing import Optional +import configparser +import numpy as np + +import requests +import zipfile + +from src.IO.MappingAbortionError import MappingAbortionError + +def robust_textfile_read(filepath): + try: + with open(filepath, 'r', encoding="utf-8") as file: + return file.read() + except UnicodeDecodeError: + try: + with open(filepath, 'r', encoding="latin1") as file: + return file.read() + except UnicodeDecodeError: + logging.error("Unable to determine file encoding. Aborting.") + #TODO: since it is not clear who calls this function for what, it may make more sense to raise a unified error to handle instead of error for exit + raise MappingAbortionError("File loading failed due to encoding.") + +def load_json(source) -> Optional[dict]: + """ + Load JSON data from a local file path or a web URL. + + :param source: A string representing either a local file path or a web URL. + :return: Parsed JSON data. + """ + if source.startswith('http://') or source.startswith('https://'): + response = requests.get(source) + response.raise_for_status() # Raise an error for bad status codes + return response.json() + else: + return json.loads(robust_textfile_read(source)) + +def is_zipfile(filepath): + return zipfile.is_zipfile(filepath) + +def extract_zip_file(zip_file_path): + """ + extracts files of zip to a temporary directory + :param zip_file_path: local file path to zip file + :return: (path to contained emxml file, path to tmp dir) or (None, None) if no emxml file was found + """ + temp_dir = tempfile.mkdtemp() + + start_time = time.time() # Start time + logging.info(f"Extracting {zip_file_path}...") + + target_dir = None + + with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: + total_items = len(zip_ref.namelist()) + + for index, file_name in enumerate(zip_ref.namelist(), start=1): + file_path = os.path.join(temp_dir, file_name) + zip_ref.extract(file_name, temp_dir) + + end_time = time.time() # End time + total_time = end_time - start_time + + logging.info(f"Total time taken to process: {total_time:.2f} seconds.") + return temp_dir + +def strip_workdir_from_path(workdirpath, fullpath): + if fullpath.startswith(workdirpath): + return fullpath.replace(workdirpath, ".", 1) + logging.debug("Unable to remove working directory from given path. Returning unchanged path") + return fullpath + +def _import_nxs_as_dict(obj, group=''): + """ + Recursive function to travel all over the Nexus file tree and extract all data and metadata as a dictionary. + Inputs: + obj: h5py (object) + Output: + inputFile (dictionary) + """ + inputFile = {} + + if isinstance(obj, h5py.Group): + for key in obj.keys(): # Iterate through all items in the group + full_directory = f"{group}.{key.strip()}" if group else key + inputFile.update(_import_nxs_as_dict(obj[key], full_directory)) + elif isinstance(obj, h5py.Dataset): + try: + # Get dataset information + dataset_info = { + 'name': obj.name, + 'attributes': dict(obj.attrs) # Attributes of the dataset + } + + # Extract the contents of the dataset (Handle scalar and array datasets) + if isinstance(obj[()], np.ndarray): + dataset_info['value'] = obj[()] + else: + dataset_info['value'] = obj[()].decode('utf-8') + + inputFile[group] = dataset_info # Add dataset info to the main dictionary + except Exception as e: + logging.warning(f"Error processing dataset {group}: {e}") + return {key.replace('/', '.'): value for key, value in inputFile.items()} + + +def _flat_to_nested_dict(flat_dict): + nested_dict = {} + + for flat_key, value in flat_dict.items(): + keys = flat_key.split('.') # Split the key by dots + current_level = nested_dict + + for key in keys[:-1]: + if key not in current_level: + current_level[key] = {} + current_level = current_level[key] + + current_level[keys[-1]] = value # Assign the value to the last key + + return nested_dict + +def _dicom_to_nested_dict(ds): + result = {} + + for elem in ds: + # Skip PixelData type (large binary) + if elem.VR in ("OB", "OW", "OF", "OD", "UN"): + continue + + key = elem.keyword or elem.name or str(elem.tag) + + # Nested type - Sequence + if elem.VR == "SQ": + result[key] = [_dicom_to_nested_dict(item) for item in elem.value] + continue + + val = elem.value + + # JSON-friendly conversion + if isinstance(val, (bytes, bytearray)): + result[key] = f"<{len(val)} bytes>" + elif isinstance(val, (list, tuple)): + result[key] = [str(v) for v in val] + elif not isinstance(val, (str, int, float, bool)) and val is not None: + result[key] = str(val) + else: + result[key] = val + + return result + +def input_to_dict(stringPayload) -> Optional[dict]: + if type(stringPayload) is not str: + return None + #print("--------im trying--------------", stringPayload) + try: + # Check if it's a file path vs JSON content + if os.path.exists(stringPayload): + # It's a file path, detect file type + filetype = get_filetype_with_magica(stringPayload) + logging.debug(f"Detected filetype: {filetype} for file: {stringPayload}") + elif stringPayload.startswith("{"): + # It's JSON content + try: #JSON + logging.info("Reading json content was successful!") + return json.loads(stringPayload) + except JSONDecodeError: + logging.debug("Reading input as json not successful") + return None + else: + # Not a file and not JSON, can't process + logging.debug(f"Cannot process input: {stringPayload}") + return None + + if filetype in ["application/octet-stream", "application/x-hdf5"]: + try: #NXS + with h5py.File(stringPayload, 'r') as f: + logging.info("Reading neXus/hdf5 file was successful!") + return _flat_to_nested_dict(_import_nxs_as_dict(f)) + except Exception as e: + logging.debug(f"Error reading Nexus/hdf5 file: {e}") + if filetype in ["application/dicom", "application/x-iso9660-image"]: + try: #DICOM + ds = pydicom.dcmread(stringPayload) + logging.info("Reading dicom file was successful!") + return _dicom_to_nested_dict(ds) + except Exception as e: + logging.debug(f"Error reading DICOM file: {e}") + except Exception as e: + logging.warning("Best effort input reading failed with unexpected error. Input malformed?") + logging.error(e) + +def normalize_path(pathString): + if "\\" in pathString: return os.path.join(*pathString.split("\\")) + return pathString + +def get_filetype_with_magica(filepath): + m = Magika() + res = m.identify_path(Path(filepath)) + return res.output.mime_type From 17a3d2389744bc827d87e5e7ab9f60f91615b908 Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Mon, 2 Mar 2026 11:32:32 +0100 Subject: [PATCH 3/9] move preprocessing logic from parser to preprocessor and clean up code --- .DS_Store | Bin 10244 -> 10244 bytes src/Preprocessor.py | 86 ++++++++-------------- src/parser/impl/MRI_Parser.py | 134 +++------------------------------- 3 files changed, 43 insertions(+), 177 deletions(-) diff --git a/.DS_Store b/.DS_Store index 47364e43c3ff80370962a67b920993b66ddf72a8..eef4c14fda63daefdb148f114768a5cae2997c18 100644 GIT binary patch delta 66 zcmZn(XbG6$&uFwUU^hRb(Pkb2X12-oVp)@o#kWk>mvGv=TH-a!=1a1sOdAU-7&o&k MaG*#}UM(>d0Pj{7x&QzG delta 495 zcmZn(XbG6$&uFnRU^hRb#bzD>W;Q_{hBSs!h8!S_2jYB&^vVCkr5)e>2LlEW;9+9H8Cl44IQ>i78I56x$*z0yMuAXiEW@jAuvz@{@t=T#${E zCy2>0F}|CuB<`!v3sh1Glm;1<50usevOpqv3`wZYVk%-_VC37JBL0oVk`HKdD$o@r jKtB`#P0a&Bkc;#{TG6yIA^T=?xQqqUW_AURAr)Q#Dhh;C diff --git a/src/Preprocessor.py b/src/Preprocessor.py index 9103de1..aa81c85 100644 --- a/src/Preprocessor.py +++ b/src/Preprocessor.py @@ -1,6 +1,7 @@ import logging import numpy as np from datetime import datetime +import re from jsonpath_ng.parser import JsonPathParser @@ -25,17 +26,6 @@ class Preprocessor: 'Mins': 'min' } - @staticmethod - def get_expected_type(field_path): - - expected_types = { - "entry.entry_identifier": "string_type", - "entry.instrument.monochromator.grating.period.value": "int_type", - "entry.sample.gas_flux[*].value": "float_type" - } - - return expected_types.get(field_path, None) - @staticmethod def normalize_unit(input_value) -> str: if input_value in Preprocessor.unit_normalization.keys(): @@ -86,56 +76,42 @@ def normalize_all_datetimes(input_dict): m.full_path.update(input_dict, normalized_value) @staticmethod - def normalize_all_numbers(input_dict): + def normalize_string_lists(input_dict): """ - In-place conversion of numeric strings into integers or floats, but checks if it's an appropriate field. - :param input_dict: dictionary to convert numeric values in + Convert string representations of lists to actual lists. + :param input_dict: dictionary to convert string lists in :return: None """ - number_fields = Preprocessor.parser.parse("$..*") # Traverse all fields - - for match in number_fields.find(input_dict): + + all_fields = Preprocessor.parser.parse("$..*") + + for match in all_fields.find(input_dict): original_value = match.value current_field = str(match.full_path) - expected_type = Preprocessor.get_expected_type(current_field) - #print("<<<<>>>> ",original_value) - - # Handle type conversions if needed (e.g.: int_type, float_type) + if isinstance(original_value, str): - try: - if expected_type == "int_type": # Convert only if it's a valid integer-like string - converted_value = int(original_value) + # Convert string representations of lists to actual lists + if original_value.startswith('[') and original_value.endswith(']'): + try: + converted_value = ast.literal_eval(original_value) match.full_path.update(input_dict, converted_value) - elif expected_type == "float_type": # Convert only if it's a valid float-like string - converted_value = float(original_value) - match.full_path.update(input_dict, converted_value) - except ValueError: - logging.warning(f"Error while trying to convert '{original_value}' into {expected_type} for field {current_field}") - continue - - # Check if the value is a numpy array - if isinstance(original_value, np.ndarray) and original_value.size > 0: - try: - converted_value = np.array([int(x) if isinstance(x, (int, str)) and not np.isnan(x) - else float(x) if isinstance(x, (float, str)) and not np.isnan(x) - else x - for x in original_value], dtype=float) - - match.full_path.update(input_dict, converted_value) - except ValueError: - logging.warning(f"Error while converting numpy array values for field {current_field}") - continue + except: + # Try to extract numbers from the string + numbers = re.findall(r'-?\d+\.?\d*', original_value) + if numbers: + converted_value = [float(n) if '.' in n else int(n) for n in numbers] + match.full_path.update(input_dict, converted_value) @staticmethod - def normalize_gas_names(input_dict): - gas_fields = Preprocessor.parser.parse("$..gas_name") - - for match in gas_fields.find(input_dict): - original_value = match.value - # Extract gas name if it's stored incorrectly (e.g., "/entry/sample/gas_flux_C2H4") - if isinstance(original_value, str) and "/" in original_value: - possible_gas = original_value.split("_")[-1] - match.full_path.update(input_dict, possible_gas) - else: - logging.warning(f"Unexpected gas name format: {original_value}") - + def normalize_program_field(input_dict): + """ + Convert program field from list to string representation for schema compatibility. + :param input_dict: dictionary to convert program field in + :return: None + """ + program_fields = Preprocessor.parser.parse("$..program") + + for match in program_fields.find(input_dict): + if isinstance(match.value, list): + converted_value = str(match.value) + match.full_path.update(input_dict, converted_value) \ No newline at end of file diff --git a/src/parser/impl/MRI_Parser.py b/src/parser/impl/MRI_Parser.py index 2f57a27..6734df1 100644 --- a/src/parser/impl/MRI_Parser.py +++ b/src/parser/impl/MRI_Parser.py @@ -1,11 +1,8 @@ import logging from typing import Optional import json -import ast import re -from PIL import Image - from src.Preprocessor import Preprocessor from src.model.ImageMD import ImageMD from src.model.SchemaConcepts.MRI_Image import MRI_Image @@ -41,19 +38,18 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]: logging.error("No mapping provided for image parsing. Aborting") exit(1) mapping_dict = mapping if mapping else self.internal_mapping + + Preprocessor.normalize_all_datetimes(input_md) + image_md = map_a_dict(input_md, mapping_dict) - #Preprocessor.normalize_all_datetimes(image_md) - Preprocessor.normalize_all_numbers(image_md) Preprocessor.normalize_all_units(image_md) - Preprocessor.normalize_gas_names(image_md) - - # Custom preprocessing to handle string-to-list conversion and add units - self._fix_data_types_and_units(image_md) + Preprocessor.normalize_string_lists(image_md) + Preprocessor.normalize_program_field(image_md) # Create MRI_Image object from the mapped data # The mapping result has a nested structure that needs to be reorganized - organized_data = {} + ac_md_format = {} # The mapping result has everything under 'study' key if 'study' in image_md and isinstance(image_md['study'], dict): @@ -80,132 +76,25 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]: del series_data['images'] if study_fields: - organized_data['study'] = study_fields + ac_md_format['study'] = study_fields if series_data: - organized_data['series'] = series_data + ac_md_format['series'] = series_data if perImage_data: # Keep perImage as dict for MRI_Image, will convert to list in as_schema_class - organized_data['perImage'] = perImage_data + ac_md_format['perImage'] = perImage_data # Handle any other top-level keys for key, value in image_md.items(): if key != 'study': - organized_data[key] = value + ac_md_format[key] = value - mri_image = MRI_Image(**organized_data) + mri_image = MRI_Image(**ac_md_format) image_from_md = ImageMD(image_metadata=mri_image, filePath="") return image_from_md, image_md - def _fix_data_types_and_units(self, data): - """ - Custom preprocessing to fix string-to-list conversion and add proper units - """ - - def process_dict(d): - if isinstance(d, dict): - for key, value in d.items(): - d[key] = process_dict(value) - elif isinstance(d, list): - return [process_dict(item) for item in d] - elif isinstance(d, str): - # Convert string representations of lists to actual lists - if d.startswith('[') and d.endswith(']'): - try: - return ast.literal_eval(d) - except: - # Try to extract numbers from the string - numbers = re.findall(r'-?\d+\.?\d*', d) - if numbers: - return [float(n) if '.' in n else int(n) for n in numbers] - return d - return d - - process_dict(data) - - # Add specific units based on field names - self._add_units(data) - - # Format specific fields - self._format_study_datetime(data) - self._fix_program_field(data) - - def _add_units(self, data): - """ - Add appropriate units to numeric values based on field names - """ - def add_units_recursive(d, path=""): - if isinstance(d, dict): - for key, value in d.items(): - new_path = f"{path}.{key}" if path else key - add_units_recursive(value, new_path) - elif isinstance(d, list): - for i, item in enumerate(d): - add_units_recursive(item, f"{path}[{i}]") - elif isinstance(d, (int, float)) and not isinstance(d, bool): - # Add units based on the field path - if any(field in path.lower() for field in ['echotime', 'repetitiontime', 'flipangle']): - if 'echotime' in path.lower() or 'repetitiontime' in path.lower(): - d = {'value': d, 'unit': 'ms'} - elif 'flipangle' in path.lower(): - d = {'value': d, 'unit': 'degree'} - elif 'slicethickness' in path.lower(): - d = {'value': d, 'unit': 'mm'} - elif 'pixelbandwidth' in path.lower(): - d = {'value': d, 'unit': 'Hz'} - elif 'pixelspacing' in path.lower(): - d = {'value': d, 'unit': 'mm'} - elif 'magneticfieldstrength' in path.lower(): - d = {'value': d, 'unit': 'T'} - elif 'weight' in path.lower(): - d = {'value': d, 'unit': 'kg'} - - add_units_recursive(data) - - def _format_study_datetime(self, data): - """ - Format studyDateTime field to ISO format - """ - def format_datetime_recursive(d): - if isinstance(d, dict): - for key, value in d.items(): - if key == 'studyDateTime' and isinstance(value, str): - # Format DICOM date (YYYYMMDD) to ISO datetime - if len(value) == 8 and value.isdigit(): - date_str = value - # Try to get time from StudyTime if available - time_str = "10:39:05" # Default time from your example - d[key] = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}T{time_str}" - else: - d[key] = value - else: - format_datetime_recursive(value) - elif isinstance(d, list): - for item in d: - format_datetime_recursive(item) - - format_datetime_recursive(data) - - def _fix_program_field(self, data): - """ - Convert program field from list to string representation - """ - def fix_program_recursive(d): - if isinstance(d, dict): - for key, value in d.items(): - if key == 'program' and isinstance(value, list): - # Convert list to string representation - d[key] = str(value) - else: - fix_program_recursive(value) - elif isinstance(d, list): - for item in d: - fix_program_recursive(item) - - fix_program_recursive(data) - def _read_input_file(self, file_path) -> Optional[dict]: """ :param file_path: image file path @@ -217,6 +106,7 @@ def _read_input_file(self, file_path) -> Optional[dict]: output_dict = {} parsed_dict = input_to_dict(md) + print("+++++++ ",parsed_dict) if parsed_dict is None: logging.error(f"Not able to parse {md}.") From 4ab2546764c228810cb1c6e0d3eb93244946a23f Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Mon, 2 Mar 2026 11:58:52 +0100 Subject: [PATCH 4/9] handle standardization of names field --- src/resources/maps/mapping/map_full_path.json | 78 +++++++++---------- src/util.py | 40 ++++++++-- 2 files changed, 72 insertions(+), 46 deletions(-) diff --git a/src/resources/maps/mapping/map_full_path.json b/src/resources/maps/mapping/map_full_path.json index d7af803..d56b541 100644 --- a/src/resources/maps/mapping/map_full_path.json +++ b/src/resources/maps/mapping/map_full_path.json @@ -1,48 +1,48 @@ { "uri": "https://metarepo.nffa.eu/api/v1/schemas/mri_schema?version=7", "study": { - "study.studyID": "StudyInstanceUID" , - "study.studyTitle": "StudyDescription", - "study.studyDateTime": "StudyDate", - "study.program": "SoftwareVersions", - "study.user.name": "ReferringPhysicianName", - "study.user.affiliation.institutionName": "InstitutionName", - "study.user.affiliation.institutionAcronym": "InstitutionCodeSequence", - "study.user.affiliation.institutionDepartment": "InstitutionalDepartmentName", - "study.user.affiliation.institutionID": "InstitutionalDepartmentTypeCodeSequence", - "study.user.email": "PersonsTelecomInformation", - "study.sample.sampleName": "PatientName", - "study.sample.sampleID": "PatientID", - "study.sample.sampleSize.value": "PatientsSize", - "study.sample.sampleWeight.value": "PatientWeight", - "study.sample.measurementConditions.value": "MagneticFieldStrength", - "study.instrument.instrumentName": "StationName", - "study.instrument.instrumentID": "DeviceSerialNumber", - "study.instrument.instrumentManufacturer.manufacturerName": "Manufacturer", - "study.instrument.instrumentManufacturer.modelName": "ManufacturerModelName", - "study.instrument.instrumentManufacturer.manufacturerID": "ManufacturersDeviceClassUID" + "study.studyID": "studyInstanceUid" , + "study.studyTitle": "studyDescription", + "study.studyDateTime": "studyDateTime", + "study.program": "softwareVersions", + "study.user.name": "referringPhysiciansName", + "study.user.affiliation.institutionName": "institutionName", + "study.user.affiliation.institutionAcronym": "institutionCodeSequence", + "study.user.affiliation.institutionDepartment": "institutionalDepartmentName", + "study.user.affiliation.institutionID": "institutionalDepartmentTypeCodeSequence", + "study.user.email": "personsTelecomInformation", + "study.sample.sampleName": "patientsName", + "study.sample.sampleID": "patientId", + "study.sample.sampleSize.value": "patientsSize", + "study.sample.sampleWeight.value": "patientsWeight", + "study.sample.measurementConditions.value": "magneticFieldStrength", + "study.instrument.instrumentName": "stationName", + "study.instrument.instrumentID": "deviceSerialNumber", + "study.instrument.instrumentManufacturer.manufacturerName": "manufacturer", + "study.instrument.instrumentManufacturer.modelName": "manufacturersModelName", + "study.instrument.instrumentManufacturer.manufacturerID": "manufacturersDeviceClassUID" }, "series": { - "study.series.seriesID": "SeriesInstanceUID", - "study.series.seriesTitle": "SeriesDescription", - "study.series.sequenceProtocol.sequenceProtocolName": "ProtocolName", - "study.series.sequenceProtocol.effectiveEchoTime.value": "EchoTime", - "study.series.sequenceProtocol.repetitionTime.value": "RepetitionTime", - "study.series.sequenceProtocol.flipAngle.value": "FlipAngle", - "study.series.images.allImages.numberOfImages": "NumberOfFrames", - "study.series.images.allImages.imageOrientation": "ImageOrientationPatient", - "study.series.images.allImages.pixelSpacing.value": "PixelSpacing", - "study.series.images.allImages.sliceThickness.value": "SliceThickness", - "study.series.images.allImages.imageSize.rows": "Rows", - "study.series.images.allImages.imageSize.columns": "Columns", - "study.series.images.allImages.pixelRange.pixelBandwidth.value": "PixelBandwidth", - "study.series.images.allImages.pixelRange.pixelRepresentation":"PixelRepresentation", - "study.series.images.allImages.pixelRange.largestImagePixelValue":"LargestImagePixelValue", - "study.series.images.allImages.pixelRange.smallestImagePixelValue": "SmallestImagePixelValue" + "study.series.seriesID": "seriesInstanceUid", + "study.series.seriesTitle": "seriesDescription", + "study.series.sequenceProtocol.sequenceProtocolName": "protocolName", + "study.series.sequenceProtocol.effectiveEchoTime.value": "effectiveEchoTime", + "study.series.sequenceProtocol.repetitionTime.value": "repetitionTime", + "study.series.sequenceProtocol.flipAngle.value": "flipAngle", + "study.series.images.allImages.numberOfImages": "numberOfFrames", + "study.series.images.allImages.imageOrientation": "imageOrientationpatient", + "study.series.images.allImages.pixelSpacing.value": "pixelSpacing", + "study.series.images.allImages.sliceThickness.value": "sliceThickness", + "study.series.images.allImages.imageSize.rows": "rows", + "study.series.images.allImages.imageSize.columns": "columns", + "study.series.images.allImages.pixelRange.pixelBandwidth.value": "pixelBandwidth", + "study.series.images.allImages.pixelRange.pixelRepresentation":"pixelRepresentation", + "study.series.images.allImages.pixelRange.largestImagePixelValue":"largestImagePixelValue", + "study.series.images.allImages.pixelRange.smallestImagePixelValue": "smallestImagePixelValue" }, "perImage": { - "study.series.images.perImage.imageNumber": "InstanceNumber", - "study.series.images.perImage.sampleImagePosition": "ImagePositionPatient" + "study.series.images.perImage.imageNumber": "instackPositionNumber", + "study.series.images.perImage.sampleImagePosition": "imagePositionpatient" } -} +} \ No newline at end of file diff --git a/src/util.py b/src/util.py index ee569a4..7e27325 100644 --- a/src/util.py +++ b/src/util.py @@ -1,6 +1,7 @@ import json import logging from pathlib import Path +import re import h5py import pydicom @@ -134,19 +135,24 @@ def _flat_to_nested_dict(flat_dict): def _dicom_to_nested_dict(ds): result = {} - for elem in ds: - # Skip PixelData type (large binary) - if elem.VR in ("OB", "OW", "OF", "OD", "UN"): + for attribute in ds: + # Skip PixelData type (large binary) - VR (Value Representation) + if attribute.VR in ("OB", "OW", "OF", "OD", "UN"): continue - key = elem.keyword or elem.name or str(elem.tag) + # Use standardized names to match mapping file expectations + keyword = attribute.keyword + name = attribute.name + standardized = name_standardization(name) if name else None + + key = standardized or keyword or str(attribute.tag) # Nested type - Sequence - if elem.VR == "SQ": - result[key] = [_dicom_to_nested_dict(item) for item in elem.value] + if attribute.VR == "SQ": + result[key] = [_dicom_to_nested_dict(attribute) for item in attribute.value] continue - val = elem.value + val = attribute.value # JSON-friendly conversion if isinstance(val, (bytes, bytearray)): @@ -209,3 +215,23 @@ def get_filetype_with_magica(filepath): m = Magika() res = m.identify_path(Path(filepath)) return res.output.mime_type + +def name_standardization(attribute: str) -> str: + """Takes a string of a dicom attribute as input and standardizes it after defined criteria. + + Args: + attribute (str): The attribute string that should be standardized. + + Returns: + str: The attribute string after standardization. + """ + name = attribute.split() + if len(name) == 1: + name = name[0].lower() + else: + subname = "" + for letter in name[1:]: + subname += letter.capitalize() + name = name[0].lower() + subname + name = re.sub('[^A-Za-z0-9]+', '', name) + return name From 88dd3f622a026f3e822e81f3896cf6cb5b860b51 Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Tue, 3 Mar 2026 12:17:45 +0100 Subject: [PATCH 5/9] update parse_datetime() and preprocessor to handle this format: '20210616 103905' --- .DS_Store | Bin 10244 -> 10244 bytes src/Preprocessor.py | 27 +++++++++++++++++---- src/model/SchemaConcepts/Schema_Concept.py | 3 +++ src/parser/impl/MRI_Parser.py | 1 + src/util.py | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/.DS_Store b/.DS_Store index eef4c14fda63daefdb148f114768a5cae2997c18..fd6ed1f8396175ec8a0836f62889b07e12591ec0 100644 GIT binary patch delta 206 zcmZn(XbG6$&uFqSU^hRb$z~pb#f(PW4Dmo%z);DM%#aAg84Rf)5zn0bS`z VSNO*=*-3iRznO)%@%j9R$F_Wi@t^*2w GWds1%2NQMx diff --git a/src/Preprocessor.py b/src/Preprocessor.py index aa81c85..8666ddb 100644 --- a/src/Preprocessor.py +++ b/src/Preprocessor.py @@ -1,6 +1,6 @@ import logging import numpy as np -from datetime import datetime +from datetime import datetime, timezone import re from jsonpath_ng.parser import JsonPathParser @@ -53,19 +53,36 @@ def normalize_all_units(input_dict): @staticmethod def normalize_datetime(input_value) -> str: if type(input_value) == dict: - if not input_value.get("Date") and input_value.get("Time"): + if not input_value.get("Date") and input_value.get("Time"): # Not possible to handle only Time logging.warning("Encountered complex date field, but cannot interpret it") return input_value + if input_value.get("Date") and not input_value.get("Time"): # Handle only Date + input_value["Time"] = "00:00:00" + logging.info("Input with date information but no time information found. Setting time to 00:00:00") input_value = input_value.get("Date") + " " + input_value.get("Time") output_value = parse_datetime(input_value) if type(output_value) == datetime: - return output_value.isoformat() + if output_value.tzinfo: + output_value = output_value.astimezone(timezone.utc) # datetime has timezone info, convert it to UTC + else: + output_value = output_value.replace(tzinfo=timezone.utc) # No timezone, assume it's already in UTC + return output_value.isoformat().replace("+00:00", "Z") return input_value @staticmethod def normalize_all_datetimes(input_dict): - fields_for_normalization = ["creationTime", "startTime", "endTime"] #we could do it more generically but may want to limit it to specific fields - + # Handle studyDate + studyTime -> studyDateTime combination + if isinstance(input_dict, dict) and 'studyDate' in input_dict and 'studyTime' in input_dict: + # Create dict format that normalize_datetime expects + datetime_dict = { + "Date": input_dict['studyDate'], + "Time": input_dict['studyTime'] + } + combined_datetime = Preprocessor.normalize_datetime(datetime_dict) + input_dict['studyDateTime'] = combined_datetime + + # Handle other datetime fields with original logic + fields_for_normalization = ["creationTime", "startTime", "endTime"] for f in fields_for_normalization: date_fields = Preprocessor.parser.parse("$.." + f) date_matches = [m for m in date_fields.find(input_dict)] diff --git a/src/model/SchemaConcepts/Schema_Concept.py b/src/model/SchemaConcepts/Schema_Concept.py index 6dfd651..9553a7e 100644 --- a/src/model/SchemaConcepts/Schema_Concept.py +++ b/src/model/SchemaConcepts/Schema_Concept.py @@ -13,6 +13,9 @@ def parse_datetime(value: str): return datetime.strptime(value, "%m/%d/%Y %H:%M:%S") if "." in value: return datetime.strptime(value, '%d.%m.%Y %H:%M:%S') + # Handle DICOM combined format: 'YYYYMMDD HHMMSS' + if len(value) == 15 and value[8] == ' ' and value[:8].isdigit() and value[9:].isdigit(): + return datetime.strptime(value, "%Y%m%d %H%M%S") return datetime.strptime(value, '%d %b %Y %H:%M:%S')#specific handling of expected date format that usual validator cannot handle except ValueError: return value #not a German date - lets hope that the normal validator can handle it diff --git a/src/parser/impl/MRI_Parser.py b/src/parser/impl/MRI_Parser.py index 6734df1..b004743 100644 --- a/src/parser/impl/MRI_Parser.py +++ b/src/parser/impl/MRI_Parser.py @@ -39,6 +39,7 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]: exit(1) mapping_dict = mapping if mapping else self.internal_mapping + # Normalize datetimes before mapping to create studyDateTime field Preprocessor.normalize_all_datetimes(input_md) image_md = map_a_dict(input_md, mapping_dict) diff --git a/src/util.py b/src/util.py index 7e27325..3b6d2b3 100644 --- a/src/util.py +++ b/src/util.py @@ -149,7 +149,7 @@ def _dicom_to_nested_dict(ds): # Nested type - Sequence if attribute.VR == "SQ": - result[key] = [_dicom_to_nested_dict(attribute) for item in attribute.value] + result[key] = [_dicom_to_nested_dict(item) for item in attribute.value] continue val = attribute.value From adf87af1b634a8b77dead9a1018e2753b5c1eb4e Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Tue, 3 Mar 2026 15:33:43 +0100 Subject: [PATCH 6/9] add mappingservice-plugin --- .DS_Store | Bin 10244 -> 12292 bytes mappingservice-plugin/build.gradle | 61 +++++ .../gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 43583 bytes .../gradle/wrapper/gradle-wrapper.properties | 7 + mappingservice-plugin/gradlew | 252 ++++++++++++++++++ mappingservice-plugin/gradlew.bat | 94 +++++++ .../integrationtests/basic.hurl | 71 +++++ mappingservice-plugin/settings.gradle | 1 + .../apeHeplugin/JaMMaToPlugin.java | 54 ++++ tests/SampleData/MRIm1.dcm | Bin 0 -> 9886 bytes 10 files changed, 540 insertions(+) create mode 100644 mappingservice-plugin/build.gradle create mode 100644 mappingservice-plugin/gradle/wrapper/gradle-wrapper.jar create mode 100644 mappingservice-plugin/gradle/wrapper/gradle-wrapper.properties create mode 100755 mappingservice-plugin/gradlew create mode 100644 mappingservice-plugin/gradlew.bat create mode 100644 mappingservice-plugin/integrationtests/basic.hurl create mode 100644 mappingservice-plugin/settings.gradle create mode 100644 mappingservice-plugin/src/main/java/edu/kit/datamanager/apeHeplugin/JaMMaToPlugin.java create mode 100644 tests/SampleData/MRIm1.dcm diff --git a/.DS_Store b/.DS_Store index fd6ed1f8396175ec8a0836f62889b07e12591ec0..6908bad4b70720eac4c2405e093b1ebe7752dd52 100644 GIT binary patch delta 679 zcmZn(Xh~3DU|?W$DortDV9)?EIe-{M3-ADifgAbS6fu+ml_vvPxo{JeCaHH?#=h;5e@ zVaNx%s|4si5E;*q0^}zH*||Vh=S}`6CdJ0E38;A9}DPTJ7!pjZ4MNk#_Udlm(g5S1oS!Ie=uNRm^?$|nhemXtUwF|8r(p_6*(Pj rOnlEgnP0_H0wfKLYmmP{G$_hJG{@u&l{u4_OV8dsT{M7^n$7|Ma^I7l delta 175 zcmZokXbF&DU|?W$DortDU{C-uIe-{M3-C-V6q~3gIoUvmMHI+qFacslpg2QLx?yl~ zer~~LL8fo4n-!QgGHzz)VBugi+03J0$2^%?%zg7j1I);sZ4^*AhUp0a03ZfkTpPKzB5ne*Rh;z&m+OX1aX1J + def generatedResourcesDir = project.layout.buildDirectory.dir(["resources", "main"].join(File.separator)) + def outputFile = generatedResourcesDir.map { it.file("dicom2json.properties") } + + t.destinationFile = outputFile.get().asFile + t.property("version", project.version) +} + +//resolveMainClassName.dependsOn("generateVersionProps") + +jar { + dependsOn(generateVersionProps) + archiveFileName +} + +//bootJar { +// enabled = false +//} \ No newline at end of file diff --git a/mappingservice-plugin/gradle/wrapper/gradle-wrapper.jar b/mappingservice-plugin/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..a4b76b9530d66f5e68d973ea569d8e19de379189 GIT binary patch literal 43583 zcma&N1CXTcmMvW9vTb(Rwr$&4wr$(C?dmSu>@vG-+vuvg^_??!{yS%8zW-#zn-LkA z5&1^$^{lnmUON?}LBF8_K|(?T0Ra(xUH{($5eN!MR#ZihR#HxkUPe+_R8Cn`RRs(P z_^*#_XlXmGv7!4;*Y%p4nw?{bNp@UZHv1?Um8r6)Fei3p@ClJn0ECfg1hkeuUU@Or zDaPa;U3fE=3L}DooL;8f;P0ipPt0Z~9P0)lbStMS)ag54=uL9ia-Lm3nh|@(Y?B`; zx_#arJIpXH!U{fbCbI^17}6Ri*H<>OLR%c|^mh8+)*h~K8Z!9)DPf zR2h?lbDZQ`p9P;&DQ4F0sur@TMa!Y}S8irn(%d-gi0*WxxCSk*A?3lGh=gcYN?FGl z7D=Js!i~0=u3rox^eO3i@$0=n{K1lPNU zwmfjRVmLOCRfe=seV&P*1Iq=^i`502keY8Uy-WNPwVNNtJFx?IwAyRPZo2Wo1+S(xF37LJZ~%i)kpFQ3Fw=mXfd@>%+)RpYQLnr}B~~zoof(JVm^^&f zxKV^+3D3$A1G;qh4gPVjhrC8e(VYUHv#dy^)(RoUFM?o%W-EHxufuWf(l*@-l+7vt z=l`qmR56K~F|v<^Pd*p~1_y^P0P^aPC##d8+HqX4IR1gu+7w#~TBFphJxF)T$2WEa zxa?H&6=Qe7d(#tha?_1uQys2KtHQ{)Qco)qwGjrdNL7thd^G5i8Os)CHqc>iOidS} z%nFEDdm=GXBw=yXe1W-ShHHFb?Cc70+$W~z_+}nAoHFYI1MV1wZegw*0y^tC*s%3h zhD3tN8b=Gv&rj}!SUM6|ajSPp*58KR7MPpI{oAJCtY~JECm)*m_x>AZEu>DFgUcby z1Qaw8lU4jZpQ_$;*7RME+gq1KySGG#Wql>aL~k9tLrSO()LWn*q&YxHEuzmwd1?aAtI zBJ>P=&$=l1efe1CDU;`Fd+_;&wI07?V0aAIgc(!{a z0Jg6Y=inXc3^n!U0Atk`iCFIQooHqcWhO(qrieUOW8X(x?(RD}iYDLMjSwffH2~tB z)oDgNBLB^AJBM1M^c5HdRx6fBfka`(LD-qrlh5jqH~);#nw|iyp)()xVYak3;Ybik z0j`(+69aK*B>)e_p%=wu8XC&9e{AO4c~O1U`5X9}?0mrd*m$_EUek{R?DNSh(=br# z#Q61gBzEpmy`$pA*6!87 zSDD+=@fTY7<4A?GLqpA?Pb2z$pbCc4B4zL{BeZ?F-8`s$?>*lXXtn*NC61>|*w7J* z$?!iB{6R-0=KFmyp1nnEmLsA-H0a6l+1uaH^g%c(p{iT&YFrbQ$&PRb8Up#X3@Zsk zD^^&LK~111%cqlP%!_gFNa^dTYT?rhkGl}5=fL{a`UViaXWI$k-UcHJwmaH1s=S$4 z%4)PdWJX;hh5UoK?6aWoyLxX&NhNRqKam7tcOkLh{%j3K^4Mgx1@i|Pi&}<^5>hs5 zm8?uOS>%)NzT(%PjVPGa?X%`N2TQCKbeH2l;cTnHiHppPSJ<7y-yEIiC!P*ikl&!B z%+?>VttCOQM@ShFguHVjxX^?mHX^hSaO_;pnyh^v9EumqSZTi+#f&_Vaija0Q-e*| z7ulQj6Fs*bbmsWp{`auM04gGwsYYdNNZcg|ph0OgD>7O}Asn7^Z=eI>`$2*v78;sj-}oMoEj&@)9+ycEOo92xSyY344^ z11Hb8^kdOvbf^GNAK++bYioknrpdN>+u8R?JxG=!2Kd9r=YWCOJYXYuM0cOq^FhEd zBg2puKy__7VT3-r*dG4c62Wgxi52EMCQ`bKgf*#*ou(D4-ZN$+mg&7$u!! z-^+Z%;-3IDwqZ|K=ah85OLwkO zKxNBh+4QHh)u9D?MFtpbl)us}9+V!D%w9jfAMYEb>%$A;u)rrI zuBudh;5PN}_6J_}l55P3l_)&RMlH{m!)ai-i$g)&*M`eN$XQMw{v^r@-125^RRCF0 z^2>|DxhQw(mtNEI2Kj(;KblC7x=JlK$@78`O~>V!`|1Lm-^JR$-5pUANAnb(5}B}JGjBsliK4& zk6y(;$e&h)lh2)L=bvZKbvh@>vLlreBdH8No2>$#%_Wp1U0N7Ank!6$dFSi#xzh|( zRi{Uw%-4W!{IXZ)fWx@XX6;&(m_F%c6~X8hx=BN1&q}*( zoaNjWabE{oUPb!Bt$eyd#$5j9rItB-h*5JiNi(v^e|XKAj*8(k<5-2$&ZBR5fF|JA z9&m4fbzNQnAU}r8ab>fFV%J0z5awe#UZ|bz?Ur)U9bCIKWEzi2%A+5CLqh?}K4JHi z4vtM;+uPsVz{Lfr;78W78gC;z*yTch~4YkLr&m-7%-xc ztw6Mh2d>_iO*$Rd8(-Cr1_V8EO1f*^@wRoSozS) zy1UoC@pruAaC8Z_7~_w4Q6n*&B0AjOmMWa;sIav&gu z|J5&|{=a@vR!~k-OjKEgPFCzcJ>#A1uL&7xTDn;{XBdeM}V=l3B8fE1--DHjSaxoSjNKEM9|U9#m2<3>n{Iuo`r3UZp;>GkT2YBNAh|b z^jTq-hJp(ebZh#Lk8hVBP%qXwv-@vbvoREX$TqRGTgEi$%_F9tZES@z8Bx}$#5eeG zk^UsLBH{bc2VBW)*EdS({yw=?qmevwi?BL6*=12k9zM5gJv1>y#ML4!)iiPzVaH9% zgSImetD@dam~e>{LvVh!phhzpW+iFvWpGT#CVE5TQ40n%F|p(sP5mXxna+Ev7PDwA zamaV4m*^~*xV+&p;W749xhb_X=$|LD;FHuB&JL5?*Y2-oIT(wYY2;73<^#46S~Gx| z^cez%V7x$81}UWqS13Gz80379Rj;6~WdiXWOSsdmzY39L;Hg3MH43o*y8ibNBBH`(av4|u;YPq%{R;IuYow<+GEsf@R?=@tT@!}?#>zIIn0CoyV!hq3mw zHj>OOjfJM3F{RG#6ujzo?y32m^tgSXf@v=J$ELdJ+=5j|=F-~hP$G&}tDZsZE?5rX ztGj`!S>)CFmdkccxM9eGIcGnS2AfK#gXwj%esuIBNJQP1WV~b~+D7PJTmWGTSDrR` zEAu4B8l>NPuhsk5a`rReSya2nfV1EK01+G!x8aBdTs3Io$u5!6n6KX%uv@DxAp3F@{4UYg4SWJtQ-W~0MDb|j-$lwVn znAm*Pl!?Ps&3wO=R115RWKb*JKoexo*)uhhHBncEDMSVa_PyA>k{Zm2(wMQ(5NM3# z)jkza|GoWEQo4^s*wE(gHz?Xsg4`}HUAcs42cM1-qq_=+=!Gk^y710j=66(cSWqUe zklbm8+zB_syQv5A2rj!Vbw8;|$@C!vfNmNV!yJIWDQ>{+2x zKjuFX`~~HKG~^6h5FntRpnnHt=D&rq0>IJ9#F0eM)Y-)GpRjiN7gkA8wvnG#K=q{q z9dBn8_~wm4J<3J_vl|9H{7q6u2A!cW{bp#r*-f{gOV^e=8S{nc1DxMHFwuM$;aVI^ zz6A*}m8N-&x8;aunp1w7_vtB*pa+OYBw=TMc6QK=mbA-|Cf* zvyh8D4LRJImooUaSb7t*fVfih<97Gf@VE0|z>NcBwBQze);Rh!k3K_sfunToZY;f2 z^HmC4KjHRVg+eKYj;PRN^|E0>Gj_zagfRbrki68I^#~6-HaHg3BUW%+clM1xQEdPYt_g<2K+z!$>*$9nQ>; zf9Bei{?zY^-e{q_*|W#2rJG`2fy@{%6u0i_VEWTq$*(ZN37|8lFFFt)nCG({r!q#9 z5VK_kkSJ3?zOH)OezMT{!YkCuSSn!K#-Rhl$uUM(bq*jY? zi1xbMVthJ`E>d>(f3)~fozjg^@eheMF6<)I`oeJYx4*+M&%c9VArn(OM-wp%M<-`x z7sLP1&3^%Nld9Dhm@$3f2}87!quhI@nwd@3~fZl_3LYW-B?Ia>ui`ELg z&Qfe!7m6ze=mZ`Ia9$z|ARSw|IdMpooY4YiPN8K z4B(ts3p%2i(Td=tgEHX z0UQ_>URBtG+-?0E;E7Ld^dyZ;jjw0}XZ(}-QzC6+NN=40oDb2^v!L1g9xRvE#@IBR zO!b-2N7wVfLV;mhEaXQ9XAU+>=XVA6f&T4Z-@AX!leJ8obP^P^wP0aICND?~w&NykJ#54x3_@r7IDMdRNy4Hh;h*!u(Ol(#0bJdwEo$5437-UBjQ+j=Ic>Q2z` zJNDf0yO6@mr6y1#n3)s(W|$iE_i8r@Gd@!DWDqZ7J&~gAm1#~maIGJ1sls^gxL9LLG_NhU!pTGty!TbhzQnu)I*S^54U6Yu%ZeCg`R>Q zhBv$n5j0v%O_j{QYWG!R9W?5_b&67KB$t}&e2LdMvd(PxN6Ir!H4>PNlerpBL>Zvyy!yw z-SOo8caEpDt(}|gKPBd$qND5#a5nju^O>V&;f890?yEOfkSG^HQVmEbM3Ugzu+UtH zC(INPDdraBN?P%kE;*Ae%Wto&sgw(crfZ#Qy(<4nk;S|hD3j{IQRI6Yq|f^basLY; z-HB&Je%Gg}Jt@={_C{L$!RM;$$|iD6vu#3w?v?*;&()uB|I-XqEKqZPS!reW9JkLewLb!70T7n`i!gNtb1%vN- zySZj{8-1>6E%H&=V}LM#xmt`J3XQoaD|@XygXjdZ1+P77-=;=eYpoEQ01B@L*a(uW zrZeZz?HJsw_4g0vhUgkg@VF8<-X$B8pOqCuWAl28uB|@r`19DTUQQsb^pfqB6QtiT z*`_UZ`fT}vtUY#%sq2{rchyfu*pCg;uec2$-$N_xgjZcoumE5vSI{+s@iLWoz^Mf; zuI8kDP{!XY6OP~q5}%1&L}CtfH^N<3o4L@J@zg1-mt{9L`s^z$Vgb|mr{@WiwAqKg zp#t-lhrU>F8o0s1q_9y`gQNf~Vb!F%70f}$>i7o4ho$`uciNf=xgJ>&!gSt0g;M>*x4-`U)ysFW&Vs^Vk6m%?iuWU+o&m(2Jm26Y(3%TL; zA7T)BP{WS!&xmxNw%J=$MPfn(9*^*TV;$JwRy8Zl*yUZi8jWYF>==j~&S|Xinsb%c z2?B+kpet*muEW7@AzjBA^wAJBY8i|#C{WtO_or&Nj2{=6JTTX05}|H>N2B|Wf!*3_ z7hW*j6p3TvpghEc6-wufFiY!%-GvOx*bZrhZu+7?iSrZL5q9}igiF^*R3%DE4aCHZ zqu>xS8LkW+Auv%z-<1Xs92u23R$nk@Pk}MU5!gT|c7vGlEA%G^2th&Q*zfg%-D^=f z&J_}jskj|Q;73NP4<4k*Y%pXPU2Thoqr+5uH1yEYM|VtBPW6lXaetokD0u z9qVek6Q&wk)tFbQ8(^HGf3Wp16gKmr>G;#G(HRBx?F`9AIRboK+;OfHaLJ(P>IP0w zyTbTkx_THEOs%Q&aPrxbZrJlio+hCC_HK<4%f3ZoSAyG7Dn`=X=&h@m*|UYO-4Hq0 z-Bq&+Ie!S##4A6OGoC~>ZW`Y5J)*ouaFl_e9GA*VSL!O_@xGiBw!AF}1{tB)z(w%c zS1Hmrb9OC8>0a_$BzeiN?rkPLc9%&;1CZW*4}CDDNr2gcl_3z+WC15&H1Zc2{o~i) z)LLW=WQ{?ricmC`G1GfJ0Yp4Dy~Ba;j6ZV4r{8xRs`13{dD!xXmr^Aga|C=iSmor% z8hi|pTXH)5Yf&v~exp3o+sY4B^^b*eYkkCYl*T{*=-0HniSA_1F53eCb{x~1k3*`W zr~};p1A`k{1DV9=UPnLDgz{aJH=-LQo<5%+Em!DNN252xwIf*wF_zS^!(XSm(9eoj z=*dXG&n0>)_)N5oc6v!>-bd(2ragD8O=M|wGW z!xJQS<)u70m&6OmrF0WSsr@I%T*c#Qo#Ha4d3COcX+9}hM5!7JIGF>7<~C(Ear^Sn zm^ZFkV6~Ula6+8S?oOROOA6$C&q&dp`>oR-2Ym3(HT@O7Sd5c~+kjrmM)YmgPH*tL zX+znN>`tv;5eOfX?h{AuX^LK~V#gPCu=)Tigtq9&?7Xh$qN|%A$?V*v=&-2F$zTUv z`C#WyIrChS5|Kgm_GeudCFf;)!WH7FI60j^0o#65o6`w*S7R@)88n$1nrgU(oU0M9 zx+EuMkC>(4j1;m6NoGqEkpJYJ?vc|B zOlwT3t&UgL!pX_P*6g36`ZXQ; z9~Cv}ANFnJGp(;ZhS(@FT;3e)0)Kp;h^x;$*xZn*k0U6-&FwI=uOGaODdrsp-!K$Ac32^c{+FhI-HkYd5v=`PGsg%6I`4d9Jy)uW0y%) zm&j^9WBAp*P8#kGJUhB!L?a%h$hJgQrx!6KCB_TRo%9{t0J7KW8!o1B!NC)VGLM5! zpZy5Jc{`r{1e(jd%jsG7k%I+m#CGS*BPA65ZVW~fLYw0dA-H_}O zrkGFL&P1PG9p2(%QiEWm6x;U-U&I#;Em$nx-_I^wtgw3xUPVVu zqSuKnx&dIT-XT+T10p;yjo1Y)z(x1fb8Dzfn8e yu?e%!_ptzGB|8GrCfu%p?(_ zQccdaaVK$5bz;*rnyK{_SQYM>;aES6Qs^lj9lEs6_J+%nIiuQC*fN;z8md>r_~Mfl zU%p5Dt_YT>gQqfr@`cR!$NWr~+`CZb%dn;WtzrAOI>P_JtsB76PYe*<%H(y>qx-`Kq!X_; z<{RpAqYhE=L1r*M)gNF3B8r(<%8mo*SR2hu zccLRZwGARt)Hlo1euqTyM>^!HK*!Q2P;4UYrysje@;(<|$&%vQekbn|0Ruu_Io(w4#%p6ld2Yp7tlA`Y$cciThP zKzNGIMPXX%&Ud0uQh!uQZz|FB`4KGD?3!ND?wQt6!n*f4EmCoJUh&b?;B{|lxs#F- z31~HQ`SF4x$&v00@(P+j1pAaj5!s`)b2RDBp*PB=2IB>oBF!*6vwr7Dp%zpAx*dPr zb@Zjq^XjN?O4QcZ*O+8>)|HlrR>oD*?WQl5ri3R#2?*W6iJ>>kH%KnnME&TT@ZzrHS$Q%LC?n|e>V+D+8D zYc4)QddFz7I8#}y#Wj6>4P%34dZH~OUDb?uP%-E zwjXM(?Sg~1!|wI(RVuxbu)-rH+O=igSho_pDCw(c6b=P zKk4ATlB?bj9+HHlh<_!&z0rx13K3ZrAR8W)!@Y}o`?a*JJsD+twZIv`W)@Y?Amu_u zz``@-e2X}27$i(2=9rvIu5uTUOVhzwu%mNazS|lZb&PT;XE2|B&W1>=B58#*!~D&) zfVmJGg8UdP*fx(>Cj^?yS^zH#o-$Q-*$SnK(ZVFkw+er=>N^7!)FtP3y~Xxnu^nzY zikgB>Nj0%;WOltWIob|}%lo?_C7<``a5hEkx&1ku$|)i>Rh6@3h*`slY=9U}(Ql_< zaNG*J8vb&@zpdhAvv`?{=zDedJ23TD&Zg__snRAH4eh~^oawdYi6A3w8<Ozh@Kw)#bdktM^GVb zrG08?0bG?|NG+w^&JvD*7LAbjED{_Zkc`3H!My>0u5Q}m!+6VokMLXxl`Mkd=g&Xx z-a>m*#G3SLlhbKB!)tnzfWOBV;u;ftU}S!NdD5+YtOjLg?X}dl>7m^gOpihrf1;PY zvll&>dIuUGs{Qnd- zwIR3oIrct8Va^Tm0t#(bJD7c$Z7DO9*7NnRZorrSm`b`cxz>OIC;jSE3DO8`hX955ui`s%||YQtt2 z5DNA&pG-V+4oI2s*x^>-$6J?p=I>C|9wZF8z;VjR??Icg?1w2v5Me+FgAeGGa8(3S z4vg*$>zC-WIVZtJ7}o9{D-7d>zCe|z#<9>CFve-OPAYsneTb^JH!Enaza#j}^mXy1 z+ULn^10+rWLF6j2>Ya@@Kq?26>AqK{A_| zQKb*~F1>sE*=d?A?W7N2j?L09_7n+HGi{VY;MoTGr_)G9)ot$p!-UY5zZ2Xtbm=t z@dpPSGwgH=QtIcEulQNI>S-#ifbnO5EWkI;$A|pxJd885oM+ zGZ0_0gDvG8q2xebj+fbCHYfAXuZStH2j~|d^sBAzo46(K8n59+T6rzBwK)^rfPT+B zyIFw)9YC-V^rhtK`!3jrhmW-sTmM+tPH+;nwjL#-SjQPUZ53L@A>y*rt(#M(qsiB2 zx6B)dI}6Wlsw%bJ8h|(lhkJVogQZA&n{?Vgs6gNSXzuZpEyu*xySy8ro07QZ7Vk1!3tJphN_5V7qOiyK8p z#@jcDD8nmtYi1^l8ml;AF<#IPK?!pqf9D4moYk>d99Im}Jtwj6c#+A;f)CQ*f-hZ< z=p_T86jog%!p)D&5g9taSwYi&eP z#JuEK%+NULWus;0w32-SYFku#i}d~+{Pkho&^{;RxzP&0!RCm3-9K6`>KZpnzS6?L z^H^V*s!8<>x8bomvD%rh>Zp3>Db%kyin;qtl+jAv8Oo~1g~mqGAC&Qi_wy|xEt2iz zWAJEfTV%cl2Cs<1L&DLRVVH05EDq`pH7Oh7sR`NNkL%wi}8n>IXcO40hp+J+sC!W?!krJf!GJNE8uj zg-y~Ns-<~D?yqbzVRB}G>0A^f0!^N7l=$m0OdZuqAOQqLc zX?AEGr1Ht+inZ-Qiwnl@Z0qukd__a!C*CKuGdy5#nD7VUBM^6OCpxCa2A(X;e0&V4 zM&WR8+wErQ7UIc6LY~Q9x%Sn*Tn>>P`^t&idaOEnOd(Ufw#>NoR^1QdhJ8s`h^|R_ zXX`c5*O~Xdvh%q;7L!_!ohf$NfEBmCde|#uVZvEo>OfEq%+Ns7&_f$OR9xsihRpBb z+cjk8LyDm@U{YN>+r46?nn{7Gh(;WhFw6GAxtcKD+YWV?uge>;+q#Xx4!GpRkVZYu zzsF}1)7$?%s9g9CH=Zs+B%M_)+~*j3L0&Q9u7!|+T`^O{xE6qvAP?XWv9_MrZKdo& z%IyU)$Q95AB4!#hT!_dA>4e@zjOBD*Y=XjtMm)V|+IXzjuM;(l+8aA5#Kaz_$rR6! zj>#&^DidYD$nUY(D$mH`9eb|dtV0b{S>H6FBfq>t5`;OxA4Nn{J(+XihF(stSche7$es&~N$epi&PDM_N`As;*9D^L==2Q7Z2zD+CiU(|+-kL*VG+&9!Yb3LgPy?A zm7Z&^qRG_JIxK7-FBzZI3Q<;{`DIxtc48k> zc|0dmX;Z=W$+)qE)~`yn6MdoJ4co;%!`ddy+FV538Y)j(vg}5*k(WK)KWZ3WaOG!8 z!syGn=s{H$odtpqFrT#JGM*utN7B((abXnpDM6w56nhw}OY}0TiTG1#f*VFZr+^-g zbP10`$LPq_;PvrA1XXlyx2uM^mrjTzX}w{yuLo-cOClE8MMk47T25G8M!9Z5ypOSV zAJUBGEg5L2fY)ZGJb^E34R2zJ?}Vf>{~gB!8=5Z) z9y$>5c)=;o0HeHHSuE4U)#vG&KF|I%-cF6f$~pdYJWk_dD}iOA>iA$O$+4%@>JU08 zS`ep)$XLPJ+n0_i@PkF#ri6T8?ZeAot$6JIYHm&P6EB=BiaNY|aA$W0I+nz*zkz_z zkEru!tj!QUffq%)8y0y`T&`fuus-1p>=^hnBiBqD^hXrPs`PY9tU3m0np~rISY09> z`P3s=-kt_cYcxWd{de@}TwSqg*xVhp;E9zCsnXo6z z?f&Sv^U7n4`xr=mXle94HzOdN!2kB~4=%)u&N!+2;z6UYKUDqi-s6AZ!haB;@&B`? z_TRX0%@suz^TRdCb?!vNJYPY8L_}&07uySH9%W^Tc&1pia6y1q#?*Drf}GjGbPjBS zbOPcUY#*$3sL2x4v_i*Y=N7E$mR}J%|GUI(>WEr+28+V z%v5{#e!UF*6~G&%;l*q*$V?&r$Pp^sE^i-0$+RH3ERUUdQ0>rAq2(2QAbG}$y{de( z>{qD~GGuOk559Y@%$?N^1ApVL_a704>8OD%8Y%8B;FCt%AoPu8*D1 zLB5X>b}Syz81pn;xnB}%0FnwazlWfUV)Z-~rZg6~b z6!9J$EcE&sEbzcy?CI~=boWA&eeIa%z(7SE^qgVLz??1Vbc1*aRvc%Mri)AJaAG!p z$X!_9Ds;Zz)f+;%s&dRcJt2==P{^j3bf0M=nJd&xwUGlUFn?H=2W(*2I2Gdu zv!gYCwM10aeus)`RIZSrCK=&oKaO_Ry~D1B5!y0R=%!i2*KfXGYX&gNv_u+n9wiR5 z*e$Zjju&ODRW3phN925%S(jL+bCHv6rZtc?!*`1TyYXT6%Ju=|X;6D@lq$8T zW{Y|e39ioPez(pBH%k)HzFITXHvnD6hw^lIoUMA;qAJ^CU?top1fo@s7xT13Fvn1H z6JWa-6+FJF#x>~+A;D~;VDs26>^oH0EI`IYT2iagy23?nyJ==i{g4%HrAf1-*v zK1)~@&(KkwR7TL}L(A@C_S0G;-GMDy=MJn2$FP5s<%wC)4jC5PXoxrQBFZ_k0P{{s@sz+gX`-!=T8rcB(=7vW}^K6oLWMmp(rwDh}b zwaGGd>yEy6fHv%jM$yJXo5oMAQ>c9j`**}F?MCry;T@47@r?&sKHgVe$MCqk#Z_3S z1GZI~nOEN*P~+UaFGnj{{Jo@16`(qVNtbU>O0Hf57-P>x8Jikp=`s8xWs^dAJ9lCQ z)GFm+=OV%AMVqVATtN@|vp61VVAHRn87}%PC^RAzJ%JngmZTasWBAWsoAqBU+8L8u z4A&Pe?fmTm0?mK-BL9t+{y7o(7jm+RpOhL9KnY#E&qu^}B6=K_dB}*VlSEiC9fn)+V=J;OnN)Ta5v66ic1rG+dGAJ1 z1%Zb_+!$=tQ~lxQrzv3x#CPb?CekEkA}0MYSgx$Jdd}q8+R=ma$|&1a#)TQ=l$1tQ z=tL9&_^vJ)Pk}EDO-va`UCT1m#Uty1{v^A3P~83_#v^ozH}6*9mIjIr;t3Uv%@VeW zGL6(CwCUp)Jq%G0bIG%?{_*Y#5IHf*5M@wPo6A{$Um++Co$wLC=J1aoG93&T7Ho}P z=mGEPP7GbvoG!uD$k(H3A$Z))+i{Hy?QHdk>3xSBXR0j!11O^mEe9RHmw!pvzv?Ua~2_l2Yh~_!s1qS`|0~0)YsbHSz8!mG)WiJE| z2f($6TQtt6L_f~ApQYQKSb=`053LgrQq7G@98#igV>y#i==-nEjQ!XNu9 z~;mE+gtj4IDDNQJ~JVk5Ux6&LCSFL!y=>79kE9=V}J7tD==Ga+IW zX)r7>VZ9dY=V&}DR))xUoV!u(Z|%3ciQi_2jl}3=$Agc(`RPb z8kEBpvY>1FGQ9W$n>Cq=DIpski};nE)`p3IUw1Oz0|wxll^)4dq3;CCY@RyJgFgc# zKouFh!`?Xuo{IMz^xi-h=StCis_M7yq$u) z?XHvw*HP0VgR+KR6wI)jEMX|ssqYvSf*_3W8zVTQzD?3>H!#>InzpSO)@SC8q*ii- z%%h}_#0{4JG;Jm`4zg};BPTGkYamx$Xo#O~lBirRY)q=5M45n{GCfV7h9qwyu1NxOMoP4)jjZMxmT|IQQh0U7C$EbnMN<3)Kk?fFHYq$d|ICu>KbY_hO zTZM+uKHe(cIZfEqyzyYSUBZa8;Fcut-GN!HSA9ius`ltNebF46ZX_BbZNU}}ZOm{M2&nANL9@0qvih15(|`S~z}m&h!u4x~(%MAO$jHRWNfuxWF#B)E&g3ghSQ9|> z(MFaLQj)NE0lowyjvg8z0#m6FIuKE9lDO~Glg}nSb7`~^&#(Lw{}GVOS>U)m8bF}x zVjbXljBm34Cs-yM6TVusr+3kYFjr28STT3g056y3cH5Tmge~ASxBj z%|yb>$eF;WgrcOZf569sDZOVwoo%8>XO>XQOX1OyN9I-SQgrm;U;+#3OI(zrWyow3 zk==|{lt2xrQ%FIXOTejR>;wv(Pb8u8}BUpx?yd(Abh6? zsoO3VYWkeLnF43&@*#MQ9-i-d0t*xN-UEyNKeyNMHw|A(k(_6QKO=nKMCxD(W(Yop zsRQ)QeL4X3Lxp^L%wzi2-WVSsf61dqliPUM7srDB?Wm6Lzn0&{*}|IsKQW;02(Y&| zaTKv|`U(pSzuvR6Rduu$wzK_W-Y-7>7s?G$)U}&uK;<>vU}^^ns@Z!p+9?St1s)dG zK%y6xkPyyS1$~&6v{kl?Md6gwM|>mt6Upm>oa8RLD^8T{0?HC!Z>;(Bob7el(DV6x zi`I)$&E&ngwFS@bi4^xFLAn`=fzTC;aimE^!cMI2n@Vo%Ae-ne`RF((&5y6xsjjAZ zVguVoQ?Z9uk$2ON;ersE%PU*xGO@T*;j1BO5#TuZKEf(mB7|g7pcEA=nYJ{s3vlbg zd4-DUlD{*6o%Gc^N!Nptgay>j6E5;3psI+C3Q!1ZIbeCubW%w4pq9)MSDyB{HLm|k zxv-{$$A*pS@csolri$Ge<4VZ}e~78JOL-EVyrbxKra^d{?|NnPp86!q>t<&IP07?Z z^>~IK^k#OEKgRH+LjllZXk7iA>2cfH6+(e&9ku5poo~6y{GC5>(bRK7hwjiurqAiZ zg*DmtgY}v83IjE&AbiWgMyFbaRUPZ{lYiz$U^&Zt2YjG<%m((&_JUbZcfJ22(>bi5 z!J?<7AySj0JZ&<-qXX;mcV!f~>G=sB0KnjWca4}vrtunD^1TrpfeS^4dvFr!65knK zZh`d;*VOkPs4*-9kL>$GP0`(M!j~B;#x?Ba~&s6CopvO86oM?-? zOw#dIRc;6A6T?B`Qp%^<U5 z19x(ywSH$_N+Io!6;e?`tWaM$`=Db!gzx|lQ${DG!zb1Zl&|{kX0y6xvO1o z220r<-oaS^^R2pEyY;=Qllqpmue|5yI~D|iI!IGt@iod{Opz@*ml^w2bNs)p`M(Io z|E;;m*Xpjd9l)4G#KaWfV(t8YUn@A;nK^#xgv=LtnArX|vWQVuw3}B${h+frU2>9^ z!l6)!Uo4`5k`<<;E(ido7M6lKTgWezNLq>U*=uz&s=cc$1%>VrAeOoUtA|T6gO4>UNqsdK=NF*8|~*sl&wI=x9-EGiq*aqV!(VVXA57 zw9*o6Ir8Lj1npUXvlevtn(_+^X5rzdR>#(}4YcB9O50q97%rW2me5_L=%ffYPUSRc z!vv?Kv>dH994Qi>U(a<0KF6NH5b16enCp+mw^Hb3Xs1^tThFpz!3QuN#}KBbww`(h z7GO)1olDqy6?T$()R7y%NYx*B0k_2IBiZ14&8|JPFxeMF{vW>HF-Vi3+ZOI=+qP}n zw(+!WcTd~4ZJX1!ZM&y!+uyt=&i!+~d(V%GjH;-NsEEv6nS1TERt|RHh!0>W4+4pp z1-*EzAM~i`+1f(VEHI8So`S`akPfPTfq*`l{Fz`hS%k#JS0cjT2mS0#QLGf=J?1`he3W*;m4)ce8*WFq1sdP=~$5RlH1EdWm|~dCvKOi4*I_96{^95p#B<(n!d?B z=o`0{t+&OMwKcxiBECznJcfH!fL(z3OvmxP#oWd48|mMjpE||zdiTBdWelj8&Qosv zZFp@&UgXuvJw5y=q6*28AtxZzo-UUpkRW%ne+Ylf!V-0+uQXBW=5S1o#6LXNtY5!I z%Rkz#(S8Pjz*P7bqB6L|M#Er{|QLae-Y{KA>`^} z@lPjeX>90X|34S-7}ZVXe{wEei1<{*e8T-Nbj8JmD4iwcE+Hg_zhkPVm#=@b$;)h6 z<<6y`nPa`f3I6`!28d@kdM{uJOgM%`EvlQ5B2bL)Sl=|y@YB3KeOzz=9cUW3clPAU z^sYc}xf9{4Oj?L5MOlYxR{+>w=vJjvbyO5}ptT(o6dR|ygO$)nVCvNGnq(6;bHlBd zl?w-|plD8spjDF03g5ip;W3Z z><0{BCq!Dw;h5~#1BuQilq*TwEu)qy50@+BE4bX28+7erX{BD4H)N+7U`AVEuREE8 z;X?~fyhF-x_sRfHIj~6f(+^@H)D=ngP;mwJjxhQUbUdzk8f94Ab%59-eRIq?ZKrwD z(BFI=)xrUlgu(b|hAysqK<}8bslmNNeD=#JW*}^~Nrswn^xw*nL@Tx!49bfJecV&KC2G4q5a!NSv)06A_5N3Y?veAz;Gv+@U3R% z)~UA8-0LvVE{}8LVDOHzp~2twReqf}ODIyXMM6=W>kL|OHcx9P%+aJGYi_Om)b!xe zF40Vntn0+VP>o<$AtP&JANjXBn7$}C@{+@3I@cqlwR2MdwGhVPxlTIcRVu@Ho-wO` z_~Or~IMG)A_`6-p)KPS@cT9mu9RGA>dVh5wY$NM9-^c@N=hcNaw4ITjm;iWSP^ZX| z)_XpaI61<+La+U&&%2a z0za$)-wZP@mwSELo#3!PGTt$uy0C(nTT@9NX*r3Ctw6J~7A(m#8fE)0RBd`TdKfAT zCf@$MAxjP`O(u9s@c0Fd@|}UQ6qp)O5Q5DPCeE6mSIh|Rj{$cAVIWsA=xPKVKxdhg zLzPZ`3CS+KIO;T}0Ip!fAUaNU>++ZJZRk@I(h<)RsJUhZ&Ru9*!4Ptn;gX^~4E8W^TSR&~3BAZc#HquXn)OW|TJ`CTahk+{qe`5+ixON^zA9IFd8)kc%*!AiLu z>`SFoZ5bW-%7}xZ>gpJcx_hpF$2l+533{gW{a7ce^B9sIdmLrI0)4yivZ^(Vh@-1q zFT!NQK$Iz^xu%|EOK=n>ug;(7J4OnS$;yWmq>A;hsD_0oAbLYhW^1Vdt9>;(JIYjf zdb+&f&D4@4AS?!*XpH>8egQvSVX`36jMd>$+RgI|pEg))^djhGSo&#lhS~9%NuWfX zDDH;3T*GzRT@5=7ibO>N-6_XPBYxno@mD_3I#rDD?iADxX`! zh*v8^i*JEMzyN#bGEBz7;UYXki*Xr(9xXax(_1qVW=Ml)kSuvK$coq2A(5ZGhs_pF z$*w}FbN6+QDseuB9=fdp_MTs)nQf!2SlROQ!gBJBCXD&@-VurqHj0wm@LWX-TDmS= z71M__vAok|@!qgi#H&H%Vg-((ZfxPAL8AI{x|VV!9)ZE}_l>iWk8UPTGHs*?u7RfP z5MC&=c6X;XlUzrz5q?(!eO@~* zoh2I*%J7dF!!_!vXoSIn5o|wj1#_>K*&CIn{qSaRc&iFVxt*^20ngCL;QonIS>I5^ zMw8HXm>W0PGd*}Ko)f|~dDd%;Wu_RWI_d;&2g6R3S63Uzjd7dn%Svu-OKpx*o|N>F zZg=-~qLb~VRLpv`k zWSdfHh@?dp=s_X`{yxOlxE$4iuyS;Z-x!*E6eqmEm*j2bE@=ZI0YZ5%Yj29!5+J$4h{s($nakA`xgbO8w zi=*r}PWz#lTL_DSAu1?f%-2OjD}NHXp4pXOsCW;DS@BC3h-q4_l`<))8WgzkdXg3! zs1WMt32kS2E#L0p_|x+x**TFV=gn`m9BWlzF{b%6j-odf4{7a4y4Uaef@YaeuPhU8 zHBvRqN^;$Jizy+ z=zW{E5<>2gp$pH{M@S*!sJVQU)b*J5*bX4h>5VJve#Q6ga}cQ&iL#=(u+KroWrxa%8&~p{WEUF0il=db;-$=A;&9M{Rq`ouZ5m%BHT6%st%saGsD6)fQgLN}x@d3q>FC;=f%O3Cyg=Ke@Gh`XW za@RajqOE9UB6eE=zhG%|dYS)IW)&y&Id2n7r)6p_)vlRP7NJL(x4UbhlcFXWT8?K=%s7;z?Vjts?y2+r|uk8Wt(DM*73^W%pAkZa1Jd zNoE)8FvQA>Z`eR5Z@Ig6kS5?0h;`Y&OL2D&xnnAUzQz{YSdh0k zB3exx%A2TyI)M*EM6htrxSlep!Kk(P(VP`$p0G~f$smld6W1r_Z+o?=IB@^weq>5VYsYZZR@` z&XJFxd5{|KPZmVOSxc@^%71C@;z}}WhbF9p!%yLj3j%YOlPL5s>7I3vj25 z@xmf=*z%Wb4;Va6SDk9cv|r*lhZ`(y_*M@>q;wrn)oQx%B(2A$9(74>;$zmQ!4fN; z>XurIk-7@wZys<+7XL@0Fhe-f%*=(weaQEdR9Eh6>Kl-EcI({qoZqyzziGwpg-GM#251sK_ z=3|kitS!j%;fpc@oWn65SEL73^N&t>Ix37xgs= zYG%eQDJc|rqHFia0!_sm7`@lvcv)gfy(+KXA@E{3t1DaZ$DijWAcA)E0@X?2ziJ{v z&KOYZ|DdkM{}t+@{@*6ge}m%xfjIxi%qh`=^2Rwz@w0cCvZ&Tc#UmCDbVwABrON^x zEBK43FO@weA8s7zggCOWhMvGGE`baZ62cC)VHyy!5Zbt%ieH+XN|OLbAFPZWyC6)p z4P3%8sq9HdS3=ih^0OOlqTPbKuzQ?lBEI{w^ReUO{V?@`ARsL|S*%yOS=Z%sF)>-y z(LAQdhgAcuF6LQjRYfdbD1g4o%tV4EiK&ElLB&^VZHbrV1K>tHTO{#XTo>)2UMm`2 z^t4s;vnMQgf-njU-RVBRw0P0-m#d-u`(kq7NL&2T)TjI_@iKuPAK-@oH(J8?%(e!0Ir$yG32@CGUPn5w4)+9@8c&pGx z+K3GKESI4*`tYlmMHt@br;jBWTei&(a=iYslc^c#RU3Q&sYp zSG){)V<(g7+8W!Wxeb5zJb4XE{I|&Y4UrFWr%LHkdQ;~XU zgy^dH-Z3lmY+0G~?DrC_S4@=>0oM8Isw%g(id10gWkoz2Q%7W$bFk@mIzTCcIB(K8 zc<5h&ZzCdT=9n-D>&a8vl+=ZF*`uTvQviG_bLde*k>{^)&0o*b05x$MO3gVLUx`xZ z43j+>!u?XV)Yp@MmG%Y`+COH2?nQcMrQ%k~6#O%PeD_WvFO~Kct za4XoCM_X!c5vhRkIdV=xUB3xI2NNStK*8_Zl!cFjOvp-AY=D;5{uXj}GV{LK1~IE2 z|KffUiBaStRr;10R~K2VVtf{TzM7FaPm;Y(zQjILn+tIPSrJh&EMf6evaBKIvi42-WYU9Vhj~3< zZSM-B;E`g_o8_XTM9IzEL=9Lb^SPhe(f(-`Yh=X6O7+6ALXnTcUFpI>ekl6v)ZQeNCg2 z^H|{SKXHU*%nBQ@I3It0m^h+6tvI@FS=MYS$ZpBaG7j#V@P2ZuYySbp@hA# ze(kc;P4i_-_UDP?%<6>%tTRih6VBgScKU^BV6Aoeg6Uh(W^#J^V$Xo^4#Ekp ztqQVK^g9gKMTHvV7nb64UU7p~!B?>Y0oFH5T7#BSW#YfSB@5PtE~#SCCg3p^o=NkMk$<8- z6PT*yIKGrvne7+y3}_!AC8NNeI?iTY(&nakN>>U-zT0wzZf-RuyZk^X9H-DT_*wk= z;&0}6LsGtfVa1q)CEUPlx#(ED@-?H<1_FrHU#z5^P3lEB|qsxEyn%FOpjx z3S?~gvoXy~L(Q{Jh6*i~=f%9kM1>RGjBzQh_SaIDfSU_9!<>*Pm>l)cJD@wlyxpBV z4Fmhc2q=R_wHCEK69<*wG%}mgD1=FHi4h!98B-*vMu4ZGW~%IrYSLGU{^TuseqVgV zLP<%wirIL`VLyJv9XG_p8w@Q4HzNt-o;U@Au{7%Ji;53!7V8Rv0^Lu^Vf*sL>R(;c zQG_ZuFl)Mh-xEIkGu}?_(HwkB2jS;HdPLSxVU&Jxy9*XRG~^HY(f0g8Q}iqnVmgjI zfd=``2&8GsycjR?M%(zMjn;tn9agcq;&rR!Hp z$B*gzHsQ~aXw8c|a(L^LW(|`yGc!qOnV(ZjU_Q-4z1&0;jG&vAKuNG=F|H?@m5^N@ zq{E!1n;)kNTJ>|Hb2ODt-7U~-MOIFo%9I)_@7fnX+eMMNh>)V$IXesJpBn|uo8f~#aOFytCT zf9&%MCLf8mp4kwHTcojWmM3LU=#|{3L>E}SKwOd?%{HogCZ_Z1BSA}P#O(%H$;z7XyJ^sjGX;j5 zrzp>|Ud;*&VAU3x#f{CKwY7Vc{%TKKqmB@oTHA9;>?!nvMA;8+Jh=cambHz#J18x~ zs!dF>$*AnsQ{{82r5Aw&^7eRCdvcgyxH?*DV5(I$qXh^zS>us*I66_MbL8y4d3ULj z{S(ipo+T3Ag!+5`NU2sc+@*m{_X|&p#O-SAqF&g_n7ObB82~$p%fXA5GLHMC+#qqL zdt`sJC&6C2)=juQ_!NeD>U8lDVpAOkW*khf7MCcs$A(wiIl#B9HM%~GtQ^}yBPjT@ z+E=|A!Z?A(rwzZ;T}o6pOVqHzTr*i;Wrc%&36kc@jXq~+w8kVrs;%=IFdACoLAcCAmhFNpbP8;s`zG|HC2Gv?I~w4ITy=g$`0qMQdkijLSOtX6xW%Z9Nw<;M- zMN`c7=$QxN00DiSjbVt9Mi6-pjv*j(_8PyV-il8Q-&TwBwH1gz1uoxs6~uU}PrgWB zIAE_I-a1EqlIaGQNbcp@iI8W1sm9fBBNOk(k&iLBe%MCo#?xI$%ZmGA?=)M9D=0t7 zc)Q0LnI)kCy{`jCGy9lYX%mUsDWwsY`;jE(;Us@gmWPqjmXL+Hu#^;k%eT>{nMtzj zsV`Iy6leTA8-PndszF;N^X@CJrTw5IIm!GPeu)H2#FQitR{1p;MasQVAG3*+=9FYK zw*k!HT(YQorfQj+1*mCV458(T5=fH`um$gS38hw(OqVMyunQ;rW5aPbF##A3fGH6h z@W)i9Uff?qz`YbK4c}JzQpuxuE3pcQO)%xBRZp{zJ^-*|oryTxJ-rR+MXJ)!f=+pp z10H|DdGd2exhi+hftcYbM0_}C0ZI-2vh+$fU1acsB-YXid7O|=9L!3e@$H*6?G*Zp z%qFB(sgl=FcC=E4CYGp4CN>=M8#5r!RU!u+FJVlH6=gI5xHVD&k;Ta*M28BsxfMV~ zLz+@6TxnfLhF@5=yQo^1&S}cmTN@m!7*c6z;}~*!hNBjuE>NLVl2EwN!F+)0$R1S! zR|lF%n!9fkZ@gPW|x|B={V6x3`=jS*$Pu0+5OWf?wnIy>Y1MbbGSncpKO0qE(qO=ts z!~@&!N`10S593pVQu4FzpOh!tvg}p%zCU(aV5=~K#bKi zHdJ1>tQSrhW%KOky;iW+O_n;`l9~omqM%sdxdLtI`TrJzN6BQz+7xOl*rM>xVI2~# z)7FJ^Dc{DC<%~VS?@WXzuOG$YPLC;>#vUJ^MmtbSL`_yXtNKa$Hk+l-c!aC7gn(Cg ze?YPYZ(2Jw{SF6MiO5(%_pTo7j@&DHNW`|lD`~{iH+_eSTS&OC*2WTT*a`?|9w1dh zh1nh@$a}T#WE5$7Od~NvSEU)T(W$p$s5fe^GpG+7fdJ9=enRT9$wEk+ZaB>G3$KQO zgq?-rZZnIv!p#>Ty~}c*Lb_jxJg$eGM*XwHUwuQ|o^}b3^T6Bxx{!?va8aC@-xK*H ztJBFvFfsSWu89%@b^l3-B~O!CXs)I6Y}y#0C0U0R0WG zybjroj$io0j}3%P7zADXOwHwafT#uu*zfM!oD$6aJx7+WL%t-@6^rD_a_M?S^>c;z zMK580bZXo1f*L$CuMeM4Mp!;P@}b~$cd(s5*q~FP+NHSq;nw3fbWyH)i2)-;gQl{S zZO!T}A}fC}vUdskGSq&{`oxt~0i?0xhr6I47_tBc`fqaSrMOzR4>0H^;A zF)hX1nfHs)%Zb-(YGX;=#2R6C{BG;k=?FfP?9{_uFLri~-~AJ;jw({4MU7e*d)?P@ zXX*GkNY9ItFjhwgAIWq7Y!ksbMzfqpG)IrqKx9q{zu%Mdl+{Dis#p9q`02pr1LG8R z@As?eG!>IoROgS!@J*to<27coFc1zpkh?w=)h9CbYe%^Q!Ui46Y*HO0mr% zEff-*$ndMNw}H2a5@BsGj5oFfd!T(F&0$<{GO!Qdd?McKkorh=5{EIjDTHU`So>8V zBA-fqVLb2;u7UhDV1xMI?y>fe3~4urv3%PX)lDw+HYa;HFkaLqi4c~VtCm&Ca+9C~ zge+67hp#R9`+Euq59WhHX&7~RlXn=--m8$iZ~~1C8cv^2(qO#X0?vl91gzUKBeR1J z^p4!!&7)3#@@X&2aF2-)1Ffcc^F8r|RtdL2X%HgN&XU-KH2SLCbpw?J5xJ*!F-ypZ zMG%AJ!Pr&}`LW?E!K~=(NJxuSVTRCGJ$2a*Ao=uUDSys!OFYu!Vs2IT;xQ6EubLIl z+?+nMGeQQhh~??0!s4iQ#gm3!BpMpnY?04kK375e((Uc7B3RMj;wE?BCoQGu=UlZt!EZ1Q*auI)dj3Jj{Ujgt zW5hd~-HWBLI_3HuO) zNrb^XzPsTIb=*a69wAAA3J6AAZZ1VsYbIG}a`=d6?PjM)3EPaDpW2YP$|GrBX{q*! z$KBHNif)OKMBCFP5>!1d=DK>8u+Upm-{hj5o|Wn$vh1&K!lVfDB&47lw$tJ?d5|=B z^(_9=(1T3Fte)z^>|3**n}mIX;mMN5v2F#l(q*CvU{Ga`@VMp#%rQkDBy7kYbmb-q z<5!4iuB#Q_lLZ8}h|hPODI^U6`gzLJre9u3k3c#%86IKI*^H-@I48Bi*@avYm4v!n0+v zWu{M{&F8#p9cx+gF0yTB_<2QUrjMPo9*7^-uP#~gGW~y3nfPAoV%amgr>PSyVAd@l)}8#X zR5zV6t*uKJZL}?NYvPVK6J0v4iVpwiN|>+t3aYiZSp;m0!(1`bHO}TEtWR1tY%BPB z(W!0DmXbZAsT$iC13p4f>u*ZAy@JoLAkJhzFf1#4;#1deO8#8d&89}en&z!W&A3++^1(;>0SB1*54d@y&9Pn;^IAf3GiXbfT`_>{R+Xv; zQvgL>+0#8-laO!j#-WB~(I>l0NCMt_;@Gp_f0#^c)t?&#Xh1-7RR0@zPyBz!U#0Av zT?}n({(p?p7!4S2ZBw)#KdCG)uPnZe+U|0{BW!m)9 zi_9$F?m<`2!`JNFv+w8MK_K)qJ^aO@7-Ig>cM4-r0bi=>?B_2mFNJ}aE3<+QCzRr*NA!QjHw# z`1OsvcoD0?%jq{*7b!l|L1+Tw0TTAM4XMq7*ntc-Ived>Sj_ZtS|uVdpfg1_I9knY z2{GM_j5sDC7(W&}#s{jqbybqJWyn?{PW*&cQIU|*v8YGOKKlGl@?c#TCnmnAkAzV- zmK={|1G90zz=YUvC}+fMqts0d4vgA%t6Jhjv?d;(Z}(Ep8fTZfHA9``fdUHkA+z3+ zhh{ohP%Bj?T~{i0sYCQ}uC#5BwN`skI7`|c%kqkyWIQ;!ysvA8H`b-t()n6>GJj6xlYDu~8qX{AFo$Cm3d|XFL=4uvc?Keb zzb0ZmMoXca6Mob>JqkNuoP>B2Z>D`Q(TvrG6m`j}-1rGP!g|qoL=$FVQYxJQjFn33lODt3Wb1j8VR zlR++vIT6^DtYxAv_hxupbLLN3e0%A%a+hWTKDV3!Fjr^cWJ{scsAdfhpI)`Bms^M6 zQG$waKgFr=c|p9Piug=fcJvZ1ThMnNhQvBAg-8~b1?6wL*WyqXhtj^g(Ke}mEfZVM zJuLNTUVh#WsE*a6uqiz`b#9ZYg3+2%=C(6AvZGc=u&<6??!slB1a9K)=VL zY9EL^mfyKnD zSJyYBc_>G;5RRnrNgzJz#Rkn3S1`mZgO`(r5;Hw6MveN(URf_XS-r58Cn80K)ArH4 z#Rrd~LG1W&@ttw85cjp8xV&>$b%nSXH_*W}7Ch2pg$$c0BdEo-HWRTZcxngIBJad> z;C>b{jIXjb_9Jis?NZJsdm^EG}e*pR&DAy0EaSGi3XWTa(>C%tz1n$u?5Fb z1qtl?;_yjYo)(gB^iQq?=jusF%kywm?CJP~zEHi0NbZ);$(H$w(Hy@{i>$wcVRD_X|w-~(0Z9BJyh zhNh;+eQ9BEIs;tPz%jSVnfCP!3L&9YtEP;svoj_bNzeGSQIAjd zBss@A;)R^WAu-37RQrM%{DfBNRx>v!G31Z}8-El9IOJlb_MSoMu2}GDYycNaf>uny z+8xykD-7ONCM!APry_Lw6-yT>5!tR}W;W`C)1>pxSs5o1z#j7%m=&=7O4hz+Lsqm` z*>{+xsabZPr&X=}G@obTb{nPTkccJX8w3CG7X+1+t{JcMabv~UNv+G?txRqXib~c^Mo}`q{$`;EBNJ;#F*{gvS12kV?AZ%O0SFB$^ zn+}!HbmEj}w{Vq(G)OGAzH}R~kS^;(-s&=ectz8vN!_)Yl$$U@HNTI-pV`LSj7Opu zTZ5zZ)-S_{GcEQPIQXLQ#oMS`HPu{`SQiAZ)m1at*Hy%3xma|>o`h%E%8BEbi9p0r zVjcsh<{NBKQ4eKlXU|}@XJ#@uQw*$4BxKn6#W~I4T<^f99~(=}a`&3(ur8R9t+|AQ zWkQx7l}wa48-jO@ft2h+7qn%SJtL%~890FG0s5g*kNbL3I&@brh&f6)TlM`K^(bhr zJWM6N6x3flOw$@|C@kPi7yP&SP?bzP-E|HSXQXG>7gk|R9BTj`e=4de9C6+H7H7n# z#GJeVs1mtHhLDmVO?LkYRQc`DVOJ_vdl8VUihO-j#t=0T3%Fc1f9F73ufJz*adn*p zc%&vi(4NqHu^R>sAT_0EDjVR8bc%wTz#$;%NU-kbDyL_dg0%TFafZwZ?5KZpcuaO54Z9hX zD$u>q!-9`U6-D`E#`W~fIfiIF5_m6{fvM)b1NG3xf4Auw;Go~Fu7cth#DlUn{@~yu z=B;RT*dp?bO}o%4x7k9v{r=Y@^YQ^UUm(Qmliw8brO^=NP+UOohLYiaEB3^DB56&V zK?4jV61B|1Uj_5fBKW;8LdwOFZKWp)g{B%7g1~DgO&N& z#lisxf?R~Z@?3E$Mms$$JK8oe@X`5m98V*aV6Ua}8Xs2#A!{x?IP|N(%nxsH?^c{& z@vY&R1QmQs83BW28qAmJfS7MYi=h(YK??@EhjL-t*5W!p z^gYX!Q6-vBqcv~ruw@oMaU&qp0Fb(dbVzm5xJN%0o_^@fWq$oa3X?9s%+b)x4w-q5Koe(@j6Ez7V@~NRFvd zfBH~)U5!ix3isg`6be__wBJp=1@yfsCMw1C@y+9WYD9_C%{Q~7^0AF2KFryfLlUP# zwrtJEcH)jm48!6tUcxiurAMaiD04C&tPe6DI0#aoqz#Bt0_7_*X*TsF7u*zv(iEfA z;$@?XVu~oX#1YXtceQL{dSneL&*nDug^OW$DSLF0M1Im|sSX8R26&)<0Fbh^*l6!5wfSu8MpMoh=2l z^^0Sr$UpZp*9oqa23fcCfm7`ya2<4wzJ`Axt7e4jJrRFVf?nY~2&tRL* zd;6_njcz01c>$IvN=?K}9ie%Z(BO@JG2J}fT#BJQ+f5LFSgup7i!xWRKw6)iITjZU z%l6hPZia>R!`aZjwCp}I zg)%20;}f+&@t;(%5;RHL>K_&7MH^S+7<|(SZH!u zznW|jz$uA`P9@ZWtJgv$EFp>)K&Gt+4C6#*khZQXS*S~6N%JDT$r`aJDs9|uXWdbg zBwho$phWx}x!qy8&}6y5Vr$G{yGSE*r$^r{}pw zVTZKvikRZ`J_IJrjc=X1uw?estdwm&bEahku&D04HD+0Bm~q#YGS6gp!KLf$A{%Qd z&&yX@Hp>~(wU{|(#U&Bf92+1i&Q*-S+=y=3pSZy$#8Uc$#7oiJUuO{cE6=tsPhwPe| zxQpK>`Dbka`V)$}e6_OXKLB%i76~4N*zA?X+PrhH<&)}prET;kel24kW%+9))G^JI zsq7L{P}^#QsZViX%KgxBvEugr>ZmFqe^oAg?{EI=&_O#e)F3V#rc z8$4}0Zr19qd3tE4#$3_f=Bbx9oV6VO!d3(R===i-7p=Vj`520w0D3W6lQfY48}!D* z&)lZMG;~er2qBoI2gsX+Ts-hnpS~NYRDtPd^FPzn!^&yxRy#CSz(b&E*tL|jIkq|l zf%>)7Dtu>jCf`-7R#*GhGn4FkYf;B$+9IxmqH|lf6$4irg{0ept__%)V*R_OK=T06 zyT_m-o@Kp6U{l5h>W1hGq*X#8*y@<;vsOFqEjTQXFEotR+{3}ODDnj;o0@!bB5x=N z394FojuGOtVKBlVRLtHp%EJv_G5q=AgF)SKyRN5=cGBjDWv4LDn$IL`*=~J7u&Dy5 zrMc83y+w^F&{?X(KOOAl-sWZDb{9X9#jrQtmrEXD?;h-}SYT7yM(X_6qksM=K_a;Z z3u0qT0TtaNvDER_8x*rxXw&C^|h{P1qxK|@pS7vdlZ#P z7PdB7MmC2}%sdzAxt>;WM1s0??`1983O4nFK|hVAbHcZ3x{PzytQLkCVk7hA!Lo` zEJH?4qw|}WH{dc4z%aB=0XqsFW?^p=X}4xnCJXK%c#ItOSjdSO`UXJyuc8bh^Cf}8 z@Ht|vXd^6{Fgai8*tmyRGmD_s_nv~r^Fy7j`Bu`6=G)5H$i7Q7lvQnmea&TGvJp9a|qOrUymZ$6G|Ly z#zOCg++$3iB$!6!>215A4!iryregKuUT344X)jQb3|9qY>c0LO{6Vby05n~VFzd?q zgGZv&FGlkiH*`fTurp>B8v&nSxNz)=5IF$=@rgND4d`!AaaX;_lK~)-U8la_Wa8i?NJC@BURO*sUW)E9oyv3RG^YGfN%BmxzjlT)bp*$<| zX3tt?EAy<&K+bhIuMs-g#=d1}N_?isY)6Ay$mDOKRh z4v1asEGWoAp=srraLW^h&_Uw|6O+r;wns=uwYm=JN4Q!quD8SQRSeEcGh|Eb5Jg8m zOT}u;N|x@aq)=&;wufCc^#)5U^VcZw;d_wwaoh9$p@Xrc{DD6GZUqZ ziC6OT^zSq@-lhbgR8B+e;7_Giv;DK5gn^$bs<6~SUadiosfewWDJu`XsBfOd1|p=q zE>m=zF}!lObA%ePey~gqU8S6h-^J2Y?>7)L2+%8kV}Gp=h`Xm_}rlm)SyUS=`=S7msKu zC|T!gPiI1rWGb1z$Md?0YJQ;%>uPLOXf1Z>N~`~JHJ!^@D5kSXQ4ugnFZ>^`zH8CAiZmp z6Ms|#2gcGsQ{{u7+Nb9sA?U>(0e$5V1|WVwY`Kn)rsnnZ4=1u=7u!4WexZD^IQ1Jk zfF#NLe>W$3m&C^ULjdw+5|)-BSHwpegdyt9NYC{3@QtMfd8GrIWDu`gd0nv-3LpGCh@wgBaG z176tikL!_NXM+Bv#7q^cyn9$XSeZR6#!B4JE@GVH zoobHZN_*RF#@_SVYKkQ_igme-Y5U}cV(hkR#k1c{bQNMji zU7aE`?dHyx=1`kOYZo_8U7?3-7vHOp`Qe%Z*i+FX!s?6huNp0iCEW-Z7E&jRWmUW_ z67j>)Ew!yq)hhG4o?^z}HWH-e=es#xJUhDRc4B51M4~E-l5VZ!&zQq`gWe`?}#b~7w1LH4Xa-UCT5LXkXQWheBa2YJYbyQ zl1pXR%b(KCXMO0OsXgl0P0Og<{(@&z1aokU-Pq`eQq*JYgt8xdFQ6S z6Z3IFSua8W&M#`~*L#r>Jfd6*BzJ?JFdBR#bDv$_0N!_5vnmo@!>vULcDm`MFU823 zpG9pqjqz^FE5zMDoGqhs5OMmC{Y3iVcl>F}5Rs24Y5B^mYQ;1T&ks@pIApHOdrzXF z-SdX}Hf{X;TaSxG_T$0~#RhqKISGKNK47}0*x&nRIPtmdwxc&QT3$8&!3fWu1eZ_P zJveQj^hJL#Sn!*4k`3}(d(aasl&7G0j0-*_2xtAnoX1@9+h zO#c>YQg60Z;o{Bi=3i7S`Ic+ZE>K{(u|#)9y}q*j8uKQ1^>+(BI}m%1v3$=4ojGBc zm+o1*!T&b}-lVvZqIUBc8V}QyFEgm#oyIuC{8WqUNV{Toz`oxhYpP!_p2oHHh5P@iB*NVo~2=GQm+8Yrkm2Xjc_VyHg1c0>+o~@>*Qzo zHVBJS>$$}$_4EniTI;b1WShX<5-p#TPB&!;lP!lBVBbLOOxh6FuYloD%m;n{r|;MU3!q4AVkua~fieeWu2 zQAQ$ue(IklX6+V;F1vCu-&V?I3d42FgWgsb_e^29ol}HYft?{SLf>DrmOp9o!t>I^ zY7fBCk+E8n_|apgM|-;^=#B?6RnFKlN`oR)`e$+;D=yO-(U^jV;rft^G_zl`n7qnM zL z*-Y4Phq+ZI1$j$F-f;`CD#|`-T~OM5Q>x}a>B~Gb3-+9i>Lfr|Ca6S^8g*{*?_5!x zH_N!SoRP=gX1?)q%>QTY!r77e2j9W(I!uAz{T`NdNmPBBUzi2{`XMB^zJGGwFWeA9 z{fk33#*9SO0)DjROug+(M)I-pKA!CX;IY(#gE!UxXVsa)X!UftIN98{pt#4MJHOhY zM$_l}-TJlxY?LS6Nuz1T<44m<4i^8k@D$zuCPrkmz@sdv+{ciyFJG2Zwy&%c7;atIeTdh!a(R^QXnu1Oq1b42*OQFWnyQ zWeQrdvP|w_idy53Wa<{QH^lFmEd+VlJkyiC>6B#s)F;w-{c;aKIm;Kp50HnA-o3lY z9B~F$gJ@yYE#g#X&3ADx&tO+P_@mnQTz9gv30_sTsaGXkfNYXY{$(>*PEN3QL>I!k zp)KibPhrfX3%Z$H6SY`rXGYS~143wZrG2;=FLj50+VM6soI~up_>fU(2Wl@{BRsMi zO%sL3x?2l1cXTF)k&moNsHfQrQ+wu(gBt{sk#CU=UhrvJIncy@tJX5klLjgMn>~h= zg|FR&;@eh|C7`>s_9c~0-{IAPV){l|Ts`i=)AW;d9&KPc3fMeoTS%8@V~D8*h;&(^>yjT84MM}=%#LS7shLAuuj(0VAYoozhWjq z4LEr?wUe2^WGwdTIgWBkDUJa>YP@5d9^Rs$kCXmMRxuF*YMVrn?0NFyPl}>`&dqZb z<5eqR=ZG3>n2{6v6BvJ`YBZeeTtB88TAY(x0a58EWyuf>+^|x8Qa6wA|1Nb_p|nA zWWa}|z8a)--Wj`LqyFk_a3gN2>5{Rl_wbW?#by7&i*^hRknK%jwIH6=dQ8*-_{*x0j^DUfMX0`|K@6C<|1cgZ~D(e5vBFFm;HTZF(!vT8=T$K+|F)x3kqzBV4-=p1V(lzi(s7jdu0>LD#N=$Lk#3HkG!a zIF<7>%B7sRNzJ66KrFV76J<2bdYhxll0y2^_rdG=I%AgW4~)1Nvz=$1UkE^J%BxLo z+lUci`UcU062os*=`-j4IfSQA{w@y|3}Vk?i;&SSdh8n+$iHA#%ERL{;EpXl6u&8@ zzg}?hkEOUOJt?ZL=pWZFJ19mI1@P=$U5*Im1e_8Z${JsM>Ov?nh8Z zP5QvI!{Jy@&BP48%P2{Jr_VgzW;P@7)M9n|lDT|Ep#}7C$&ud&6>C^5ZiwKIg2McPU(4jhM!BD@@L(Gd*Nu$ji(ljZ<{FIeW_1Mmf;76{LU z-ywN~=uNN)Xi6$<12A9y)K%X|(W0p|&>>4OXB?IiYr||WKDOJPxiSe01NSV-h24^L z_>m$;|C+q!Mj**-qQ$L-*++en(g|hw;M!^%_h-iDjFHLo-n3JpB;p?+o2;`*jpvJU zLY^lt)Un4joij^^)O(CKs@7E%*!w>!HA4Q?0}oBJ7Nr8NQ7QmY^4~jvf0-`%waOLn zdNjAPaC0_7c|RVhw)+71NWjRi!y>C+Bl;Z`NiL^zn2*0kmj5gyhCLCxts*cWCdRI| zjsd=sT5BVJc^$GxP~YF$-U{-?kW6r@^vHXB%{CqYzU@1>dzf#3SYedJG-Rm6^RB7s zGM5PR(yKPKR)>?~vpUIeTP7A1sc8-knnJk*9)3t^e%izbdm>Y=W{$wm(cy1RB-19i za#828DMBY+ps#7Y8^6t)=Ea@%Nkt)O6JCx|ybC;Ap}Z@Zw~*}3P>MZLPb4Enxz9Wf zssobT^(R@KuShj8>@!1M7tm|2%-pYYDxz-5`rCbaTCG5{;Uxm z*g=+H1X8{NUvFGzz~wXa%Eo};I;~`37*WrRU&K0dPSB$yk(Z*@K&+mFal^?c zurbqB-+|Kb5|sznT;?Pj!+kgFY1#Dr;_%A(GIQC{3ct|{*Bji%FNa6c-thbpBkA;U zURV!Dr&X{0J}iht#-Qp2=xzuh(fM>zRoiGrYl5ttw2#r34gC41CCOC31m~^UPTK@s z6;A@)7O7_%C)>bnAXerYuAHdE93>j2N}H${zEc6&SbZ|-fiG*-qtGuy-qDelH(|u$ zorf8_T6Zqe#Ub!+e3oSyrskt_HyW_^5lrWt#30l)tHk|j$@YyEkXUOV;6B51L;M@=NIWZXU;GrAa(LGxO%|im%7F<-6N;en0Cr zLH>l*y?pMwt`1*cH~LdBPFY_l;~`N!Clyfr;7w<^X;&(ZiVdF1S5e(+Q%60zgh)s4 zn2yj$+mE=miVERP(g8}G4<85^-5f@qxh2ec?n+$A_`?qN=iyT1?U@t?V6DM~BIlBB z>u~eXm-aE>R0sQy!-I4xtCNi!!qh?R1!kKf6BoH2GG{L4%PAz0{Sh6xpuyI%*~u)s z%rLuFl)uQUCBQAtMyN;%)zFMx4loh7uTfKeB2Xif`lN?2gq6NhWhfz0u5WP9J>=V2 zo{mLtSy&BA!mSzs&CrKWq^y40JF5a&GSXIi2= z{EYb59J4}VwikL4P=>+mc6{($FNE@e=VUwG+KV21;<@lrN`mnz5jYGASyvz7BOG_6(p^eTxD-4O#lROgon;R35=|nj#eHIfJBYPWG>H>`dHKCDZ3`R{-?HO0mE~(5_WYcFmp8sU?wr*UkAQiNDGc6T zA%}GOLXlOWqL?WwfHO8MB#8M8*~Y*gz;1rWWoVSXP&IbKxbQ8+s%4Jnt?kDsq7btI zCDr0PZ)b;B%!lu&CT#RJzm{l{2fq|BcY85`w~3LSK<><@(2EdzFLt9Y_`;WXL6x`0 zDoQ?=?I@Hbr;*VVll1Gmd8*%tiXggMK81a+T(5Gx6;eNb8=uYn z5BG-0g>pP21NPn>$ntBh>`*})Fl|38oC^9Qz>~MAazH%3Q~Qb!ALMf$srexgPZ2@&c~+hxRi1;}+)-06)!#Mq<6GhP z-Q?qmgo${aFBApb5p}$1OJKTClfi8%PpnczyVKkoHw7Ml9e7ikrF0d~UB}i3vizos zXW4DN$SiEV9{faLt5bHy2a>33K%7Td-n5C*N;f&ZqAg#2hIqEb(y<&f4u5BWJ>2^4 z414GosL=Aom#m&=x_v<0-fp1r%oVJ{T-(xnomNJ(Dryv zh?vj+%=II_nV+@NR+(!fZZVM&(W6{6%9cm+o+Z6}KqzLw{(>E86uA1`_K$HqINlb1 zKelh3-jr2I9V?ych`{hta9wQ2c9=MM`2cC{m6^MhlL2{DLv7C^j z$xXBCnDl_;l|bPGMX@*tV)B!c|4oZyftUlP*?$YU9C_eAsuVHJ58?)zpbr30P*C`T z7y#ao`uE-SOG(Pi+`$=e^mle~)pRrdwL5)N;o{gpW21of(QE#U6w%*C~`v-z0QqBML!!5EeYA5IQB0 z^l01c;L6E(iytN!LhL}wfwP7W9PNAkb+)Cst?qg#$n;z41O4&v+8-zPs+XNb-q zIeeBCh#ivnFLUCwfS;p{LC0O7tm+Sf9Jn)~b%uwP{%69;QC)Ok0t%*a5M+=;y8j=v z#!*pp$9@!x;UMIs4~hP#pnfVc!%-D<+wsG@R2+J&%73lK|2G!EQC)O05TCV=&3g)C!lT=czLpZ@Sa%TYuoE?v8T8`V;e$#Zf2_Nj6nvBgh1)2 GZ~q4|mN%#X literal 0 HcmV?d00001 diff --git a/mappingservice-plugin/gradle/wrapper/gradle-wrapper.properties b/mappingservice-plugin/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..9355b41 --- /dev/null +++ b/mappingservice-plugin/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/mappingservice-plugin/gradlew b/mappingservice-plugin/gradlew new file mode 100755 index 0000000..f5feea6 --- /dev/null +++ b/mappingservice-plugin/gradlew @@ -0,0 +1,252 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/mappingservice-plugin/gradlew.bat b/mappingservice-plugin/gradlew.bat new file mode 100644 index 0000000..9d21a21 --- /dev/null +++ b/mappingservice-plugin/gradlew.bat @@ -0,0 +1,94 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/mappingservice-plugin/integrationtests/basic.hurl b/mappingservice-plugin/integrationtests/basic.hurl new file mode 100644 index 0000000..a57c589 --- /dev/null +++ b/mappingservice-plugin/integrationtests/basic.hurl @@ -0,0 +1,71 @@ +# ---------------------------------------------------------- +# Check if the mapping service is reachable +#----------------------------------------------------------- +GET {{host}} +HTTP 200 + +# ---------------------------------------------------------- +# Get mapping types from the mapping service +# to fetch the first available id +# ---------------------------------------------------------- +GET {{host}}/api/v1/mappingAdministration/plugins +HTTP 200 +[Captures] +mappingType: regex "(?s)\"id\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*[^}]*?\"name\"\\s*:\\s*\"Dicom2JSON\"" + +# ---------------- BEGIN Test Mapping for JaMMaTo ----------- + +# ---------------------------------------------------------- +# Register a dummy mapping with embedded JSON +# ---------------------------------------------------------- +POST {{host}}/api/v1/mappingAdministration/ +Content-Type: multipart/form-data; boundary=boundary +[Options] +variable: id1=0 +``` +--boundary +Content-Disposition: form-data; name="record"; filename="blob" +Content-Type: application/json + +{"mappingId":"{{id1}}","mappingType": "{{mappingType}}","title":"JaMMaTo from CI test","description":"","acl":[]} +--boundary +Content-Disposition: form-data; name="document"; filename="blob" +Content-Type: application/json + +{ + "study": { + "study.studyID": "studyInstanceUid" + } +} +--boundary-- +``` +HTTP 201 + +# ---------------------------------------------------------- +# Execute mapping with test data +# ---------------------------------------------------------- +POST {{host}}/api/v1/mappingExecution/{{id1}} +[MultipartFormData] +document: file,./tests/sampleData/MRIm1.dcm; +HTTP 200 +[Captures] +result1: body +[Asserts] +body contains "study" # Expect some result structure containing "study" + +# ---------------------------------------------------------- +# Get the ETag of the registered mapping +# ---------------------------------------------------------- +GET {{host}}/api/v1/mappingAdministration/{{id1}} +HTTP 200 +[Captures] +etag: header "ETag" # Capture Etag used for delete + +# ----------------------------------------------------------- +# Delete the registered mapping +# ----------------------------------------------------------- +DELETE {{host}}/api/v1/mappingAdministration/{{id1}} +If-Match: {{etag}} +HTTP 204 # Expect a successful deletion + +# ---------------- END Test Mapping for JaMMaTo ---------------- diff --git a/mappingservice-plugin/settings.gradle b/mappingservice-plugin/settings.gradle new file mode 100644 index 0000000..a69a131 --- /dev/null +++ b/mappingservice-plugin/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'JaMMaToPlugin' \ No newline at end of file diff --git a/mappingservice-plugin/src/main/java/edu/kit/datamanager/apeHeplugin/JaMMaToPlugin.java b/mappingservice-plugin/src/main/java/edu/kit/datamanager/apeHeplugin/JaMMaToPlugin.java new file mode 100644 index 0000000..5f8875d --- /dev/null +++ b/mappingservice-plugin/src/main/java/edu/kit/datamanager/apeHeplugin/JaMMaToPlugin.java @@ -0,0 +1,54 @@ +package edu.kit.datamanager.jammatoplugin; + +import edu.kit.datamanager.mappingservice.plugins.AbstractPythonMappingPlugin; +import java.nio.file.Path; + +public class JaMMaToPlugin extends AbstractPythonMappingPlugin{ + + private static final String REPOSITORY = "https://github.com/kit-data-manager/JaMMaTo"; + + + public JaMMaToPlugin() { + super("Dicom2JSON", REPOSITORY); + } + + @Override + public String name() { + return "Dicom2JSON"; + } + + @Override + public String description() { + return "The software JaMMaTo (JSON Metadata Mapping Tool) is a metadata mapping tool based on Python and used for mapping metadata from a Dicom input to a JSON format schema. "; + } + + @Override + public String[] inputTypes() { + return new String[]{ + "application/octet-stream", + "application/x-hdf5", + "application/dicom", + "application/x-iso9660-image" + }; + } + + @Override + public String[] outputTypes() { + return new String[]{ + "application/json" + }; + } + + @Override + public String[] getCommandArray(Path workingDir, Path mappingFile, Path inputFile, Path outputFile) { + return new String[]{ + workingDir + "/plugin_wrapper.py", + "-m", + mappingFile.toString(), + "-i", + inputFile.toString(), + "-o", + outputFile.toString() + }; + } +} diff --git a/tests/SampleData/MRIm1.dcm b/tests/SampleData/MRIm1.dcm new file mode 100644 index 0000000000000000000000000000000000000000..51cd75c44b965bb518b67408e5a70f3443dc0a75 GIT binary patch literal 9886 zcmc(EXLuAxu;6r>nOy}WL=Zu;5CW80t-`K^1QL=!0_CiYvpc)9NmaZG)QliR)VO& zQWY1eh*T+~6^bZDWHg1NBWVvEdSn(F(^9jGdK;tT6ahTLGpU6wnB0QQ zo|!o*S-QM}%vnXwfH~C$}_(I z53=%1z<;h97Zt}d?f$c^QmT0-@V~Sb|H_pjI$jaWGg1G>V{~jZovZx!R!np>&*V`p zFjQuNN)4H8Dq#(imFvrNuWN8w>jJtvstaozRRP%rnE^d3%^6fbLn)qTdS?~|WM&u9 z_7o;Bhh?fN>*^b53&6q*mI+fR6^*)_;?+1dr6e;mGghJSqI=djHl@l^XEk|IsT#2v z4Rw7?mg)d6M)AK%;l(6><|(%ky)(*q0h8RipxK7P7-qtO_>m>K^~-|WD=oJQQ-&s6_iE?>%BBh`XzUC)95 zU#js&{S@Z;QeCGOGISr?f9_GH`7%LNhwlHTv4kF3DTNunRIh1;m~yP7bBBJ0)l?(C z%rFYOQ_EDV74dP=>eyJ?H-h#-CW=BMKVzw?BBPXQm0F>qeWNIKilsEiP^_GZii%Xl z#HpecQNGMLCX>#Li}7Wqeuf%p%%Gz%Lmi01Gbzk6DmC?sSYIk+quwYcE-og9&IzFH zNG8uzXDV^jJF2S!QpDN@3iWA(8{H;K_rD-NcTk!NI*VmKi%DBvKE;JR-KoYWnek2>otu-Jno*FOovsWBrcyRylPT8wKOpK!g-si2 z(5+X-HnRR1praYkINFPj2LA!U&rlQEhdx7%E3u%Ej%!6B?lUyJUocZZIfINMAebqn zkmqL}l{5@SMJZ!qmBCCAm7tN=8$)PZEu{t#z%aAn5Eu+g$T3vN#>1xg0PF$Zz@hLY zcunTOwXhdPH~v!Z^9kPN$?w5MEavW;2}B)&%;o(5hze4IL;))U0?%~OorfSR0eL5G8j%O zz&lh8`lDn}#!cdW#kWWt_!ISj_t7jg5s!yYxZ7YZdI&h!8<^QyxHT-2|HIy97BW%n z2{anMU{cU| zxIcO*^XFRNc(x24#A;N)ErqAJGw?Uq6GyX`;S#FJX819DLClZ^H}DsHAv%s&HXN-d zT>+2Yv1){1TbP8+d<#;dJ&4w$=h%ZD&h4??tu4Z;Ud8F~kP06)U@pgHp6 zb%?=(*e=k(?#CA3KwIES_y^|5*U*Rijr)oD2{B|FxDE_(E}~XK9)LI8GNdIEHv(S4 zi&*H9>iK|M+H6~!?tYJg!`np$__w*&3b*j#{NY(%!~3&^Y#M&Td_{ic9>bAr1@3}6 zpzBoLc4(}uCG@`lueRFbE$YG>C7U$W)`1#BHrqw^#T}H*3*aeN zhPuLeL?WG$7v}>%f|KwK7e_Wz=^acB?8{7)$SK4@cFV}UukK?#= zy<@V*t-NVDUp7xSP1sY`&5~y9Yy8EyLHx1z&Q?*>kA~pM=o425CP6d)0WK%knD4=5 z>g^B@rIy?mc7(q(mtZq^jr!kKI2zxFr+^=B$3?Io(O}LEp2TD1?*i%#b82tfL(H=+ z`=mqFri$JbvmCcg8dEousccm5oL)nV@9KUK4qBQRYjufYwWhdmg}G8l?Uv_tjlBX+ zZ~@>k+QOd%1>8S)DhQ+T?mb+Ne}^Hc8a)RJ(v!S^%lVD4LDm+`28VGLdV#~qHl`dOAWlRL4T;Mm*8t( zAbepMujy!t(_RYNC6A&06;3?yP#Q1pg9IRgTdR#@p8!|1RF2`6>5d z>45U3+V+MumdAp>-eg!=_(B^Yl^7a|7aPkHck;WKYU~7U$xbE+cg6ikN2DT7I1)}` zdxB436aL8lLh7j}$k8!Ig*7mgOa#Ng+~)JO4ccMlznc8byR}oTNzS8{E^(42Q7m?R zZAq|iv+orL=;CsuoLKE>p{?H2JW;2mZ+e1S93BW5)D2mXZ?^8Vm82u5nqnbr7RF!)*{E%}a_!ZZ7# zx)D}~vBomU*3sEo1omUnaWPMdx6ar6k@;s%c=68~mvMz~Qp^$chCRY?gTEofG^qGo z;VbK#vOc=bh5d;yz6$n3f=cjo+#UJDMW_dw#4N;vU`zBnh~-bg*4#oo5@)d{@QkeS zeOy1P*9OHizYk5yo1JPai+O9PsoF07VTqGo3cMr2($0C?;xcX0yXC*l&NjUlKN(*c zdKqV$rWsxvu4}XPqsohgeBE(_)zF|->gQ(>up6|+WoSBCgLKsXmcpkr4*iDTpjgru zE+FekIhV>kf_`uecfy?rqdF(KS$uD|@M->|=Mu75&Z2p--&va)u9?mVK-yy7qYXAC zns8oqrmbYG_O51^m@9hg>Z~1&X^v^eYvOsW!Z1Q3THclm(yj6^)3a_LVLrM8oy3Ra zz}JjB4nz_*0wwmtWvmR1g!R0D8$ck`Vrxn7wo$Rks`2siJC;8|d>tY0VJ zY7gOS`AA`~C0nmqapebl7@szoHcIg%w^*${qW<&h5 z&I{tl#6~6ePt+^AawR0sZLs=DO@v{k^h8r)Y-=2C_%`1wBO+&(;ht`&si}2_^^|Z= z{MmF!ztEU!?x2Y{oCXcH#c)6*38|0PBer z&EVc}5pGigHN6_Tj8N+0&PJ<}bjtL^X)33}kos4Lm;6w69NwuvW4Uep)xio=ZKb9c zx^MFD7XD+16T9e(bxrhYOAGx)<07M7Y$*hoSBq*h<;1Aeyfdo}Z>0x42^k93vQ1%Y zau{3%nQ%Je54YpvK*N6rV!6BAYOKsRMQ3;WG1|~&e~dG+TB%8}r;Ja|>9SkBUA`7} zAO*?}wKDx;%U%6N!Km4rH=)VPUfetG=5 z`2OmSu|Zvibs3a?IRSM`Y}Wz5kUec(V;XPluTM4-%O&F-{miVY{Hx`AwQkzK4bQbo z(>B8h^C_F4ooh83;`F(i5&9f$gfP$Ouc<2=r@fIS!BMC?gW(dm6P-sJNfx+`E>Npj z$S=jUT=OhVjugLGIZriNy)iKyhdWCXE*W$#EDU(Bv;XHX#iwngQv~{J{{E4ON zVv?`yxvj^gd`H1vjZ8DVY+%u}vJNG!i*6MhESXtcQaq_7K6gyXoRWdXkMr7SZxmhD zt_i#^zlGe;H#i<%gt@XkXeIqV>xVkwdXy^{%W(Iw#IZ4_5@5VB;f#81;&k=)ZmCIs zsB6-GQ7k}X*p}|KMM-&nd4J@~3V+TTl6fITo3SnTNnUKunBt5a&!XD}y9&N4epT>1 z>t5cAJe}t2f?Y)=Il)EqvVSTV*=thiisZiVDtW0mw)WhABSxiNzw7XUAD)W<1WWvR%NFE^}TNI<4Tg3e&G$K;rwO`%bylJD!G;CRq!d-zjsVp zm-LF>b;XAYel6`+L<&C?cPe0VTjf2=Q|GTM?Nf*fwikEGYg>4zq_FJUqGS0R^OHPY zkUeNS{09t2hoBjh(@3}mcIO(&V>$^juzJou(!Q9Nt% zGdVR%owv5q?4#9LQ}aW`oe|aS7~G0HVyeM6crS`%y3zHAu=y>AIEsoo+MA|ibh{mG zNW9;*S<-FQr{q3K15{E`hODZ)lHQhJrjAtx^tzTMBo3kxniuLvx*17f}mOail zVr%P3Jx_jSH-nj@W3#?0AEB=jdJA`rr)*P&)xvs;FBMmt zPg__^Pg{4vDy_E86?<36t=YCM(vQxI-V50ea5ibjc1OPw70m(@=uY_OKQ#BW=%j19 zF4i+bV7smU=Jrj3{M+nfjhx)A2A)$u$0D)4<)XvWI$V3$%(>XA`C>+izWk2$M^~J5 z!H{HHYVoi{iTy3Z%>yi>D=MU^(t6>fsg3a$>AEzk@}k7mZL;mHn;v*tb{xf{I+}N> zxMF;Q&j)+Rm&TL!P-(WoViu&f_F?w*xtoI0npDenz^2})Ctz)EC69o5p3JKCg>pWpqRfB$WjE~ni{)60`RkH24bF#_!Cve3-JRMcWTDOYBtwq)m zlGX9EX@l{5v%izr%PKcnS5)4&de&XD5AWAI?ik;iRny&HgC?N)G$Sh@=URA+6UFnE z`9ihXUEC#hb8HhInU>fl*f`63Gm`S9pKQNdR~heBpGjQhzCe}-pR#ArR9QGwk!>Up zjb~E1*?gv-cfMVkBebZPV*S&uHw-uXNONshU14@8HLI9sUR0NBAK33|!eF-({NFGO ze8izBfS-iF#!o#I&c%+KwrY#U(jbnK%+gwmpDn@JT1vM4V{hsxw*6|o>3D1}_yWp( zAe0@JwMF413tnb)q(;`9m*e*E2#oM(fZNkS<$V1tn~&wbd5^udW2{~4xaIiAcB68p zw6Ct#7GFO^-Ij#oG!%u}&}?}bH;-S2txb;D7g^p~v(0DiW?`9WvE#5!>k#a>oWq}*(vy+J!P0)CHWY)kMnJOfj)z+DF!vbX%1q;6F&HCwD* z>_v_yV$X_f(^&f;N5870HcKrKulI?LAB7`OQ(OxYU~|3=4#Vl>ntOkHvh7dlwmH+W zO5e*KXVzFUY+ap)Z6|Edb+0nRvbE-ZhXgl3&e1Qm2x608MUTN|TmbE`72_Z$m}X!H zn8WoVXYge~*psjpoWLHa845#p zITakoG}RoDQl+NOqpn}AN6k&_@1%#$@5GIcw{-6x3g1?CPib&d@dr5zdL z1y}JMa9^6eB!jzP70@AHJb<+_Kf#`|SAxIosAZ=i(Q;56YCcqV&2q28DP4D-w!N?W zBIHN;6QXB3(QjWC$B=h4PrSo!%`2}dwWd18n%~=wS-q{IsC(P9Io# z!*<#6+`@Z0>!@;zok$@kA$!H3xf=A_xdQ7W&X*z9fZq?Wm9k(rg zopsh1_FiJ8?Uv;~%&!hQ$?cP3>(jZFx?eNvQnU@w0Wby{tN340Sgl2k@dPh;zo}-B1>}MI znn+xkZW-uUXPoJHBAgTdvTd(iVL547C<*p29G%*3_T0s9XIR<0Hh;&=4h`uzH#9n8 zNLWQgdhm(PADca3&U0@tA+gK{<|@0GS;&6sl4l=LzR{-F?~@$DGv@>`yE5JqT5ZjG zMC#yvREMs?9q=Q&6<4q{`@lxGmBKFTO?qZNcU>^`uti9NTm!9nHNQzKr5*a-wD$Fy z?SRgB9E?qF|1z{XbVcOakhfvQkqzO2p`Kv|{yFT+KgU~OHLY&kf!oM}PFt&z%BI;q zSfgxr^j7OM({&pVOX?;^^_T6S+0Z2DO)Ez4$RYBSJY&k`Q%H?=lLMJA2rB1N>9tg0 zKkpju+9L&-;$6e@yR)~@byx&D1qCbi1`iJ()Uj3POJPUCo_173E(m-Y;ny~Xq_BNt z!CV=PVh=FooVw{<^L*>ymUhxEW1+doRw?~c;b!s`|7y04J;(NC0HVWwv9ptC#&}u~hsZdb@T@nU-$S{`$Pk-f}PU z9hxj(82lmZO2pByxQIVPoZ;=2V>;Oszl0S=jEOi3FtXA}Tmq}mZ)6#qEEkO$+h)@$ z>5QSdDM~matu#)mSrK;BgLiuY#?o)Ep6mb+iY`KltASgX0F>Y!m11=*)Gw;;X*njj zTl1a4&h-^0=~AChsS&bw@@MP-S!GC7%=aO7Wnt)r$hDog&|@KILbr!!c1jISVhh0% zT19!qXwV?I7CiuJZ40s3u+i{`d4aW~knE@zrfKK%E);^R0Sip{Dl?8+;RI?4-=Sq> zE-Ys`pCc81#xnCdag}AFw9>gooNehW7D?`g!>}Xv#$T~gP)X;8kQWidI){cm3A2X3 z3GJw0!>>fnYhQ~8viCtpwmWfWJMt~~5|4#~+IZCTr;un~BP_6N)6KP;B7(_W{G3?0 zyUas87)C+|_?vx*c2aG;;ohS@?1{7|^#;Mm+FUHQwY25h`?~V%OB{P_-I~_nw_pa> z(5g~7HOLsXDkLW|K5~Y_GmJ#kb=cMUkWV1D&@BR9WdlGJJi?~P*80UeIQ5U2QFQ^|$x7YAWs-*H@jl z7F4-aF!d+G637Gj@1O^FDL6{GTyY{KHR4q0iHJW!(}QP+P7IyFhvFFYhS;%<4Q6NX zi2EsNj@Zf6U3_6$ZGggQZIE@me+Eo*Tgz<4KhoN=4*v-=SQo8yrf~!C4m1{)^6z-P z+v84u*S4}GIRqOj(uJydCLdQ&@$dKs{)@!&oLgS#v(Vpydvan@`Q_Eg}a4$y^4y%uGcxn9Rl_e{h(jm+31NhVN`Qq)6$W^Gfw2 z+psD}UVm~BUqJQTBTy2cbyF@yuc4z!>zLxy1Yh9>cf5gBy?T(VLY; zvD3?2i0t>EJjglrymtB%m=~6gbNegVL?FIfO{9UeN zhw)8t0@ss3?v&Z^5!vK$_p1HkF>5bwn zVXPBL>_J-NuVjZ|1?t7rvZ4G78nNHNr>HsRkQ|(W$8iM_`0zRmn zn<{X(Ft_Hat)br=82|+f*JmKTO-ztciR&TERbMWb6vO5_!R;vbSh1 z+C}`}Xug!GA)UZ7#sD1n2}nXa*k15Fc?gf;b?gNEn$+PY_$V`wTwr>zE0KubkR@0r zFDJR-#l2VS#p**1m-_rsvB>&Vnqruk|E+$B=pn&cx4P36<1#!uck!rk%VwuA?ZJE$ z%?{@-!gyo?dGHr}8nhyE)Ld4AHgO|q&btl)XohO}HS`U0f!+maNFH2=zJ>v?8D{Az zrUGh`1H$1y>}IgdEflqHQija@kF6g@F6fGKy)|~7vB0-iacb|ZAJbbzg?te%@6U#^ zbFe%A1s;Qw@oiYe4Pj=2nW!iG2bji>N8drc?0Y;7{!QAW?lkVMhP&x4&>QST+tD+L>~gp+zW z93O?=xGh-3IU$R7!8_bpdf&VjEk{@3VNCB^NePE=A$$a1!{cBBm_Uv(xo8}{Rl5pr zfw^!lD8((|DKZirgEN2}#iG$@Ja~^gf^SI#DkFZl57ENOs5N?mcX0b*4{{9_;;-Ba zspd~H>(DRgA?yX+v62MzCinfD7_$9N9oQC Date: Mon, 9 Mar 2026 10:47:24 +0100 Subject: [PATCH 7/9] add test for inputreader --- .DS_Store | Bin 12292 -> 12292 bytes tests/conftest.py | 134 --------------------------- tests/io_tests/__init__.py | 0 tests/io_tests/test_inputreader.py | 54 +++++++++++ tests/test_attribute_mapping.py | 21 ----- tests/test_dicom_reader.py | 38 -------- tests/test_map_mri_schema.py | 36 ------- tests/test_map_schema.py | 49 ---------- tests/test_metadata_reader.py | 10 -- tests/test_metadata_schema_reader.py | 131 -------------------------- tests/test_schema_collector.py | 15 --- 11 files changed, 54 insertions(+), 434 deletions(-) delete mode 100644 tests/conftest.py create mode 100644 tests/io_tests/__init__.py create mode 100644 tests/io_tests/test_inputreader.py delete mode 100644 tests/test_attribute_mapping.py delete mode 100644 tests/test_dicom_reader.py delete mode 100644 tests/test_map_mri_schema.py delete mode 100644 tests/test_map_schema.py delete mode 100644 tests/test_metadata_reader.py delete mode 100644 tests/test_metadata_schema_reader.py delete mode 100644 tests/test_schema_collector.py diff --git a/.DS_Store b/.DS_Store index 6908bad4b70720eac4c2405e093b1ebe7752dd52..27bc7231815949b6a9e633ad7b38234df5dffcbc 100644 GIT binary patch delta 255 zcmZokXi1nL#l*rjQO20ja$>;t#R^Q!lMQrOCUY|yZ?+P+$++29=`AA<0|Nsi5CZ`R z_hvyRahA>MJWrSzWhM(S>P>bOIy~7&*lF_#VQp5%os;`T3pFIGs|_s;%ybmY4NPiv z6sj$a40IGsjLm9mIXOg?^{s>AvvYFu^1CPJDa$kVY+fT7&$d~Cse_48W;2h%V#du+ z6%$w{*Q#xt{6tM}vK`}*$rr_fWPrA_g5AKN!40Hc85jhB4rAeBU{C-#>OJ#hejQ5z cpgX}%1rkgk)srK17EeAdo;G>9>N<2I0hcR5WB>pF delta 430 zcmZokXi1nL#l*-lQO20jc4ENx$p$(slet-pC%;oW!o-lYd7~sV8z&V&;)e0~$FfoDjY&H=zW1aj{ zZRchc@pv{y+08r(iy4i$8RCHuq%D~t5r{K@HUULY4PaCP8ld+d3>Y@AR!m_rfP$<*3 None: - - def mock_dcmread(file): - pydicom_file=Monk_Pydicom_Object() - return pydicom_file - - monkeypatch.setattr(pydicom, "dcmread", mock_dcmread) - - return_validate_type=[True, False, True, False, False, False, False, False, True, False, False] - validate_type_mock=Mock(side_effect=return_validate_type) - monkeypatch.setattr(Dicom_Reader, "validate_type", validate_type_mock) - - return_name_standardization=["standardizedName","studyDate", "studyTime", "standardizedName", "standardizedName"] - name_standardization_mock=Mock(side_effect=return_name_standardization) - monkeypatch.setattr(Dicom_Reader, "name_standardization", name_standardization_mock) - - return_merge_dict_keys=[{"pixelData": "val1"}, {"pixelData": ["val1", "val2"]}] - merge_dict_keys_mock=Mock(side_effect=return_merge_dict_keys) - monkeypatch.setattr(Dicom_Reader, "merge_dict_keys", merge_dict_keys_mock) - -@pytest.fixture -def test_metadata_reader_isdir(monkeypatch: pytest.MonkeyPatch) -> None: - return_is_file=[False, True] - is_file_mock=Mock(side_effect=return_is_file) - monkeypatch.setattr(os.path, "isfile", is_file_mock) - - return_is_dir=[True, False] - is_dir_mock=Mock(side_effect=return_is_dir) - monkeypatch.setattr(os.path, "isdir", is_dir_mock) - - return_listdir=[["dicom_file"]] - listdir_mock=Mock(side_effect=return_listdir) - monkeypatch.setattr(os, "listdir", listdir_mock) - - return_splittext=[("filename", ".dcm")] - splittext_mock=Mock(side_effect=return_splittext) - monkeypatch.setattr(os.path, "splitext", splittext_mock) - - return_Dicom_Reader=[None] - Dicom_Reader_mock=Mock(side_effect=return_Dicom_Reader) - monkeypatch.setattr(Dicom_Reader, "__init__", Dicom_Reader_mock) - -class Monk_Zipfile_name(IterMixin): - - def __init__(self): - self.filename="dummy_name" - -class Monk_Zipfile(IterMixin): - - def __init__(self): - - self.filelist=[Monk_Zipfile_name()] - - def __enter__(self): - return Monk_Zipfile() - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def open(self): - pass - -@pytest.fixture -def test_metadata_reader_isfile(monkeypatch: pytest.MonkeyPatch) -> None: - return_is_file=[True] - is_file_mock=Mock(side_effect=return_is_file) - monkeypatch.setattr(os.path, "isfile", is_file_mock) - - return_is_dir=[False] - is_dir_mock=Mock(side_effect=return_is_dir) - monkeypatch.setattr(os.path, "isdir", is_dir_mock) - - return_listdir=[["dicom_file"]] - listdir_mock=Mock(side_effect=return_listdir) - monkeypatch.setattr(os, "listdir", listdir_mock) - - return_splittext=[("filename", ".dcm")] - splittext_mock=Mock(side_effect=return_splittext) - monkeypatch.setattr(os.path, "splitext", splittext_mock) - - return_Dicom_Reader=[None] - Dicom_Reader_mock=Mock(side_effect=return_Dicom_Reader) - monkeypatch.setattr(Dicom_Reader, "__init__", Dicom_Reader_mock) - - def mock_zipfile(file): - zipfile=Monk_Zipfile() - return zipfile - - monkeypatch.setattr(zipfile, "ZipFile", mock_zipfile) - - return_open=["dummy_file"] - open_mock=Mock(side_effect=return_open) - monkeypatch.setattr(Monk_Zipfile, "open", open_mock) \ No newline at end of file diff --git a/tests/io_tests/__init__.py b/tests/io_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/io_tests/test_inputreader.py b/tests/io_tests/test_inputreader.py new file mode 100644 index 0000000..46206f2 --- /dev/null +++ b/tests/io_tests/test_inputreader.py @@ -0,0 +1,54 @@ +import os +import pytest + +from src.IO.InputReader import InputReader +from src.parser.impl.MRI_Parser import MRI_Parser + + +class TestInputReader: + + def set_up_sample_data(self): + dir_to_testscript = os.path.split(__file__)[0] + + test_path = os.path.join(dir_to_testscript, "../../example/dicom_files/") + return test_path + + def test_get_applicable_mriparser(self): + tp = self.set_up_sample_data() + + dicomfile = os.path.join(tp, "./MRIm1.dcm") + + parsers = InputReader.get_applicable_parsers(dicomfile) + assert len(parsers) >= 1 + + dicomfile = os.path.join(tp, "./MRIm1.dcm") + + parsers = InputReader.get_applicable_parsers(dicomfile) + assert len(parsers) >= 1 + + def test_get_applicable_parsers_with_extension(self): + tp = self.set_up_sample_data() + + dicomfile = os.path.join(tp, "./MRIm1.dcm") + + parsers = InputReader.get_applicable_parsers(dicomfile) + assert len(parsers) >= 1 + assert "MRI_Parser" in parsers + + def test_get_applicable_parsers_wo_extension(self): + tp = self.set_up_sample_data() + + # Create a copy of the DICOM file without extension + dicomfile_with_ext = os.path.join(tp, "./MRIm1.dcm") + dicomfile_wo_ext = os.path.join(tp, "./MRIm1") + + # Copy the file without extension + import shutil + shutil.copy2(dicomfile_with_ext, dicomfile_wo_ext) + + parsers = InputReader.get_applicable_parsers(dicomfile_wo_ext) + assert len(parsers) >= 1 + + # Clean up + if os.path.exists(dicomfile_wo_ext): + os.unlink(dicomfile_wo_ext) diff --git a/tests/test_attribute_mapping.py b/tests/test_attribute_mapping.py deleted file mode 100644 index 4eb2155..0000000 --- a/tests/test_attribute_mapping.py +++ /dev/null @@ -1,21 +0,0 @@ -import pytest -from jammato.attribute_mapper import Attribute_Mapper -class Dicom_Object(): - def __init__(self) -> None: - self.attribute1 = "value1" - self.attribute2 = "value2" - self.attribute3 = "value3" - -dicom_object=Dicom_Object() -map_dict={"object": {"attribute1": "attribute1_1", "attribute2": "attribute2_1", "attribute3": "attribute3_1", "attribute4": "attribute4_1"}} -study_map = Attribute_Mapper.mapping_from_object(dicom_object.__dict__, map_dict, "object") -kwargs={"additional_attribute": {"attribute4_1": "value4"}} -study_map.update_map(**kwargs) -@pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"attribute1_1": "value1", "attribute2_1": "value2", "attribute3_1": "value3", "additional_attribute": {"attribute4_1": "value4"}}, study_map.__dict__), - ) -) -def test_name_standardization(exp_res: dict, inp: dict) -> None: - assert exp_res == inp \ No newline at end of file diff --git a/tests/test_dicom_reader.py b/tests/test_dicom_reader.py deleted file mode 100644 index a0f8e83..0000000 --- a/tests/test_dicom_reader.py +++ /dev/null @@ -1,38 +0,0 @@ -import pytest -from jammato.dicom_reader import Dicom_Reader - - -def test_dicom_reader(test_dicom_reader: object) -> dict: - test_dicom_file=Dicom_Reader("fakepath") - assert list(test_dicom_file.__dict__.keys())==(["studyDateTime", "standardizedName"]) - - -def test_dicom_reader_raises(file=None) -> dict: - with pytest.raises(TypeError): - _=Dicom_Reader(file) - -def test_dicom_reader_raises(file="fake_path") -> dict: - with pytest.raises(FileNotFoundError): - _=Dicom_Reader(file) - -@pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ("standardizedname", "Standardized_name"), - ("standardizedName", "Standardized Name"), - ) -) -def test_name_standardization(test_dicom_reader: object, exp_res: dict, inp: str) -> None: - test_dicom_file=Dicom_Reader("fakepath") - assert exp_res == test_dicom_file.name_standardization(inp) - -@pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key1": ["val2", "val1"]}, ({"key1": "val1"}, {"key1": "val2"})), - ({"key1": ["val2", "val3", "val1"]}, ({"key1": "val1"}, {"key1": ["val2", "val3"]})), - ) -) -def test_merge_dict_keys(test_dicom_reader: object, exp_res: dict, inp: dict) -> None: - test_dicom_file=Dicom_Reader("fakepath") - assert exp_res == test_dicom_file.merge_dict_keys(inp) diff --git a/tests/test_map_mri_schema.py b/tests/test_map_mri_schema.py deleted file mode 100644 index dce1ac5..0000000 --- a/tests/test_map_mri_schema.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from jammato.map_mri_schema import Map_MRI_Schema - -class Class_for_testing1(): - def __init__(self) -> None: - self.key8_1="8" - -class Class_for_testing2(): - def __init__(self) -> None: - self.key1="1" - self.key2="2" - self.key3="3.0" - self.key4="True" - self.key5="None" - self.key6_1="6" - self.key7=["7"] - self.key8=[Class_for_testing1()] - self.key9="9" - self.key10="10" - self.key11=["11"] - -dummy_map=Class_for_testing2() -test_schema1 = Map_MRI_Schema({"key1": "int", "key2": "str", "key3": "float", "key4": "bool", "key5": "None", "key6": {"key6_1": "str"}, "key7": ["str"], "key8": [{"key8_1": "str"}], - "key9": ("str", "int"), "key10": {"value": "str", "unit": "default_unit"}, "key11": {"value": ["str"], "unit": "default_unit"}, "key12": 12}, - ["key1", "key2", "key3", "key4", "key5", "key6", "key7", "key8", "key9", "key10", "key11", "key12"], dummy_map, None) - -@ pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key1": 1, "key2": "2", "key3": 3.0, "key4": True, "key5": "null", "key6": {"key6_1": "6"}, "key7": ["7"], "key8": [{"key8_1": "8"}], "key9": "9", "key10": {"value": "10", "unit": "default_unit"}, "key11": {"value": ["11"], "unit": "default_unit"}}, - test_schema1.fill_json_object(test_schema1.schema_skeleton, test_schema1.key_list, test_schema1.map, test_schema1.main_key)), - ) -) -def test_json_object_search(exp_res: dict, inp: dict) -> None: - assert exp_res == inp \ No newline at end of file diff --git a/tests/test_map_schema.py b/tests/test_map_schema.py deleted file mode 100644 index 7c267b7..0000000 --- a/tests/test_map_schema.py +++ /dev/null @@ -1,49 +0,0 @@ -import pytest - -from jammato.attribute_inserter import Attribute_Inserter - -class Class_for_testing1(): - def __init__(self) -> None: - self.key8_1="8" - -class Class_for_testing2(): - def __init__(self) -> None: - self.key1="1" - self.key2="2" - self.key3="3.0" - self.key4="True" - self.key5="None" - self.key6_1="6" - self.key7=["7"] - self.key8=[Class_for_testing1()] - self.key9=[["9"]] - self.key10="10" - self.key11=[] - -dummy_map=Class_for_testing2() -test_schema1 = Attribute_Inserter({"key1": "int", "key2": "str", "key3": "float", "key4": "bool", "key5": "None", "key6": {"key6_1": "str"}, "key7": ["str"], "key8": [{"key8_1": "str"}], "key9": [["str"]], - "key10": ("str", "int")}, - ["key1", "key2", "key3", "key4", "key5", "key6", "key7", "key8", "key9", "key10"], dummy_map) -test_schema2 = Attribute_Inserter({"key1": "int", "key2_1": "str"}, - ["key1", "key2_1"], dummy_map) -test_schema3 = Attribute_Inserter({"key1": "int", "key11": ["str"]}, - ["key1", "key11"], dummy_map) -test_schema4 = Attribute_Inserter({"key1": "int", "key11": [None]}, -["key1", "key11"], dummy_map) - -@ pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key1": 1, "key2": "2", "key3": 3.0, "key4": True, "key5": "null", "key6": {"key6_1": "6"}, "key7": ["7"], "key8": [{"key8_1": "8"}], "key9": [["9"]], - "key10": "10"}, - test_schema1.fill_json_object(test_schema1.schema_skeleton, test_schema1.key_list, test_schema1.map)), - ({"key1": 1}, - test_schema2.fill_json_object(test_schema2.schema_skeleton, test_schema2.key_list, test_schema2.map)), - ({"key1": 1}, - test_schema3.fill_json_object(test_schema3.schema_skeleton, test_schema3.key_list, test_schema3.map)), - ({"key1": 1}, - test_schema4.fill_json_object(test_schema4.schema_skeleton, test_schema4.key_list, test_schema4.map)), - ) -) -def test_fill_object(exp_res: dict, inp: dict) -> None: - assert exp_res == inp \ No newline at end of file diff --git a/tests/test_metadata_reader.py b/tests/test_metadata_reader.py deleted file mode 100644 index 9e0cfa6..0000000 --- a/tests/test_metadata_reader.py +++ /dev/null @@ -1,10 +0,0 @@ -from jammato.metadata_reader import Metadata_Reader -from jammato.dicom_reader import Dicom_Reader - -def test_dicom_reader_isdir(test_metadata_reader_isdir: object) -> object: - test=Metadata_Reader("fakepath") - assert isinstance(test.all_dicom_series[0], Dicom_Reader) - -def test_dicom_reader_isfile(test_metadata_reader_isfile: object) -> object: - test=Metadata_Reader("fakepath") - assert isinstance(test.all_dicom_series[0], Dicom_Reader) \ No newline at end of file diff --git a/tests/test_metadata_schema_reader.py b/tests/test_metadata_schema_reader.py deleted file mode 100644 index e6bdea6..0000000 --- a/tests/test_metadata_schema_reader.py +++ /dev/null @@ -1,131 +0,0 @@ -import pytest -# from Classes.dicomReader import DicomReader - -from jammato.schema_reader import Schema_Reader - -test_schema1 = Schema_Reader( - {"properties": {"key1": {"type": "integer"}}}) -test_schema2 = Schema_Reader( - {"properties": {"key2": {"type": "number"}}}) -test_schema3 = Schema_Reader( - {"properties": {"key3": {"type": "string"}}}) -test_schema4 = Schema_Reader( - {"properties": {"key4": {"type": "boolean"}}}) -test_schema5 = Schema_Reader( - {"properties": {"key5": {"type": "string"}}, "$defs": {"key_d5_1": {"type": "string"}}}) -test_schema6 = Schema_Reader( - {"properties": {"key6": {"type": "string"}}, "definitions": {"key_d6_1": {"type": "string"}}}) -test_schema7 = Schema_Reader( - {"properties": {"key7": {"type": "string"}}, "definitions": {"key_d7_1": {"type": "object", "properties": {"key_d7_11": {"type": "string"}}}}}) -test_schema8 = Schema_Reader( - {"properties": {"key8": {"type": "string"}}, "definitions": {"key_d8_1": {"type": "array", "items": {"type": "string"}}}}) -test_schema9 = Schema_Reader( - {"properties": {"key9": {"type": "integer"}, "key9_1": {"type": "number"}, "key9_2": {"type": "boolean"}, "key9_3": {"$ref": "#/definitions/key_d9_1"}}, "definitions": {"key_d9_1": {"type": "string"}}}) -test_schema10 = Schema_Reader( - {"properties": {"key_10": {"$ref": "#/definitions/key_d10_1"}}, "definitions": {"key_d10_1": {"$ref": "#/definitions/key_d10_2"}, "key_d10_2": {"type": "string"}}}) -test_schema11 = Schema_Reader( - {"properties": {"key_11": {"$ref": "#/definitions/key_d11_1"}}, "definitions": {"key_d11_1": {"type": "array", "items": {"type": "string"}}}}) -test_schema12 = Schema_Reader( - {"properties": {"key_12": {"$ref": "#/definitions/key_d12_1"}}, "definitions": {"key_d12_1": {"type": "object", "properties": {"key_d12_11": {"type": "string"}}}}}) -test_schema13 = Schema_Reader( - {"properties": {"key_13": {"type": "array", "items": {"$ref": "#/definitions/key_d13_1"}}}, "definitions": {"key_d13_1": {"type": "string"}}}) -test_schema14 = Schema_Reader( - {"properties": {"key_14": {"type": "array", "items": {"oneOf": [{"type": "string"}, {"type": "integer"}]}}}}) -test_schema15 = Schema_Reader( - {"properties": {"key_15": {"type": "array", "items": {"oneOf": [{"properties": {"key_15_1": {"type": "string"}}}]}}}}) -test_schema16 = Schema_Reader( - {"properties": {"key_16": {"type": "array", "items": {"oneOf": [{"items": {"type": "string"}}]}}}}) -test_schema17 = Schema_Reader( - {"properties": {"key_17": {"type": "object", "properties": {"key_17_1": {"oneOf": [{"type": "string"}, {"type": "integer"}]}}}}}) -test_schema18 = Schema_Reader( - {"properties": {"key_18": {"type": "object", "properties": {"value": {"type": "string"}, "unit": {"type": "string", "default": "someUnit"}}}}}) -test_schema19 = Schema_Reader( - {"properties": {"key_19": {"$ref": "#/definitions/key_d19_1"}}, "definitions": {"key_d19_1": {"oneOf": [{"type": "string"}, {"type": "integer"}]}}}) -test_schema20 = Schema_Reader( - {"properties": {"key_20": {"type": "array", "items": { "type": "array", "items": {"type": "string"}}}}}) -test_schema21 = Schema_Reader( - {"properties": {"key_21": {"type": "array", "items": { "type": "object", "properties": {"key_21_1": {"type": "string"}}}}}}) -test_schema22 = Schema_Reader( - {"properties": {"key_22": {"type": "object", "properties": {"value": {"type": "string"}, "unit": {"type": "string"}}}}}) -test_schema23 = Schema_Reader( - {"properties": {"key23": {"type": "null"}}}) -test_schema1_1 = Schema_Reader( - {"properties": {"key11": {"type": "int"}}}) -test_schema2_1 = Schema_Reader( - {"properties": {"key21": {"type": "float"}}}) -test_schema3_1 = Schema_Reader( - {"properties": {"key31": {"type": "str"}}}) -test_schema4_1 = Schema_Reader( - {"properties": {"key41": {"type": "bool"}}}) -test_schema5_1 = Schema_Reader( - {"properties": {"key51": {"type": ["int", "float", "str", "bool"]}}}) -test_schema6_1 = Schema_Reader( - {"properties": {"key_61": {"$ref": "#/definitions/key_d61_1"}}, "definitions": {"key_d61_1": {"$ref": "/definitions/key_d61_2"}, "key_d61_2": {"type": "string"}}}) -test_schema7_1 = Schema_Reader( - {"properties": {"key_71": {"type": "array", "items": {"$ref": "/definitions/key_d7_1"}}}, "definitions": {"key_d7_1": {"type": "string"}}}) -test_schema8_1 = Schema_Reader( - {"properties": {"key_81": {"type": "array", "items": {"oneOf": [{"key_81_1": {"type": "string"}}]}}}}) -test_schema9_1 = Schema_Reader( - {"properties": {"key_91": {"$ref": "/definitions/key_d91_1"}}}) - -@ pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key1": "int"}, test_schema1.json_object_search(test_schema1.schema)), - ({"key2": "float"}, test_schema2.json_object_search(test_schema2.schema)), - ({"key3": "str"}, test_schema3.json_object_search(test_schema3.schema)), - ({"key4": "bool"}, test_schema4.json_object_search(test_schema4.schema)), - ({"key9": "int", "key9_1": "float", "key9_2": "bool", "key9_3": "str"}, test_schema9.json_object_search(test_schema9.schema)), - ({"key_10": "str"}, test_schema10.json_object_search(test_schema10.schema)), - ({"key_11": ["str"]}, test_schema11.json_object_search(test_schema11.schema)), - ({"key_12": {"key_d12_11": "str"}}, test_schema12.json_object_search(test_schema12.schema)), - ({"key_13": ["str"]}, test_schema13.json_object_search(test_schema13.schema)), - ({"key_14": ["str", "int"]}, test_schema14.json_object_search(test_schema14.schema)), - ({"key_15": [{"key_15_1": "str"}]}, test_schema15.json_object_search(test_schema15.schema)), - ({"key_16": [["str"]]}, test_schema16.json_object_search(test_schema16.schema)), - ({"key_17": {"key_17_1": ["str", "int"]}}, test_schema17.json_object_search(test_schema17.schema)), - ({"key_18": {"value": "str", "unit": "someUnit"}}, test_schema18.json_object_search(test_schema18.schema)), - ({"key_19": ["str", "int"]}, test_schema19.json_object_search(test_schema19.schema)), - ({"key_20": [["str"]]}, test_schema20.json_object_search(test_schema20.schema)), - ({"key_21": [{"key_21_1": "str"}]}, test_schema21.json_object_search(test_schema21.schema)), - ({"key_22": {"value": "str", "unit": "str"}}, test_schema22.json_object_search(test_schema22.schema)), - ({"key23": None}, test_schema23.json_object_search(test_schema23.schema)) - ) -) -def test_json_object_search(exp_res: dict, inp: dict) -> None: - assert inp == exp_res - -@ pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key_d5_1": {"type": "string"}}, test_schema5.definitions), - ({"key_d6_1": {"type": "string"}}, test_schema6.definitions), - ({"key_d7_1": {"properties": {"key_d7_11": {"type": "string"}}, "type": "object"}}, test_schema7.definitions), - ({"key_d8_1": {"items": {"type": "string"}, "type": "array"}}, test_schema8.definitions), - - ) -) -def test_definitions(exp_res: dict, inp: dict) -> None: - assert inp == exp_res - - -@ pytest.mark.parametrize( - ("exp_res", "inp"), - ( - ({"key11": None}, test_schema1_1.json_object_search(test_schema1_1.schema)), - ({"key21": None}, test_schema2_1.json_object_search(test_schema2_1.schema)), - ({"key31": None}, test_schema3_1.json_object_search(test_schema3_1.schema)), - ({"key41": None}, test_schema4_1.json_object_search(test_schema4_1.schema)), - ({"key51": (None, None, None, None)}, test_schema5_1.json_object_search(test_schema5_1.schema)), - ({"key_61": None}, test_schema6_1.json_object_search(test_schema6_1.schema)), - ({"key_71": None}, test_schema7_1.json_object_search(test_schema7_1.schema)), - ({"key_81": [None]}, test_schema8_1.json_object_search(test_schema8_1.schema)), - ({}, test_schema9_1.json_object_search(test_schema9_1.schema)) - ) -) -def test_schema_false_return(exp_res: dict, inp: Schema_Reader) -> None: - assert inp == exp_res - -@ pytest.mark.skip(reason="not implemented") -def testOther() -> None: - pass diff --git a/tests/test_schema_collector.py b/tests/test_schema_collector.py deleted file mode 100644 index e5caf62..0000000 --- a/tests/test_schema_collector.py +++ /dev/null @@ -1,15 +0,0 @@ -import pytest -from jammato.schemas_collector import Schemas_Collector - -schemasCollectorInstance=Schemas_Collector() -schemasCollectorInstance.add_schema("uri", {"attribute1": "value1"}) - -@pytest.mark.parametrize( - ("exp_res", "inp"), - ( - (True, schemasCollectorInstance.get_uri("uri")), - ({"attribute1": "value1"}, schemasCollectorInstance.get_schema("uri")), - ) -) -def test_name_standardization(exp_res: dict, inp: dict) -> None: - assert exp_res == inp \ No newline at end of file From 6b13109aa4fb5c86700f6f00976c370a814b3778 Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Mon, 9 Mar 2026 10:48:50 +0100 Subject: [PATCH 8/9] add test for mapfilereader --- tests/io_tests/test_mapfilereader.py | 80 ++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tests/io_tests/test_mapfilereader.py diff --git a/tests/io_tests/test_mapfilereader.py b/tests/io_tests/test_mapfilereader.py new file mode 100644 index 0000000..93eaf33 --- /dev/null +++ b/tests/io_tests/test_mapfilereader.py @@ -0,0 +1,80 @@ +import os +import pytest + +from src.IO.MapfileReader import MapFileReader + + +class TestMapfileReader: + + def set_up_sample_data(self): + dir_to_testscript = os.path.split(__file__)[0] + test_path = os.path.join(dir_to_testscript, "../../src/resources/maps/mapping/") + return test_path + + def test_read_mapfile_valid(self): + """Test reading a valid mapping file.""" + tp = self.set_up_sample_data() + mapfile = os.path.join(tp, "map_full_path.json") + + mapping_dict = MapFileReader.read_mapfile(mapfile) + + assert isinstance(mapping_dict, dict) + assert 'study' in mapping_dict + assert 'series' in mapping_dict + assert 'perImage' in mapping_dict + + def test_parse_mapinfo_for_study(self): + """Test parsing study section from mapping.""" + tp = self.set_up_sample_data() + mapfile = os.path.join(tp, "map_full_path.json") + + mapping_dict = MapFileReader.read_mapfile(mapfile) + study_mapping = MapFileReader.parse_mapinfo_for_study(mapping_dict) + + assert isinstance(study_mapping, dict) + assert 'study.studyID' in study_mapping + assert 'study.studyTitle' in study_mapping + + def test_parse_mapinfo_for_series(self): + """Test parsing series section from mapping.""" + tp = self.set_up_sample_data() + mapfile = os.path.join(tp, "map_full_path.json") + + mapping_dict = MapFileReader.read_mapfile(mapfile) + series_mapping = MapFileReader.parse_mapinfo_for_series(mapping_dict) + + assert isinstance(series_mapping, dict) + assert 'study.series.seriesID' in series_mapping + assert 'study.series.seriesTitle' in series_mapping + + def test_parse_mapinfo_for_perImage(self): + """Test parsing perImage section from mapping.""" + tp = self.set_up_sample_data() + mapfile = os.path.join(tp, "map_full_path.json") + + mapping_dict = MapFileReader.read_mapfile(mapfile) + perImage_mapping = MapFileReader.parse_mapinfo_for_perImage(mapping_dict) + + assert isinstance(perImage_mapping, dict) + assert 'study.series.images.perImage.sampleImagePosition' in perImage_mapping + + def test_parse_mapinfo_missing_sections(self): + """Test parsing when sections are missing.""" + # Test with empty mapping + empty_mapping = {} + + study_mapping = MapFileReader.parse_mapinfo_for_study(empty_mapping) + series_mapping = MapFileReader.parse_mapinfo_for_series(empty_mapping) + perImage_mapping = MapFileReader.parse_mapinfo_for_perImage(empty_mapping) + + assert study_mapping == {} + assert series_mapping == {} + assert perImage_mapping == {} + + def test_read_mapfile_nonexistent(self): + """Test reading nonexistent mapping file.""" + tp = self.set_up_sample_data() + dummy_file = os.path.join(tp, "dummy.json") + + with pytest.raises(Exception): + MapFileReader.read_mapfile(dummy_file) From 1a8bac82aa54581220f21ea1e46661ddc7c6f452 Mon Sep 17 00:00:00 2001 From: gabinoumbe Date: Mon, 9 Mar 2026 10:50:00 +0100 Subject: [PATCH 9/9] add test for parser --- tests/parser_tests/__init__.py | 0 tests/parser_tests/test_mriparser.py | 70 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/parser_tests/__init__.py create mode 100644 tests/parser_tests/test_mriparser.py diff --git a/tests/parser_tests/__init__.py b/tests/parser_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser_tests/test_mriparser.py b/tests/parser_tests/test_mriparser.py new file mode 100644 index 0000000..b1a0936 --- /dev/null +++ b/tests/parser_tests/test_mriparser.py @@ -0,0 +1,70 @@ +import os +import pytest +from pprint import pprint + +from src.parser.impl.MRI_Parser import MRI_Parser +from src.util import input_to_dict + + +class TestMRIparser: + + def test_mri_parser_basic(self): + """Test MRI parser basic functionality.""" + dir_to_testscript = os.path.split(__file__)[0] + + try: + test_dicompath = os.path.join(dir_to_testscript, "../../example/dicom_files/MRIm1.dcm") + + # Test that we can create the parser + parser = MRI_Parser() + assert parser is not None + + # Test that we can read the DICOM file + input_md = parser._read_input_file(test_dicompath) + assert input_md is not None + assert 'studyDate' in input_md + assert 'studyTime' in input_md + + print("Raw DICOM data:") + pprint(input_md) + + except FileNotFoundError: + pytest.skip("Test file not included, skipping test") + + def test_mri_parser_dicom(self): + """Test MRI parser with DICOM file - just test parsing, not mapping.""" + dir_to_testscript = os.path.split(__file__)[0] + + try: + test_dicompath = os.path.join(dir_to_testscript, "../../example/dicom_files/MRIm1.dcm") + + parser = MRI_Parser() + # Just test that we can read the file without errors + input_md = parser._read_input_file(test_dicompath) + + print("Raw DICOM data:") + pprint(input_md) + + except FileNotFoundError: + pytest.skip("Test file not included, skipping test") + + def test_mri_parser_preprocessing(self): + """Test MRI parser preprocessing functionality.""" + dir_to_testscript = os.path.split(__file__)[0] + + try: + test_dicompath = os.path.join(dir_to_testscript, "../../example/dicom_files/MRIm1.dcm") + + parser = MRI_Parser() + input_md = parser._read_input_file(test_dicompath) + + # Test that preprocessing was applied + assert input_md is not None + assert 'studyDate' in input_md + assert 'studyTime' in input_md + + print("Raw DICOM data with preprocessing:") + pprint(input_md) + + except FileNotFoundError: + pytest.skip("Test file not included, skipping test") \ No newline at end of file