From 4b071f7c3d30343dc3858a976a1c90001848fdec Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Tue, 14 Mar 2023 17:55:51 +0100 Subject: [PATCH 01/15] feat: adds script to synchronize biobanks and collections data with fdp --- sync_directory_with_fdp.py | 159 +++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 sync_directory_with_fdp.py diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py new file mode 100644 index 0000000..7193a7a --- /dev/null +++ b/sync_directory_with_fdp.py @@ -0,0 +1,159 @@ +from molgenis import client +from molgenis.client import MolgenisRequestError + +molgenisURL = 'http://localhost:82' +directoryURL = 'http://localhost:82' + +MOLGENIS_URL = f'{molgenisURL}' # URL of the molgenis to query +DIRECTORY_URL = f'{directoryURL}' # URL of the directory + +BBMRI_BIOBANK_ENTITY = 'eu_bbmri_eric_biobanks' +BBMRI_COLLECTION_ENTITY = 'eu_bbmri_eric_collections' + +FDP_BIOBANK_ENTITY = 'fdp_Biobank' +FDP_COLLECTION_ENTITY = 'fdp_Collection' + +ORPHA_ONTOLOGY_PREFIX = 'http://www.orpha.net/ORDO/Orphanet_' +ORPHA_DIRECTORY_PREFIX = 'ORPHA:' +ICD_10_ONTOLOGY_PREFIX = 'http://purl.bioontology.org/ontology/ICD10/' +ICD_10_DIRECTORY_PREFIX = 'urn:miriam:icd:' + +BIOBANKS_ATTRIBUTES = 'id,name,acronym,description,country,juridical_person,collections,contact' +BIOBANKS_EXPAND_ATTRIBUTES = 'country,juridical_person,collections,contact' +COLLECTIONS_ATTRIBUTES = 'id,name,description,biobank,diagnosis_available,country,parent_collection' +COLLECTIONS_EXPAND_ATTRIBUTES = 'biobank,diagnosis_available,country' + +COLLECTION_TYPES_ONTOLOGIES = { + 'BIRTH_COHORT': 'http://purl.obolibrary.org/obo/OBI_0002614', + 'CASE_CONTROL': 'http://purl.obolibrary.org/obo/OBIB_0000693', + 'COHORT': 'http://purl.obolibrary.org/obo/OBIB_0000696', + 'CROSS_SECTIONAL': 'http://purl.obolibrary.org/obo/OBIB_0000694', + 'DISEASE_SPECIFIC': 'http://purl.obolibrary.org/obo/OBI_0002615', + 'HOSPITAL': None, + 'IMAGE': None, + 'LONGITUDINAL': 'http://purl.obolibrary.org/obo/OBIB_0000695', + 'NON_HUMAN': None, + 'OTHER': None, + 'POPULATION_BASED': 'http://purl.obolibrary.org/obo/OBIB_0000698', + 'PROSPECTIVE_COLLECTION': None, + 'QUALITY_CONTROL': 'http://purl.obolibrary.org/obo/OBIB_0000699', + 'RD': None, + 'SAMPLE': None, + 'TWIN_STUDY': 'http://purl.obolibrary.org/obo/OBIB_0000700' +} + + +def _get_missing_biobanks(session, **kwargs): + print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) + source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) + print("Getting ids already present") + dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK_ENTITY, attributes='identifier')] + new_records = [sr for sr in source_records if sr['id'] not in dest_records_ids] + print("Found {} new records to insert".format(len(new_records))) + return source_records + + +def _add_new_records(session, entity, records): + created_records = [] + for i in range(0, len(records), 1000): + try: + created_records.extend(session.add_all(entity, records[i:i + 1000])) + except MolgenisRequestError as ex: + print("Error adding records") + print(ex) + print("Added {} record(s) of type {}".format(len(created_records), entity)) + + +def _convert_country(country): + return 'GB' if country == 'UK' else country + + +def _get_disease_ontology_code(disease_code): + if ORPHA_DIRECTORY_PREFIX in disease_code: + return disease_code.replace(ORPHA_ONTOLOGY_PREFIX, ORPHA_ONTOLOGY_PREFIX) + if ICD_10_DIRECTORY_PREFIX in disease_code: + return disease_code.replace(ICD_10_DIRECTORY_PREFIX, ICD_10_ONTOLOGY_PREFIX) + + +def _get_collection_type_ontology_code(collection_type): + return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) + + +def get_records_to_add(biobank_data, session): + missing_iris = [] + for c in biobank_data['collections']: + for d in c['diagnosis_available']: + try: + session.get_by_id('fdp_IRI', d['id'], attributes='id') + except MolgenisRequestError: + missing_iris.append((d['id'], _get_disease_ontology_code(d['id']))) + + for t in c['type']: + try: + session.get_by_id('fdp_IRI', t['id'], attributes='id') + except MolgenisRequestError: + ontology_code = _get_collection_type_ontology_code(t['id']) + if ontology_code is not None: + missing_iris.append((t['id'], ontology_code)) + + return { + 'fdp_Biobank': { + 'IRI': f'{DIRECTORY_URL}/api/fdp/fdp_Biobank/{biobank_data["id"]}', + 'catalog': 'bbmri-directory', # TODO: get it dynamically + 'identifier': biobank_data['id'], + 'title': biobank_data['name'], + 'acronym': biobank_data['acronym'] if 'acronym' in biobank_data else None, + 'description': biobank_data['description'] if 'description' in biobank_data else None, + 'publisher': f'{biobank_data["id"]}-pub', + 'landingPage': f'{DIRECTORY_URL}/#/biobank/{biobank_data["id"]}', + 'contactPoint': f'{biobank_data["id"]}-cp' if 'contact' in biobank_data else None, + 'country': _convert_country(biobank_data['country']['id']) + }, + 'fdp_Publisher': { + 'identifier': f'{biobank_data["id"]}-pub', + 'label': biobank_data['juridical_person'] + }, + 'fdp_ContactPointIndividual': { + 'identifier': f'{biobank_data["id"]}-cp', + 'email': f'mailto:{biobank_data["contact"]["email"]}' if 'contact' in biobank_data else '', + 'address': '', + 'telephone': '', + }, + 'fdp_Collection': [{ + 'identifier': c['id'], + 'title': c['name'], + 'biobank': biobank_data["id"], + 'description': c['description'] if 'description' in c else None, + 'type': [t['id'] for t in c['type'] if _get_collection_type_ontology_code(t['id']) is not None], + 'theme': [d['id'] for d in c['diagnosis_available']] + } for c in biobank_data['collections']], + 'fdp_IRI': missing_iris + } + + +def sync_biobanks(session, **kwargs): + missing_biobanks = _get_missing_biobanks(session, attributes=BIOBANKS_ATTRIBUTES, + expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) + biobanks = [] + publishers = [] + contacts = [] + collections = [] + iris = set() + for b in missing_biobanks: + records = get_records_to_add(b, session) + biobanks.append(records['fdp_Biobank']) + publishers.append(records['fdp_Publisher']) + contacts.append(records['fdp_ContactPointIndividual']) + collections.extend(records['fdp_Collection']) + iris.update(records['fdp_IRI']) + + _add_new_records(session, 'fdp_Publisher', publishers) + _add_new_records(session, 'fdp_ContactPointIndividual', contacts) + _add_new_records(session, 'fdp_IRI', [{'id': i[0], 'IRI': i[1]} for i in iris]) + _add_new_records(session, 'fdp_Biobank', biobanks) + _add_new_records(session, 'fdp_Collection', collections) + +s = client.Session(MOLGENIS_URL) +s.login('admin', 'admin') + +sync_biobanks(s, num=1) From b2396a9a0a8274dcd387ea6ca5a31bfa41eff111 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Fri, 17 Mar 2023 16:04:39 +0100 Subject: [PATCH 02/15] feat: adds DataService support for Collection to potentially describe a record service endpoint --- sync_directory_with_fdp.py | 50 ++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 7193a7a..334e79d 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -9,6 +9,7 @@ BBMRI_BIOBANK_ENTITY = 'eu_bbmri_eric_biobanks' BBMRI_COLLECTION_ENTITY = 'eu_bbmri_eric_collections' +BBMRI_DATA_SERVICE_ENTITY = 'eu_bbmri_eric_record_service' FDP_BIOBANK_ENTITY = 'fdp_Biobank' FDP_COLLECTION_ENTITY = 'fdp_Collection' @@ -18,10 +19,10 @@ ICD_10_ONTOLOGY_PREFIX = 'http://purl.bioontology.org/ontology/ICD10/' ICD_10_DIRECTORY_PREFIX = 'urn:miriam:icd:' -BIOBANKS_ATTRIBUTES = 'id,name,acronym,description,country,juridical_person,collections,contact' -BIOBANKS_EXPAND_ATTRIBUTES = 'country,juridical_person,collections,contact' -COLLECTIONS_ATTRIBUTES = 'id,name,description,biobank,diagnosis_available,country,parent_collection' -COLLECTIONS_EXPAND_ATTRIBUTES = 'biobank,diagnosis_available,country' +COLLECTIONS_ATTRIBUTES = '*,record_service(*)' +BIOBANKS_ATTRIBUTES = f'id,name,acronym,description,country,juridical_person,contact,collections' +BIOBANKS_EXPAND_ATTRIBUTES = f'country,juridical_person,contact,collections' +COLLECTIONS_EXPAND_ATTRIBUTES = 'biobank,diagnosis_available,country,record_service' COLLECTION_TYPES_ONTOLOGIES = { 'BIRTH_COHORT': 'http://purl.obolibrary.org/obo/OBI_0002614', @@ -45,7 +46,7 @@ def _get_missing_biobanks(session, **kwargs): print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) - source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) + source_records = session.get(BBMRI_BIOBANK_ENTITY, q='id==bbmri-eric:ID:RD_ES:44001', **kwargs) print("Getting ids already present") dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK_ENTITY, attributes='identifier')] new_records = [sr for sr in source_records if sr['id'] not in dest_records_ids] @@ -81,6 +82,7 @@ def _get_collection_type_ontology_code(collection_type): def get_records_to_add(biobank_data, session): missing_iris = [] + dataServices = [] for c in biobank_data['collections']: for d in c['diagnosis_available']: try: @@ -96,7 +98,16 @@ def get_records_to_add(biobank_data, session): if ontology_code is not None: missing_iris.append((t['id'], ontology_code)) - return { + if 'record_service' in c: + rs_data = session.get_by_id(BBMRI_DATA_SERVICE_ENTITY, c['record_service']['id']) + dataServices.append({ + 'identifier': rs_data['id'], + 'endpointUrl': rs_data['url'], + 'endpointDescription': rs_data['description'] if 'description' in rs_data else None, + 'conformsTo': rs_data['conformsTo'] + }) + + res = { 'fdp_Biobank': { 'IRI': f'{DIRECTORY_URL}/api/fdp/fdp_Biobank/{biobank_data["id"]}', 'catalog': 'bbmri-directory', # TODO: get it dynamically @@ -125,20 +136,27 @@ def get_records_to_add(biobank_data, session): 'biobank': biobank_data["id"], 'description': c['description'] if 'description' in c else None, 'type': [t['id'] for t in c['type'] if _get_collection_type_ontology_code(t['id']) is not None], - 'theme': [d['id'] for d in c['diagnosis_available']] + 'theme': [d['id'] for d in c['diagnosis_available']], + 'service': c['record_service']['id'] if 'record_service' in c else None } for c in biobank_data['collections']], - 'fdp_IRI': missing_iris + 'fdp_IRI': missing_iris, } + if len(dataServices) > 0: + res.update({ + 'fdp_DataService': dataServices + }) + return res def sync_biobanks(session, **kwargs): - missing_biobanks = _get_missing_biobanks(session, attributes=BIOBANKS_ATTRIBUTES, - expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) + missing_biobanks = _get_missing_biobanks(session, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, + **kwargs) biobanks = [] publishers = [] contacts = [] collections = [] iris = set() + data_services = [] for b in missing_biobanks: records = get_records_to_add(b, session) biobanks.append(records['fdp_Biobank']) @@ -147,12 +165,24 @@ def sync_biobanks(session, **kwargs): collections.extend(records['fdp_Collection']) iris.update(records['fdp_IRI']) + if 'fdp_DataService' in records: + data_services.extend(records['fdp_DataService']) + + print(data_services) + session.delete_list('fdp_DataService', [d['identifier'] for d in data_services]) + session.delete_list('fdp_Collection', [c['identifier'] for c in collections]) + session.delete_list('fdp_Biobank', [b['identifier'] for b in biobanks]) + session.delete_list('fdp_Publisher', [p['identifier'] for p in publishers]) + session.delete_list('fdp_ContactPointIndividual', [c['identifier'] for c in contacts]) + _add_new_records(session, 'fdp_Publisher', publishers) _add_new_records(session, 'fdp_ContactPointIndividual', contacts) _add_new_records(session, 'fdp_IRI', [{'id': i[0], 'IRI': i[1]} for i in iris]) + _add_new_records(session, 'fdp_DataService', data_services) _add_new_records(session, 'fdp_Biobank', biobanks) _add_new_records(session, 'fdp_Collection', collections) + s = client.Session(MOLGENIS_URL) s.login('admin', 'admin') From 6cd2b5dcefad3a539cda5913703b91dd846df2a9 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Mon, 20 Mar 2023 16:54:23 +0100 Subject: [PATCH 03/15] feat: adds option to delete records before adding them --- sync_directory_with_fdp.py | 151 +++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 65 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 334e79d..67252c0 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -1,18 +1,23 @@ +from collections import OrderedDict + from molgenis import client from molgenis.client import MolgenisRequestError -molgenisURL = 'http://localhost:82' -directoryURL = 'http://localhost:82' +molgenis_url = 'http://localhost:82' +directory_url = 'http://localhost:82' -MOLGENIS_URL = f'{molgenisURL}' # URL of the molgenis to query -DIRECTORY_URL = f'{directoryURL}' # URL of the directory +MOLGENIS_URL = f'{molgenis_url}' # URL of the molgenis to query +DIRECTORY_URL = f'{directory_url}' # URL of the directory BBMRI_BIOBANK_ENTITY = 'eu_bbmri_eric_biobanks' -BBMRI_COLLECTION_ENTITY = 'eu_bbmri_eric_collections' BBMRI_DATA_SERVICE_ENTITY = 'eu_bbmri_eric_record_service' -FDP_BIOBANK_ENTITY = 'fdp_Biobank' -FDP_COLLECTION_ENTITY = 'fdp_Collection' +FDP_BIOBANK = 'fdp_Biobank' +FDP_COLLECTION = 'fdp_Collection' +FDP_PUBLISHER = 'fdp_Publisher' +FDP_CONTACT = 'fdp_ContactPointIndividual' +FDP_DATA_SERVICE = 'fdp_DataService' +FDP_IRI = 'fdp_IRI' ORPHA_ONTOLOGY_PREFIX = 'http://www.orpha.net/ORDO/Orphanet_' ORPHA_DIRECTORY_PREFIX = 'ORPHA:' @@ -44,17 +49,21 @@ } -def _get_missing_biobanks(session, **kwargs): +def get_missing_biobanks(session, reset, **kwargs): print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) - source_records = session.get(BBMRI_BIOBANK_ENTITY, q='id==bbmri-eric:ID:RD_ES:44001', **kwargs) + source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) + # if reset is True the missing biobanks are all + if reset: + return source_records + print("Getting ids already present") - dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK_ENTITY, attributes='identifier')] + dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK, attributes='identifier')] new_records = [sr for sr in source_records if sr['id'] not in dest_records_ids] print("Found {} new records to insert".format(len(new_records))) - return source_records + return new_records -def _add_new_records(session, entity, records): +def create_records(session, entity, records): created_records = [] for i in range(0, len(records), 1000): try: @@ -65,42 +74,54 @@ def _add_new_records(session, entity, records): print("Added {} record(s) of type {}".format(len(created_records), entity)) -def _convert_country(country): +def delete_records(session, entity, records): + removed_records = [] + for i in range(0, len(records), 1000): + try: + removed_records.extend( + session.delete_list(entity, [record['identifier'] for record in records[i:i + 1000]])) + except MolgenisRequestError as ex: + print("Error removing records") + print(ex) + print(f"Removed {len(removed_records)} of type {entity}") + + +def get_country(country): return 'GB' if country == 'UK' else country -def _get_disease_ontology_code(disease_code): +def get_disease_ontology_code(disease_code): if ORPHA_DIRECTORY_PREFIX in disease_code: - return disease_code.replace(ORPHA_ONTOLOGY_PREFIX, ORPHA_ONTOLOGY_PREFIX) + return disease_code.replace(ORPHA_DIRECTORY_PREFIX, ORPHA_ONTOLOGY_PREFIX) if ICD_10_DIRECTORY_PREFIX in disease_code: return disease_code.replace(ICD_10_DIRECTORY_PREFIX, ICD_10_ONTOLOGY_PREFIX) -def _get_collection_type_ontology_code(collection_type): +def get_collection_type_ontology_code(collection_type): return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) def get_records_to_add(biobank_data, session): missing_iris = [] - dataServices = [] + data_services = [] for c in biobank_data['collections']: for d in c['diagnosis_available']: try: session.get_by_id('fdp_IRI', d['id'], attributes='id') except MolgenisRequestError: - missing_iris.append((d['id'], _get_disease_ontology_code(d['id']))) + missing_iris.append((d['id'], get_disease_ontology_code(d['id']))) for t in c['type']: try: session.get_by_id('fdp_IRI', t['id'], attributes='id') except MolgenisRequestError: - ontology_code = _get_collection_type_ontology_code(t['id']) + ontology_code = get_collection_type_ontology_code(t['id']) if ontology_code is not None: missing_iris.append((t['id'], ontology_code)) if 'record_service' in c: rs_data = session.get_by_id(BBMRI_DATA_SERVICE_ENTITY, c['record_service']['id']) - dataServices.append({ + data_services.append({ 'identifier': rs_data['id'], 'endpointUrl': rs_data['url'], 'endpointDescription': rs_data['description'] if 'description' in rs_data else None, @@ -108,82 +129,82 @@ def get_records_to_add(biobank_data, session): }) res = { - 'fdp_Biobank': { + FDP_BIOBANK: { + 'identifier': biobank_data['id'], 'IRI': f'{DIRECTORY_URL}/api/fdp/fdp_Biobank/{biobank_data["id"]}', 'catalog': 'bbmri-directory', # TODO: get it dynamically - 'identifier': biobank_data['id'], 'title': biobank_data['name'], 'acronym': biobank_data['acronym'] if 'acronym' in biobank_data else None, 'description': biobank_data['description'] if 'description' in biobank_data else None, 'publisher': f'{biobank_data["id"]}-pub', 'landingPage': f'{DIRECTORY_URL}/#/biobank/{biobank_data["id"]}', 'contactPoint': f'{biobank_data["id"]}-cp' if 'contact' in biobank_data else None, - 'country': _convert_country(biobank_data['country']['id']) + 'country': get_country(biobank_data['country']['id']) }, - 'fdp_Publisher': { + FDP_PUBLISHER: { 'identifier': f'{biobank_data["id"]}-pub', 'label': biobank_data['juridical_person'] }, - 'fdp_ContactPointIndividual': { + FDP_CONTACT: { 'identifier': f'{biobank_data["id"]}-cp', 'email': f'mailto:{biobank_data["contact"]["email"]}' if 'contact' in biobank_data else '', 'address': '', 'telephone': '', }, - 'fdp_Collection': [{ + FDP_COLLECTION: [{ 'identifier': c['id'], 'title': c['name'], 'biobank': biobank_data["id"], 'description': c['description'] if 'description' in c else None, - 'type': [t['id'] for t in c['type'] if _get_collection_type_ontology_code(t['id']) is not None], + 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], 'theme': [d['id'] for d in c['diagnosis_available']], 'service': c['record_service']['id'] if 'record_service' in c else None } for c in biobank_data['collections']], - 'fdp_IRI': missing_iris, + FDP_IRI: missing_iris, + FDP_DATA_SERVICE: data_services } - if len(dataServices) > 0: - res.update({ - 'fdp_DataService': dataServices - }) return res -def sync_biobanks(session, **kwargs): - missing_biobanks = _get_missing_biobanks(session, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, - **kwargs) - biobanks = [] - publishers = [] - contacts = [] - collections = [] - iris = set() - data_services = [] +def sync(session, reset, **kwargs): + missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, + expand=BIOBANKS_EXPAND_ATTRIBUTES, + **kwargs) + + records = OrderedDict({ + FDP_PUBLISHER: [], + FDP_CONTACT: [], + FDP_IRI: set(), + FDP_DATA_SERVICE: [], + FDP_BIOBANK: [], + FDP_COLLECTION: [] + }) for b in missing_biobanks: - records = get_records_to_add(b, session) - biobanks.append(records['fdp_Biobank']) - publishers.append(records['fdp_Publisher']) - contacts.append(records['fdp_ContactPointIndividual']) - collections.extend(records['fdp_Collection']) - iris.update(records['fdp_IRI']) - - if 'fdp_DataService' in records: - data_services.extend(records['fdp_DataService']) - - print(data_services) - session.delete_list('fdp_DataService', [d['identifier'] for d in data_services]) - session.delete_list('fdp_Collection', [c['identifier'] for c in collections]) - session.delete_list('fdp_Biobank', [b['identifier'] for b in biobanks]) - session.delete_list('fdp_Publisher', [p['identifier'] for p in publishers]) - session.delete_list('fdp_ContactPointIndividual', [c['identifier'] for c in contacts]) - - _add_new_records(session, 'fdp_Publisher', publishers) - _add_new_records(session, 'fdp_ContactPointIndividual', contacts) - _add_new_records(session, 'fdp_IRI', [{'id': i[0], 'IRI': i[1]} for i in iris]) - _add_new_records(session, 'fdp_DataService', data_services) - _add_new_records(session, 'fdp_Biobank', biobanks) - _add_new_records(session, 'fdp_Collection', collections) + new_records = get_records_to_add(b, session) + for k, v in new_records.items(): + if type(records[k]) == list: + if type(new_records[k]) == list: + records[k].extend(new_records[k]) + else: + records[k].append(new_records[k]) + else: + records[k].update(new_records[k]) + + if reset: + for k, v in reversed(records.items()): + if k != 'fdp_IRI' and len(v) > 0: + delete_records(session, k, v) + + for k, v in records.items(): + if len(v) > 0: + if k == FDP_IRI: + create_records(session, k, [{'id': i[0], 'IRI': i[1]} for i in v]) + else: + create_records(session, k, v) s = client.Session(MOLGENIS_URL) s.login('admin', 'admin') -sync_biobanks(s, num=1) +reset = True +sync(s, reset) From 7a0c97498729f2d8c6f8609e4946ecbbb09b9f89 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Tue, 21 Mar 2023 17:16:28 +0100 Subject: [PATCH 04/15] feat: added parameters --- sync_directory_with_fdp.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 67252c0..8e6a1e6 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -1,14 +1,9 @@ +import logging from collections import OrderedDict from molgenis import client from molgenis.client import MolgenisRequestError -molgenis_url = 'http://localhost:82' -directory_url = 'http://localhost:82' - -MOLGENIS_URL = f'{molgenis_url}' # URL of the molgenis to query -DIRECTORY_URL = f'{directory_url}' # URL of the directory - BBMRI_BIOBANK_ENTITY = 'eu_bbmri_eric_biobanks' BBMRI_DATA_SERVICE_ENTITY = 'eu_bbmri_eric_record_service' @@ -24,10 +19,8 @@ ICD_10_ONTOLOGY_PREFIX = 'http://purl.bioontology.org/ontology/ICD10/' ICD_10_DIRECTORY_PREFIX = 'urn:miriam:icd:' -COLLECTIONS_ATTRIBUTES = '*,record_service(*)' BIOBANKS_ATTRIBUTES = f'id,name,acronym,description,country,juridical_person,contact,collections' BIOBANKS_EXPAND_ATTRIBUTES = f'country,juridical_person,contact,collections' -COLLECTIONS_EXPAND_ATTRIBUTES = 'biobank,diagnosis_available,country,record_service' COLLECTION_TYPES_ONTOLOGIES = { 'BIRTH_COHORT': 'http://purl.obolibrary.org/obo/OBI_0002614', @@ -101,7 +94,7 @@ def get_collection_type_ontology_code(collection_type): return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) -def get_records_to_add(biobank_data, session): +def get_records_to_add(biobank_data, session, directory_prefix): missing_iris = [] data_services = [] for c in biobank_data['collections']: @@ -131,13 +124,13 @@ def get_records_to_add(biobank_data, session): res = { FDP_BIOBANK: { 'identifier': biobank_data['id'], - 'IRI': f'{DIRECTORY_URL}/api/fdp/fdp_Biobank/{biobank_data["id"]}', + 'IRI': f'{directory_prefix}/api/fdp/fdp_Biobank/{biobank_data["id"]}', 'catalog': 'bbmri-directory', # TODO: get it dynamically 'title': biobank_data['name'], 'acronym': biobank_data['acronym'] if 'acronym' in biobank_data else None, 'description': biobank_data['description'] if 'description' in biobank_data else None, 'publisher': f'{biobank_data["id"]}-pub', - 'landingPage': f'{DIRECTORY_URL}/#/biobank/{biobank_data["id"]}', + 'landingPage': f'{directory_prefix}/#/biobank/{biobank_data["id"]}', 'contactPoint': f'{biobank_data["id"]}-cp' if 'contact' in biobank_data else None, 'country': get_country(biobank_data['country']['id']) }, @@ -166,7 +159,7 @@ def get_records_to_add(biobank_data, session): return res -def sync(session, reset, **kwargs): +def sync(session, reset, directory_prefix, **kwargs): missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) @@ -180,7 +173,7 @@ def sync(session, reset, **kwargs): FDP_COLLECTION: [] }) for b in missing_biobanks: - new_records = get_records_to_add(b, session) + new_records = get_records_to_add(b, session, directory_prefix) for k, v in new_records.items(): if type(records[k]) == list: if type(new_records[k]) == list: @@ -203,8 +196,18 @@ def sync(session, reset, **kwargs): create_records(session, k, v) -s = client.Session(MOLGENIS_URL) -s.login('admin', 'admin') +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--molgenis-url', '-U') + parser.add_argument('--molgenis-user', '-u') + parser.add_argument('--molgenis-password', '-p') + parser.add_argument('--directory-prefix', '-d', help='The main prefix of the url to be used to generate IRIs', default='https://directory.bbmri-eric.eu/') + parser.add_argument('--reset', '-r', dest='reset', action='store_true') + args = parser.parse_args() + + s = client.Session(args.molgenis_url) + s.login(args.molgenis_user, args.molgenis_password) + + sync(s, args.directory_prefix, args.reset) -reset = True -sync(s, reset) From adeb53c4bcf769cbda342586ff141851a6340590 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 22 Mar 2023 12:32:00 +0100 Subject: [PATCH 05/15] fixes order of parameters --- sync_directory_with_fdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 8e6a1e6..6da3698 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -159,7 +159,7 @@ def get_records_to_add(biobank_data, session, directory_prefix): return res -def sync(session, reset, directory_prefix, **kwargs): +def sync(session, directory_prefix, reset, **kwargs): missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) From bc7abbbc8ae50916cf6c94c483c61560251e3051 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Tue, 4 Apr 2023 17:14:38 +0200 Subject: [PATCH 06/15] feat: adds contact for collections --- sync_directory_with_fdp.py | 105 +++++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 33 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 6da3698..74e140e 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -5,11 +5,12 @@ from molgenis.client import MolgenisRequestError BBMRI_BIOBANK_ENTITY = 'eu_bbmri_eric_biobanks' +BBMRI_CONTACT_ENTITY = 'eu_bbmri_eric_persons' BBMRI_DATA_SERVICE_ENTITY = 'eu_bbmri_eric_record_service' FDP_BIOBANK = 'fdp_Biobank' FDP_COLLECTION = 'fdp_Collection' -FDP_PUBLISHER = 'fdp_Publisher' +FDP_BIOBANK_ORGANIZATION = 'fdp_BiobankOrganization' FDP_CONTACT = 'fdp_ContactPointIndividual' FDP_DATA_SERVICE = 'fdp_DataService' FDP_IRI = 'fdp_IRI' @@ -67,12 +68,12 @@ def create_records(session, entity, records): print("Added {} record(s) of type {}".format(len(created_records), entity)) -def delete_records(session, entity, records): +def delete_records(session, entity, records_ids): removed_records = [] - for i in range(0, len(records), 1000): + for i in range(0, len(records_ids), 1000): try: removed_records.extend( - session.delete_list(entity, [record['identifier'] for record in records[i:i + 1000]])) + session.delete_list(entity, [record_id for record_id in records_ids[i:i + 1000]])) except MolgenisRequestError as ex: print("Error removing records") print(ex) @@ -94,17 +95,38 @@ def get_collection_type_ontology_code(collection_type): return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) +def get_contact_record(session, contact_id): + contact = session.get_by_id(BBMRI_CONTACT_ENTITY, contact_id) + return ( + f'{contact["id"]}', + f'mailto:{contact["email"]}', + f'tel:{contact["phone"].replace(" ", "")}' if 'phone' in contact else None, + contact['first_name'] if 'first_name' in contact else None, + contact['last_name'] if 'last_name' in contact else None, + contact['title_before_name'] if 'title_before_name' in contact else None, + contact['title_after_name'] if 'title_after_name' in contact else None + ) + + def get_records_to_add(biobank_data, session, directory_prefix): missing_iris = [] data_services = [] - for c in biobank_data['collections']: - for d in c['diagnosis_available']: + contacts = [] + print('processing biobank', biobank_data['id']) + + print("getting biobank's contact data") + if 'contact' in biobank_data: + contacts.append(get_contact_record(session, biobank_data['contact']['id'])) + + for collection in biobank_data['collections']: + print("processing collection", collection['id']) + for d in collection['diagnosis_available']: try: session.get_by_id('fdp_IRI', d['id'], attributes='id') except MolgenisRequestError: missing_iris.append((d['id'], get_disease_ontology_code(d['id']))) - for t in c['type']: + for t in collection['type']: try: session.get_by_id('fdp_IRI', t['id'], attributes='id') except MolgenisRequestError: @@ -112,45 +134,45 @@ def get_records_to_add(biobank_data, session, directory_prefix): if ontology_code is not None: missing_iris.append((t['id'], ontology_code)) - if 'record_service' in c: - rs_data = session.get_by_id(BBMRI_DATA_SERVICE_ENTITY, c['record_service']['id']) + if 'contact' in collection: + contacts.append(get_contact_record(session, collection['contact']['id'])) + + if 'record_service' in collection: + rs = session.get_by_id(BBMRI_DATA_SERVICE_ENTITY, collection['record_service']['id']) data_services.append({ - 'identifier': rs_data['id'], - 'endpointUrl': rs_data['url'], - 'endpointDescription': rs_data['description'] if 'description' in rs_data else None, - 'conformsTo': rs_data['conformsTo'] + 'identifier': rs['id'], + 'endpointUrl': rs['url'], + 'endpointDescription': rs['description'] if 'description' in rs else None, + 'conformsTo': rs['conformsTo'] }) res = { FDP_BIOBANK: { 'identifier': biobank_data['id'], - 'IRI': f'{directory_prefix}/api/fdp/fdp_Biobank/{biobank_data["id"]}', - 'catalog': 'bbmri-directory', # TODO: get it dynamically + 'IRI': f'{directory_prefix}/api/fdp/fdp_Biobank/{biobank_data["id"]}', # TODO: use the PID 'title': biobank_data['name'], 'acronym': biobank_data['acronym'] if 'acronym' in biobank_data else None, 'description': biobank_data['description'] if 'description' in biobank_data else None, 'publisher': f'{biobank_data["id"]}-pub', 'landingPage': f'{directory_prefix}/#/biobank/{biobank_data["id"]}', - 'contactPoint': f'{biobank_data["id"]}-cp' if 'contact' in biobank_data else None, + 'contactPoint': f'{biobank_data["contact"]["id"]}' if 'contact' in biobank_data else None, 'country': get_country(biobank_data['country']['id']) }, - FDP_PUBLISHER: { + FDP_BIOBANK_ORGANIZATION: { 'identifier': f'{biobank_data["id"]}-pub', - 'label': biobank_data['juridical_person'] - }, - FDP_CONTACT: { - 'identifier': f'{biobank_data["id"]}-cp', - 'email': f'mailto:{biobank_data["contact"]["email"]}' if 'contact' in biobank_data else '', - 'address': '', - 'telephone': '', + 'name': biobank_data['juridical_person'] }, + FDP_CONTACT: contacts, FDP_COLLECTION: [{ 'identifier': c['id'], + 'IRI': f'{directory_prefix}/api/fdp/fdp_Collection/{c["id"]}', + 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, + 'catalog': 'bbmri-directory', # TODO: get it dynamically 'title': c['name'], 'biobank': biobank_data["id"], 'description': c['description'] if 'description' in c else None, - 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], - 'theme': [d['id'] for d in c['diagnosis_available']], + 'type': [t['id'] for t in collection['type'] if get_collection_type_ontology_code(t['id']) is not None], + 'theme': [d['id'] for d in collection['diagnosis_available']], 'service': c['record_service']['id'] if 'record_service' in c else None } for c in biobank_data['collections']], FDP_IRI: missing_iris, @@ -165,8 +187,8 @@ def sync(session, directory_prefix, reset, **kwargs): **kwargs) records = OrderedDict({ - FDP_PUBLISHER: [], - FDP_CONTACT: [], + FDP_BIOBANK_ORGANIZATION: [], + FDP_CONTACT: set(), FDP_IRI: set(), FDP_DATA_SERVICE: [], FDP_BIOBANK: [], @@ -185,29 +207,46 @@ def sync(session, directory_prefix, reset, **kwargs): if reset: for k, v in reversed(records.items()): - if k != 'fdp_IRI' and len(v) > 0: - delete_records(session, k, v) + if k not in (FDP_IRI, FDP_CONTACT) and len(v) > 0: + delete_records(session, k, [i['identifier'] for i in v]) + if k == FDP_CONTACT and len(v) > 0: + delete_records(session, k, [i[0] for i in v]) for k, v in records.items(): if len(v) > 0: if k == FDP_IRI: create_records(session, k, [{'id': i[0], 'IRI': i[1]} for i in v]) + elif k == FDP_CONTACT: + create_records(session, k, [{ + 'identifier': i[0], + 'email': i[1], + 'telephone': i[2], + 'given_name': i[3], + 'family_name': i[4], + 'honorific_prefix': i[5], + 'honorific_suffix': i[6] + } for i in v]) else: create_records(session, k, v) if __name__ == '__main__': import argparse + parser = argparse.ArgumentParser() parser.add_argument('--molgenis-url', '-U') parser.add_argument('--molgenis-user', '-u') parser.add_argument('--molgenis-password', '-p') - parser.add_argument('--directory-prefix', '-d', help='The main prefix of the url to be used to generate IRIs', default='https://directory.bbmri-eric.eu/') + parser.add_argument('--directory-prefix', '-d', + help='The main prefix of the url to be used to generate IRIs', + default='https://directory.bbmri-eric.eu/') parser.add_argument('--reset', '-r', dest='reset', action='store_true') args = parser.parse_args() + directory_prefix = args.directory_prefix.replace('/', '', + -1) # just in case the input put the last /, it removes it + s = client.Session(args.molgenis_url) s.login(args.molgenis_user, args.molgenis_password) - sync(s, args.directory_prefix, args.reset) - + sync(s, directory_prefix, args.reset) From d11c52233b8168f8a337ea552aeb952cc02b136d Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 5 Apr 2023 16:35:49 +0200 Subject: [PATCH 07/15] fixes collection type and theme --- sync_directory_with_fdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 74e140e..71ea631 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -171,8 +171,8 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'title': c['name'], 'biobank': biobank_data["id"], 'description': c['description'] if 'description' in c else None, - 'type': [t['id'] for t in collection['type'] if get_collection_type_ontology_code(t['id']) is not None], - 'theme': [d['id'] for d in collection['diagnosis_available']], + 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], + 'theme': [d['id'] for d in c['diagnosis_available']], 'service': c['record_service']['id'] if 'record_service' in c else None } for c in biobank_data['collections']], FDP_IRI: missing_iris, From bea5998bb9a466b1da7390c259d4d88d5e184687 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 21 Jun 2023 11:59:07 +0200 Subject: [PATCH 08/15] fix(fdp): fixes to not override previous addedd collections to the catalog --- sync_directory_with_fdp.py | 88 ++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 71ea631..75a6d2e 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -1,4 +1,11 @@ -import logging +""" +Script to synchronize the BBMRI Directory data of biobanks and collections with their FAIR Data Point/DCAT representation. +It gets the data of biobanks and collections from a Molgenis instance of the Directory, converts it and upload the +converted one into another Molgenis instance. The Molgenis instance can be the same or different. The destination +instance needs to be deployed with the FDP instance already deployed. Also, the FDP must have the fdp_Catalog already +created. +""" + from collections import OrderedDict from molgenis import client @@ -8,6 +15,7 @@ BBMRI_CONTACT_ENTITY = 'eu_bbmri_eric_persons' BBMRI_DATA_SERVICE_ENTITY = 'eu_bbmri_eric_record_service' +FDP_CATALOG = 'fdp_Catalog' FDP_BIOBANK = 'fdp_Biobank' FDP_COLLECTION = 'fdp_Collection' FDP_BIOBANK_ORGANIZATION = 'fdp_BiobankOrganization' @@ -42,7 +50,10 @@ 'TWIN_STUDY': 'http://purl.obolibrary.org/obo/OBIB_0000700' } - +""" +It gets the data from the source Molgenis and filters the one that are already it the destination Molgenis. +If reset flag is True it doesn't filter the biobanks to add +""" def get_missing_biobanks(session, reset, **kwargs): print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) @@ -57,6 +68,13 @@ def get_missing_biobanks(session, reset, **kwargs): return new_records +""" +Send converted data to the destination + +:params session: Molgenis session to use +:params entity: the name of Molgenis entity type of the records to add +:params records: lists of dictionary with data of the FDP entity to add +""" def create_records(session, entity, records): created_records = [] for i in range(0, len(records), 1000): @@ -68,6 +86,13 @@ def create_records(session, entity, records): print("Added {} record(s) of type {}".format(len(created_records), entity)) +""" +Delete records in the destination Molgenis. Used when reset flag is True + +:params session: Molgenis session to use +:params entity: the name of Molgenis entity type of the records to delete +:params records_ids: the ids of the records of type :entity: to delete +""" def delete_records(session, entity, records_ids): removed_records = [] for i in range(0, len(records_ids), 1000): @@ -80,10 +105,16 @@ def delete_records(session, entity, records_ids): print(f"Removed {len(removed_records)} of type {entity}") +""" +Returns the country code correspondent to the country in input +""" def get_country(country): return 'GB' if country == 'UK' else country +""" +Returns the IRI of the disease code to use in the FDP +""" def get_disease_ontology_code(disease_code): if ORPHA_DIRECTORY_PREFIX in disease_code: return disease_code.replace(ORPHA_DIRECTORY_PREFIX, ORPHA_ONTOLOGY_PREFIX) @@ -91,10 +122,22 @@ def get_disease_ontology_code(disease_code): return disease_code.replace(ICD_10_DIRECTORY_PREFIX, ICD_10_ONTOLOGY_PREFIX) +""" +Return the IRI of the collection type + +:params collection_type: the collection type code in the directory +""" def get_collection_type_ontology_code(collection_type): return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) +""" +Gets the contact data of the contact with id :contact_id: from the source Molgenis and +returns the FDP corresponding FDP record + +:params session: Molgenis session to use +:params contact_id: the id of the contact in the Directory +""" def get_contact_record(session, contact_id): contact = session.get_by_id(BBMRI_CONTACT_ENTITY, contact_id) return ( @@ -108,6 +151,16 @@ def get_contact_record(session, contact_id): ) +""" +It generates the FDP records related to a biobank from the representation of the biobank in the Directory. +It returns a dictionary with data for entities: +fdp_Biobank: data of the Biobank as organization +fdp_BiobankOrganization: data of the Jurystic Person that manage the Biobank +fdp_Collection: list of collections of the biobank +fdp_Contacts: contact of the biobank and the collections to add +fdp_IRI: codes of diseases and collection types (if not already present in the destination) +fdp_DataService: data related to the Data service, if present +""" def get_records_to_add(biobank_data, session, directory_prefix): missing_iris = [] data_services = [] @@ -121,12 +174,14 @@ def get_records_to_add(biobank_data, session, directory_prefix): for collection in biobank_data['collections']: print("processing collection", collection['id']) for d in collection['diagnosis_available']: + # it checks if the diagnosis is already present in the destination, if not it adds it to the ones to insert try: session.get_by_id('fdp_IRI', d['id'], attributes='id') except MolgenisRequestError: missing_iris.append((d['id'], get_disease_ontology_code(d['id']))) for t in collection['type']: + # same as diagnosis for collection type try: session.get_by_id('fdp_IRI', t['id'], attributes='id') except MolgenisRequestError: @@ -135,9 +190,11 @@ def get_records_to_add(biobank_data, session, directory_prefix): missing_iris.append((t['id'], ontology_code)) if 'contact' in collection: + # it adds data about the contacts contacts.append(get_contact_record(session, collection['contact']['id'])) if 'record_service' in collection: + # if the collection has a record service it generates the corresponding DataService rs = session.get_by_id(BBMRI_DATA_SERVICE_ENTITY, collection['record_service']['id']) data_services.append({ 'identifier': rs['id'], @@ -167,7 +224,7 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'identifier': c['id'], 'IRI': f'{directory_prefix}/api/fdp/fdp_Collection/{c["id"]}', 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, - 'catalog': 'bbmri-directory', # TODO: get it dynamically + # 'catalog': 'bbmri-directory', # it 'title': c['name'], 'biobank': biobank_data["id"], 'description': c['description'] if 'description' in c else None, @@ -180,12 +237,21 @@ def get_records_to_add(biobank_data, session, directory_prefix): } return res +def update_catalog(session, new_collections): + prev_collections = session.get_by_id(FDP_CATALOG, 'bbmri-directory', attributes='collection') + + collections = set([c['identifier'] for c in prev_collections['collection'] + new_collections]) + + session.update_one(FDP_CATALOG, 'bbmri-directory', 'collection', list(collections)) +""" +Main function that gets the data of the missing biobanks, convert it and upload the new records. +""" def sync(session, directory_prefix, reset, **kwargs): + # it gets the missing biobanks missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, - expand=BIOBANKS_EXPAND_ATTRIBUTES, - **kwargs) - + expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) + # it gathers the data for all the biobanks records = OrderedDict({ FDP_BIOBANK_ORGANIZATION: [], FDP_CONTACT: set(), @@ -195,7 +261,9 @@ def sync(session, directory_prefix, reset, **kwargs): FDP_COLLECTION: [] }) for b in missing_biobanks: + # it gets the records to add for a biobank new_records = get_records_to_add(b, session, directory_prefix) + # it updates the overall records with the ones from of the processed biobank for k, v in new_records.items(): if type(records[k]) == list: if type(new_records[k]) == list: @@ -205,13 +273,16 @@ def sync(session, directory_prefix, reset, **kwargs): else: records[k].update(new_records[k]) + # if the reset flag is True, it deletes the old records if reset: + session.update_one('fdp_Catalog', 'bbmri-directory', 'collection', []) for k, v in reversed(records.items()): if k not in (FDP_IRI, FDP_CONTACT) and len(v) > 0: delete_records(session, k, [i['identifier'] for i in v]) if k == FDP_CONTACT and len(v) > 0: delete_records(session, k, [i[0] for i in v]) + # it creates all the for k, v in records.items(): if len(v) > 0: if k == FDP_IRI: @@ -229,6 +300,8 @@ def sync(session, directory_prefix, reset, **kwargs): else: create_records(session, k, v) + update_catalog(session, records[FDP_COLLECTION]) + if __name__ == '__main__': import argparse @@ -238,8 +311,7 @@ def sync(session, directory_prefix, reset, **kwargs): parser.add_argument('--molgenis-user', '-u') parser.add_argument('--molgenis-password', '-p') parser.add_argument('--directory-prefix', '-d', - help='The main prefix of the url to be used to generate IRIs', - default='https://directory.bbmri-eric.eu/') + help='The main prefix of the url to be used to generate IRIs') parser.add_argument('--reset', '-r', dest='reset', action='store_true') args = parser.parse_args() From d8791f29d2b579b74a2a581fae442aaefcf69daa Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 21 Jun 2023 12:09:10 +0200 Subject: [PATCH 09/15] docs(fdp): changes description of the fdp_sync script --- sync_directory_with_fdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 75a6d2e..34fed0f 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -2,8 +2,8 @@ Script to synchronize the BBMRI Directory data of biobanks and collections with their FAIR Data Point/DCAT representation. It gets the data of biobanks and collections from a Molgenis instance of the Directory, converts it and upload the converted one into another Molgenis instance. The Molgenis instance can be the same or different. The destination -instance needs to be deployed with the FDP instance already deployed. Also, the FDP must have the fdp_Catalog already -created. +instance needs the FDP EMX model and the "bbmri-directory" FDP_Catalog (i.e., the FDP Catalog with data of the directory) +already deployed """ from collections import OrderedDict From cf076ddc5d17bee571153f224afdcb94738c6230 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 28 Jun 2023 10:37:00 +0200 Subject: [PATCH 10/15] docs: moves docstrings --- sync_directory_with_fdp.py | 100 +++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 34fed0f..ec86dbc 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -50,11 +50,12 @@ 'TWIN_STUDY': 'http://purl.obolibrary.org/obo/OBIB_0000700' } -""" -It gets the data from the source Molgenis and filters the one that are already it the destination Molgenis. -If reset flag is True it doesn't filter the biobanks to add -""" + def get_missing_biobanks(session, reset, **kwargs): + """ + It gets the data from the source Molgenis and filters the one that are already it the destination Molgenis. + If reset flag is True it doesn't filter the biobanks to add + """ print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) # if reset is True the missing biobanks are all @@ -68,14 +69,14 @@ def get_missing_biobanks(session, reset, **kwargs): return new_records -""" -Send converted data to the destination - -:params session: Molgenis session to use -:params entity: the name of Molgenis entity type of the records to add -:params records: lists of dictionary with data of the FDP entity to add -""" def create_records(session, entity, records): + """ + Send converted data to the destination + + :params session: Molgenis session to use + :params entity: the name of Molgenis entity type of the records to add + :params records: lists of dictionary with data of the FDP entity to add + """ created_records = [] for i in range(0, len(records), 1000): try: @@ -86,14 +87,14 @@ def create_records(session, entity, records): print("Added {} record(s) of type {}".format(len(created_records), entity)) -""" -Delete records in the destination Molgenis. Used when reset flag is True - -:params session: Molgenis session to use -:params entity: the name of Molgenis entity type of the records to delete -:params records_ids: the ids of the records of type :entity: to delete -""" def delete_records(session, entity, records_ids): + """ + Delete records in the destination Molgenis. Used when reset flag is True + + :params session: Molgenis session to use + :params entity: the name of Molgenis entity type of the records to delete + :params records_ids: the ids of the records of type :entity: to delete + """ removed_records = [] for i in range(0, len(records_ids), 1000): try: @@ -105,40 +106,40 @@ def delete_records(session, entity, records_ids): print(f"Removed {len(removed_records)} of type {entity}") -""" -Returns the country code correspondent to the country in input -""" def get_country(country): + """ + Returns the country code correspondent to the country in input + """ return 'GB' if country == 'UK' else country -""" -Returns the IRI of the disease code to use in the FDP -""" def get_disease_ontology_code(disease_code): + """ + Returns the IRI of the disease code to use in the FDP + """ if ORPHA_DIRECTORY_PREFIX in disease_code: return disease_code.replace(ORPHA_DIRECTORY_PREFIX, ORPHA_ONTOLOGY_PREFIX) if ICD_10_DIRECTORY_PREFIX in disease_code: return disease_code.replace(ICD_10_DIRECTORY_PREFIX, ICD_10_ONTOLOGY_PREFIX) -""" -Return the IRI of the collection type - -:params collection_type: the collection type code in the directory -""" def get_collection_type_ontology_code(collection_type): - return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) + """ + Return the IRI of the collection type + :params collection_type: the collection type code in the directory + """ + return COLLECTION_TYPES_ONTOLOGIES.get(collection_type, None) -""" -Gets the contact data of the contact with id :contact_id: from the source Molgenis and -returns the FDP corresponding FDP record -:params session: Molgenis session to use -:params contact_id: the id of the contact in the Directory -""" def get_contact_record(session, contact_id): + """ + Gets the contact data of the contact with id :contact_id: from the source Molgenis and + returns the FDP corresponding FDP record + + :params session: Molgenis session to use + :params contact_id: the id of the contact in the Directory + """ contact = session.get_by_id(BBMRI_CONTACT_ENTITY, contact_id) return ( f'{contact["id"]}', @@ -151,17 +152,18 @@ def get_contact_record(session, contact_id): ) -""" -It generates the FDP records related to a biobank from the representation of the biobank in the Directory. -It returns a dictionary with data for entities: -fdp_Biobank: data of the Biobank as organization -fdp_BiobankOrganization: data of the Jurystic Person that manage the Biobank -fdp_Collection: list of collections of the biobank -fdp_Contacts: contact of the biobank and the collections to add -fdp_IRI: codes of diseases and collection types (if not already present in the destination) -fdp_DataService: data related to the Data service, if present -""" def get_records_to_add(biobank_data, session, directory_prefix): + """ + It generates the FDP records related to a biobank from the representation of the biobank in the Directory. + It returns a dictionary with data for entities: + fdp_Biobank: data of the Biobank as organization + fdp_BiobankOrganization: data of the Jurystic Person that manage the Biobank + fdp_Collection: list of collections of the biobank + fdp_Contacts: contact of the biobank and the collections to add + fdp_IRI: codes of diseases and collection types (if not already present in the destination) + fdp_DataService: data related to the Data service, if present + """ + missing_iris = [] data_services = [] contacts = [] @@ -244,10 +246,10 @@ def update_catalog(session, new_collections): session.update_one(FDP_CATALOG, 'bbmri-directory', 'collection', list(collections)) -""" -Main function that gets the data of the missing biobanks, convert it and upload the new records. -""" def sync(session, directory_prefix, reset, **kwargs): + """ + Main function that gets the data of the missing biobanks, convert it and upload the new records. + """ # it gets the missing biobanks missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) From f7f3c6ba640a8bd0eb35cc596fb817eb6e619a64 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Mon, 6 Nov 2023 15:33:39 +0100 Subject: [PATCH 11/15] feat: minor changes to fdp --- sync_directory_with_fdp.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index ec86dbc..a50350b 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -18,7 +18,7 @@ FDP_CATALOG = 'fdp_Catalog' FDP_BIOBANK = 'fdp_Biobank' FDP_COLLECTION = 'fdp_Collection' -FDP_BIOBANK_ORGANIZATION = 'fdp_BiobankOrganization' +FDP_BIOBANK_LEGAL_PERSON = 'fdp_BiobankLegalPerson' FDP_CONTACT = 'fdp_ContactPointIndividual' FDP_DATA_SERVICE = 'fdp_DataService' FDP_IRI = 'fdp_IRI' @@ -58,13 +58,18 @@ def get_missing_biobanks(session, reset, **kwargs): """ print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) + biobanks_with_record_service = [] + for sr in source_records: + for c in sr["collections"]: + if "record_service" in c: + biobanks_with_record_service.append(sr) # if reset is True the missing biobanks are all if reset: - return source_records + return biobanks_with_record_service print("Getting ids already present") dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK, attributes='identifier')] - new_records = [sr for sr in source_records if sr['id'] not in dest_records_ids] + new_records = [sr for sr in biobanks_with_record_service if sr['id'] not in dest_records_ids] print("Found {} new records to insert".format(len(new_records))) return new_records @@ -217,23 +222,25 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'contactPoint': f'{biobank_data["contact"]["id"]}' if 'contact' in biobank_data else None, 'country': get_country(biobank_data['country']['id']) }, - FDP_BIOBANK_ORGANIZATION: { + FDP_BIOBANK_LEGAL_PERSON: { 'identifier': f'{biobank_data["id"]}-pub', 'name': biobank_data['juridical_person'] }, FDP_CONTACT: contacts, FDP_COLLECTION: [{ - 'identifier': c['id'], 'IRI': f'{directory_prefix}/api/fdp/fdp_Collection/{c["id"]}', - 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, - # 'catalog': 'bbmri-directory', # it - 'title': c['name'], + 'identifier': c['id'], 'biobank': biobank_data["id"], + 'title': c['name'], 'description': c['description'] if 'description' in c else None, - 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], 'theme': [d['id'] for d in c['diagnosis_available']], - 'service': c['record_service']['id'] if 'record_service' in c else None - } for c in biobank_data['collections']], + 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], + 'landingPage': f'{directory_prefix}/#/biobank/{c["id"]}', + 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, + 'service': c['record_service']['id'] if 'record_service' in c else None, + 'vpConnection': 'ejprd-vp-discoverable' if 'record_service' in c else None, + 'personalData': 'true' + } for c in biobank_data['collections'] if len(f'{directory_prefix}/#/biobank/{c["id"]}') < 255] , FDP_IRI: missing_iris, FDP_DATA_SERVICE: data_services } @@ -253,9 +260,10 @@ def sync(session, directory_prefix, reset, **kwargs): # it gets the missing biobanks missing_biobanks = get_missing_biobanks(session, reset=reset, attributes=BIOBANKS_ATTRIBUTES, expand=BIOBANKS_EXPAND_ATTRIBUTES, **kwargs) + # it gathers the data for all the biobanks records = OrderedDict({ - FDP_BIOBANK_ORGANIZATION: [], + FDP_BIOBANK_LEGAL_PERSON: [], FDP_CONTACT: set(), FDP_IRI: set(), FDP_DATA_SERVICE: [], @@ -317,9 +325,7 @@ def sync(session, directory_prefix, reset, **kwargs): parser.add_argument('--reset', '-r', dest='reset', action='store_true') args = parser.parse_args() - directory_prefix = args.directory_prefix.replace('/', '', - -1) # just in case the input put the last /, it removes it - + directory_prefix = args.directory_prefix # .replace('/', '', -1) # just in case the input put the last /, it removes it s = client.Session(args.molgenis_url) s.login(args.molgenis_user, args.molgenis_password) From 58e8639dfce4c3676326668abd935f56ee640e36 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Wed, 24 Jan 2024 17:02:49 +0100 Subject: [PATCH 12/15] fix: fixes biobanks duplication --- sync_directory_with_fdp.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index a50350b..7581f21 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -5,9 +5,9 @@ instance needs the FDP EMX model and the "bbmri-directory" FDP_Catalog (i.e., the FDP Catalog with data of the directory) already deployed """ - from collections import OrderedDict +import pprint from molgenis import client from molgenis.client import MolgenisRequestError @@ -60,9 +60,12 @@ def get_missing_biobanks(session, reset, **kwargs): source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) biobanks_with_record_service = [] for sr in source_records: + add_biobank = False for c in sr["collections"]: if "record_service" in c: - biobanks_with_record_service.append(sr) + add_biobank = True + if add_biobank: + biobanks_with_record_service.append(sr) # if reset is True the missing biobanks are all if reset: return biobanks_with_record_service @@ -82,6 +85,12 @@ def create_records(session, entity, records): :params entity: the name of Molgenis entity type of the records to add :params records: lists of dictionary with data of the FDP entity to add """ + # with open(f'fdp_data/{entity}.csv', 'w' ) as csvfile: + # writer = csv.DictWriter(csvfile, fieldnames=records[0].keys(), dialect="excel", quotechar="\"") + # writer.writeheader() + # for r in records: + # writer.writerow(r) + created_records = [] for i in range(0, len(records), 1000): try: @@ -207,13 +216,14 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'identifier': rs['id'], 'endpointUrl': rs['url'], 'endpointDescription': rs['description'] if 'description' in rs else None, - 'conformsTo': rs['conformsTo'] + 'conformsTo': rs['conformsTo'], + 'type': rs['type'] }) res = { FDP_BIOBANK: { 'identifier': biobank_data['id'], - 'IRI': f'{directory_prefix}/api/fdp/fdp_Biobank/{biobank_data["id"]}', # TODO: use the PID + # 'IRI': f'{directory_prefix}/api/fdp/fdp_Biobank/{biobank_data["id"]}', # TODO: use the PID 'title': biobank_data['name'], 'acronym': biobank_data['acronym'] if 'acronym' in biobank_data else None, 'description': biobank_data['description'] if 'description' in biobank_data else None, @@ -240,12 +250,14 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'service': c['record_service']['id'] if 'record_service' in c else None, 'vpConnection': 'ejprd-vp-discoverable' if 'record_service' in c else None, 'personalData': 'true' - } for c in biobank_data['collections'] if len(f'{directory_prefix}/#/biobank/{c["id"]}') < 255] , + } for c in biobank_data['collections'] if + 'record_service' in c and len(f'{directory_prefix}/#/biobank/{c["id"]}') < 255], FDP_IRI: missing_iris, FDP_DATA_SERVICE: data_services } return res + def update_catalog(session, new_collections): prev_collections = session.get_by_id(FDP_CATALOG, 'bbmri-directory', attributes='collection') @@ -253,6 +265,7 @@ def update_catalog(session, new_collections): session.update_one(FDP_CATALOG, 'bbmri-directory', 'collection', list(collections)) + def sync(session, directory_prefix, reset, **kwargs): """ Main function that gets the data of the missing biobanks, convert it and upload the new records. @@ -291,7 +304,7 @@ def sync(session, directory_prefix, reset, **kwargs): delete_records(session, k, [i['identifier'] for i in v]) if k == FDP_CONTACT and len(v) > 0: delete_records(session, k, [i[0] for i in v]) - + pprint.pprint(records) # it creates all the for k, v in records.items(): if len(v) > 0: @@ -325,7 +338,7 @@ def sync(session, directory_prefix, reset, **kwargs): parser.add_argument('--reset', '-r', dest='reset', action='store_true') args = parser.parse_args() - directory_prefix = args.directory_prefix # .replace('/', '', -1) # just in case the input put the last /, it removes it + directory_prefix = args.directory_prefix # .replace('/', '', -1) # just in case the input put the last /, it removes it s = client.Session(args.molgenis_url) s.login(args.molgenis_user, args.molgenis_password) From 5174ad76813e4ac9f2c46996a959f5a8a5ea1fef Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Mon, 9 Sep 2024 15:06:29 +0200 Subject: [PATCH 13/15] fix: adapts to new model --- sync_directory_with_fdp.py | 82 ++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 7581f21..816a9d2 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -8,6 +8,8 @@ from collections import OrderedDict import pprint +from datetime import datetime + from molgenis import client from molgenis.client import MolgenisRequestError @@ -58,21 +60,22 @@ def get_missing_biobanks(session, reset, **kwargs): """ print("Getting source entities from {}".format(BBMRI_BIOBANK_ENTITY)) source_records = session.get(BBMRI_BIOBANK_ENTITY, **kwargs) - biobanks_with_record_service = [] + print("Found {}".format(source_records)) + missing_biobanks = [] for sr in source_records: - add_biobank = False - for c in sr["collections"]: - if "record_service" in c: - add_biobank = True + add_biobank = True + # for c in sr["collections"]: + # if "record_service" in c: + # add_biobank = True if add_biobank: - biobanks_with_record_service.append(sr) + missing_biobanks.append(sr) # if reset is True the missing biobanks are all if reset: - return biobanks_with_record_service + return missing_biobanks print("Getting ids already present") dest_records_ids = [r['identifier'] for r in session.get(FDP_BIOBANK, attributes='identifier')] - new_records = [sr for sr in biobanks_with_record_service if sr['id'] not in dest_records_ids] + new_records = [sr for sr in missing_biobanks if sr['id'] not in dest_records_ids] print("Found {} new records to insert".format(len(new_records))) return new_records @@ -85,12 +88,6 @@ def create_records(session, entity, records): :params entity: the name of Molgenis entity type of the records to add :params records: lists of dictionary with data of the FDP entity to add """ - # with open(f'fdp_data/{entity}.csv', 'w' ) as csvfile: - # writer = csv.DictWriter(csvfile, fieldnames=records[0].keys(), dialect="excel", quotechar="\"") - # writer.writeheader() - # for r in records: - # writer.writerow(r) - created_records = [] for i in range(0, len(records), 1000): try: @@ -217,7 +214,12 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'endpointUrl': rs['url'], 'endpointDescription': rs['description'] if 'description' in rs else None, 'conformsTo': rs['conformsTo'], - 'type': rs['type'] + 'type': rs['type'], + 'issued': datetime.now().isoformat(), + 'modified': datetime.now().isoformat(), + 'publisher': 'bbmri', + 'title': f'Data Service of {collection["name"]}', + 'language': ['eng-eu'] }) res = { @@ -239,29 +241,57 @@ def get_records_to_add(biobank_data, session, directory_prefix): FDP_CONTACT: contacts, FDP_COLLECTION: [{ 'IRI': f'{directory_prefix}/api/fdp/fdp_Collection/{c["id"]}', + 'additionalRDFType': "http://www.w3.org/ns/dcat#Dataset", 'identifier': c['id'], 'biobank': biobank_data["id"], + 'publisher': 'bbmri', 'title': c['name'], 'description': c['description'] if 'description' in c else None, - 'theme': [d['id'] for d in c['diagnosis_available']], - 'type': [t['id'] for t in c['type'] if get_collection_type_ontology_code(t['id']) is not None], - 'landingPage': f'{directory_prefix}/#/biobank/{c["id"]}', + 'diseases': [d['id'].replace("urn:miriam:icd:", "ICD10:") for d in c['diagnosis_available']], + 'theme': 'EU:HEALTH', + 'type': '', + 'landingPage': f'{directory_prefix}/#/collection/{c["id"]}', 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, 'service': c['record_service']['id'] if 'record_service' in c else None, 'vpConnection': 'ejprd-vp-discoverable' if 'record_service' in c else None, - 'personalData': 'true' - } for c in biobank_data['collections'] if - 'record_service' in c and len(f'{directory_prefix}/#/biobank/{c["id"]}') < 255], + 'issued': datetime.now().isoformat(), + 'modified': datetime.now().isoformat(), + 'personalData': 'true', + 'version': '', + 'language': ['eng-eu'], + 'country': 'EU', + 'rights': 'restricted', + 'policy': '', # CRC Cohort 'https://www.bbmri-eric.eu/services/access-policies/', + 'minAge': c['age_low'] if 'age_low' in c else None, + 'maxAge': c['age_high'] if 'age_high' in c else None, + 'numberOfRecords': c['size'] if 'size' in c else None, + 'numberOfUniqueIndividuals': c['number_of_donors'] + + } for c in biobank_data['collections']], FDP_IRI: missing_iris, FDP_DATA_SERVICE: data_services } return res +def get_collections_in_catalog(session): + catalog = session.get_by_id(FDP_CATALOG, 'bbmri-directory', attributes='collection') + return [c['identifier'] for c in catalog['collection']] + + +def reset_catalog(session, collections_to_update): + collections_in_catalog = get_collections_in_catalog(session) + collections_to_update_ids = list([c['IRI'].split('/')[-1] for c in collections_to_update]) + for c in collections_in_catalog[:]: + if c in collections_to_update_ids: + collections_in_catalog.remove(c) + session.update_one('fdp_Catalog', 'bbmri-directory', 'collection', collections_in_catalog) + + def update_catalog(session, new_collections): - prev_collections = session.get_by_id(FDP_CATALOG, 'bbmri-directory', attributes='collection') + prev_collections = get_collections_in_catalog(session) - collections = set([c['identifier'] for c in prev_collections['collection'] + new_collections]) + collections = set(prev_collections + [c['identifier'] for c in new_collections]) session.update_one(FDP_CATALOG, 'bbmri-directory', 'collection', list(collections)) @@ -298,14 +328,14 @@ def sync(session, directory_prefix, reset, **kwargs): # if the reset flag is True, it deletes the old records if reset: - session.update_one('fdp_Catalog', 'bbmri-directory', 'collection', []) + reset_catalog(session, new_records[FDP_COLLECTION]) + for k, v in reversed(records.items()): if k not in (FDP_IRI, FDP_CONTACT) and len(v) > 0: delete_records(session, k, [i['identifier'] for i in v]) if k == FDP_CONTACT and len(v) > 0: delete_records(session, k, [i[0] for i in v]) - pprint.pprint(records) - # it creates all the + for k, v in records.items(): if len(v) > 0: if k == FDP_IRI: From 45262a8eab0b41522e5b811cf381e5f73ec7c0a9 Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Mon, 30 Sep 2024 11:59:00 +0200 Subject: [PATCH 14/15] adds: EU:HEALTH as theme --- sync_directory_with_fdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 816a9d2..2e16082 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -248,7 +248,7 @@ def get_records_to_add(biobank_data, session, directory_prefix): 'title': c['name'], 'description': c['description'] if 'description' in c else None, 'diseases': [d['id'].replace("urn:miriam:icd:", "ICD10:") for d in c['diagnosis_available']], - 'theme': 'EU:HEALTH', + 'theme': ['EU:HEALTH'] + [d['id'] for d in c['diagnosis_available']], 'type': '', 'landingPage': f'{directory_prefix}/#/collection/{c["id"]}', 'contactPoint': f'{c["contact"]["id"]}' if 'contact' in c else None, @@ -372,4 +372,4 @@ def sync(session, directory_prefix, reset, **kwargs): s = client.Session(args.molgenis_url) s.login(args.molgenis_user, args.molgenis_password) - sync(s, directory_prefix, args.reset) + sync(s, directory_prefix, args.reset, q="id==bbmri-eric:ID:EU_BBMRI-ERIC") From 3731746d598ee0608177bd1bf66a40238a33368f Mon Sep 17 00:00:00 2001 From: Vittorio Meloni Date: Thu, 20 Feb 2025 11:21:51 +0100 Subject: [PATCH 15/15] Add authorship and acknwoledgements to sync_directory_with_fdp.py --- sync_directory_with_fdp.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sync_directory_with_fdp.py b/sync_directory_with_fdp.py index 2e16082..facfcde 100644 --- a/sync_directory_with_fdp.py +++ b/sync_directory_with_fdp.py @@ -4,6 +4,20 @@ converted one into another Molgenis instance. The Molgenis instance can be the same or different. The destination instance needs the FDP EMX model and the "bbmri-directory" FDP_Catalog (i.e., the FDP Catalog with data of the directory) already deployed + +-------------------------------------------------------------------------------------------------------------------------- + +Authors + + - Vittorio Meloni + +Acknowledgments: + +This work has been partially funded by the following sources: + + - The European Joint Programme on Rare Disease (EJPRD) project (grant agreement N. 825575); + +and has evolved within the context of the BBMRI-ERIC Common Service IT. """ from collections import OrderedDict