Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cds_migrator_kit/rdm/records/transform/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
"""CDS-RDM transform config module."""

# filters out PIDs which we don't migrate
PIDS_SCHEMES_TO_DROP = ["HAL"]
PIDS_SCHEMES_TO_DROP = []
# validates allowed schemes
PIDS_SCHEMES_ALLOWED = ["DOI"]

# stores the identifiers found in PIDs field in the alternative identifiers instead
PID_SCHEMES_TO_STORE_IN_IDENTIFIERS = [
"ARXIV",
"HDL",
"HAL"
"HANDLE",
"URN",
"INIS",
Expand All @@ -25,7 +26,6 @@

IDENTIFIERS_SCHEMES_TO_DROP = [
"SPIRES",
"HAL",
"OSTI",
"SLAC",
"PROQUEST",
Expand Down
1 change: 1 addition & 0 deletions cds_migrator_kit/rdm/records/transform/models/hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class HrModel(CdsOverdo):
"100__m",
"300__a", # number of pages
"591__b", #
"6531_9", # keyword scheme
"700__m",
"7870_r", # detailed description of record relation (2862345)
"8564_8",
Expand Down
39 changes: 23 additions & 16 deletions cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,19 +350,20 @@ def identifiers(self, key, value):
https://github.com/CERNDocumentServer/cds-migrator-kit/issues/21
"""
id_value = StringValue(value.get("a", "")).parse()
scheme = StringValue(value.get("9", "")).parse()
original_scheme = StringValue(value.get("9", "")).parse()
scheme = original_scheme.lower()
related_works = self.get("related_identifiers", [])
if scheme.upper() in IDENTIFIERS_SCHEMES_TO_DROP:
if original_scheme.upper() in IDENTIFIERS_SCHEMES_TO_DROP:
raise IgnoreKey("identifiers")
# drop oai harvest info
if id_value.startswith("oai:inspirehep.net"):
raise IgnoreKey("identifiers")
if scheme.lower() == "arxiv":
if scheme == "arxiv":
id_value = id_value.replace("oai:arXiv.org:", "arXiv:")
if scheme.lower() == "cern annual report":
if scheme == "cern annual report":
additional_descriptions = self.get("additional_descriptions", [])
new_desc = {
"description": f"{scheme} {id_value}",
"description": f"{original_scheme} {id_value}",
"type": {"id": "series-information"},
}
additional_descriptions.append(new_desc)
Expand All @@ -377,34 +378,36 @@ def identifiers(self, key, value):
self["related_identifiers"] = related_works
raise IgnoreKey("identifiers")

is_aleph_number = scheme.lower() == "cercer" or not scheme and "CERCER" in id_value
is_aleph_number = scheme == "cercer" or not scheme and "CERCER" in id_value

if is_aleph_number:
scheme = "aleph"
if scheme.lower() == "cds":
elif scheme == "cds":
scheme = "cds"
if scheme.lower() == "inspire":
elif scheme == "inspire":
validate_inspire_identifier(id_value, key)

rel_id = {"scheme": scheme.lower(), "identifier": id_value}
if scheme.lower() == "admbul":
legacy_scheme = scheme
rel_id = {"scheme": scheme, "identifier": id_value}

if scheme == "admbul":
scheme = "other"
rel_id = {"scheme": scheme, "identifier": f"{legacy_scheme}_{id_value}"}
if scheme.lower() == "agendamaker":
rel_id = {"scheme": scheme, "identifier": f"{original_scheme}_{id_value}"}
if scheme == "agendamaker":
indico_id = get_new_indico_id(id_value)
scheme = "indico"
rel_id = {"scheme": scheme, "identifier": str(indico_id)}
if scheme.lower() == "zentralblatt math":
if scheme == "zentralblatt math":
scheme = "url"
rel_id = {
"scheme": scheme,
"identifier": f"https://zbmath.org/?q=an:{id_value}",
}

if id_value:
if rel_id["scheme"] in RDM_RECORDS_RELATED_IDENTIFIERS_SCHEMES:
rel_id.update(
{
"relation_type": {"id": "isreferencedby"},
"relation_type": {"id": "isvariantformof"},
"resource_type": {"id": "publication-other"},
}
)
Expand Down Expand Up @@ -459,7 +462,7 @@ def _pids(self, key, value):
else:
new_id.update(
{
"relation_type": {"id": "isversionof"},
"relation_type": {"id": "isvariantformof"},
"resource_type": {"id": qualifier},
}
)
Expand Down Expand Up @@ -779,6 +782,10 @@ def related_identifiers_787(self, key, value):
recid = recid.replace("https://cds.cern.ch/record/", "")

relation_map = {
"periodical": {
"relation_type": {"id": "ispublishedin"},
"resource_type": {"id": "publication-periodical"},
},
"issue": {
"relation_type": {"id": "ispublishedin"},
"resource_type": {"id": "publication-periodicalissue"},
Expand Down
24 changes: 15 additions & 9 deletions cds_migrator_kit/rdm/records/transform/xml_processing/rules/hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def additional_desc(self, key, value):
raise IgnoreKey("additional_descriptions_hr")


@model.over("subjects", "(^6931_)|(^650[12_][7_])|(^653[12_]_)|(^695__)|(^694__)")
@model.over("subjects", "(^6931_)|(^650[12_][7_])|(^653[12_]_)|(^695__)|(^694__)", override=True)
@require(["a"])
@for_each_value
def hr_subjects(self, key, value):
keyword = value.get("a")
if key == "6531_":
keyword = value.get("a")
if "," in keyword:
Expand All @@ -87,14 +88,17 @@ def hr_subjects(self, key, value):
_subjects.append({"subject": key})
self["subjects"] = _subjects
raise IgnoreKey("subjects")
else:
resource_type_map = {
"Presentation": {"id": "presentation"},
}
resource_type = resource_type_map.get(keyword)
if resource_type:
self["resource_type"] = resource_type
raise IgnoreKey("subjects")

resource_type_map = {
"Presentation": {"id": "presentation"},
"Mémos": {"id": "publication-memorandum"},
"Formulaires": {"id": "publication-form"},
"Form": {"id": "publication-form"},
"Modèles de documents": {"id": "publication-doctemplate"},
}
resource_type = resource_type_map.get(keyword)
if resource_type:
self["resource_type"] = resource_type

subjects(self, key, value)

Expand Down Expand Up @@ -153,6 +157,8 @@ def resource_type(self, key, value):
self["subjects"] = subjects
if value == "administrativenote":
raise IgnoreKey("resource_type")
if value == "cern-admin-e-guide" and self["resource_type"]:
raise IgnoreKey("resource_type")
map = {
"annualstats": {"id": "publication-report"},
"cern-admin-e-guide": {"id": "publication-other"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ def isbn(self, key, value):
new_id = {
"identifier": _isbn,
"scheme": "isbn",
"relation_type": {"id": "isversionof"},
"relation_type": {"id": "isvariantformof"},
"resource_type": {"id": "publication-book"},
}
else:
destination = "related_identifiers"
new_id = {
"identifier": _isbn,
"scheme": "isbn",
"relation_type": {"id": "isversionof"},
"relation_type": {"id": "isvariantformof"},
"resource_type": {"id": "publication-book"},
}
ids = self.get(destination, [])
Expand Down
10 changes: 10 additions & 0 deletions tests/cds-rdm/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,16 @@ def relation_type_v(app, relation_type):
},
)

vocab = vocabulary_service.create(
system_identity,
{
"id": "isvariantformof",
"props": {"datacite": "Is variant form of"},
"title": {"en": "Is variant form of"},
"type": "relationtypes",
},
)

Vocabulary.index.refresh()

return vocab
Expand Down
10 changes: 5 additions & 5 deletions tests/cds-rdm/test_thesis_migration.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2025 CERN.
Expand Down Expand Up @@ -96,7 +96,7 @@
{
"identifier": "978-3-030-90375-6",
"scheme": "isbn",
"relation_type": {"id": "isversionof", "title": {"en": "Is version of"}},
"relation_type": {"id": "isvariantformof", "title": {"en": "Is variant form of"}},
"resource_type": {
"id": "publication-book",
"title": {
Expand All @@ -108,7 +108,7 @@
{
"identifier": "978-3-030-90376-3",
"scheme": "isbn",
"relation_type": {"id": "isversionof", "title": {"en": "Is version of"}},
"relation_type": {"id": "isvariantformof", "title": {"en": "Is variant form of"}},
"resource_type": {
"id": "publication-book",
"title": {
Expand All @@ -121,7 +121,7 @@
{
"identifier": "10.1007/978-3-030-90376-3",
"scheme": "doi",
"relation_type": {"id": "isversionof", "title": {"en": "Is version of"}},
"relation_type": {"id": "isvariantformof", "title": {"en": "Is variant form of"}},
"resource_type": {
"id": "publication",
"title": {"en": "Publication", "de": "Publikation"},
Expand All @@ -130,8 +130,8 @@
{
"identifier": "1807850",
"relation_type": {
"id": "isreferencedby",
"title": {"de": "Wird referenziert von", "en": "Is referenced by"},
"id": "isvariantformof",
"title": {"en": "Is variant form of"},
},
"resource_type": {
"id": "publication-other",
Expand Down
Loading