From 4a622be781770605abae5d7dc251beeb15f1311b Mon Sep 17 00:00:00 2001 From: Jan Range Date: Wed, 29 May 2024 10:56:31 +0200 Subject: [PATCH 1/6] fix update not adding new mult compounds --- easyDataverse/base.py | 12 ++-------- tests/integration/test_dataset_update.py | 28 +++++++++++++++++++++++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/easyDataverse/base.py b/easyDataverse/base.py index 4d13c0c..e6bcd12 100644 --- a/easyDataverse/base.py +++ b/easyDataverse/base.py @@ -212,18 +212,10 @@ def _add_changed_multiples(self): if not self._is_multiple(field): continue - value = getattr(self, name) - has_changes = any(value._changed for value in value) - - if has_changes: - self._changed.add(name) + self._changed.add(name) def _process_multiple_compound(self, compounds) -> List[Dict]: - """Whenever a single compound has changed, return all compounds.""" - - if not any(len(compound._changed) for compound in compounds): - return [] - + """Processes multiple compounds""" return [compound.dataverse_dict() for compound in compounds] def _process_single_compound(self, compound) -> Dict: diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 8670f7a..b2d42e3 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -13,7 +13,6 @@ def test_dataset_update( credentials, minimal_upload, ): - # Arrange base_url, api_token = credentials url = f"{base_url}/api/dataverses/root/datasets" @@ -38,6 +37,11 @@ def test_dataset_update( # Fetch the dataset and update the title dataset = dataverse.load_dataset(pid) dataset.citation.title = "Title has changed" + + # Check if multiple compound changes are tracked too + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + dataset.update() # Re-fetch the dataset @@ -59,10 +63,32 @@ def test_dataset_update( ) ) + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + # Assert assert ( title_field["value"] == "Title has changed" ), "The updated dataset title does not match the expected title." + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." @staticmethod def sort_citation(dataset: Dict): From f7c9a5e6df39be789251806e2981a633e44e4f44 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Mon, 3 Jun 2024 13:13:59 +0200 Subject: [PATCH 2/6] add replace option --- easyDataverse/dataset.py | 9 +- easyDataverse/uploader.py | 15 +++- tests/integration/test_dataset_update.py | 109 +++++++++++++++++++++++ 3 files changed, 129 insertions(+), 4 deletions(-) diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py index c673d62..29281d7 100644 --- a/easyDataverse/dataset.py +++ b/easyDataverse/dataset.py @@ -225,10 +225,16 @@ def upload( return self.p_id - def update(self): + def update(self, replace: bool = True): """Updates a dataset if a p_id has been given. Use this function to update a dataset that has already been uploaded to Dataverse. + + Args: + replace (bool, optional): Whether to replace the dataset or not. Defaults to True. + + Raises: + HTTPError: If the dataset could not be updated. """ if not self.p_id: @@ -238,6 +244,7 @@ def update(self): to_change=self._extract_changes(), p_id=self.p_id, # type: ignore files=self.files, + replace=replace, DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore API_TOKEN=str(self.API_TOKEN), ) diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 813dabc..d946b22 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -116,6 +116,7 @@ def update_dataset( p_id: str, to_change: Dict, files: List[File], + replace: bool, DATAVERSE_URL: Optional[str] = None, API_TOKEN: Optional[str] = None, ) -> bool: @@ -125,6 +126,7 @@ def update_dataset( p_id (str): Persistent ID of the dataset. to_change (Dict): Dictionary of fields to change. files (List[File]): List of files that should be uploaded. Can also include directory names. + replace (bool, optional): Whether to replace the existing files. Defaults to False. DATAVERSE_URL (Optional[str], optional): The URL of the Dataverse instance. Defaults to None. API_TOKEN (Optional[str], optional): The API token for authentication. Defaults to None. @@ -137,6 +139,7 @@ def update_dataset( _update_metadata( p_id=p_id, to_change=to_change, + replace=replace, base_url=DATAVERSE_URL, # type: ignore api_token=API_TOKEN, # type: ignore ) @@ -155,6 +158,7 @@ def _update_metadata( to_change: Dict, base_url: str, api_token: str, + replace: bool, ): """Updates the metadata of a dataset. @@ -167,9 +171,14 @@ def _update_metadata( Raises: requests.HTTPError: If the request fails. """ - EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true" - headers = {"X-Dataverse-key": api_token} + if replace: + EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true" + else: + EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}" + + headers = {"X-Dataverse-key": api_token} response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change) - response.raise_for_status() + if response.status_code != 200: + raise requests.HTTPError(f"Failed to update metadata: {response.text}") diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index b2d42e3..fa0efd8 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -2,6 +2,7 @@ import pytest import requests +from requests.models import HTTPError from easyDataverse import Dataverse @@ -90,6 +91,114 @@ def test_dataset_update( other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" ), "The updated dataset does not have the expected other id agency." + @pytest.mark.integration + def test_dataset_update_wo_replace( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = requests.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + + # Check if multiple compound changes are tracked too + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + + dataset.update(replace=False) + + # Re-fetch the dataset + url = ( + f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}" + ) + + response = requests.get( + url, + headers={"X-Dataverse-key": api_token}, + ) + + response.raise_for_status() + updated_dataset = response.json() + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + + # Assert + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." + + @pytest.mark.integration + def test_dataset_update_invalid( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = requests.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + + # Check if multiple compound changes are tracked too + dataset.citation.title = "Title has changed" + + with pytest.raises(HTTPError): + dataset.update(replace=False) + @staticmethod def sort_citation(dataset: Dict): citation = dataset["datasetVersion"]["metadataBlocks"]["citation"] From ee107a5632d57a3861b3654451afa2b13aa0be20 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Wed, 12 Jun 2024 15:33:47 +0200 Subject: [PATCH 3/6] keep track of new objects and edits --- easyDataverse/base.py | 29 +++++++++++-- easyDataverse/dataverse.py | 4 ++ tests/integration/test_dataset_update.py | 52 ++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/easyDataverse/base.py b/easyDataverse/base.py index e6bcd12..e33b326 100644 --- a/easyDataverse/base.py +++ b/easyDataverse/base.py @@ -25,6 +25,7 @@ class DataverseBase(BaseModel): ) _changed: Set = PrivateAttr(default_factory=set) + _new: bool = PrivateAttr(default=True) # ! Overloads def __setattr__(self, name: str, value: Any) -> None: @@ -184,10 +185,10 @@ def to_dataverse_json(self, indent: int = 2) -> str: def extract_changed(self) -> List[Dict]: """Extracts the changed fields from the object""" - self._add_changed_multiples() - changed_fields = [] + self._add_changed_multiples() + for name in self._changed: field = self.model_fields[name] @@ -212,10 +213,17 @@ def _add_changed_multiples(self): if not self._is_multiple(field): continue - self._changed.add(name) + has_changed = any( + compound._changed or compound._new + for compound in getattr(self, name) + ) + + if has_changed: + self._changed.add(name) def _process_multiple_compound(self, compounds) -> List[Dict]: """Processes multiple compounds""" + return [compound.dataverse_dict() for compound in compounds] def _process_single_compound(self, compound) -> Dict: @@ -246,6 +254,21 @@ def _wrap_changed(self, field: FieldInfo, value: Any): "value": value, } + def _set_new_prop(self, value: bool): + """Sets the new property of the object""" + + self._new = value + + for attr, field in self.model_fields.items(): + if not field.json_schema_extra["typeClass"] == "compound": + continue + + if field.json_schema_extra["multiple"]: + for compound in getattr(self, attr): + compound._set_new_prop(value) + else: + getattr(self, attr)._set_new_prop(value) + @staticmethod def is_empty(value): """Checks whether a given value is None or empty""" diff --git a/easyDataverse/dataverse.py b/easyDataverse/dataverse.py index 89473d4..00d7132 100644 --- a/easyDataverse/dataverse.py +++ b/easyDataverse/dataverse.py @@ -339,6 +339,10 @@ def load_dataset( n_parallel_downloads=n_parallel_downloads, ) + # Set "new" prop to False + for metadatablock in dataset.metadatablocks.values(): + metadatablock._set_new_prop(False) + return dataset def _fetch_dataset( diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index fa0efd8..04b2670 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -163,8 +163,9 @@ def test_dataset_update_wo_replace( other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" ), "The updated dataset does not have the expected other id agency." + @pytest.mark.integration - def test_dataset_update_invalid( + def test_update_edit( self, credentials, minimal_upload, @@ -194,10 +195,53 @@ def test_dataset_update_invalid( dataset = dataverse.load_dataset(pid) # Check if multiple compound changes are tracked too - dataset.citation.title = "Title has changed" + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + + dataset.update(replace=False) + + # Fetch another time and edit the first entry + dataset = dataverse.load_dataset(pid) + dataset.citation.other_id[0].agency = "Software Heritage1 updated" + + dataset.update(replace=False) + + # Re-fetch the dataset + url = ( + f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}" + ) + + response = requests.get( + url, + headers={"X-Dataverse-key": api_token}, + ) + + response.raise_for_status() + updated_dataset = response.json() + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + + # Assert + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1 updated" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." - with pytest.raises(HTTPError): - dataset.update(replace=False) @staticmethod def sort_citation(dataset: Dict): From 07ffcfc2d6c757dcababb5e34d438a55c880a68e Mon Sep 17 00:00:00 2001 From: Jan Range Date: Wed, 12 Jun 2024 15:33:56 +0200 Subject: [PATCH 4/6] explicitly use `replace` --- easyDataverse/uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index d946b22..f5fa06a 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -175,7 +175,7 @@ def _update_metadata( if replace: EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true" else: - EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}" + EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=false" headers = {"X-Dataverse-key": api_token} response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change) From 7276fc5c53af55a95513d3998927d1e6ff89d52f Mon Sep 17 00:00:00 2001 From: Jan Range Date: Wed, 12 Jun 2024 15:34:48 +0200 Subject: [PATCH 5/6] remove unused import --- tests/integration/test_dataset_update.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 04b2670..a25fc42 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -2,7 +2,6 @@ import pytest import requests -from requests.models import HTTPError from easyDataverse import Dataverse From 09133a713e7b4ab21aaf228a3e9dc5d2fb574ec1 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 09:59:38 +0200 Subject: [PATCH 6/6] simplify endpoint handling --- easyDataverse/uploader.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index f5fa06a..e1c7837 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -172,13 +172,10 @@ def _update_metadata( requests.HTTPError: If the request fails. """ - if replace: - EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true" - else: - EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=false" - headers = {"X-Dataverse-key": api_token} - response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change) + endpoint = f"/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace={str(replace).lower()}" + url = urljoin(base_url, endpoint) + response = requests.put(url, headers=headers, json=to_change) if response.status_code != 200: raise requests.HTTPError(f"Failed to update metadata: {response.text}")