diff --git a/easyDataverse/base.py b/easyDataverse/base.py index 0c67ce9..8b95ef5 100644 --- a/easyDataverse/base.py +++ b/easyDataverse/base.py @@ -25,6 +25,7 @@ class DataverseBase(BaseModel): ) _changed: Set = PrivateAttr(default_factory=set) + _new: bool = PrivateAttr(default=True) # ! Overloads def __setattr__(self, name: str, value: Any) -> None: @@ -184,10 +185,10 @@ def to_dataverse_json(self, indent: int = 2) -> str: def extract_changed(self) -> List[Dict]: """Extracts the changed fields from the object""" - self._add_changed_multiples() - changed_fields = [] + self._add_changed_multiples() + for name in self._changed: field = self.model_fields[name] @@ -212,17 +213,16 @@ def _add_changed_multiples(self): if not self._is_multiple(field): continue - value = getattr(self, name) - has_changes = any(value._changed for value in value) + has_changed = any( + compound._changed or compound._new + for compound in getattr(self, name) + ) - if has_changes: + if has_changed: self._changed.add(name) def _process_multiple_compound(self, compounds) -> List[Dict]: - """Whenever a single compound has changed, return all compounds.""" - - if not any(len(compound._changed) for compound in compounds): - return [] + """Processes multiple compounds""" return [compound.dataverse_dict() for compound in compounds] @@ -254,6 +254,21 @@ def _wrap_changed(self, field: FieldInfo, value: Any): "value": value, } + def _set_new_prop(self, value: bool): + """Sets the new property of the object""" + + self._new = value + + for attr, field in self.model_fields.items(): + if not field.json_schema_extra["typeClass"] == "compound": + continue + + if field.json_schema_extra["multiple"]: + for compound in getattr(self, attr): + compound._set_new_prop(value) + else: + getattr(self, attr)._set_new_prop(value) + @staticmethod def is_empty(value): """Checks whether a given value is None or empty""" diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py index c673d62..29281d7 100644 --- a/easyDataverse/dataset.py +++ b/easyDataverse/dataset.py @@ -225,10 +225,16 @@ def upload( return self.p_id - def update(self): + def update(self, replace: bool = True): """Updates a dataset if a p_id has been given. Use this function to update a dataset that has already been uploaded to Dataverse. + + Args: + replace (bool, optional): Whether to replace the dataset or not. Defaults to True. + + Raises: + HTTPError: If the dataset could not be updated. """ if not self.p_id: @@ -238,6 +244,7 @@ def update(self): to_change=self._extract_changes(), p_id=self.p_id, # type: ignore files=self.files, + replace=replace, DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore API_TOKEN=str(self.API_TOKEN), ) diff --git a/easyDataverse/dataverse.py b/easyDataverse/dataverse.py index a0c9104..3d76a5b 100644 --- a/easyDataverse/dataverse.py +++ b/easyDataverse/dataverse.py @@ -339,6 +339,10 @@ def load_dataset( n_parallel_downloads=n_parallel_downloads, ) + # Set "new" prop to False + for metadatablock in dataset.metadatablocks.values(): + metadatablock._set_new_prop(False) + return dataset def _fetch_dataset( diff --git a/easyDataverse/uploader.py b/easyDataverse/uploader.py index 813dabc..e1c7837 100644 --- a/easyDataverse/uploader.py +++ b/easyDataverse/uploader.py @@ -116,6 +116,7 @@ def update_dataset( p_id: str, to_change: Dict, files: List[File], + replace: bool, DATAVERSE_URL: Optional[str] = None, API_TOKEN: Optional[str] = None, ) -> bool: @@ -125,6 +126,7 @@ def update_dataset( p_id (str): Persistent ID of the dataset. to_change (Dict): Dictionary of fields to change. files (List[File]): List of files that should be uploaded. Can also include directory names. + replace (bool, optional): Whether to replace the existing files. Defaults to False. DATAVERSE_URL (Optional[str], optional): The URL of the Dataverse instance. Defaults to None. API_TOKEN (Optional[str], optional): The API token for authentication. Defaults to None. @@ -137,6 +139,7 @@ def update_dataset( _update_metadata( p_id=p_id, to_change=to_change, + replace=replace, base_url=DATAVERSE_URL, # type: ignore api_token=API_TOKEN, # type: ignore ) @@ -155,6 +158,7 @@ def _update_metadata( to_change: Dict, base_url: str, api_token: str, + replace: bool, ): """Updates the metadata of a dataset. @@ -167,9 +171,11 @@ def _update_metadata( Raises: requests.HTTPError: If the request fails. """ - EDIT_ENDPOINT = f"{base_url.rstrip('/')}/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace=true" - headers = {"X-Dataverse-key": api_token} - response = requests.put(EDIT_ENDPOINT, headers=headers, json=to_change) + headers = {"X-Dataverse-key": api_token} + endpoint = f"/api/datasets/:persistentId/editMetadata?persistentId={p_id}&replace={str(replace).lower()}" + url = urljoin(base_url, endpoint) + response = requests.put(url, headers=headers, json=to_change) - response.raise_for_status() + if response.status_code != 200: + raise requests.HTTPError(f"Failed to update metadata: {response.text}") diff --git a/tests/integration/test_dataset_update.py b/tests/integration/test_dataset_update.py index 8670f7a..a25fc42 100644 --- a/tests/integration/test_dataset_update.py +++ b/tests/integration/test_dataset_update.py @@ -13,7 +13,6 @@ def test_dataset_update( credentials, minimal_upload, ): - # Arrange base_url, api_token = credentials url = f"{base_url}/api/dataverses/root/datasets" @@ -38,6 +37,11 @@ def test_dataset_update( # Fetch the dataset and update the title dataset = dataverse.load_dataset(pid) dataset.citation.title = "Title has changed" + + # Check if multiple compound changes are tracked too + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + dataset.update() # Re-fetch the dataset @@ -59,10 +63,184 @@ def test_dataset_update( ) ) + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + # Assert assert ( title_field["value"] == "Title has changed" ), "The updated dataset title does not match the expected title." + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." + + @pytest.mark.integration + def test_dataset_update_wo_replace( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = requests.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + + # Check if multiple compound changes are tracked too + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + + dataset.update(replace=False) + + # Re-fetch the dataset + url = ( + f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}" + ) + + response = requests.get( + url, + headers={"X-Dataverse-key": api_token}, + ) + + response.raise_for_status() + updated_dataset = response.json() + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + + # Assert + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." + + + @pytest.mark.integration + def test_update_edit( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + url = f"{base_url}/api/dataverses/root/datasets" + response = requests.post( + url=url, + json=minimal_upload, + headers={ + "X-Dataverse-key": api_token, + "Content-Type": "application/json", + }, + ) + + response.raise_for_status() + pid = response.json()["data"]["persistentId"] + + # Act + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Fetch the dataset and update the title + dataset = dataverse.load_dataset(pid) + + # Check if multiple compound changes are tracked too + dataset.citation.add_other_id(agency="Software Heritage1", value="softwareid1") + dataset.citation.add_other_id(agency="Software Heritage2", value="softwareid2") + + dataset.update(replace=False) + + # Fetch another time and edit the first entry + dataset = dataverse.load_dataset(pid) + dataset.citation.other_id[0].agency = "Software Heritage1 updated" + + dataset.update(replace=False) + + # Re-fetch the dataset + url = ( + f"{base_url}/api/datasets/:persistentId/versions/:draft?persistentId={pid}" + ) + + response = requests.get( + url, + headers={"X-Dataverse-key": api_token}, + ) + + response.raise_for_status() + updated_dataset = response.json() + other_id_fields = next( + filter( + lambda x: x["typeName"] == "otherId", + updated_dataset["data"]["metadataBlocks"]["citation"]["fields"], + ) + )["value"] + + # Assert + assert ( + len(other_id_fields) == 2 + ), "The updated dataset does not have the expected number of other ids." + assert ( + other_id_fields[0]["otherIdValue"]["value"] == "softwareid1" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[1]["otherIdValue"]["value"] == "softwareid2" + ), "The updated dataset does not have the expected other id." + assert ( + other_id_fields[0]["otherIdAgency"]["value"] == "Software Heritage1 updated" + ), "The updated dataset does not have the expected other id agency." + assert ( + other_id_fields[1]["otherIdAgency"]["value"] == "Software Heritage2" + ), "The updated dataset does not have the expected other id agency." + @staticmethod def sort_citation(dataset: Dict):