diff --git a/pyproject.toml b/pyproject.toml index acbc3ed..7e5864f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "defopt>=7.0.0", "email_validator", "hdx-python-country>=3.9.6", - "hdx-python-utilities>=3.9.0", + "hdx-python-utilities>=3.9.1", "libhxl>=5.2.2", "makefun", "quantulum3", diff --git a/requirements.txt b/requirements.txt index bb24b11..b5b5547 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ click==8.2.1 # typer colorama==0.4.6 # via mkdocs-material -coverage==7.10.5 +coverage==7.10.6 # via pytest-cov defopt==7.0.0 # via hdx-python-api (pyproject.toml) @@ -67,7 +67,7 @@ gspread==6.2.1 # via hdx-python-api (pyproject.toml) hdx-python-country==3.9.6 # via hdx-python-api (pyproject.toml) -hdx-python-utilities==3.9.0 +hdx-python-utilities==3.9.1 # via # hdx-python-api (pyproject.toml) # hdx-python-country @@ -142,7 +142,7 @@ mkdocs-material==9.6.18 # via mkapi mkdocs-material-extensions==1.3.1 # via mkdocs-material -more-itertools==10.7.0 +more-itertools==10.8.0 # via inflect nodeenv==1.9.1 # via pre-commit @@ -254,7 +254,7 @@ rfc3986==2.0.0 # via frictionless rich==14.1.0 # via typer -rpds-py==0.27.0 +rpds-py==0.27.1 # via # jsonschema # referencing @@ -292,7 +292,7 @@ text-unidecode==1.3 # via python-slugify typeguard==4.4.4 # via inflect -typer==0.16.1 +typer==0.17.3 # via frictionless typing-extensions==4.15.0 # via diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index 2b71b9b..b3fd298 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -85,6 +85,7 @@ def dataset_update_filestore_resource( resource_data_to_update: "Resource", filestore_resources: Dict[int, str], resource_index: int, + **kwargs: Any, ) -> int: """Helper method to merge updated resource from dataset into HDX resource read from HDX including filestore. Returns status code where: @@ -101,17 +102,21 @@ def dataset_update_filestore_resource( resource_data_to_update (Resource): Updated resource from dataset filestore_resources (Dict[int, str]): List of (index of resources, file to upload) resource_index (int): Index of resource + **kwargs: Any, Returns: int: Status code """ file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: + force_update = kwargs.pop("force_update", False) file_format = resource_data_to_update.get("format", "").lower() size, hash = get_size_and_hash(file_to_upload, file_format) - if size == original_resource_data.get( - "size" - ) and hash == original_resource_data.get("hash"): + if ( + not force_update + and size == original_resource_data.get("size") + and hash == original_resource_data.get("hash") + ): logger.warning( f"Not updating filestore for resource {original_resource_data['name']} as size and hash unchanged!" ) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 1df2e42..f3d06a8 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -927,6 +927,7 @@ def _dataset_update_resources( resource_data_to_update, filestore_resources, i, + **kwargs, ) statuses[resource_name] = status resources_to_update.append(resource_data_to_update) @@ -970,6 +971,7 @@ def _dataset_update_resources( resource_data_to_update, filestore_resources, i, + **kwargs, ) statuses[updated_resource_name] = status else: @@ -1117,6 +1119,7 @@ def update_in_hdx( keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True. updated_by_script (str): String to identify your script. Defaults to your user agent. batch (str): A string you can specify to show which datasets are part of a single batch update + force_update (bool): Forces files to be updated even if they haven't changed Returns: Dict: Status codes of resources @@ -1184,6 +1187,7 @@ def create_in_hdx( keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True. updated_by_script (str): String to identify your script. Defaults to your user agent. batch (str): A string you can specify to show which datasets are part of a single batch update + force_update (bool): Forces files to be updated even if they haven't changed Returns: Dict: Status codes of resources diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 3364509..187bca0 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -402,6 +402,7 @@ def _resource_merge_hdx_update( **kwargs: See below operation (str): Operation to perform eg. patch. Defaults to update. data_updated (bool): If True, set last_modified to now. Defaults to False. + force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False. Returns: int: Status code @@ -409,9 +410,14 @@ def _resource_merge_hdx_update( data_updated = kwargs.pop("data_updated", self._data_updated) files = {} if self._file_to_upload: + force_update = kwargs.pop("force_update", False) file_format = self._old_data.get("format", "").lower() size, hash = get_size_and_hash(self._file_to_upload, file_format) - if size == self.data.get("size") and hash == self.data.get("hash"): + if ( + not force_update + and size == self.data.get("size") + and hash == self.data.get("hash") + ): logger.warning( f"Not updating filestore for resource {self.data['name']} as size and hash unchanged!" ) @@ -474,6 +480,7 @@ def update_in_hdx(self, **kwargs: Any) -> int: operation (string): Operation to perform eg. patch. Defaults to update. data_updated (bool): If True, set last_modified to now. Defaults to False. date_data_updated (datetime): Date to use for last_modified. Default to None. + force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False. Returns: int: Status code @@ -504,6 +511,7 @@ def create_in_hdx(self, **kwargs: Any) -> int: **kwargs: See below data_updated (bool): If True, set last_modified to now. Defaults to False. date_data_updated (datetime): Date to use for last_modified. Default to None. + force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False. Returns: int: Status code diff --git a/tests/hdx/api/utilities/test_filestore_helper.py b/tests/hdx/api/utilities/test_filestore_helper.py index 8fae229..a173132 100644 --- a/tests/hdx/api/utilities/test_filestore_helper.py +++ b/tests/hdx/api/utilities/test_filestore_helper.py @@ -21,9 +21,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): orig_resource = Resource(resource_data_copy) resource = Resource(resource_data_copy) filestore_resources = {} - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( orig_resource, resource, filestore_resources, 0 ) + assert status == 1 assert resource == { "description": "My Resource", "format": "xlsx", @@ -36,9 +37,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): assert filestore_resources == {} resource.set_file_to_upload(test_data) - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( orig_resource, resource, filestore_resources, 0 ) + assert status == 2 assert resource == { "description": "My Resource", "format": "xlsx", @@ -53,9 +55,15 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): assert filestore_resources == {0: test_data} filestore_resources = {} - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( + resource, resource, filestore_resources, 0, force_update=True + ) + assert status == 2 + filestore_resources = {} + status = FilestoreHelper.dataset_update_filestore_resource( resource, resource, filestore_resources, 0 ) + assert status == 3 assert resource == { "description": "My Resource", "format": "xlsx", @@ -71,9 +79,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): resource._file_to_upload = None resource.mark_data_updated() - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( orig_resource, resource, filestore_resources, 0 ) + assert status == 0 regex = r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d$" assert re.match(regex, resource["last_modified"]) assert filestore_resources == {} diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index 4a772a1..45688b2 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -878,6 +878,9 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data assert status == 3 resource["id"] = "74b74ae1-df0c-4716-829f-4f939a046817" resource.set_file_to_upload(test_data, guess_format_from_suffix=True) + status = resource.update_in_hdx(force_update=True) + assert status == 2 + resource.set_file_to_upload(test_data, guess_format_from_suffix=True) status = resource.update_in_hdx() assert status == 4