Skip to content

Commit a96c343

Browse files
authored
Add force_update option to write files even if hash hasn't changed (#92)
1 parent 2b802f2 commit a96c343

7 files changed

Lines changed: 43 additions & 14 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dependencies = [
3838
"defopt>=7.0.0",
3939
"email_validator",
4040
"hdx-python-country>=3.9.6",
41-
"hdx-python-utilities>=3.9.0",
41+
"hdx-python-utilities>=3.9.1",
4242
"libhxl>=5.2.2",
4343
"makefun",
4444
"quantulum3",

requirements.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ click==8.2.1
3333
# typer
3434
colorama==0.4.6
3535
# via mkdocs-material
36-
coverage==7.10.5
36+
coverage==7.10.6
3737
# via pytest-cov
3838
defopt==7.0.0
3939
# via hdx-python-api (pyproject.toml)
@@ -67,7 +67,7 @@ gspread==6.2.1
6767
# via hdx-python-api (pyproject.toml)
6868
hdx-python-country==3.9.6
6969
# via hdx-python-api (pyproject.toml)
70-
hdx-python-utilities==3.9.0
70+
hdx-python-utilities==3.9.1
7171
# via
7272
# hdx-python-api (pyproject.toml)
7373
# hdx-python-country
@@ -142,7 +142,7 @@ mkdocs-material==9.6.18
142142
# via mkapi
143143
mkdocs-material-extensions==1.3.1
144144
# via mkdocs-material
145-
more-itertools==10.7.0
145+
more-itertools==10.8.0
146146
# via inflect
147147
nodeenv==1.9.1
148148
# via pre-commit
@@ -254,7 +254,7 @@ rfc3986==2.0.0
254254
# via frictionless
255255
rich==14.1.0
256256
# via typer
257-
rpds-py==0.27.0
257+
rpds-py==0.27.1
258258
# via
259259
# jsonschema
260260
# referencing
@@ -292,7 +292,7 @@ text-unidecode==1.3
292292
# via python-slugify
293293
typeguard==4.4.4
294294
# via inflect
295-
typer==0.16.1
295+
typer==0.17.3
296296
# via frictionless
297297
typing-extensions==4.15.0
298298
# via

src/hdx/api/utilities/filestore_helper.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def dataset_update_filestore_resource(
8585
resource_data_to_update: "Resource",
8686
filestore_resources: Dict[int, str],
8787
resource_index: int,
88+
**kwargs: Any,
8889
) -> int:
8990
"""Helper method to merge updated resource from dataset into HDX resource read from HDX including filestore.
9091
Returns status code where:
@@ -101,17 +102,21 @@ def dataset_update_filestore_resource(
101102
resource_data_to_update (Resource): Updated resource from dataset
102103
filestore_resources (Dict[int, str]): List of (index of resources, file to upload)
103104
resource_index (int): Index of resource
105+
**kwargs: Any,
104106
105107
Returns:
106108
int: Status code
107109
"""
108110
file_to_upload = resource_data_to_update.get_file_to_upload()
109111
if file_to_upload:
112+
force_update = kwargs.pop("force_update", False)
110113
file_format = resource_data_to_update.get("format", "").lower()
111114
size, hash = get_size_and_hash(file_to_upload, file_format)
112-
if size == original_resource_data.get(
113-
"size"
114-
) and hash == original_resource_data.get("hash"):
115+
if (
116+
not force_update
117+
and size == original_resource_data.get("size")
118+
and hash == original_resource_data.get("hash")
119+
):
115120
logger.warning(
116121
f"Not updating filestore for resource {original_resource_data['name']} as size and hash unchanged!"
117122
)

src/hdx/data/dataset.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,7 @@ def _dataset_update_resources(
927927
resource_data_to_update,
928928
filestore_resources,
929929
i,
930+
**kwargs,
930931
)
931932
statuses[resource_name] = status
932933
resources_to_update.append(resource_data_to_update)
@@ -970,6 +971,7 @@ def _dataset_update_resources(
970971
resource_data_to_update,
971972
filestore_resources,
972973
i,
974+
**kwargs,
973975
)
974976
statuses[updated_resource_name] = status
975977
else:
@@ -1117,6 +1119,7 @@ def update_in_hdx(
11171119
keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
11181120
updated_by_script (str): String to identify your script. Defaults to your user agent.
11191121
batch (str): A string you can specify to show which datasets are part of a single batch update
1122+
force_update (bool): Forces files to be updated even if they haven't changed
11201123
11211124
Returns:
11221125
Dict: Status codes of resources
@@ -1184,6 +1187,7 @@ def create_in_hdx(
11841187
keep_crisis_tags (bool): Whether to keep existing crisis tags. Defaults to True.
11851188
updated_by_script (str): String to identify your script. Defaults to your user agent.
11861189
batch (str): A string you can specify to show which datasets are part of a single batch update
1190+
force_update (bool): Forces files to be updated even if they haven't changed
11871191
11881192
Returns:
11891193
Dict: Status codes of resources

src/hdx/data/resource.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,16 +402,22 @@ def _resource_merge_hdx_update(
402402
**kwargs: See below
403403
operation (str): Operation to perform eg. patch. Defaults to update.
404404
data_updated (bool): If True, set last_modified to now. Defaults to False.
405+
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
405406
406407
Returns:
407408
int: Status code
408409
"""
409410
data_updated = kwargs.pop("data_updated", self._data_updated)
410411
files = {}
411412
if self._file_to_upload:
413+
force_update = kwargs.pop("force_update", False)
412414
file_format = self._old_data.get("format", "").lower()
413415
size, hash = get_size_and_hash(self._file_to_upload, file_format)
414-
if size == self.data.get("size") and hash == self.data.get("hash"):
416+
if (
417+
not force_update
418+
and size == self.data.get("size")
419+
and hash == self.data.get("hash")
420+
):
415421
logger.warning(
416422
f"Not updating filestore for resource {self.data['name']} as size and hash unchanged!"
417423
)
@@ -474,6 +480,7 @@ def update_in_hdx(self, **kwargs: Any) -> int:
474480
operation (string): Operation to perform eg. patch. Defaults to update.
475481
data_updated (bool): If True, set last_modified to now. Defaults to False.
476482
date_data_updated (datetime): Date to use for last_modified. Default to None.
483+
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
477484
478485
Returns:
479486
int: Status code
@@ -504,6 +511,7 @@ def create_in_hdx(self, **kwargs: Any) -> int:
504511
**kwargs: See below
505512
data_updated (bool): If True, set last_modified to now. Defaults to False.
506513
date_data_updated (datetime): Date to use for last_modified. Default to None.
514+
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
507515
508516
Returns:
509517
int: Status code

tests/hdx/api/utilities/test_filestore_helper.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data):
2121
orig_resource = Resource(resource_data_copy)
2222
resource = Resource(resource_data_copy)
2323
filestore_resources = {}
24-
FilestoreHelper.dataset_update_filestore_resource(
24+
status = FilestoreHelper.dataset_update_filestore_resource(
2525
orig_resource, resource, filestore_resources, 0
2626
)
27+
assert status == 1
2728
assert resource == {
2829
"description": "My Resource",
2930
"format": "xlsx",
@@ -36,9 +37,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data):
3637
assert filestore_resources == {}
3738

3839
resource.set_file_to_upload(test_data)
39-
FilestoreHelper.dataset_update_filestore_resource(
40+
status = FilestoreHelper.dataset_update_filestore_resource(
4041
orig_resource, resource, filestore_resources, 0
4142
)
43+
assert status == 2
4244
assert resource == {
4345
"description": "My Resource",
4446
"format": "xlsx",
@@ -53,9 +55,15 @@ def test_dataset_update_filestore_resource(self, configuration, test_data):
5355
assert filestore_resources == {0: test_data}
5456

5557
filestore_resources = {}
56-
FilestoreHelper.dataset_update_filestore_resource(
58+
status = FilestoreHelper.dataset_update_filestore_resource(
59+
resource, resource, filestore_resources, 0, force_update=True
60+
)
61+
assert status == 2
62+
filestore_resources = {}
63+
status = FilestoreHelper.dataset_update_filestore_resource(
5764
resource, resource, filestore_resources, 0
5865
)
66+
assert status == 3
5967
assert resource == {
6068
"description": "My Resource",
6169
"format": "xlsx",
@@ -71,9 +79,10 @@ def test_dataset_update_filestore_resource(self, configuration, test_data):
7179

7280
resource._file_to_upload = None
7381
resource.mark_data_updated()
74-
FilestoreHelper.dataset_update_filestore_resource(
82+
status = FilestoreHelper.dataset_update_filestore_resource(
7583
orig_resource, resource, filestore_resources, 0
7684
)
85+
assert status == 0
7786
regex = r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d$"
7887
assert re.match(regex, resource["last_modified"])
7988
assert filestore_resources == {}

tests/hdx/data/test_resource.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,9 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data
878878
assert status == 3
879879
resource["id"] = "74b74ae1-df0c-4716-829f-4f939a046817"
880880
resource.set_file_to_upload(test_data, guess_format_from_suffix=True)
881+
status = resource.update_in_hdx(force_update=True)
882+
assert status == 2
883+
resource.set_file_to_upload(test_data, guess_format_from_suffix=True)
881884
status = resource.update_in_hdx()
882885
assert status == 4
883886

0 commit comments

Comments
 (0)