Skip to content

Commit 1a6d328

Browse files
committed
Return statuses
1 parent b6eb092 commit 1a6d328

8 files changed

Lines changed: 352 additions & 73 deletions

File tree

src/hdx/api/utilities/filestore_helper.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,24 @@ def check_filestore_resource(
4747
filestore_resources: Dict[int, str],
4848
resource_index: int,
4949
**kwargs: Any,
50-
) -> None:
50+
) -> int:
5151
"""Helper method to add new resource from dataset including filestore.
52+
Returns status code where:
53+
0 = no file to upload and last_modified set to now
54+
(resource creation or data_updated flag is True),
55+
1 = no file to upload and data_updated flag is False,
56+
2 = file uploaded to filestore (resource creation or either hash or size of file
57+
has changed),
58+
3 = file not uploaded to filestore (hash and size of file are the same),
59+
4 = file not uploaded (hash, size unchanged), given last_modified ignored
5260
5361
Args:
5462
resource_data_to_update (Resource): Updated resource from dataset
5563
filestore_resources (Dict[int, str]): List of (index of resource, file to upload)
5664
resource_index (int): Index of resource
5765
5866
Returns:
59-
None
67+
int: Status code
6068
"""
6169
cls.resource_check_required_fields(resource_data_to_update, **kwargs)
6270
file_to_upload = resource_data_to_update.get_file_to_upload()
@@ -67,6 +75,8 @@ def check_filestore_resource(
6775
resource_data_to_update["url"] = cls.temporary_url
6876
resource_data_to_update["size"] = size
6977
resource_data_to_update["hash"] = hash
78+
return 2
79+
return 0
7080

7181
@classmethod
7282
def dataset_update_filestore_resource(
@@ -75,8 +85,16 @@ def dataset_update_filestore_resource(
7585
resource_data_to_update: "Resource",
7686
filestore_resources: Dict[int, str],
7787
resource_index: int,
78-
) -> None:
88+
) -> int:
7989
"""Helper method to merge updated resource from dataset into HDX resource read from HDX including filestore.
90+
Returns status code where:
91+
0 = no file to upload and last_modified set to now
92+
(resource creation or data_updated flag is True),
93+
1 = no file to upload and data_updated flag is False,
94+
2 = file uploaded to filestore (resource creation or either hash or size of file
95+
has changed),
96+
3 = file not uploaded to filestore (hash and size of file are the same),
97+
4 = file not uploaded (hash, size unchanged), given last_modified ignored
8098
8199
Args:
82100
original_resource_data (Resource): Original resource from dataset
@@ -85,7 +103,7 @@ def dataset_update_filestore_resource(
85103
resource_index (int): Index of resource
86104
87105
Returns:
88-
None
106+
int: Status code
89107
"""
90108
file_to_upload = resource_data_to_update.get_file_to_upload()
91109
if file_to_upload:
@@ -97,15 +115,21 @@ def dataset_update_filestore_resource(
97115
# ensure last_modified is not updated if file hasn't changed
98116
if "last_modified" in resource_data_to_update:
99117
del resource_data_to_update["last_modified"]
118+
return 4
119+
else:
120+
return 3
100121
else:
101122
# update file if size or hash has changed
102123
filestore_resources[resource_index] = file_to_upload
103124
resource_data_to_update["url"] = cls.temporary_url
104125
resource_data_to_update["size"] = size
105126
resource_data_to_update["hash"] = hash
127+
return 2
106128
elif resource_data_to_update.is_marked_data_updated():
107129
# Should not output timezone info here
108130
resource_data_to_update["last_modified"] = now_utc_notz().isoformat(
109131
timespec="microseconds"
110132
)
111133
resource_data_to_update.data_updated = False
134+
return 0
135+
return 1

src/hdx/data/dataset.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -853,20 +853,24 @@ def _dataset_update_resources(
853853
remove_additional_resources: bool,
854854
match_resource_order: bool,
855855
**kwargs: Any,
856-
) -> Tuple[List, List, Dict, List]:
856+
) -> Tuple[List, List, Dict, List, Dict]:
857857
"""Helper method to compare new and existing dataset data returning
858858
resources to be updated, resources to be deleted, resources where files
859859
need to be uploaded to the filestore and if match_resource_order is
860860
True, then the new resource order.
861861
862+
Returns a tuple of resources information of the form:
863+
(resources_to_update, resources_to_delete, filestore_resources,
864+
new_resource_order, resource status codes)
865+
862866
Args:
863867
update_resources (bool): Whether to update resources
864868
match_resources_by_metadata (bool): Compare resource metadata rather than position in list
865869
remove_additional_resources (bool): Remove additional resources found in dataset (if updating)
866870
match_resource_order (bool): Match order of given resources by name
867871
868872
Returns:
869-
Tuple[List, List, Dict, List]: (resources_to_update, resources_to_delete, filestore_resources, new_resource_order)
873+
Tuple[List, List, Dict, List, Dict]: Tuple of resources information
870874
"""
871875
# When the user sets up a dataset, "data" contains the metadata. The
872876
# HDX dataset update process involves copying "data" to "old_data" and
@@ -877,6 +881,7 @@ def _dataset_update_resources(
877881
resources_to_update = []
878882
resources_to_delete = []
879883
filestore_resources = {}
884+
statuses = {}
880885
if update_resources and resources_metadata_to_update:
881886
if match_resources_by_metadata:
882887
(
@@ -896,26 +901,29 @@ def _dataset_update_resources(
896901
resource_data_to_update = resources_metadata_to_update[
897902
updated_resource_matches[match_index]
898903
]
904+
resource_name = resource["name"]
899905
logger.warning(f"Resource exists. Updating {resource['name']}")
900-
FilestoreHelper.dataset_update_filestore_resource(
906+
status = FilestoreHelper.dataset_update_filestore_resource(
901907
resource,
902908
resource_data_to_update,
903909
filestore_resources,
904910
i,
905911
)
912+
statuses[resource_name] = status
906913
resources_to_update.append(resource_data_to_update)
907914

908915
resource_index = len(self.resources)
909916
for updated_resource_index in updated_resource_no_matches:
910917
resource_data_to_update = resources_metadata_to_update[
911918
updated_resource_index
912919
]
913-
FilestoreHelper.check_filestore_resource(
920+
status = FilestoreHelper.check_filestore_resource(
914921
resource_data_to_update,
915922
filestore_resources,
916923
resource_index,
917924
**kwargs,
918925
)
926+
statuses[resource_data_to_update["name"]] = status
919927
resource_index = resource_index + 1
920928
resources_to_update.append(resource_data_to_update)
921929
if remove_additional_resources:
@@ -938,19 +946,21 @@ def _dataset_update_resources(
938946
logger.warning(
939947
f"Changing resource name to: {updated_resource_name}"
940948
)
941-
FilestoreHelper.dataset_update_filestore_resource(
949+
status = FilestoreHelper.dataset_update_filestore_resource(
942950
resource,
943951
resource_data_to_update,
944952
filestore_resources,
945953
i,
946954
)
955+
statuses[updated_resource_name] = status
947956
else:
948-
FilestoreHelper.check_filestore_resource(
957+
status = FilestoreHelper.check_filestore_resource(
949958
resource_data_to_update,
950959
filestore_resources,
951960
i,
952961
**kwargs,
953962
)
963+
statuses[resource_data_to_update["name"]] = status
954964
resources_to_update.append(resource_data_to_update)
955965

956966
for i, resource in enumerate(self.resources):
@@ -973,6 +983,7 @@ def _dataset_update_resources(
973983
resources_to_delete,
974984
filestore_resources,
975985
new_resource_order,
986+
statuses,
976987
)
977988

978989
def _dataset_hdx_update(
@@ -985,12 +996,15 @@ def _dataset_hdx_update(
985996
create_default_views: bool,
986997
hxl_update: bool,
987998
**kwargs: Any,
988-
) -> Dict:
999+
) -> Tuple[Dict, Dict]:
9891000
"""Helper method to compare new and existing dataset data, update
9901001
resources including those with files in the filestore, delete extra
9911002
resources if needed and update dataset data and save the dataset and
9921003
resources to HDX.
9931004
1005+
Returns a tuple of information of the form: (resource status codes,
1006+
dictionary of what gets passed to the revise call (for testing))
1007+
9941008
Args:
9951009
update_resources (bool): Whether to update resources
9961010
match_resources_by_metadata (bool): Compare resource metadata rather than position in list
@@ -1001,13 +1015,14 @@ def _dataset_hdx_update(
10011015
hxl_update (bool): Whether to call package_hxl_update.
10021016
10031017
Returns:
1004-
Dict: Dictionary of what gets passed to the revise call (for testing)
1018+
Tuple[Dict, Dict]: Tuple of (resource status codes, revise call info)
10051019
"""
10061020
(
10071021
resources_to_update,
10081022
resources_to_delete,
10091023
filestore_resources,
10101024
new_resource_order,
1025+
statuses,
10111026
) = self._dataset_update_resources(
10121027
update_resources,
10131028
match_resources_by_metadata,
@@ -1029,7 +1044,7 @@ def _dataset_hdx_update(
10291044
if not found:
10301045
self.old_data["tags"].append(tag)
10311046
self._prepare_hdx_call(self.old_data, kwargs)
1032-
return self._revise_dataset(
1047+
revise_call = self._revise_dataset(
10331048
keys_to_delete,
10341049
resources_to_update,
10351050
resources_to_delete,
@@ -1039,6 +1054,7 @@ def _dataset_hdx_update(
10391054
create_default_views=create_default_views,
10401055
**kwargs,
10411056
)
1057+
return statuses, revise_call
10421058

10431059
def update_in_hdx(
10441060
self,
@@ -1050,10 +1066,19 @@ def update_in_hdx(
10501066
create_default_views: bool = True,
10511067
hxl_update: bool = True,
10521068
**kwargs: Any,
1053-
) -> None:
1069+
) -> Dict:
10541070
"""Check if dataset exists in HDX and if so, update it. match_resources_by_metadata uses ids if they are
10551071
available, otherwise names only if names are unique or format in addition if not.
10561072
1073+
Returns a dictionary with key resource name and value status code:
1074+
0 = no file to upload and last_modified set to now
1075+
(resource creation or data_updated flag is True),
1076+
1 = no file to upload and data_updated flag is False,
1077+
2 = file uploaded to filestore (resource creation or either hash or size of file
1078+
has changed),
1079+
3 = file not uploaded to filestore (hash and size of file are the same),
1080+
4 = file not uploaded (hash, size unchanged), given last_modified ignored
1081+
10571082
Args:
10581083
update_resources (bool): Whether to update resources. Defaults to True.
10591084
match_resources_by_metadata (bool): Compare resource metadata rather than position in list. Defaults to True.
@@ -1068,7 +1093,7 @@ def update_in_hdx(
10681093
batch (str): A string you can specify to show which datasets are part of a single batch update
10691094
10701095
Returns:
1071-
None
1096+
Dict: Status codes of resources
10721097
"""
10731098
loaded = False
10741099
if "id" in self.data:
@@ -1081,7 +1106,7 @@ def update_in_hdx(
10811106
self._check_existing_object("dataset", "name")
10821107
if not self._dataset_load_from_hdx(self.data["name"]):
10831108
raise HDXError("No existing dataset to update!")
1084-
self._dataset_hdx_update(
1109+
statuses, _ = self._dataset_hdx_update(
10851110
update_resources=update_resources,
10861111
match_resources_by_metadata=match_resources_by_metadata,
10871112
keys_to_delete=keys_to_delete,
@@ -1092,6 +1117,7 @@ def update_in_hdx(
10921117
**kwargs,
10931118
)
10941119
logger.info(f"Updated {self.get_hdx_url()}")
1120+
return statuses
10951121

10961122
def create_in_hdx(
10971123
self,
@@ -1104,10 +1130,19 @@ def create_in_hdx(
11041130
create_default_views: bool = True,
11051131
hxl_update: bool = True,
11061132
**kwargs: Any,
1107-
) -> None:
1133+
) -> Dict:
11081134
"""Check if dataset exists in HDX and if so, update it, otherwise create it. match_resources_by_metadata uses
11091135
ids if they are available, otherwise names only if names are unique or format in addition if not.
11101136
1137+
Returns a dictionary with key resource name and value status code:
1138+
0 = no file to upload and last_modified set to now
1139+
(resource creation or data_updated flag is True),
1140+
1 = no file to upload and data_updated flag is False,
1141+
2 = file uploaded to filestore (resource creation or either hash or size of file
1142+
has changed),
1143+
3 = file not uploaded to filestore (hash and size of file are the same),
1144+
4 = file not uploaded (hash, size unchanged), given last_modified ignored
1145+
11111146
Args:
11121147
allow_no_resources (bool): Whether to allow no resources. Defaults to False.
11131148
update_resources (bool): Whether to update resources (if updating). Defaults to True.
@@ -1123,7 +1158,7 @@ def create_in_hdx(
11231158
batch (str): A string you can specify to show which datasets are part of a single batch update
11241159
11251160
Returns:
1126-
None
1161+
Dict: Status codes of resources
11271162
"""
11281163
if "ignore_check" not in kwargs: # allow ignoring of field checks
11291164
self.check_required_fields(allow_no_resources=allow_no_resources, **kwargs)
@@ -1139,7 +1174,7 @@ def create_in_hdx(
11391174
if self._dataset_load_from_hdx(self.data["name"]):
11401175
loadedid = self.data["name"]
11411176
if loadedid:
1142-
self._dataset_hdx_update(
1177+
statuses, _ = self._dataset_hdx_update(
11431178
update_resources=update_resources,
11441179
match_resources_by_metadata=match_resources_by_metadata,
11451180
keys_to_delete=keys_to_delete,
@@ -1150,14 +1185,16 @@ def create_in_hdx(
11501185
**kwargs,
11511186
)
11521187
logger.info(f"Updated {self.get_hdx_url()}")
1153-
return
1188+
return statuses
11541189

1190+
statuses = {}
11551191
filestore_resources = {}
11561192
if self.resources:
11571193
for i, resource in enumerate(self.resources):
1158-
FilestoreHelper.check_filestore_resource(
1194+
status = FilestoreHelper.check_filestore_resource(
11591195
resource, filestore_resources, i, **kwargs
11601196
)
1197+
statuses[resource["name"]] = status
11611198
self.unseparate_resources()
11621199
self._prepare_hdx_call(self.data, kwargs)
11631200
kwargs["operation"] = "create"
@@ -1178,6 +1215,7 @@ def create_in_hdx(
11781215
**kwargs,
11791216
)
11801217
logger.info(f"Created {self.get_hdx_url()}")
1218+
return statuses
11811219

11821220
def delete_from_hdx(self) -> None:
11831221
"""Deletes a dataset from HDX.

0 commit comments

Comments
 (0)