From 128767958b1b679619f8da5dd445b5a672a32c94 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 17 Jul 2025 13:38:54 +1200 Subject: [PATCH 1/6] Possible fix for double update issue --- pyproject.toml | 4 +-- requirements.txt | 38 ++++++++++++++--------------- src/hdx/data/dataset.py | 18 +++++++++----- tests/hdx/data/test_update_logic.py | 4 +-- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 22342485..ea9e4054 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,8 @@ dependencies = [ "ckanapi>=4.8", "defopt>=7.0.0", "email_validator", - "hdx-python-country>=3.9.4", - "hdx-python-utilities>=3.8.7", + "hdx-python-country>=3.9.6", + "hdx-python-utilities>=3.8.8", "libhxl>=5.2.2", "makefun", "quantulum3", diff --git a/requirements.txt b/requirements.txt index 4e1c0fcc..120b8c80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt annotated-types==0.7.0 # via pydantic -astdoc==1.2.1 +astdoc==1.3.0 # via mkapi attrs==25.3.0 # via @@ -12,11 +12,11 @@ attrs==25.3.0 # referencing babel==2.17.0 # via mkdocs-material -backrefs==5.8 +backrefs==5.9 # via mkdocs-material cachetools==5.5.2 # via google-auth -certifi==2025.6.15 +certifi==2025.7.14 # via requests cfgv==3.4.0 # via pre-commit @@ -32,7 +32,7 @@ click==8.2.1 # typer colorama==0.4.6 # via mkdocs-material -coverage==7.9.1 +coverage==7.9.2 # via pytest-cov defopt==7.0.0 # via hdx-python-api (pyproject.toml) @@ -64,9 +64,9 @@ google-auth-oauthlib==1.2.2 # via gspread gspread==6.2.1 # via hdx-python-api (pyproject.toml) -hdx-python-country==3.9.4 +hdx-python-country==3.9.6 # via hdx-python-api (pyproject.toml) -hdx-python-utilities==3.8.7 +hdx-python-utilities==3.8.8 # via # hdx-python-api (pyproject.toml) # hdx-python-country @@ -110,7 +110,7 @@ loguru==0.7.3 # via hdx-python-utilities makefun==1.16.0 # via hdx-python-api (pyproject.toml) -markdown==3.8 +markdown==3.8.2 # via # mkdocs # mkdocs-material @@ -129,7 +129,7 @@ mergedeep==1.3.4 # via # mkdocs # mkdocs-get-deps -mkapi==4.4.1 +mkapi==4.4.4 # via hdx-python-api (pyproject.toml) mkdocs==1.6.1 # via @@ -137,7 +137,7 @@ mkdocs==1.6.1 # mkdocs-material mkdocs-get-deps==0.2.0 # via mkdocs -mkdocs-material==9.6.14 +mkdocs-material==9.6.15 # via mkapi mkdocs-material-extensions==1.3.1 # via mkdocs-material @@ -147,7 +147,7 @@ nodeenv==1.9.1 # via pre-commit num2words==0.5.14 # via quantulum3 -oauthlib==3.2.2 +oauthlib==3.3.1 # via requests-oauthlib openpyxl==3.1.5 # via hdx-python-utilities @@ -159,7 +159,7 @@ paginate==0.5.7 # via mkdocs-material pathspec==0.12.1 # via mkdocs -petl==1.7.16 +petl==1.7.17 # via frictionless platformdirs==4.3.8 # via @@ -187,16 +187,16 @@ pydantic==2.11.7 # via frictionless pydantic-core==2.33.2 # via pydantic -pygments==2.19.1 +pygments==2.19.2 # via # mkdocs-material # pytest # rich -pymdown-extensions==10.15 +pymdown-extensions==10.16 # via mkdocs-material pyphonetics==0.5.3 # via hdx-python-utilities -pytest==8.4.0 +pytest==8.4.1 # via # hdx-python-api (pyproject.toml) # pytest-check @@ -253,7 +253,7 @@ rfc3986==2.0.0 # via frictionless rich==14.0.0 # via typer -rpds-py==0.25.1 +rpds-py==0.26.0 # via # jsonschema # referencing @@ -289,11 +289,11 @@ tenacity==9.1.2 # via hdx-python-country text-unidecode==1.3 # via python-slugify -typeguard==4.4.3 +typeguard==4.4.4 # via inflect typer==0.16.0 # via frictionless -typing-extensions==4.14.0 +typing-extensions==4.14.1 # via # frictionless # pydantic @@ -307,7 +307,7 @@ unidecode==1.4.0 # via # libhxl # pyphonetics -urllib3==2.4.0 +urllib3==2.5.0 # via # libhxl # requests @@ -325,7 +325,7 @@ xlrd3==1.1.0 # via libhxl xlsx2csv==0.8.4 # via hdx-python-utilities -xlsxwriter==3.2.3 +xlsxwriter==3.2.5 # via tableschema-to-template xlwt==1.3.0 # via hdx-python-utilities diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 7a46a6ee..869be282 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -652,7 +652,10 @@ def _prepare_hdx_call(self, data: Dict, kwargs: Any) -> None: kwargs["batch_mode"] = "DONT_GROUP" def _revise_filter( - self, dataset_data_to_update: Dict, keys_to_delete: ListTuple[str] + self, + dataset_data_to_update: Dict, + keys_to_delete: ListTuple[str], + resources_to_delete: ListTuple[int], ): """Returns the revise filter parameter adding to it any keys in the dataset metadata that are specified to be deleted. Also compare lists @@ -662,15 +665,18 @@ def _revise_filter( Args: dataset_data_to_update (Dict): Dataset data to be updated keys_to_delete (ListTuple[str]): List of top level metadata keys to delete + resources_to_delete (ListTuple[int]): List of indexes of resources to delete """ revise_filter = [] for key in keys_to_delete: revise_filter.append(f"-{key}") + for resource_no in resources_to_delete: + revise_filter.append(f"-resources__{resource_no}") for key, value in dataset_data_to_update.items(): - if not isinstance(value, list): - continue if key not in self.old_data: continue + if not isinstance(value, list): + continue orig_list = self.old_data[key] elements_to_remove = [] for i, orig_value in enumerate(orig_list): @@ -729,8 +735,6 @@ def _revise_files_to_upload_resource_deletions( if not self.is_requestable(): for resource_index in resources_to_delete: del resources_to_update[resource_index] - if resource_index == len(resources_to_update): - revise_filter.append(f"-resources__{resource_index}") new_fsresources = {} for index in filestore_resources: if index > resource_index: @@ -788,7 +792,9 @@ def _revise_dataset( ): # Whether or not CKAN should perform validation steps (checking fields present) dataset_data_to_update["skip_validation"] = kwargs["skip_validation"] dataset_data_to_update["state"] = "active" - revise_filter = self._revise_filter(dataset_data_to_update, keys_to_delete) + revise_filter = self._revise_filter( + dataset_data_to_update, keys_to_delete, resources_to_delete + ) files_to_upload = self._revise_files_to_upload_resource_deletions( revise_filter, resources_to_update, diff --git a/tests/hdx/data/test_update_logic.py b/tests/hdx/data/test_update_logic.py index 5a65b39b..5f107a97 100644 --- a/tests/hdx/data/test_update_logic.py +++ b/tests/hdx/data/test_update_logic.py @@ -166,7 +166,7 @@ def test_update_logic_1( hxl_update=False, test=True, ) - assert results["filter"] == [] + assert results["filter"] == ["-resources__13"] update = results["update"] del update["updated_by_script"] assert update == { @@ -1118,7 +1118,7 @@ def test_update_logic_3( hxl_update=False, test=True, ) - assert results["filter"] == [] + assert results["filter"] == ["-resources__2", "-resources__1"] update = results["update"] del update["updated_by_script"] assert update == { From f808b448037adc8c5b6b6a95b465258612eff03b Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 22 Jul 2025 13:35:32 +1200 Subject: [PATCH 2/6] Update requirements --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 120b8c80..a2799fa9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,7 @@ coverage==7.9.2 # via pytest-cov defopt==7.0.0 # via hdx-python-api (pyproject.toml) -distlib==0.3.9 +distlib==0.4.0 # via virtualenv dnspython==2.7.0 # via email-validator @@ -96,7 +96,7 @@ jsonlines==4.0.0 # via hdx-python-utilities jsonpath-ng==1.7.0 # via libhxl -jsonschema==4.24.0 +jsonschema==4.25.0 # via # frictionless # tableschema-to-template @@ -313,7 +313,7 @@ urllib3==2.5.0 # requests validators==0.35.0 # via frictionless -virtualenv==20.31.2 +virtualenv==20.32.0 # via pre-commit watchdog==6.0.0 # via mkdocs From a9802e573ca05d66da5981906ed9a78e1eb6daa2 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 22 Jul 2025 13:57:08 +1200 Subject: [PATCH 3/6] Use demo for ckan test --- tests/hdx/api/test_ckan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hdx/api/test_ckan.py b/tests/hdx/api/test_ckan.py index c765acf5..e91bf072 100644 --- a/tests/hdx/api/test_ckan.py +++ b/tests/hdx/api/test_ckan.py @@ -32,7 +32,7 @@ class TestCKAN: def configuration(self): hdx_key = getenv("HDX_KEY_TEST") Configuration._create( - hdx_site="stage", + hdx_site="demo", user_agent="test", hdx_key=hdx_key, ) From 4afc6eb3533d72fcd5b479dd29991a6a522b100d Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 22 Jul 2025 14:26:45 +1200 Subject: [PATCH 4/6] Reorder if tests --- src/hdx/data/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 869be282..576bef45 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -673,10 +673,10 @@ def _revise_filter( for resource_no in resources_to_delete: revise_filter.append(f"-resources__{resource_no}") for key, value in dataset_data_to_update.items(): - if key not in self.old_data: - continue if not isinstance(value, list): continue + if key not in self.old_data: + continue orig_list = self.old_data[key] elements_to_remove = [] for i, orig_value in enumerate(orig_list): From f79c4206e3a39a8e64bc0f7ca90d5964586ca655 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 22 Jul 2025 14:31:37 +1200 Subject: [PATCH 5/6] Only delete resources if not requestable --- src/hdx/data/dataset.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 576bef45..c950309c 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -670,8 +670,9 @@ def _revise_filter( revise_filter = [] for key in keys_to_delete: revise_filter.append(f"-{key}") - for resource_no in resources_to_delete: - revise_filter.append(f"-resources__{resource_no}") + if not self.is_requestable(): + for resource_no in resources_to_delete: + revise_filter.append(f"-resources__{resource_no}") for key, value in dataset_data_to_update.items(): if not isinstance(value, list): continue @@ -716,7 +717,6 @@ def _revise_filter( def _revise_files_to_upload_resource_deletions( self, - revise_filter: List[str], resources_to_update: ListTuple["Resource"], resources_to_delete: ListTuple[int], filestore_resources: Dict[int, str], @@ -726,7 +726,6 @@ def _revise_files_to_upload_resource_deletions( reflect any deletions. Args: - revise_filter (List[str]): Keys and list elements to delete resources_to_update (ListTuple[Resource]): Resources to update resources_to_delete (ListTuple[int]): List of indexes of resources to delete filestore_resources (Dict[int, str]): List of (index of resources, file to upload) @@ -796,7 +795,6 @@ def _revise_dataset( dataset_data_to_update, keys_to_delete, resources_to_delete ) files_to_upload = self._revise_files_to_upload_resource_deletions( - revise_filter, resources_to_update, resources_to_delete, filestore_resources, From 04546ac05bb2638e9b3251e638695c5f14c909a2 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 22 Jul 2025 14:38:36 +1200 Subject: [PATCH 6/6] Use consistent naming Update documentation --- src/hdx/data/dataset.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index c950309c..892d9e47 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -658,8 +658,8 @@ def _revise_filter( resources_to_delete: ListTuple[int], ): """Returns the revise filter parameter adding to it any keys in the - dataset metadata that are specified to be deleted. Also compare lists - in original and updated metadata to see if any have had elements + dataset metadata and resources that are specified to be deleted. Also compare + lists in original and updated metadata to see if any have had elements removed in which case these should be added to the filter Args: @@ -671,8 +671,8 @@ def _revise_filter( for key in keys_to_delete: revise_filter.append(f"-{key}") if not self.is_requestable(): - for resource_no in resources_to_delete: - revise_filter.append(f"-resources__{resource_no}") + for resource_index in resources_to_delete: + revise_filter.append(f"-resources__{resource_index}") for key, value in dataset_data_to_update.items(): if not isinstance(value, list): continue @@ -721,9 +721,8 @@ def _revise_files_to_upload_resource_deletions( resources_to_delete: ListTuple[int], filestore_resources: Dict[int, str], ): - """Returns the files to be uploaded and updates the revise filter with - any resources to be deleted also updating filestore_resources to - reflect any deletions. + """Returns the files to be uploaded and updates resources_to_update and + filestore_resources to reflect any deletions. Args: resources_to_update (ListTuple[Resource]): Resources to update