From fe9d576c7bceca97892ddfbd719fdca334ebad07 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 12 Aug 2025 14:45:33 +1200 Subject: [PATCH 01/16] Hash files on upload --- requirements.txt | 14 +++++++------- src/hdx/data/resource.py | 34 ++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index f869c233..f48c5249 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ cfgv==3.4.0 # via pre-commit chardet==5.2.0 # via frictionless -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 # via requests ckanapi==4.8 # via hdx-python-api (pyproject.toml) @@ -32,7 +32,7 @@ click==8.2.1 # typer colorama==0.4.6 # via mkdocs-material -coverage==7.10.2 +coverage==7.10.3 # via pytest-cov defopt==7.0.0 # via hdx-python-api (pyproject.toml) @@ -72,7 +72,7 @@ hdx-python-utilities==3.9.0 # hdx-python-country humanize==4.12.3 # via frictionless -identify==2.6.12 +identify==2.6.13 # via pre-commit idna==3.10 # via @@ -115,9 +115,9 @@ markdown==3.8.2 # mkdocs # mkdocs-material # pymdown-extensions -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 # via rich -marko==2.1.4 +marko==2.2.0 # via frictionless markupsafe==3.0.2 # via @@ -175,7 +175,7 @@ ply==3.11 # libhxl pockets==0.9.1 # via sphinxcontrib-napoleon -pre-commit==4.2.0 +pre-commit==4.3.0 # via hdx-python-api (pyproject.toml) pyasn1==0.6.1 # via @@ -253,7 +253,7 @@ rfc3986==2.0.0 # via frictionless rich==14.1.0 # via typer -rpds-py==0.26.0 +rpds-py==0.27.0 # via # jsonschema # referencing diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index c1cbea73..34f6fbe1 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -1,5 +1,6 @@ """Resource class containing all logic for creating, checking, and updating resources.""" +import hashlib import logging import warnings from datetime import datetime @@ -358,15 +359,17 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None self.check_url_filetoupload() self._check_required_fields("resource", ignore_fields) - def _get_files(self) -> Dict: - """Return the files parameter for CKANAPI + def _get_hash(self) -> str: + """Return the hash of file to upload Returns: - Dict: files parameter for CKANAPI + str: Hash of file to upload """ - if self.file_to_upload is None: - return {} - return {"upload": self.file_to_upload} + md5 = hashlib.md5() + f = open(self.file_to_upload, "rb") + while chunk := f.read(4096): + md5.update(chunk) + return md5.hexdigest() def _resource_merge_hdx_update( self, @@ -384,14 +387,21 @@ def _resource_merge_hdx_update( None """ data_updated = kwargs.pop("data_updated", self.data_updated) - if data_updated and not self.file_to_upload: + files = {} + if self.file_to_upload: + hash = self._get_hash() + if hash != self.data.get("hash"): # update file if hash has changed + files["upload"] = self.file_to_upload + self.old_data["hash"] = hash + elif data_updated: # Should not output timezone info here self.old_data["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" ) self.data_updated = False - # old_data will be merged into data in the next step - self._merge_hdx_update("resource", "id", self._get_files(), True, **kwargs) + + # old_data will be merged into data in the next step + self._merge_hdx_update("resource", "id", files, True, **kwargs) def update_in_hdx(self, **kwargs: Any) -> None: """Check if resource exists in HDX and if so, update it. To indicate @@ -441,7 +451,11 @@ def create_in_hdx(self, **kwargs: Any) -> None: del self.data["url"] self._resource_merge_hdx_update(**kwargs) else: - self._save_to_hdx("create", "name", self._get_files(), True) + files = {} + if self.file_to_upload: + files["upload"] = self.file_to_upload + self.data["hash"] = self._get_hash() + self._save_to_hdx("create", "name", files, True) def delete_from_hdx(self) -> None: """Deletes a resource from HDX From b6eb0922e55c989651b2e292c8be831a1ae72be4 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 14 Aug 2025 11:20:06 +1200 Subject: [PATCH 02/16] Hash files before upload and compare hash and size --- requirements.txt | 2 +- src/hdx/api/utilities/filestore_helper.py | 27 +- src/hdx/api/utilities/size_hash.py | 43 + src/hdx/data/dataset.py | 5 +- src/hdx/data/resource.py | 33 +- ...-All-Africa-File_20170101-to-20170708.xlsx | Bin .../expected_resources_to_update.json | 172 ++- tests/hdx/api/test_ckan.py | 17 +- .../api/utilities/test_filestore_helper.py | 35 +- tests/hdx/api/utilities/test_size_hash.py | 24 + tests/hdx/conftest.py | 5 + tests/hdx/data/test_dataset_core.py | 8 +- tests/hdx/data/test_dataset_noncore.py | 8 +- tests/hdx/data/test_resource.py | 22 +- tests/hdx/data/test_update_logic.py | 1154 +++++++++++------ 15 files changed, 1037 insertions(+), 518 deletions(-) create mode 100644 src/hdx/api/utilities/size_hash.py rename tests/fixtures/{datastore => size_hash}/ACLED-All-Africa-File_20170101-to-20170708.xlsx (100%) create mode 100644 tests/hdx/api/utilities/test_size_hash.py diff --git a/requirements.txt b/requirements.txt index f48c5249..0a68fcbe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -313,7 +313,7 @@ urllib3==2.5.0 # requests validators==0.35.0 # via frictionless -virtualenv==20.33.1 +virtualenv==20.34.0 # via pre-commit watchdog==6.0.0 # via mkdocs diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index 446f7acf..6428b10e 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict +from hdx.api.utilities.size_hash import get_size_and_hash from hdx.utilities.dateparse import now_utc_notz if TYPE_CHECKING: @@ -60,12 +61,17 @@ def check_filestore_resource( cls.resource_check_required_fields(resource_data_to_update, **kwargs) file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: + file_format = resource_data_to_update.get("format", "").lower() + size, hash = get_size_and_hash(file_to_upload, file_format) filestore_resources[resource_index] = file_to_upload resource_data_to_update["url"] = cls.temporary_url + resource_data_to_update["size"] = size + resource_data_to_update["hash"] = hash @classmethod def dataset_update_filestore_resource( cls, + original_resource_data: "Resource", resource_data_to_update: "Resource", filestore_resources: Dict[int, str], resource_index: int, @@ -73,6 +79,7 @@ def dataset_update_filestore_resource( """Helper method to merge updated resource from dataset into HDX resource read from HDX including filestore. Args: + original_resource_data (Resource): Original resource from dataset resource_data_to_update (Resource): Updated resource from dataset filestore_resources (Dict[int, str]): List of (index of resources, file to upload) resource_index (int): Index of resource @@ -82,11 +89,21 @@ def dataset_update_filestore_resource( """ file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: - filestore_resources[resource_index] = file_to_upload - resource_data_to_update["url"] = cls.temporary_url - - data_updated = resource_data_to_update.is_marked_data_updated() - if data_updated: + file_format = resource_data_to_update.get("format", "").lower() + size, hash = get_size_and_hash(file_to_upload, file_format) + if size == original_resource_data.get( + "size" + ) and hash == original_resource_data.get("hash"): + # ensure last_modified is not updated if file hasn't changed + if "last_modified" in resource_data_to_update: + del resource_data_to_update["last_modified"] + else: + # update file if size or hash has changed + filestore_resources[resource_index] = file_to_upload + resource_data_to_update["url"] = cls.temporary_url + resource_data_to_update["size"] = size + resource_data_to_update["hash"] = hash + elif resource_data_to_update.is_marked_data_updated(): # Should not output timezone info here resource_data_to_update["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" diff --git a/src/hdx/api/utilities/size_hash.py b/src/hdx/api/utilities/size_hash.py new file mode 100644 index 00000000..fc5137da --- /dev/null +++ b/src/hdx/api/utilities/size_hash.py @@ -0,0 +1,43 @@ +import hashlib +from io import BytesIO +from typing import Tuple + +from openpyxl import load_workbook + + +def get_size_and_hash(file_to_upload: str, file_format: str) -> Tuple[int, str]: + """Return the size and hash of file to upload + + Args: + file_to_upload: File to upload + file_format (str): File format + + Returns: + Tuple[int, str]: Tuple (size, hash) + """ + f = open(file_to_upload, "rb") + md5hash = hashlib.md5() + if file_format == "xlsx": + first_chunk = f.read(4096) + size = len(first_chunk) + signature = first_chunk[:4] + if signature == b"PK\x03\x04": # xlsx + xlsxbuffer = bytearray(first_chunk) + while chunk := f.read(4096): + size += len(chunk) + xlsxbuffer.extend(chunk) + workbook = load_workbook(filename=BytesIO(xlsxbuffer), read_only=True) + for sheet_name in workbook.sheetnames: + sheet = workbook[sheet_name] + for cols in sheet.iter_rows(values_only=True): + md5hash.update(bytes(str(cols), "utf-8")) + workbook.close() + return size, md5hash.hexdigest() + else: + md5hash.update(first_chunk) + else: + size = 0 + while chunk := f.read(4096): + size += len(chunk) + md5hash.update(chunk) + return size, md5hash.hexdigest() diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index cac3523e..87fd6a91 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -898,6 +898,7 @@ def _dataset_update_resources( ] logger.warning(f"Resource exists. Updating {resource['name']}") FilestoreHelper.dataset_update_filestore_resource( + resource, resource_data_to_update, filestore_resources, i, @@ -930,13 +931,15 @@ def _dataset_update_resources( ): if len(self.resources) > i: updated_resource_name = resource_data_to_update["name"] - resource_name = self.resources[i]["name"] + resource = self.resources[i] + resource_name = resource["name"] logger.warning(f"Resource exists. Updating {resource_name}") if resource_name != updated_resource_name: logger.warning( f"Changing resource name to: {updated_resource_name}" ) FilestoreHelper.dataset_update_filestore_resource( + resource, resource_data_to_update, filestore_resources, i, diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 34f6fbe1..c54dbea6 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -1,6 +1,5 @@ """Resource class containing all logic for creating, checking, and updating resources.""" -import hashlib import logging import warnings from datetime import datetime @@ -12,6 +11,7 @@ from hdx.api.configuration import Configuration from hdx.api.utilities.date_helper import DateHelper from hdx.api.utilities.filestore_helper import FilestoreHelper +from hdx.api.utilities.size_hash import get_size_and_hash from hdx.data.hdxobject import HDXError, HDXObject from hdx.data.resource_view import ResourceView from hdx.utilities.dateparse import now_utc, now_utc_notz, parse_date @@ -359,18 +359,6 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None self.check_url_filetoupload() self._check_required_fields("resource", ignore_fields) - def _get_hash(self) -> str: - """Return the hash of file to upload - - Returns: - str: Hash of file to upload - """ - md5 = hashlib.md5() - f = open(self.file_to_upload, "rb") - while chunk := f.read(4096): - md5.update(chunk) - return md5.hexdigest() - def _resource_merge_hdx_update( self, **kwargs: Any, @@ -389,9 +377,16 @@ def _resource_merge_hdx_update( data_updated = kwargs.pop("data_updated", self.data_updated) files = {} if self.file_to_upload: - hash = self._get_hash() - if hash != self.data.get("hash"): # update file if hash has changed + file_format = self.old_data.get("format", "").lower() + size, hash = get_size_and_hash(self.file_to_upload, file_format) + if size == self.data.get("size") and hash == self.data.get("hash"): + # ensure last_modified is not updated if file hasn't changed + if "last_modified" in self.data: + del self.data["last_modified"] + else: + # update file if size or hash has changed files["upload"] = self.file_to_upload + self.old_data["size"] = size self.old_data["hash"] = hash elif data_updated: # Should not output timezone info here @@ -403,7 +398,7 @@ def _resource_merge_hdx_update( # old_data will be merged into data in the next step self._merge_hdx_update("resource", "id", files, True, **kwargs) - def update_in_hdx(self, **kwargs: Any) -> None: + def update_in_hdx(self, **kwargs: Any) -> int: """Check if resource exists in HDX and if so, update it. To indicate that the data in an external resource (given by a URL) has been updated, set data_updated to True, which will result in the resource @@ -418,7 +413,7 @@ def update_in_hdx(self, **kwargs: Any) -> None: date_data_updated (datetime): Date to use for last_modified. Default to None. Returns: - None + int: Return status code """ self._check_load_existing_object("resource", "id") if self.file_to_upload and "url" in self.data: @@ -454,7 +449,9 @@ def create_in_hdx(self, **kwargs: Any) -> None: files = {} if self.file_to_upload: files["upload"] = self.file_to_upload - self.data["hash"] = self._get_hash() + self.data["size"], self.data["hash"] = get_size_and_hash( + self.file_to_upload, self.get_format() + ) self._save_to_hdx("create", "name", files, True) def delete_from_hdx(self) -> None: diff --git a/tests/fixtures/datastore/ACLED-All-Africa-File_20170101-to-20170708.xlsx b/tests/fixtures/size_hash/ACLED-All-Africa-File_20170101-to-20170708.xlsx similarity index 100% rename from tests/fixtures/datastore/ACLED-All-Africa-File_20170101-to-20170708.xlsx rename to tests/fixtures/size_hash/ACLED-All-Africa-File_20170101-to-20170708.xlsx diff --git a/tests/fixtures/update_dataset_resources/expected_resources_to_update.json b/tests/fixtures/update_dataset_resources/expected_resources_to_update.json index ba74a01b..b16f8f35 100644 --- a/tests/fixtures/update_dataset_resources/expected_resources_to_update.json +++ b/tests/fixtures/update_dataset_resources/expected_resources_to_update.json @@ -1,87 +1,85 @@ -[ - {}, - {}, - {}, - { - "dataset_preview_enabled": "False", - "description": "SDG 4 Global and Thematic data with HXL tags.\n\nIndicators: Adjusted attendance rate, Adjusted net attendance rate, Adjusted net enrolment rate, Administration of a nationally representative learning assessment in Grade 2 or 3 in mathematics, Administration of a nationally representative learning assessment in Grade 2 or 3 in reading, Administration of a nationally-representative learning assessment at the end of lower secondary education in mathematics, Administration of a nationally-representative learning assessment at the end of lower secondary education in reading, Administration of a nationally-representative learning assessment at the end of primary in mathematics, Administration of a nationally-representative learning assessment at the end of primary in reading, Adult literacy rate, Average teacher salary in lower secondary education relative to other professions requiring a comparable level of qualification, Average teacher salary in pre-primary education relative to other professions requiring a comparable level of qualification, Average teacher salary in primary education relative to other professions requiring a comparable level of qualification, Average teacher salary in upper secondary education relative to other professions requiring a comparable level of qualification, Completion rate, Educational attainment, Educational attainment rate, Elderly literacy rate, Expenditure on education as a percentage of total government expenditure, Government expenditure on education as a percentage of GDP, Gross attendance ratio for tertiary education, Gross enrolment ratio, Gross enrolment ratio for tertiary education, Gross intake ratio to the last grade of lower secondary general education, Gross intake ratio to the last grade of primary education, Initial government funding per lower secondary student, Initial government funding per lower secondary student as a percentage of GDP per capita, Initial government funding per pre-primary student, Initial government funding per pre-primary student as a percentage of GDP per capita, Initial government funding per primary student, Initial government funding per primary student as a percentage of GDP per capita, Initial government funding per secondary student, Initial government funding per secondary student as a percentage of GDP per capita, Initial government funding per tertiary student, Initial government funding per tertiary student as a percentage of GDP per capita, Initial government funding per upper secondary student, Initial government funding per upper secondary student as a percentage of GDP per capita, Initial household funding per primary student, Initial household funding per primary student as a percentage of GDP per capita, Initial household funding per secondary student, Initial household funding per secondary student as a percentage of GDP per capita, Initial household funding per tertiary student, Initial household funding per tertiary student as a percentage of GDP per capita, Literacy rate, Number of attacks on students, Number of years of compulsory pre-primary education guaranteed in legal frameworks, Number of years of compulsory primary and secondary education guaranteed in legal frameworks, Number of years of free pre-primary education guaranteed in legal frameworks, Number of years of free primary and secondary education guaranteed in legal frameworks, Out-of-school rate, Out-of-school rate for adolescents and youth of lower and upper secondary school age, Out-of-school rate for adolescents of lower secondary school age, Out-of-school rate for children, Out-of-school rate for children and adolescents of primary and lower secondary school age, Out-of-school rate for children of primary school age, Out-of-school rate for children one year younger than official age, Out-of-school rate for children one year younger than official primary entry age, Out-of-school rate for youth of upper secondary school age, Participants in literacy programmes as a % of the illiterate population, Participation rate of youth and adults in formal and non-formal education and training in the previous 12 months, Percentage of children under 5 years experiencing positive and stimulating home learning environments, Percentage of lower secondary schools providing life skills-based HIV and sexuality education, Percentage of primary schools providing life skills-based HIV and sexuality education, Percentage of pupils enrolled in lower secondary general education who are at least 2 years over-age for their current grade, Percentage of pupils enrolled in primary education who are at least 2 years over-age for their current grade, Percentage of qualified teachers in lower secondary education, Percentage of qualified teachers in pre-primary education, Percentage of qualified teachers in primary education, Percentage of qualified teachers in secondary education, Percentage of qualified teachers in upper secondary education, Percentage of students at the end of lower secondary education who have their first or home language as language of instruction, Percentage of students at the end of primary education who have their first or home language as language of instruction, Percentage of students experiencing bullying in the last 12 months in lower secondary education, Percentage of students experiencing bullying in the last 12 months in primary education, Percentage of students in early grades who have their first or home language as language of instruction, Percentage of students in lower secondary education who have their first or home language as language of instruction, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability - Cognitive Dimension, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Freedom, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Gender equality, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Global-local thinking, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Multiculturalism, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Peace, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Social Justice, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability \u00e2\u20ac\u201c Non-cognitive Dimension \u00e2\u20ac\u201c Sustainable development, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Cognitive dimension, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Cognitive dimension \u00e2\u20ac\u201c adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Cognitive dimension \u00e2\u20ac\u201c adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Cognitive dimension \u00e2\u20ac\u201c adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension - Confidence, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension - Enjoyment, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Confidence, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Confidence \u00e2\u20ac\u201c adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Confidence \u00e2\u20ac\u201c adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Confidence \u00e2\u20ac\u201c adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Enjoyment, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Enjoyment \u00e2\u20ac\u201c adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Enjoyment \u00e2\u20ac\u201c adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c Non-cognitive dimension \u00e2\u20ac\u201c Enjoyment \u00e2\u20ac\u201c adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience \u00e2\u20ac\u201c adjusted wealth parity index, Percentage of teachers in lower secondary education who received in-service training in the last 12 months by type of trained, Percentage of teachers in lower secondary education who received in-service training in the last 12 months by type of training, Percentage of teachers in primary education who received in-service training in the last 12 months by type of trained, Percentage of teachers in primary education who received in-service training in the last 12 months by type of training, Percentage of total aid to education allocated to least developed countries, Percentage of upper secondary schools providing life skills-based HIV and sexuality education, Percentage of youth/adults who have achieved at least a minimum level of proficiency in digital literacy skills, Proportion of 15- to 24-year-olds enrolled in vocational education, Proportion of 15-24 year-olds enrolled in vocational education, Proportion of children aged 24-59 months who are developmentally on track in health, Proportion of lower secondary schools with access to Internet for pedagogical purposes, Proportion of lower secondary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of lower secondary schools with access to basic drinking water, Proportion of lower secondary schools with access to computers for pedagogical purposes, Proportion of lower secondary schools with access to electricity, Proportion of lower secondary schools with basic handwashing facilities, Proportion of lower secondary schools with single-sex basic sanitation facilities, Proportion of population achieving at least a fixed level of proficiency in functional literacy skills, Proportion of population achieving at least a fixed level of proficiency in functional numeracy skills, Proportion of primary schools with access to Internet for pedagogical purposes, Proportion of primary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of primary schools with access to basic drinking water, Proportion of primary schools with access to computers for pedagogical purposes, Proportion of primary schools with access to electricity, Proportion of primary schools with basic handwashing facilities, Proportion of primary schools with single-sex basic sanitation facilities, Proportion of pupils enrolled in lower secondary general education who are at least 2 years over-age for their current grade, Proportion of pupils enrolled in primary education who are at least 2 years over-age for their current grade, Proportion of qualified teachers in lower secondary education, Proportion of qualified teachers in pre-primary education, Proportion of qualified teachers in primary education, Proportion of qualified teachers in secondary education, Proportion of qualified teachers in upper secondary education, Proportion of secondary schools with access to Internet for pedagogical purposes, Proportion of secondary schools with access to computers for pedagogical purposes, Proportion of students at the end of lower secondary achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of lower secondary achieving at least a minimum proficiency level in reading, Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in reading, Proportion of students at the end of primary achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of primary achieving at least a minimum proficiency level in reading, Proportion of students at the end of primary education achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of primary education achieving at least a minimum proficiency level in reading, Proportion of students in Grade 2 or 3 achieving at least a minimum proficiency level in mathematics, Proportion of students in Grade 2 or 3 achieving at least a minimum proficiency level in reading, Proportion of students\u00c2 at the end of primary education achieving at least a minimum proficiency level in reading, Proportion of teachers with the minimum required qualifications in lower secondary education, Proportion of teachers with the minimum required qualifications in pre-primary education, Proportion of teachers with the minimum required qualifications in primary education, Proportion of teachers with the minimum required qualifications in secondary education, Proportion of teachers with the minimum required qualifications in upper secondary education, Proportion of upper secondary schools with access to Internet for pedagogical purposes, Proportion of upper secondary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of upper secondary schools with access to basic drinking water, Proportion of upper secondary schools with access to computers for pedagogical purposes, Proportion of upper secondary schools with access to electricity, Proportion of upper secondary schools with basic handwashing facilities, Proportion of upper secondary schools with single-sex basic sanitation facilities, Proportion of youth and adults who have Transferred files between a computer and other devices, Proportion of youth and adults who have connected and installed new devices, Proportion of youth and adults who have copied or moved a file or folder, Proportion of youth and adults who have created electronic presentations with presentation software, Proportion of youth and adults who have found, Proportion of youth and adults who have sent e-mails with attached files, Proportion of youth and adults who have transferred files between a computer and other devices, Proportion of youth and adults who have used basic arithmetic formulae in a spreadsheet, Proportion of youth and adults who have used copy and paste tools to duplicate or move information within a document, Proportion of youth and adults who have wrote a computer program using a specialised programming language, Pupil-qualified teacher ratio in lower secondary, Pupil-qualified teacher ratio in pre-primary education, Pupil-qualified teacher ratio in primary education, Pupil-qualified teacher ratio in secondary, Pupil-qualified teacher ratio in upper secondary, Pupil-trained teacher ratio in lower secondary education, Pupil-trained teacher ratio in pre-primary education, Pupil-trained teacher ratio in primary education, Pupil-trained teacher ratio in secondary education, Pupil-trained teacher ratio in upper secondary education, Teacher attrition rate from general secondary education, Teacher attrition rate from lower secondary education, Teacher attrition rate from pre-primary education, Teacher attrition rate from primary education, Teacher attrition rate from secondary education, Teacher attrition rate from upper secondary education, Teacher attrition rate from vocational secondary education, Volume of official development assistance flows for scholarships by sector and type of study, Youth literacy rate", - "format": "csv", - "name": "SDG 4 Global and Thematic data", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "SDG 4 Global and Thematic indicator list with HXL tags", - "format": "csv", - "name": "SDG 4 Global and Thematic indicator list", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "SDG 4 Global and Thematic metadata with HXL tags", - "format": "csv", - "name": "SDG 4 Global and Thematic metadata", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "Demographic and Socio-economic data with HXL tags.\n\nIndicators: DEC alternative conversion factor, Fertility rate, GDP, GDP at market prices, GDP deflator, GDP growth, GDP per capita, GNI, GNI per capita, General government total expenditure, Life expectancy at birth, Mortality rate, Official exchange rate, PPP conversion factor, Population aged 14 years or younger, Population aged 15-24 years, Population aged 25-64 years, Population aged 65 years or older, Population growth, Poverty headcount ratio at $3.20 a day, Prevalence of HIV, Price level ratio of PPP conversion factor, Rural population, Total debt service, Total population", - "format": "csv", - "name": "Demographic and Socio-economic data", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "Demographic and Socio-economic indicator list with HXL tags", - "format": "csv", - "name": "Demographic and Socio-economic indicator list", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - {}, - { - "dataset_preview_enabled": "False", - "description": "Other Policy Relevant Indicators data with HXL tags.\n\nIndicators: Adult illiterate population, Africa, All staff compensation as a percentage of total expenditure in lower secondary public institutions, All staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, All staff compensation as a percentage of total expenditure in pre-primary public institutions, All staff compensation as a percentage of total expenditure in primary public institutions, All staff compensation as a percentage of total expenditure in public institutions, All staff compensation as a percentage of total expenditure in secondary public institutions, All staff compensation as a percentage of total expenditure in tertiary public institutions, All staff compensation as a percentage of total expenditure in upper secondary public institutions, Asia, Capital expenditure as a percentage of total expenditure in lower secondary public institutions, Capital expenditure as a percentage of total expenditure in post-secondary non-tertiary public institutions, Capital expenditure as a percentage of total expenditure in pre-primary public institutions, Capital expenditure as a percentage of total expenditure in primary public institutions, Capital expenditure as a percentage of total expenditure in public institutions, Capital expenditure as a percentage of total expenditure in secondary public institutions, Capital expenditure as a percentage of total expenditure in tertiary public institutions, Capital expenditure as a percentage of total expenditure in upper-secondary public institutions, Caribbean and Central America, Current expenditure as a percentage of total expenditure in lower secondary public institutions, Current expenditure as a percentage of total expenditure in post-secondary non-tertiary public institutions, Current expenditure as a percentage of total expenditure in pre-primary public institutions, Current expenditure as a percentage of total expenditure in primary public institutions, Current expenditure as a percentage of total expenditure in public institutions, Current expenditure as a percentage of total expenditure in secondary public institutions, Current expenditure as a percentage of total expenditure in tertiary public institutions, Current expenditure as a percentage of total expenditure in upper-secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in lower secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in pre-primary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in primary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in tertiary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in upper secondary public institutions, Duration of compulsory education, End month of the academic school year, End of the academic school year, Enrolment in early childhood education, Enrolment in early childhood educational development programmes, Enrolment in lower secondary education, Enrolment in post-secondary non-tertiary education, Enrolment in pre-primary education, Enrolment in primary education, Enrolment in secondary education, Enrolment in tertiary education, Enrolment in upper secondary education, Europe, Expenditure on school books and teaching material as % of total expenditure in primary public institutions, Expenditure on school books and teaching material as % of total expenditure in secondary public institutions, Government expenditure on education, Government expenditure on education not specified by level, Government expenditure on lower secondary education, Government expenditure on lower secondary education as a percentage of GDP, Government expenditure on post-secondary non-tertiary education, Government expenditure on post-secondary non-tertiary education as a percentage of GDP, Government expenditure on pre-primary education, Government expenditure on pre-primary education as a percentage of GDP, Government expenditure on primary education, Government expenditure on primary education as a percentage of GDP, Government expenditure on secondary and post-secondary non-tertiary vocational education as a percentage of GDP, Government expenditure on secondary and post-secondary non-tertiary vocational education only, Government expenditure on secondary education, Government expenditure on secondary education as a percentage of GDP, Government expenditure on tertiary education, Government expenditure on tertiary education as a percentage of GDP, Government expenditure on upper secondary education, Government expenditure on upper secondary education as a percentage of GDP, Gross enrolment ratio, Gross graduation ratio from first degree programmes, Illiterate population, Illiterate youth population, Inbound internationally mobile students from Africa, Inbound internationally mobile students from Asia, Inbound internationally mobile students from Central Asia, Inbound internationally mobile students from Central and Eastern Europe, Inbound internationally mobile students from East Asia and the Pacific, Inbound internationally mobile students from Europe, Inbound internationally mobile students from Latin America and the Caribbean, Inbound internationally mobile students from North America, Inbound internationally mobile students from North America and Western Europe, Inbound internationally mobile students from Oceania, Inbound internationally mobile students from South America, Inbound internationally mobile students from South and West Asia, Inbound internationally mobile students from sub-Saharan Africa, Inbound internationally mobile students from the Arab States, Inbound internationally mobile students from the Caribbean and Central America, Inbound internationally mobile students from unknown continents, Inbound internationally mobile students from unknown regions, Inbound mobility rate, Mean years of schooling, Net flow of internationally mobile students, Net flow ratio of internationally mobile students, Non-teaching staff compensation as a percentage of total expenditure in lower secondary public institutions, Non-teaching staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Non-teaching staff compensation as a percentage of total expenditure in pre-primary public institutions, Non-teaching staff compensation as a percentage of total expenditure in primary public institutions, Non-teaching staff compensation as a percentage of total expenditure in public institutions, Non-teaching staff compensation as a percentage of total expenditure in secondary public institutions, Non-teaching staff compensation as a percentage of total expenditure in tertiary public institutions, Non-teaching staff compensation as a percentage of total expenditure in upper secondary public institutions, North America, Oceania, Official entrance age to compulsory education, Official entrance age to early childhood education, Official entrance age to early childhood educational development, Official entrance age to lower secondary education, Official entrance age to post-secondary non-tertiary education, Official entrance age to pre-primary education, Official entrance age to primary education, Official entrance age to upper secondary education, Out-of-school adolescents and youth of secondary school age, Out-of-school adolescents of lower secondary school age, Out-of-school children, Out-of-school children and adolescents of primary and lower secondary school age, Out-of-school children of primary school age, Out-of-school youth of upper secondary school age, Outbound internationally mobile tertiary students studying in Central Asia, Outbound internationally mobile tertiary students studying in Central and Eastern Europe, Outbound internationally mobile tertiary students studying in East Asia and the Pacific, Outbound internationally mobile tertiary students studying in Latin America and the Caribbean, Outbound internationally mobile tertiary students studying in North America and Western Europe, Outbound internationally mobile tertiary students studying in South and West Asia, Outbound internationally mobile tertiary students studying in sub-Saharan Africa, Outbound internationally mobile tertiary students studying in the Arab States, Outbound mobility ratio, Outbound mobility ratio to Central Asia, Outbound mobility ratio to Central and Eastern Europe, Outbound mobility ratio to East Asia and the Pacific, Outbound mobility ratio to Latin America and the Caribbean, Outbound mobility ratio to North America and Western Europe, Outbound mobility ratio to South and West Asia, Outbound mobility ratio to sub-Saharan Africa, Outbound mobility ratio to the Arab States, Percentage of enrolment in early childhood education programmes in private institutions, Percentage of enrolment in early childhood educational development programmes in private institutions, Percentage of enrolment in lower secondary education in private institutions, Percentage of enrolment in post-secondary non-tertiary education in private institutions, Percentage of enrolment in pre-primary education in private institutions, Percentage of enrolment in primary education in private institutions, Percentage of enrolment in secondary education in private institutions, Percentage of enrolment in tertiary education in private institutions, Percentage of enrolment in upper secondary education in private institutions, Percentage of graduates from Science, Percentage of graduates from programmes other than Science, Percentage of graduates from tertiary education graduating from Agriculture, Percentage of graduates from tertiary education graduating from Arts and Humanities programmes, Percentage of graduates from tertiary education graduating from Business, Percentage of graduates from tertiary education graduating from Education programmes, Percentage of graduates from tertiary education graduating from Engineering, Percentage of graduates from tertiary education graduating from Health and Welfare programmes, Percentage of graduates from tertiary education graduating from Information and Communication Technologies programmes, Percentage of graduates from tertiary education graduating from Natural Sciences, Percentage of graduates from tertiary education graduating from Services programmes, Percentage of graduates from tertiary education graduating from Social Sciences, Percentage of graduates from tertiary education graduating from programmes in unspecified fields, Percentage of teachers in lower secondary education who are female, Percentage of teachers in post-secondary non-tertiary education who are female, Percentage of teachers in pre-primary education who are female, Percentage of teachers in primary education who are female, Percentage of teachers in secondary education who are female, Percentage of teachers in tertiary education who are female, Percentage of teachers in upper secondary education who are female, Population of compulsory school age, Population of the official entrance age to primary education, Population of the official entrance age to secondary general education, Repeaters in Grade 1 of lower secondary general education, Repeaters in Grade 1 of primary education, Repeaters in Grade 2 of lower secondary general education, Repeaters in Grade 2 of primary education, Repeaters in Grade 3 of lower secondary general education, Repeaters in Grade 3 of primary education, Repeaters in Grade 4 of lower secondary general education, Repeaters in Grade 4 of primary education, Repeaters in Grade 5 of lower secondary general education, Repeaters in Grade 5 of primary education, Repeaters in Grade 6 of lower secondary general education, Repeaters in Grade 6 of primary education, Repeaters in Grade 7 of primary education, Repeaters in grade unknown of lower secondary general education, Repeaters in grade unknown of primary education, Repeaters in lower secondary general education, Repeaters in primary education, Repetition rate in Grade 1 of lower secondary general education, Repetition rate in Grade 1 of primary education, Repetition rate in Grade 2 of lower secondary general education, Repetition rate in Grade 2 of primary education, Repetition rate in Grade 3 of lower secondary general education, Repetition rate in Grade 3 of primary education, Repetition rate in Grade 4 of lower secondary general education, Repetition rate in Grade 4 of primary education, Repetition rate in Grade 5 of lower secondary general education, Repetition rate in Grade 5 of primary education, Repetition rate in Grade 6 of primary education, Repetition rate in Grade 7 of primary education, Repetition rate in lower secondary general education, Repetition rate in primary education, School age population, School life expectancy, Share of all students in lower secondary education enrolled in general programmes, Share of all students in lower secondary education enrolled in vocational programmes, Share of all students in post-secondary non-tertiary education enrolled in general programmes, Share of all students in post-secondary non-tertiary education enrolled in vocational programmes, Share of all students in secondary education enrolled in general programmes, Share of all students in secondary education enrolled in vocational programmes, Share of all students in upper secondary education enrolled in general programmes, Share of all students in upper secondary education enrolled in vocational programmes, South America, Start month of the academic school year, Start of the academic school year, Survival rate to Grade 4 of primary education, Survival rate to Grade 5 of primary education, Survival rate to the last grade of primary education, Teachers in early childhood educational development programmes, Teachers in lower secondary education, Teachers in post-secondary non-tertiary education, Teachers in pre-primary education, Teachers in primary education, Teachers in secondary education, Teachers in tertiary education ISCED 5 programmes, Teachers in tertiary education ISCED 6, Teachers in tertiary education programmes, Teachers in upper secondary education, Teaching staff compensation as a percentage of total expenditure in lower secondary public institutions, Teaching staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Teaching staff compensation as a percentage of total expenditure in pre-primary public institutions, Teaching staff compensation as a percentage of total expenditure in primary public institutions, Teaching staff compensation as a percentage of total expenditure in public institutions, Teaching staff compensation as a percentage of total expenditure in secondary public institutions, Teaching staff compensation as a percentage of total expenditure in tertiary public institutions, Teaching staff compensation as a percentage of total expenditure in upper secondary public institutions, Theoretical duration of early childhood education, Theoretical duration of early childhood educational development, Theoretical duration of lower secondary education, Theoretical duration of post-secondary non-tertiary education, Theoretical duration of pre-primary education, Theoretical duration of primary education, Theoretical duration of secondary education, Theoretical duration of upper secondary education, Total inbound internationally mobile students, Total net attendance rate, Total net enrolment rate, Total outbound internationally mobile tertiary students studying abroad, Youth illiterate population", - "format": "csv", - "name": "Other Policy Relevant Indicators data", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "Other Policy Relevant Indicators indicator list with HXL tags", - "format": "csv", - "name": "Other Policy Relevant Indicators indicator list", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "False", - "description": "Other Policy Relevant Indicators metadata with HXL tags", - "format": "csv", - "name": "Other Policy Relevant Indicators metadata", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - }, - { - "dataset_preview_enabled": "True", - "description": "Cut down data for QuickCharts", - "format": "csv", - "name": "QuickCharts-SDG 4 Global and Thematic data", - "resource_type": "file.upload", - "url": "updated_by_file_upload_step", - "url_type": "upload" - } -] +[{}, + {}, + {}, + {"dataset_preview_enabled": "False", + "description": "SDG 4 Global and Thematic data with HXL tags.\n\nIndicators: Adjusted attendance rate, Adjusted net attendance rate, Adjusted net enrolment rate, Administration of a nationally representative learning assessment in Grade 2 or 3 in mathematics, Administration of a nationally representative learning assessment in Grade 2 or 3 in reading, Administration of a nationally-representative learning assessment at the end of lower secondary education in mathematics, Administration of a nationally-representative learning assessment at the end of lower secondary education in reading, Administration of a nationally-representative learning assessment at the end of primary in mathematics, Administration of a nationally-representative learning assessment at the end of primary in reading, Adult literacy rate, Average teacher salary in lower secondary education relative to other professions requiring a comparable level of qualification, Average teacher salary in pre-primary education relative to other professions requiring a comparable level of qualification, Average teacher salary in primary education relative to other professions requiring a comparable level of qualification, Average teacher salary in upper secondary education relative to other professions requiring a comparable level of qualification, Completion rate, Educational attainment, Educational attainment rate, Elderly literacy rate, Expenditure on education as a percentage of total government expenditure, Government expenditure on education as a percentage of GDP, Gross attendance ratio for tertiary education, Gross enrolment ratio, Gross enrolment ratio for tertiary education, Gross intake ratio to the last grade of lower secondary general education, Gross intake ratio to the last grade of primary education, Initial government funding per lower secondary student, Initial government funding per lower secondary student as a percentage of GDP per capita, Initial government funding per pre-primary student, Initial government funding per pre-primary student as a percentage of GDP per capita, Initial government funding per primary student, Initial government funding per primary student as a percentage of GDP per capita, Initial government funding per secondary student, Initial government funding per secondary student as a percentage of GDP per capita, Initial government funding per tertiary student, Initial government funding per tertiary student as a percentage of GDP per capita, Initial government funding per upper secondary student, Initial government funding per upper secondary student as a percentage of GDP per capita, Initial household funding per primary student, Initial household funding per primary student as a percentage of GDP per capita, Initial household funding per secondary student, Initial household funding per secondary student as a percentage of GDP per capita, Initial household funding per tertiary student, Initial household funding per tertiary student as a percentage of GDP per capita, Literacy rate, Number of attacks on students, Number of years of compulsory pre-primary education guaranteed in legal frameworks, Number of years of compulsory primary and secondary education guaranteed in legal frameworks, Number of years of free pre-primary education guaranteed in legal frameworks, Number of years of free primary and secondary education guaranteed in legal frameworks, Out-of-school rate, Out-of-school rate for adolescents and youth of lower and upper secondary school age, Out-of-school rate for adolescents of lower secondary school age, Out-of-school rate for children, Out-of-school rate for children and adolescents of primary and lower secondary school age, Out-of-school rate for children of primary school age, Out-of-school rate for children one year younger than official age, Out-of-school rate for children one year younger than official primary entry age, Out-of-school rate for youth of upper secondary school age, Participants in literacy programmes as a % of the illiterate population, Participation rate of youth and adults in formal and non-formal education and training in the previous 12 months, Percentage of children under 5 years experiencing positive and stimulating home learning environments, Percentage of lower secondary schools providing life skills-based HIV and sexuality education, Percentage of primary schools providing life skills-based HIV and sexuality education, Percentage of pupils enrolled in lower secondary general education who are at least 2 years over-age for their current grade, Percentage of pupils enrolled in primary education who are at least 2 years over-age for their current grade, Percentage of qualified teachers in lower secondary education, Percentage of qualified teachers in pre-primary education, Percentage of qualified teachers in primary education, Percentage of qualified teachers in secondary education, Percentage of qualified teachers in upper secondary education, Percentage of students at the end of lower secondary education who have their first or home language as language of instruction, Percentage of students at the end of primary education who have their first or home language as language of instruction, Percentage of students experiencing bullying in the last 12 months in lower secondary education, Percentage of students experiencing bullying in the last 12 months in primary education, Percentage of students in early grades who have their first or home language as language of instruction, Percentage of students in lower secondary education who have their first or home language as language of instruction, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability - Cognitive Dimension, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Freedom, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Gender equality, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Global-local thinking, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Multiculturalism, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Peace, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Social Justice, Percentage of students in lower secondary showing adequate understanding of issues relating to global citizenship and sustainability – Non-cognitive Dimension – Sustainable development, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Cognitive dimension, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Cognitive dimension – adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Cognitive dimension – adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Cognitive dimension – adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension - Confidence, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension - Enjoyment, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Confidence, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Confidence – adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Confidence – adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Confidence – adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Enjoyment, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Enjoyment – adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Enjoyment – adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – Non-cognitive dimension – Enjoyment – adjusted wealth parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – adjusted gender parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – adjusted location parity index, Percentage of students in lower secondary showing proficiency in knowledge of environmental science and geoscience – adjusted wealth parity index, Percentage of teachers in lower secondary education who received in-service training in the last 12 months by type of trained, Percentage of teachers in lower secondary education who received in-service training in the last 12 months by type of training, Percentage of teachers in primary education who received in-service training in the last 12 months by type of trained, Percentage of teachers in primary education who received in-service training in the last 12 months by type of training, Percentage of total aid to education allocated to least developed countries, Percentage of upper secondary schools providing life skills-based HIV and sexuality education, Percentage of youth/adults who have achieved at least a minimum level of proficiency in digital literacy skills, Proportion of 15- to 24-year-olds enrolled in vocational education, Proportion of 15-24 year-olds enrolled in vocational education, Proportion of children aged 24-59 months who are developmentally on track in health, Proportion of lower secondary schools with access to Internet for pedagogical purposes, Proportion of lower secondary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of lower secondary schools with access to basic drinking water, Proportion of lower secondary schools with access to computers for pedagogical purposes, Proportion of lower secondary schools with access to electricity, Proportion of lower secondary schools with basic handwashing facilities, Proportion of lower secondary schools with single-sex basic sanitation facilities, Proportion of population achieving at least a fixed level of proficiency in functional literacy skills, Proportion of population achieving at least a fixed level of proficiency in functional numeracy skills, Proportion of primary schools with access to Internet for pedagogical purposes, Proportion of primary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of primary schools with access to basic drinking water, Proportion of primary schools with access to computers for pedagogical purposes, Proportion of primary schools with access to electricity, Proportion of primary schools with basic handwashing facilities, Proportion of primary schools with single-sex basic sanitation facilities, Proportion of pupils enrolled in lower secondary general education who are at least 2 years over-age for their current grade, Proportion of pupils enrolled in primary education who are at least 2 years over-age for their current grade, Proportion of qualified teachers in lower secondary education, Proportion of qualified teachers in pre-primary education, Proportion of qualified teachers in primary education, Proportion of qualified teachers in secondary education, Proportion of qualified teachers in upper secondary education, Proportion of secondary schools with access to Internet for pedagogical purposes, Proportion of secondary schools with access to computers for pedagogical purposes, Proportion of students at the end of lower secondary achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of lower secondary achieving at least a minimum proficiency level in reading, Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in reading, Proportion of students at the end of primary achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of primary achieving at least a minimum proficiency level in reading, Proportion of students at the end of primary education achieving at least a minimum proficiency level in mathematics, Proportion of students at the end of primary education achieving at least a minimum proficiency level in reading, Proportion of students in Grade 2 or 3 achieving at least a minimum proficiency level in mathematics, Proportion of students in Grade 2 or 3 achieving at least a minimum proficiency level in reading, Proportion of students at the end of primary education achieving at least a minimum proficiency level in reading, Proportion of teachers with the minimum required qualifications in lower secondary education, Proportion of teachers with the minimum required qualifications in pre-primary education, Proportion of teachers with the minimum required qualifications in primary education, Proportion of teachers with the minimum required qualifications in secondary education, Proportion of teachers with the minimum required qualifications in upper secondary education, Proportion of upper secondary schools with access to Internet for pedagogical purposes, Proportion of upper secondary schools with access to adapted infrastructure and materials for students with disabilities, Proportion of upper secondary schools with access to basic drinking water, Proportion of upper secondary schools with access to computers for pedagogical purposes, Proportion of upper secondary schools with access to electricity, Proportion of upper secondary schools with basic handwashing facilities, Proportion of upper secondary schools with single-sex basic sanitation facilities, Proportion of youth and adults who have Transferred files between a computer and other devices, Proportion of youth and adults who have connected and installed new devices, Proportion of youth and adults who have copied or moved a file or folder, Proportion of youth and adults who have created electronic presentations with presentation software, Proportion of youth and adults who have found, Proportion of youth and adults who have sent e-mails with attached files, Proportion of youth and adults who have transferred files between a computer and other devices, Proportion of youth and adults who have used basic arithmetic formulae in a spreadsheet, Proportion of youth and adults who have used copy and paste tools to duplicate or move information within a document, Proportion of youth and adults who have wrote a computer program using a specialised programming language, Pupil-qualified teacher ratio in lower secondary, Pupil-qualified teacher ratio in pre-primary education, Pupil-qualified teacher ratio in primary education, Pupil-qualified teacher ratio in secondary, Pupil-qualified teacher ratio in upper secondary, Pupil-trained teacher ratio in lower secondary education, Pupil-trained teacher ratio in pre-primary education, Pupil-trained teacher ratio in primary education, Pupil-trained teacher ratio in secondary education, Pupil-trained teacher ratio in upper secondary education, Teacher attrition rate from general secondary education, Teacher attrition rate from lower secondary education, Teacher attrition rate from pre-primary education, Teacher attrition rate from primary education, Teacher attrition rate from secondary education, Teacher attrition rate from upper secondary education, Teacher attrition rate from vocational secondary education, Volume of official development assistance flows for scholarships by sector and type of study, Youth literacy rate", + "format": "csv", + "hash": "ddb03f009656fb4974201b39f3f27203", + "name": "SDG 4 Global and Thematic data", + "resource_type": "file.upload", + "size": 230593, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "SDG 4 Global and Thematic indicator list with HXL tags", + "format": "csv", + "hash": "72bdc3d7c710de2c28d08acbd3ad75f9", + "name": "SDG 4 Global and Thematic indicator list", + "resource_type": "file.upload", + "size": 216041, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "SDG 4 Global and Thematic metadata with HXL tags", + "format": "csv", + "hash": "9aef656749f15f8a400e8388f251b760", + "name": "SDG 4 Global and Thematic metadata", + "resource_type": "file.upload", + "size": 842237, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "Demographic and Socio-economic data with HXL tags.\n\nIndicators: DEC alternative conversion factor, Fertility rate, GDP, GDP at market prices, GDP deflator, GDP growth, GDP per capita, GNI, GNI per capita, General government total expenditure, Life expectancy at birth, Mortality rate, Official exchange rate, PPP conversion factor, Population aged 14 years or younger, Population aged 15-24 years, Population aged 25-64 years, Population aged 65 years or older, Population growth, Poverty headcount ratio at $3.20 a day, Prevalence of HIV, Price level ratio of PPP conversion factor, Rural population, Total debt service, Total population", + "format": "csv", + "hash": "47c998b649181ca10bdd7d93b508f97d", + "name": "Demographic and Socio-economic data", + "resource_type": "file.upload", + "size": 50900, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "Demographic and Socio-economic indicator list with HXL tags", + "format": "csv", + "hash": "d3bf9d79aa8af9e2b611d35ca4f69ef1", + "name": "Demographic and Socio-economic indicator list", + "resource_type": "file.upload", + "size": 2002, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {}, + {"dataset_preview_enabled": "False", + "description": "Other Policy Relevant Indicators data with HXL tags.\n\nIndicators: Adult illiterate population, Africa, All staff compensation as a percentage of total expenditure in lower secondary public institutions, All staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, All staff compensation as a percentage of total expenditure in pre-primary public institutions, All staff compensation as a percentage of total expenditure in primary public institutions, All staff compensation as a percentage of total expenditure in public institutions, All staff compensation as a percentage of total expenditure in secondary public institutions, All staff compensation as a percentage of total expenditure in tertiary public institutions, All staff compensation as a percentage of total expenditure in upper secondary public institutions, Asia, Capital expenditure as a percentage of total expenditure in lower secondary public institutions, Capital expenditure as a percentage of total expenditure in post-secondary non-tertiary public institutions, Capital expenditure as a percentage of total expenditure in pre-primary public institutions, Capital expenditure as a percentage of total expenditure in primary public institutions, Capital expenditure as a percentage of total expenditure in public institutions, Capital expenditure as a percentage of total expenditure in secondary public institutions, Capital expenditure as a percentage of total expenditure in tertiary public institutions, Capital expenditure as a percentage of total expenditure in upper-secondary public institutions, Caribbean and Central America, Current expenditure as a percentage of total expenditure in lower secondary public institutions, Current expenditure as a percentage of total expenditure in post-secondary non-tertiary public institutions, Current expenditure as a percentage of total expenditure in pre-primary public institutions, Current expenditure as a percentage of total expenditure in primary public institutions, Current expenditure as a percentage of total expenditure in public institutions, Current expenditure as a percentage of total expenditure in secondary public institutions, Current expenditure as a percentage of total expenditure in tertiary public institutions, Current expenditure as a percentage of total expenditure in upper-secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in lower secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in pre-primary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in primary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in secondary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in tertiary public institutions, Current expenditure other than staff compensation as a percentage of total expenditure in upper secondary public institutions, Duration of compulsory education, End month of the academic school year, End of the academic school year, Enrolment in early childhood education, Enrolment in early childhood educational development programmes, Enrolment in lower secondary education, Enrolment in post-secondary non-tertiary education, Enrolment in pre-primary education, Enrolment in primary education, Enrolment in secondary education, Enrolment in tertiary education, Enrolment in upper secondary education, Europe, Expenditure on school books and teaching material as % of total expenditure in primary public institutions, Expenditure on school books and teaching material as % of total expenditure in secondary public institutions, Government expenditure on education, Government expenditure on education not specified by level, Government expenditure on lower secondary education, Government expenditure on lower secondary education as a percentage of GDP, Government expenditure on post-secondary non-tertiary education, Government expenditure on post-secondary non-tertiary education as a percentage of GDP, Government expenditure on pre-primary education, Government expenditure on pre-primary education as a percentage of GDP, Government expenditure on primary education, Government expenditure on primary education as a percentage of GDP, Government expenditure on secondary and post-secondary non-tertiary vocational education as a percentage of GDP, Government expenditure on secondary and post-secondary non-tertiary vocational education only, Government expenditure on secondary education, Government expenditure on secondary education as a percentage of GDP, Government expenditure on tertiary education, Government expenditure on tertiary education as a percentage of GDP, Government expenditure on upper secondary education, Government expenditure on upper secondary education as a percentage of GDP, Gross enrolment ratio, Gross graduation ratio from first degree programmes, Illiterate population, Illiterate youth population, Inbound internationally mobile students from Africa, Inbound internationally mobile students from Asia, Inbound internationally mobile students from Central Asia, Inbound internationally mobile students from Central and Eastern Europe, Inbound internationally mobile students from East Asia and the Pacific, Inbound internationally mobile students from Europe, Inbound internationally mobile students from Latin America and the Caribbean, Inbound internationally mobile students from North America, Inbound internationally mobile students from North America and Western Europe, Inbound internationally mobile students from Oceania, Inbound internationally mobile students from South America, Inbound internationally mobile students from South and West Asia, Inbound internationally mobile students from sub-Saharan Africa, Inbound internationally mobile students from the Arab States, Inbound internationally mobile students from the Caribbean and Central America, Inbound internationally mobile students from unknown continents, Inbound internationally mobile students from unknown regions, Inbound mobility rate, Mean years of schooling, Net flow of internationally mobile students, Net flow ratio of internationally mobile students, Non-teaching staff compensation as a percentage of total expenditure in lower secondary public institutions, Non-teaching staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Non-teaching staff compensation as a percentage of total expenditure in pre-primary public institutions, Non-teaching staff compensation as a percentage of total expenditure in primary public institutions, Non-teaching staff compensation as a percentage of total expenditure in public institutions, Non-teaching staff compensation as a percentage of total expenditure in secondary public institutions, Non-teaching staff compensation as a percentage of total expenditure in tertiary public institutions, Non-teaching staff compensation as a percentage of total expenditure in upper secondary public institutions, North America, Oceania, Official entrance age to compulsory education, Official entrance age to early childhood education, Official entrance age to early childhood educational development, Official entrance age to lower secondary education, Official entrance age to post-secondary non-tertiary education, Official entrance age to pre-primary education, Official entrance age to primary education, Official entrance age to upper secondary education, Out-of-school adolescents and youth of secondary school age, Out-of-school adolescents of lower secondary school age, Out-of-school children, Out-of-school children and adolescents of primary and lower secondary school age, Out-of-school children of primary school age, Out-of-school youth of upper secondary school age, Outbound internationally mobile tertiary students studying in Central Asia, Outbound internationally mobile tertiary students studying in Central and Eastern Europe, Outbound internationally mobile tertiary students studying in East Asia and the Pacific, Outbound internationally mobile tertiary students studying in Latin America and the Caribbean, Outbound internationally mobile tertiary students studying in North America and Western Europe, Outbound internationally mobile tertiary students studying in South and West Asia, Outbound internationally mobile tertiary students studying in sub-Saharan Africa, Outbound internationally mobile tertiary students studying in the Arab States, Outbound mobility ratio, Outbound mobility ratio to Central Asia, Outbound mobility ratio to Central and Eastern Europe, Outbound mobility ratio to East Asia and the Pacific, Outbound mobility ratio to Latin America and the Caribbean, Outbound mobility ratio to North America and Western Europe, Outbound mobility ratio to South and West Asia, Outbound mobility ratio to sub-Saharan Africa, Outbound mobility ratio to the Arab States, Percentage of enrolment in early childhood education programmes in private institutions, Percentage of enrolment in early childhood educational development programmes in private institutions, Percentage of enrolment in lower secondary education in private institutions, Percentage of enrolment in post-secondary non-tertiary education in private institutions, Percentage of enrolment in pre-primary education in private institutions, Percentage of enrolment in primary education in private institutions, Percentage of enrolment in secondary education in private institutions, Percentage of enrolment in tertiary education in private institutions, Percentage of enrolment in upper secondary education in private institutions, Percentage of graduates from Science, Percentage of graduates from programmes other than Science, Percentage of graduates from tertiary education graduating from Agriculture, Percentage of graduates from tertiary education graduating from Arts and Humanities programmes, Percentage of graduates from tertiary education graduating from Business, Percentage of graduates from tertiary education graduating from Education programmes, Percentage of graduates from tertiary education graduating from Engineering, Percentage of graduates from tertiary education graduating from Health and Welfare programmes, Percentage of graduates from tertiary education graduating from Information and Communication Technologies programmes, Percentage of graduates from tertiary education graduating from Natural Sciences, Percentage of graduates from tertiary education graduating from Services programmes, Percentage of graduates from tertiary education graduating from Social Sciences, Percentage of graduates from tertiary education graduating from programmes in unspecified fields, Percentage of teachers in lower secondary education who are female, Percentage of teachers in post-secondary non-tertiary education who are female, Percentage of teachers in pre-primary education who are female, Percentage of teachers in primary education who are female, Percentage of teachers in secondary education who are female, Percentage of teachers in tertiary education who are female, Percentage of teachers in upper secondary education who are female, Population of compulsory school age, Population of the official entrance age to primary education, Population of the official entrance age to secondary general education, Repeaters in Grade 1 of lower secondary general education, Repeaters in Grade 1 of primary education, Repeaters in Grade 2 of lower secondary general education, Repeaters in Grade 2 of primary education, Repeaters in Grade 3 of lower secondary general education, Repeaters in Grade 3 of primary education, Repeaters in Grade 4 of lower secondary general education, Repeaters in Grade 4 of primary education, Repeaters in Grade 5 of lower secondary general education, Repeaters in Grade 5 of primary education, Repeaters in Grade 6 of lower secondary general education, Repeaters in Grade 6 of primary education, Repeaters in Grade 7 of primary education, Repeaters in grade unknown of lower secondary general education, Repeaters in grade unknown of primary education, Repeaters in lower secondary general education, Repeaters in primary education, Repetition rate in Grade 1 of lower secondary general education, Repetition rate in Grade 1 of primary education, Repetition rate in Grade 2 of lower secondary general education, Repetition rate in Grade 2 of primary education, Repetition rate in Grade 3 of lower secondary general education, Repetition rate in Grade 3 of primary education, Repetition rate in Grade 4 of lower secondary general education, Repetition rate in Grade 4 of primary education, Repetition rate in Grade 5 of lower secondary general education, Repetition rate in Grade 5 of primary education, Repetition rate in Grade 6 of primary education, Repetition rate in Grade 7 of primary education, Repetition rate in lower secondary general education, Repetition rate in primary education, School age population, School life expectancy, Share of all students in lower secondary education enrolled in general programmes, Share of all students in lower secondary education enrolled in vocational programmes, Share of all students in post-secondary non-tertiary education enrolled in general programmes, Share of all students in post-secondary non-tertiary education enrolled in vocational programmes, Share of all students in secondary education enrolled in general programmes, Share of all students in secondary education enrolled in vocational programmes, Share of all students in upper secondary education enrolled in general programmes, Share of all students in upper secondary education enrolled in vocational programmes, South America, Start month of the academic school year, Start of the academic school year, Survival rate to Grade 4 of primary education, Survival rate to Grade 5 of primary education, Survival rate to the last grade of primary education, Teachers in early childhood educational development programmes, Teachers in lower secondary education, Teachers in post-secondary non-tertiary education, Teachers in pre-primary education, Teachers in primary education, Teachers in secondary education, Teachers in tertiary education ISCED 5 programmes, Teachers in tertiary education ISCED 6, Teachers in tertiary education programmes, Teachers in upper secondary education, Teaching staff compensation as a percentage of total expenditure in lower secondary public institutions, Teaching staff compensation as a percentage of total expenditure in post-secondary non-tertiary public institutions, Teaching staff compensation as a percentage of total expenditure in pre-primary public institutions, Teaching staff compensation as a percentage of total expenditure in primary public institutions, Teaching staff compensation as a percentage of total expenditure in public institutions, Teaching staff compensation as a percentage of total expenditure in secondary public institutions, Teaching staff compensation as a percentage of total expenditure in tertiary public institutions, Teaching staff compensation as a percentage of total expenditure in upper secondary public institutions, Theoretical duration of early childhood education, Theoretical duration of early childhood educational development, Theoretical duration of lower secondary education, Theoretical duration of post-secondary non-tertiary education, Theoretical duration of pre-primary education, Theoretical duration of primary education, Theoretical duration of secondary education, Theoretical duration of upper secondary education, Total inbound internationally mobile students, Total net attendance rate, Total net enrolment rate, Total outbound internationally mobile tertiary students studying abroad, Youth illiterate population", + "format": "csv", + "hash": "13f979b415ff4797fb6039b70aaf1f9c", + "name": "Other Policy Relevant Indicators data", + "resource_type": "file.upload", + "size": 319924, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "Other Policy Relevant Indicators indicator list with HXL tags", + "format": "csv", + "hash": "a0dfe4211d1c61c399f37cef91754bdc", + "name": "Other Policy Relevant Indicators indicator list", + "resource_type": "file.upload", + "size": 92805, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "False", + "description": "Other Policy Relevant Indicators metadata with HXL tags", + "format": "csv", + "hash": "a697e87f21e61327c380c434a4beb95b", + "name": "Other Policy Relevant Indicators metadata", + "resource_type": "file.upload", + "size": 290790, + "url": "updated_by_file_upload_step", + "url_type": "upload"}, + {"dataset_preview_enabled": "True", + "description": "Cut down data for QuickCharts", + "format": "csv", + "hash": "b637c24a0bb7d91ab7ad7df682064776", + "name": "QuickCharts-SDG 4 Global and Thematic data", + "resource_type": "file.upload", + "size": 2387, + "url": "updated_by_file_upload_step", + "url_type": "upload"}] diff --git a/tests/hdx/api/test_ckan.py b/tests/hdx/api/test_ckan.py index e91bf072..f0a0f9fa 100644 --- a/tests/hdx/api/test_ckan.py +++ b/tests/hdx/api/test_ckan.py @@ -46,10 +46,6 @@ def configuration(self): def datasetmetadata(self): return join("tests", "fixtures", "CKAN", "hdx_dataset_static.yaml") - @pytest.fixture(scope="function") - def testdata(self): - return join("tests", "fixtures", "test_data.csv") - @pytest.fixture(scope="class") def params(self): return { @@ -101,7 +97,7 @@ def setup_teardown_folder(self, configuration, gclient, params): def test_create_dataset( self, datasetmetadata, - testdata, + test_data, setup_teardown_folder, params, ): @@ -156,7 +152,7 @@ def create_resource(): filestore = resource_no % 2 == 0 if filestore: resource.set_format("csv") - resource.set_file_to_upload(testdata) + resource.set_file_to_upload(test_data) else: wks, url = create_gsheet( "resource1", @@ -217,7 +213,7 @@ def create_resource(): resources.pop() gsheet_resource = resources[5] gsheet_resource.set_format("csv") - gsheet_resource.set_file_to_upload(testdata) + gsheet_resource.set_file_to_upload(test_data) for resource in resources: del resource["package_id"] dataset.add_update_resources(resources) @@ -249,6 +245,8 @@ def create_resource(): assert "humdata" not in updated_resource["url"] else: assert "humdata" in updated_resource["url"] + assert updated_resource.get("size") == resource.get("size") + assert updated_resource.get("hash") == resource.get("hash") # modify dataset again starting with existing dataset title = "HDX Python API test changed again" @@ -260,7 +258,7 @@ def create_resource(): countryiso3s.append("YEM") dataset.add_country_location("YEM") dataset.delete_resource(updated_resources[5]) - updated_resources[0].set_file_to_upload(testdata) + updated_resources[0].set_file_to_upload(test_data) create_resource() resources = dataset.get_resources() @@ -294,6 +292,9 @@ def create_resource(): assert "humdata" not in updated_resource["url"] else: assert "humdata" in updated_resource["url"] + if i != 7: + assert updated_resource.get("size") == resource.get("size") + assert updated_resource.get("hash") == resource.get("hash") # tear down dataset.delete_from_hdx() diff --git a/tests/hdx/api/utilities/test_filestore_helper.py b/tests/hdx/api/utilities/test_filestore_helper.py index 821f427a..aa5a6122 100644 --- a/tests/hdx/api/utilities/test_filestore_helper.py +++ b/tests/hdx/api/utilities/test_filestore_helper.py @@ -16,12 +16,13 @@ class TestFilestoreHelper: - def test_dataset_update_filestore_resource(self, configuration): + def test_dataset_update_filestore_resource(self, configuration, test_data): resource_data_copy = copy.deepcopy(resource_data) + orig_resource = Resource(resource_data_copy) resource = Resource(resource_data_copy) filestore_resources = {} FilestoreHelper.dataset_update_filestore_resource( - resource, filestore_resources, 0 + orig_resource, resource, filestore_resources, 0 ) assert resource == { "api_type": "api", @@ -34,25 +35,45 @@ def test_dataset_update_filestore_resource(self, configuration): } assert filestore_resources == {} - resource.set_file_to_upload("test") + resource.set_file_to_upload(test_data) FilestoreHelper.dataset_update_filestore_resource( - resource, filestore_resources, 0 + orig_resource, resource, filestore_resources, 0 ) assert resource == { "api_type": "api", "description": "My Resource", "format": "xlsx", "name": "MyResource1", + "hash": "3790da698479326339fa99a074cbc1f7", + "size": 1548, "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "resource_type": "api", "url": "updated_by_file_upload_step", } - assert filestore_resources == {0: "test"} + assert filestore_resources == {0: test_data} + filestore_resources = {} + FilestoreHelper.dataset_update_filestore_resource( + resource, resource, filestore_resources, 0 + ) + assert resource == { + "api_type": "api", + "description": "My Resource", + "format": "xlsx", + "name": "MyResource1", + "hash": "3790da698479326339fa99a074cbc1f7", + "size": 1548, + "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", + "resource_type": "api", + "url": "updated_by_file_upload_step", + } + assert filestore_resources == {} + + resource.file_to_upload = None resource.mark_data_updated() FilestoreHelper.dataset_update_filestore_resource( - resource, filestore_resources, 0 + orig_resource, resource, filestore_resources, 0 ) regex = r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d$" assert re.match(regex, resource["last_modified"]) - assert filestore_resources == {0: "test"} + assert filestore_resources == {} diff --git a/tests/hdx/api/utilities/test_size_hash.py b/tests/hdx/api/utilities/test_size_hash.py new file mode 100644 index 00000000..78c25189 --- /dev/null +++ b/tests/hdx/api/utilities/test_size_hash.py @@ -0,0 +1,24 @@ +from os.path import join + +import pytest + +from hdx.api.utilities.size_hash import get_size_and_hash + + +class TestSizeHash: + @pytest.fixture(scope="class") + def test_xlsx(self, fixturesfolder): + return join( + fixturesfolder, + "size_hash", + "ACLED-All-Africa-File_20170101-to-20170708.xlsx", + ) + + def test_get_size_and_hash(self, test_data, test_xlsx): + size, hash = get_size_and_hash(test_data, "csv") + assert size == 1548 + assert hash == "3790da698479326339fa99a074cbc1f7" + + size, hash = get_size_and_hash(test_xlsx, "xlsx") + assert size == 23724 + assert hash == "6b8acf7e28d62685a1e829e7fa220d17" diff --git a/tests/hdx/conftest.py b/tests/hdx/conftest.py index 23a9f99e..36480815 100755 --- a/tests/hdx/conftest.py +++ b/tests/hdx/conftest.py @@ -35,6 +35,11 @@ def project_config_yaml(): return join("tests", "fixtures", "config", "project_configuration.yaml") +@pytest.fixture(scope="session") +def test_data(fixturesfolder): + return join(fixturesfolder, "test_data.csv") + + @pytest.fixture(scope="session") def locations(): Locations.set_validlocations( diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py index 8e58e4f2..eb80f670 100755 --- a/tests/hdx/data/test_dataset_core.py +++ b/tests/hdx/data/test_dataset_core.py @@ -185,10 +185,6 @@ def mockhxlupdate(url, datadict): class TestDatasetCore: - @pytest.fixture(scope="class") - def test_file(self): - return join("tests", "fixtures", "test_data.csv") - @pytest.fixture(scope="class") def static_yaml(self): return join("tests", "fixtures", "config", "hdx_dataset_static.yaml") @@ -512,12 +508,12 @@ def test_read_from_hdx(self, configuration, read): dataset = Dataset.read_from_hdx("TEST3") assert dataset is None - def test_revise(self, configuration, test_file, post_revise): + def test_revise(self, configuration, test_data, post_revise): dataset = Dataset.revise( {"name": "MyDataset1"}, filter=["-resources__0__description"], update={"resources": [{"description": "haha"}]}, - files_to_upload={"update__resources__0__upload": test_file}, + files_to_upload={"update__resources__0__upload": test_data}, ) assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" assert dataset["name"] == "MyDataset1" diff --git a/tests/hdx/data/test_dataset_noncore.py b/tests/hdx/data/test_dataset_noncore.py index 3d481ee6..ed9ee3c7 100755 --- a/tests/hdx/data/test_dataset_noncore.py +++ b/tests/hdx/data/test_dataset_noncore.py @@ -206,7 +206,7 @@ def test_get_api_url(self, configuration, hdx_config_yaml, project_config_yaml): == "https://feature.data-humdata-org.ahconu.org/api/3/action/package_show?id=MyDataset1" ) - def test_get_set_date_of_dataset(self): + def test_get_set_date_of_dataset(self, configuration): dataset = Dataset({"dataset_date": "[2020-01-07T00:00:00 TO *]"}) result = dataset.get_time_period(today=datetime(2020, 11, 17)) assert result == { @@ -259,7 +259,7 @@ def test_set_dataset_year_range(self, configuration): retval = dataset.set_time_period_year_range((2005, 2002, 2003)) assert retval == [2002, 2003, 2005] - def test_is_set_subnational(self): + def test_is_set_subnational(self, configuration): datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) assert dataset["subnational"] == "1" @@ -963,7 +963,7 @@ def test_remove_dates_from_title(self): assert dataset["title"] == "Mon_State_Village_Tract_Boundaries 9999 99" assert dataset["dataset_date"] == "[2001-01-01T00:00:00 TO 2001-12-31T23:59:59]" - def test_load_save_to_json(self, vocabulary_read): + def test_load_save_to_json(self, configuration, vocabulary_read): with temp_dir( "LoadSaveDatasetJSON", delete_on_success=True, @@ -1011,7 +1011,7 @@ def test_load_save_to_json(self, vocabulary_read): dataset = Dataset.load_from_json(path) assert dataset is None - def test_custom_viz(self): + def test_custom_viz(self, configuration): dataset = Dataset({"name": "lala", "title": "title", "notes": "description"}) url = "http://lala" dataset.set_custom_viz(url) diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index 7096f812..96658c55 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -382,6 +382,8 @@ def post(url, data, headers, files, allow_redirects, auth=None): resultdictcopy["url"] = ( f"http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/{filename}" ) + resultdictcopy["size"] = datadict["size"] + resultdictcopy["hash"] = datadict["hash"] resultdictcopy["state"] = datadict["state"] result = json.dumps(resultdictcopy) @@ -701,7 +703,7 @@ def test_check_required_fields(self, configuration): resource.check_url_filetoupload() resource.check_required_fields() - def test_create_in_hdx(self, configuration, date_pattern, post_create): + def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data): resource = Resource() with pytest.raises(HDXError): resource.create_in_hdx() @@ -723,9 +725,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): resource_data_copy = copy.deepcopy(resource_data) resource = Resource(resource_data_copy) - filetoupload = join("tests", "fixtures", "test_data.csv") - resource.set_file_to_upload(filetoupload) - assert resource.get_file_to_upload() == filetoupload + resource.set_file_to_upload(test_data) + assert resource.get_file_to_upload() == test_data resource.create_in_hdx() assert resource["url_type"] == "upload" assert resource["resource_type"] == "file.upload" @@ -733,13 +734,17 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) + assert resource["size"] == "1548" + assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" assert resource["state"] == "active" - resource.set_file_to_upload(filetoupload) + resource.set_file_to_upload(test_data) resource.create_in_hdx() assert ( resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) + assert resource["size"] == "1548" + assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" resource_data_copy["name"] = "MyResource2" resource = Resource(resource_data_copy) with pytest.raises(HDXError): @@ -750,7 +755,7 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): with pytest.raises(HDXError): resource.create_in_hdx() - def test_update_in_hdx(self, configuration, date_pattern, post_update): + def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data): resource = Resource() resource["id"] = "NOTEXIST" with pytest.raises(HDXError): @@ -783,8 +788,7 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update): match = date_pattern.search(resource["last_modified"]) assert match - filetoupload = join("tests", "fixtures", "test_data.csv") - resource.set_file_to_upload(filetoupload, guess_format_from_suffix=True) + resource.set_file_to_upload(test_data, guess_format_from_suffix=True) assert resource["format"] == "csv" resource.update_in_hdx() assert resource["url_type"] == "upload" @@ -793,6 +797,8 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update): resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) + assert resource["size"] == "1548" + assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" assert resource["state"] == "active" resource["id"] = "NOTEXIST" diff --git a/tests/hdx/data/test_update_logic.py b/tests/hdx/data/test_update_logic.py index 5f107a97..67bea805 100644 --- a/tests/hdx/data/test_update_logic.py +++ b/tests/hdx/data/test_update_logic.py @@ -1,3 +1,4 @@ +import string from os.path import join import pytest @@ -11,9 +12,12 @@ from hdx.location.country import Country from hdx.utilities.loader import load_yaml from hdx.utilities.matching import multiple_replace +from hdx.utilities.path import get_temp_dir class TestUpdateLogic: + characters = string.ascii_letters + string.digits + string.punctuation + @pytest.fixture(scope="function") def configuration(self): Configuration._create( @@ -42,6 +46,10 @@ def configuration(self): def fixture_path(self): return join("tests", "fixtures", "update_logic") + @pytest.fixture(scope="class") + def temp_dir(self): + return get_temp_dir("WHO", delete_if_exists=True) + @pytest.fixture(scope="class") def new_resources_yaml(self, fixture_path): return join(fixture_path, "update_logic_resources_new.yaml") @@ -102,8 +110,17 @@ def add_dataset_resources(dataset, yaml_path, include=None, exclude=None): continue dataset.add_update_resource(resource) - @staticmethod - def add_new_dataset_resources(new_dataset, yaml_path, include=None, exclude=None): + @classmethod + # Function to generate file with number + def generate_content(cls, path, number): + # Write the content to a file + with open(path, "w") as file: + file.write(f"{number:02d}") + + @classmethod + def add_new_dataset_resources( + cls, temp_dir, new_dataset, yaml_path, include=None, exclude=None + ): resources_uploads = load_yaml(yaml_path) if include is None: include = range(len(resources_uploads)) @@ -115,8 +132,9 @@ def add_new_dataset_resources(new_dataset, yaml_path, include=None, exclude=None if i in exclude: continue resource = resource_upload["resource"] - file_to_upload = resource_upload["file_to_upload"] resource = Resource(resource) + file_to_upload = resource_upload["file_to_upload"] + cls.generate_content(file_to_upload, i) resource.set_file_to_upload(file_to_upload) new_dataset.add_update_resource(resource) @@ -151,8 +169,11 @@ def test_update_logic_1( dataset, new_resources_yaml, resources_yaml, + temp_dir, ): - self.add_new_dataset_resources(new_dataset, new_resources_yaml, exclude=[13]) + self.add_new_dataset_resources( + temp_dir, new_dataset, new_resources_yaml, exclude=[13] + ) self.add_dataset_resources(dataset, resources_yaml) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources @@ -170,453 +191,642 @@ def test_update_logic_1( update = results["update"] del update["updated_by_script"] assert update == { - "name": "who-data-for-zambia", - "title": "Zambia - Health Indicators", - "private": False, - "notes": "Contains data from World Health Organization...", + "data_update_frequency": "30", + "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", "dataset_source": "World Health Organization", + "groups": [{"name": "zmb"}], "license_id": "hdx-other", "license_other": "CC BY-NC-SA 3.0 IGO", - "methodology": "Registry", "maintainer": "35f7bb2c-4ab6-4796-8334-525b30a94c89", + "methodology": "Registry", + "name": "who-data-for-zambia", + "notes": "Contains data from World Health Organization...", "owner_org": "c021f6be-3598-418e-8f7f-c7a799194dba", - "data_update_frequency": "30", - "subnational": "0", - "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", - "groups": [{"name": "zmb"}], - "tags": [ - { - "name": "hxl", - "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87", - } - ], + "private": False, "resources": [ { - "name": "All Health Indicators for Zambia", - "description": "See resource descriptions below for links to indicator metadata", + "description": "See resource descriptions below for links to " + "indicator metadata", "format": "csv", - "url_type": "upload", + "hash": "b4b147bc522828731f1a016bfa72c073", + "name": "All Health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Mortality and global health estimates Indicators for Zambia", - "description": "*Mortality and global health estimates:*\n[Infant mortality rate (proba", + "description": "*Mortality and global health estimates:*\n" + "[Infant mortality rate (proba", "format": "csv", - "url_type": "upload", + "hash": "96a3be3cf272e017046d1b2674a52bd3", + "name": "Mortality and global health estimates Indicators for " + "Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Sustainable development goals Indicators for Zambia", - "description": "*Sustainable development goals:*\n[Adolescent birth rate (per 1000 wome", + "description": "*Sustainable development goals:*\n" + "[Adolescent birth rate (per 1000 wome", "format": "csv", - "url_type": "upload", + "hash": "a2ef406e2c2351e0b9e80029c909242d", + "name": "Sustainable development goals Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Millennium Development Goals (MDGs) Indicators for Zambia", - "description": "*Millennium Development Goals (MDGs):*\n[Contraceptive prevalence (%)](", + "description": "*Millennium Development Goals (MDGs):*\n" + "[Contraceptive prevalence (%)](", "format": "csv", - "url_type": "upload", + "hash": "e45ee7ce7e88149af8dd32b27f9512ce", + "name": "Millennium Development Goals (MDGs) Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health systems Indicators for Zambia", - "description": "*Health systems:*\n[Median availability of selected generic medicines (", + "description": "*Health systems:*\n" + "[Median availability of selected generic " + "medicines (", "format": "csv", - "url_type": "upload", + "hash": "7d0665438e81d8eceb98c1e31fca80c1", + "name": "Health systems Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Malaria Indicators for Zambia", - "description": "*Malaria:*\n[Children aged <5 years sleeping under insecticide-treated ", + "description": "*Malaria:*\n" + "[Children aged <5 years sleeping under " + "insecticide-treated ", "format": "csv", - "url_type": "upload", + "hash": "751d31dd6b56b26b29dac2c0e1839e34", + "name": "Malaria Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Tuberculosis Indicators for Zambia", - "description": "*Tuberculosis:*\n[Deaths due to tuberculosis among HIV-negative people ", + "description": "*Tuberculosis:*\n" + "[Deaths due to tuberculosis among HIV-negative " + "people ", "format": "csv", - "url_type": "upload", + "hash": "faeac4e1eef307c2ab7b0a3821e6c667", + "name": "Tuberculosis Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Child health Indicators for Zambia", - "description": "*Child health:*\n[Children aged <5 years stunted (%)](https://www.who.i", + "description": "*Child health:*\n" + "[Children aged <5 years stunted " + "(%)](https://www.who.i", "format": "csv", - "url_type": "upload", + "hash": "d72d187df41e10ea7d9fcdc7f5909205", + "name": "Child health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Infectious diseases Indicators for Zambia", - "description": "*Infectious diseases:*\n[Cholera - number of reported cases](https://ww", + "description": "*Infectious diseases:*\n" + "[Cholera - number of reported " + "cases](https://ww", "format": "csv", - "url_type": "upload", + "hash": "fad6f4e614a212e80c67249a666d2b09", + "name": "Infectious diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "World Health Statistics Indicators for Zambia", - "description": "*World Health Statistics:*\n[Literacy rate among adults aged >= 15 year", + "description": "*World Health Statistics:*\n" + "[Literacy rate among adults aged >= 15 year", "format": "csv", - "url_type": "upload", + "hash": "d3d9446802a44259755d38e6d163e820", + "name": "World Health Statistics Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health financing Indicators for Zambia", - "description": "*Health financing:*\n[Private prepaid plans as a percentage of private ", + "description": "*Health financing:*\n" + "[Private prepaid plans as a percentage of " + "private ", "format": "csv", - "url_type": "upload", + "hash": "6512bd43d9caa6e02c990b0a82652dca", + "name": "Health financing Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Public health and environment Indicators for Zambia", - "description": "*Public health and environment:*\n[Population using solid fuels (%)](ht", + "description": "*Public health and environment:*\n" + "[Population using solid fuels (%)](ht", "format": "csv", - "url_type": "upload", + "hash": "c74d97b01eae257e44aa9d5bade97baf", + "name": "Public health and environment Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Substance use and mental health Indicators for Zambia", - "description": "*Substance use and mental health:*\n[Fines for violations](https://www.", + "description": "*Substance use and mental health:*\n" + "[Fines for violations](https://www.", "format": "csv", - "url_type": "upload", + "hash": "c20ad4d76fe97759aa27a0c99bff6710", + "name": "Substance use and mental health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Injuries and violence Indicators for Zambia", - "description": "*Injuries and violence:*\n[Income level](https://www.who.int/data/gho/i", + "description": "*Injuries and violence:*\n" + "[Income level](https://www.who.int/data/gho/i", "format": "csv", - "url_type": "upload", + "hash": "aab3238922bcc25a6f606eb525ffdc56", + "name": "Injuries and violence Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "HIV/AIDS and other STIs Indicators for Zambia", - "description": "*HIV/AIDS and other STIs:*\n[Prevalence of HIV among adults aged 15 to ", + "description": "*HIV/AIDS and other STIs:*\n" + "[Prevalence of HIV among adults aged 15 to ", "format": "csv", - "url_type": "upload", + "hash": "9bf31c7ff062936a96d3c8bd1f8f2ff3", + "name": "HIV/AIDS and other STIs Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Nutrition Indicators for Zambia", - "description": "*Nutrition:*\n[Early initiation of breastfeeding (%)](https://www.who.i", + "description": "*Nutrition:*\n" + "[Early initiation of breastfeeding " + "(%)](https://www.who.i", "format": "csv", - "url_type": "upload", + "hash": "70efdf2ec9b086079795c442636b55fb", + "name": "Nutrition Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Urban health Indicators for Zambia", - "description": "*Urban health:*\n[Percentage of the total population living in cities >", + "description": "*Urban health:*\n" + "[Percentage of the total population living in " + "cities >", "format": "csv", - "url_type": "upload", + "hash": "6f4922f45568161a8cdf4ad2299f6d23", + "name": "Urban health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases Indicators for Zambia", - "description": "*Noncommunicable diseases:*\n[Prevalence of overweight among adults, BM", + "description": "*Noncommunicable diseases:*\n" + "[Prevalence of overweight among adults, BM", "format": "csv", - "url_type": "upload", + "hash": "1f0e3dad99908345f7439f8ffabdffc4", + "name": "Noncommunicable diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases CCS Indicators for Zambia", - "description": "*Noncommunicable diseases CCS:*\n[Existence of operational policy/strat", + "description": "*Noncommunicable diseases CCS:*\n" + "[Existence of operational policy/strat", "format": "csv", - "url_type": "upload", + "hash": "98f13708210194c475687be6106a3b84", + "name": "Noncommunicable diseases CCS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Negelected tropical diseases Indicators for Zambia", - "description": "*Negelected tropical diseases:*\n[Number of new reported cases of Burul", + "description": "*Negelected tropical diseases:*\n" + "[Number of new reported cases of Burul", "format": "csv", - "url_type": "upload", + "hash": "3c59dc048e8850243be8079a5c74d079", + "name": "Negelected tropical diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health Equity Monitor Indicators for Zambia", - "description": "*Health Equity Monitor:*\n[Antenatal care coverage - at least one visit", + "description": "*Health Equity Monitor:*\n" + "[Antenatal care coverage - at least one visit", "format": "csv", - "url_type": "upload", + "hash": "4e732ced3463d06de0ca9a15b6153677", + "name": "Health Equity Monitor Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Infrastructure Indicators for Zambia", - "description": "*Infrastructure:*\n[Total density per 100 000 population: Health posts]", + "description": "*Infrastructure:*\n" + "[Total density per 100 000 population: Health " + "posts]", "format": "csv", - "url_type": "upload", + "hash": "b6d767d2f8ed5d21a44b0e5886680cb9", + "name": "Infrastructure Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Essential health technologies Indicators for Zambia", - "description": "*Essential health technologies:*\n[Availability of national standards o", + "description": "*Essential health technologies:*\n" + "[Availability of national standards o", "format": "csv", - "url_type": "upload", + "hash": "37693cfc748049e45d87b8c7d8b9aacd", + "name": "Essential health technologies Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Medical equipment Indicators for Zambia", - "description": "*Medical equipment:*\n[Total density per million population: Magnetic R", + "description": "*Medical equipment:*\n" + "[Total density per million population: " + "Magnetic R", "format": "csv", - "url_type": "upload", + "hash": "1ff1de774005f8da13f42943881c655f", + "name": "Medical equipment Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Demographic and socioeconomic statistics Indicators for Zambia", - "description": "*Demographic and socioeconomic statistics:*\n[Cellular subscribers (per", + "description": "*Demographic and socioeconomic statistics:*\n" + "[Cellular subscribers (per", "format": "csv", - "url_type": "upload", + "hash": "8e296a067a37563370ded05f5a3bf3ec", + "name": "Demographic and socioeconomic statistics Indicators " + "for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Neglected tropical diseases Indicators for Zambia", - "description": "*Neglected tropical diseases:*\n[Status of yaws endemicity](https://www", + "description": "*Neglected tropical diseases:*\n" + "[Status of yaws endemicity](https://www", "format": "csv", - "url_type": "upload", + "hash": "33e75ff09dd601bbe69f351039152189", + "name": "Neglected tropical diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "International Health Regulations (2005) monitoring framework Indicators for Zambia", - "description": "*International Health Regulations (2005) monitoring framework:*\n[Legis", + "description": "*International Health Regulations (2005) " + "monitoring framework:*\n" + "[Legis", "format": "csv", - "url_type": "upload", + "hash": "6ea9ab1baa0efb9e19094440c317e21b", + "name": "International Health Regulations (2005) monitoring " + "framework Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Insecticide resistance Indicators for Zambia", - "description": "*Insecticide resistance:*\n[Number of insecticide classes to which resi", + "description": "*Insecticide resistance:*\n" + "[Number of insecticide classes to which resi", "format": "csv", - "url_type": "upload", + "hash": "34173cb38f07f89ddbebc2ac9128303f", + "name": "Insecticide resistance Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Universal Health Coverage Indicators for Zambia", - "description": "*Universal Health Coverage:*\n[Cataract surgical coverage of adults age", + "description": "*Universal Health Coverage:*\n" + "[Cataract surgical coverage of adults age", "format": "csv", - "url_type": "upload", + "hash": "c16a5320fa475530d9583c34fd356ef5", + "name": "Universal Health Coverage Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Global Observatory for eHealth (GOe) Indicators for Zambia", - "description": "*Global Observatory for eHealth (GOe):*\n[National universal health cov", + "description": "*Global Observatory for eHealth (GOe):*\n" + "[National universal health cov", "format": "csv", - "url_type": "upload", + "hash": "182be0c5cdcd5072bb1864cdee4d3d6e", + "name": "Global Observatory for eHealth (GOe) Indicators for " + "Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION:*\n[Government uni", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING : " + "PREVENTION:*\n" + "[Government uni", "format": "csv", - "url_type": "upload", + "hash": "e369853df766fa44e1ed0ff613f563bd", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT:*\n[Government unit/", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: " + "TREATMENT:*\n" + "[Government unit/", "format": "csv", - "url_type": "upload", + "hash": "1c383cd30b7c298ab50293adfecb7b18", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING:*\n[Five-year change", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: " + "FINANCING:*\n" + "[Five-year change", "format": "csv", - "url_type": "upload", + "hash": "19ca14e7ea6328a42e0eb13d585e4c22", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVIDERS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVID", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "TREATMENT SECTORS AND PROVID", "format": "csv", - "url_type": "upload", + "hash": "a5bfc9e07964f8dddeb95fc584cd965d", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT " + "SECTORS AND PROVIDERS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREATMENT COVERAGE Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREAT", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "TREATMENT CAPACITY AND TREAT", "format": "csv", - "url_type": "upload", + "hash": "a5771bce93e200c36f7cd9dfd0e5deaa", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT " + "CAPACITY AND TREATMENT COVERAGE Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT:*\n", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PHARMACOLOGICAL TREATMENT:*\n", "format": "csv", - "url_type": "upload", + "hash": "d67d8ab4f4c10bf22aa353e27879133c", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PHARMACOLOGICAL TREATMENT Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVENTIONS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVEN", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "SCREENING AND BRIEF INTERVEN", "format": "csv", - "url_type": "upload", + "hash": "d645920e395fedad7bbbed0eca3fe2e0", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING " + "AND BRIEF INTERVENTIONS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROVIDERS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROV", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PREVENTION PROGRAMS AND PROV", "format": "csv", - "url_type": "upload", + "hash": "3416a75f4cea9109507cacd8e2f2aefc", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION " + "PROGRAMS AND PROVIDERS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVICES Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVI", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "SPECIAL PROGRAMMES AND SERVI", "format": "csv", - "url_type": "upload", + "hash": "a1d0c6e83f027327d8461063f4ac58a6", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL " + "PROGRAMMES AND SERVICES Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: HUMAN RESOURCES Indicators for Zambia", - "description": "*RSUD: HUMAN RESOURCES:*\n[Health professionals providing treatment for", + "description": "*RSUD: HUMAN RESOURCES:*\n" + "[Health professionals providing treatment for", "format": "csv", - "url_type": "upload", + "hash": "17e62166fc8586dfa4d1bc0e1742c08b", + "name": "RSUD: HUMAN RESOURCES Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: INFORMATION SYSTEMS Indicators for Zambia", - "description": "*RSUD: INFORMATION SYSTEMS:*\n[Epidemiological data collection for subs", + "description": "*RSUD: INFORMATION SYSTEMS:*\n" + "[Epidemiological data collection for subs", "format": "csv", - "url_type": "upload", + "hash": "f7177163c833dff4b38fc8d2872f1ec6", + "name": "RSUD: INFORMATION SYSTEMS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: YOUTH Indicators for Zambia", - "description": "*RSUD: YOUTH:*\n[Epidemiological data collection system for substance u", + "description": "*RSUD: YOUTH:*\n" + "[Epidemiological data collection system for " + "substance u", "format": "csv", - "url_type": "upload", + "hash": "6c8349cc7260ae62e3b1396831a8398f", + "name": "RSUD: YOUTH Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "FINANCIAL PROTECTION Indicators for Zambia", - "description": "*FINANCIAL PROTECTION:*\n[Population with household expenditures on hea", + "description": "*FINANCIAL PROTECTION:*\n" + "[Population with household expenditures on hea", "format": "csv", - "url_type": "upload", + "hash": "d9d4f495e875a2e075a1a4a6e1b9770f", + "name": "FINANCIAL PROTECTION Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases and mental health Indicators for Zambia", - "description": "*Noncommunicable diseases and mental health:*\n[Number of deaths attrib", + "description": "*Noncommunicable diseases and mental health:*\n" + "[Number of deaths attrib", "format": "csv", - "url_type": "upload", + "hash": "67c6a1e7ce56d3d6fa748ab6d9af3fd7", + "name": "Noncommunicable diseases and mental health Indicators " + "for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health workforce Indicators for Zambia", - "description": "*Health workforce:*\n[Medical doctors (per 10 000 population)](https://", + "description": "*Health workforce:*\n" + "[Medical doctors (per 10 000 " + "population)](https://", "format": "csv", - "url_type": "upload", + "hash": "642e92efb79421734881b53e1e1b18b6", + "name": "Health workforce Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Neglected Tropical Diseases Indicators for Zambia", - "description": "*Neglected Tropical Diseases:*\n[Number of new leprosy cases](https://w", + "description": "*Neglected Tropical Diseases:*\n" + "[Number of new leprosy cases](https://w", "format": "csv", - "url_type": "upload", + "hash": "33e75ff09dd601bbe69f351039152189", + "name": "Neglected Tropical Diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "QuickCharts Indicators for Zambia", "description": "Cut down data for QuickCharts", "format": "csv", - "url_type": "upload", + "hash": "d82c8d1619ad8176d665453cfb2e55f0", + "name": "QuickCharts Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "TOBACCO Indicators for Zambia", - "description": "*TOBACCO:*\n[Monitor](https://www.who.int/data/gho/indicator-metadata-r", + "description": "*TOBACCO:*\n" + "[Monitor](https://www.who.int/data/gho/indicator-metadata-r", "format": "csv", - "url_type": "upload", + "hash": "02e74f10e0327ad868d138f2b4fdd6f0", + "name": "TOBACCO Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "UHC Indicators for Zambia", - "description": "*UHC:*\n[Population in malaria-endemic areas who slept under an insecti", + "description": "*UHC:*\n" + "[Population in malaria-endemic areas who slept " + "under an insecti", "format": "csv", - "url_type": "upload", + "hash": "6364d3f0f495b6ab9dcf8d3b5c6e0b01", + "name": "UHC Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "ICD Indicators for Zambia", - "description": "*ICD:*\n[ICD-11 implementation progress level](https://web-prod.who.int", + "description": "*ICD:*\n" + "[ICD-11 implementation progress " + "level](https://web-prod.who.int", "format": "csv", - "url_type": "upload", + "hash": "f457c545a9ded88f18ecee47145a72c0", + "name": "ICD Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia", - "description": "*SEXUAL AND REPRODUCTIVE HEALTH:*\n[Institutional Births (birth taken p", + "description": "*SEXUAL AND REPRODUCTIVE HEALTH:*\n" + "[Institutional Births (birth taken p", "format": "csv", - "url_type": "upload", + "hash": "c0c7c76d30bd3dcaefc96f40275bdc0a", + "name": "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Immunization Indicators for Zambia", - "description": "*Immunization:*\n[Proportion of vaccination cards seen (%)](https://www", + "description": "*Immunization:*\n" + "[Proportion of vaccination cards seen " + "(%)](https://www", "format": "csv", - "url_type": "upload", + "hash": "2838023a778dfaecdc212708f721b788", + "name": "Immunization Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "NLIS Indicators for Zambia", - "description": "*NLIS:*\n[Subclinical vitamin A deficiency in preschool-age children (s", + "description": "*NLIS:*\n" + "[Subclinical vitamin A deficiency in " + "preschool-age children (s", "format": "csv", - "url_type": "upload", + "hash": "9a1158154dfa42caddbd0694a4e9bdc8", + "name": "NLIS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, ], "state": "active", + "subnational": "0", + "tags": [ + {"name": "hxl", "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87"} + ], + "title": "Zambia - Health Indicators", } self.check_resources(update, results) @@ -627,8 +837,11 @@ def test_update_logic_2( dataset, new_resources_yaml, resources_yaml, + temp_dir, ): - self.add_new_dataset_resources(new_dataset, new_resources_yaml, exclude=[52]) + self.add_new_dataset_resources( + temp_dir, new_dataset, new_resources_yaml, exclude=[52] + ) self.add_dataset_resources(dataset, resources_yaml) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources @@ -646,453 +859,642 @@ def test_update_logic_2( update = results["update"] del update["updated_by_script"] assert update == { - "name": "who-data-for-zambia", - "title": "Zambia - Health Indicators", - "private": False, - "notes": "Contains data from World Health Organization...", + "data_update_frequency": "30", + "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", "dataset_source": "World Health Organization", + "groups": [{"name": "zmb"}], "license_id": "hdx-other", "license_other": "CC BY-NC-SA 3.0 IGO", - "methodology": "Registry", "maintainer": "35f7bb2c-4ab6-4796-8334-525b30a94c89", + "methodology": "Registry", + "name": "who-data-for-zambia", + "notes": "Contains data from World Health Organization...", "owner_org": "c021f6be-3598-418e-8f7f-c7a799194dba", - "data_update_frequency": "30", - "subnational": "0", - "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", - "groups": [{"name": "zmb"}], - "tags": [ - { - "name": "hxl", - "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87", - } - ], + "private": False, "resources": [ { - "name": "All Health Indicators for Zambia", - "description": "See resource descriptions below for links to indicator metadata", + "description": "See resource descriptions below for links to " + "indicator metadata", "format": "csv", - "url_type": "upload", + "hash": "b4b147bc522828731f1a016bfa72c073", + "name": "All Health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Mortality and global health estimates Indicators for Zambia", - "description": "*Mortality and global health estimates:*\n[Infant mortality rate (proba", + "description": "*Mortality and global health estimates:*\n" + "[Infant mortality rate (proba", "format": "csv", - "url_type": "upload", + "hash": "96a3be3cf272e017046d1b2674a52bd3", + "name": "Mortality and global health estimates Indicators for " + "Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Sustainable development goals Indicators for Zambia", - "description": "*Sustainable development goals:*\n[Adolescent birth rate (per 1000 wome", + "description": "*Sustainable development goals:*\n" + "[Adolescent birth rate (per 1000 wome", "format": "csv", - "url_type": "upload", + "hash": "a2ef406e2c2351e0b9e80029c909242d", + "name": "Sustainable development goals Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Millennium Development Goals (MDGs) Indicators for Zambia", - "description": "*Millennium Development Goals (MDGs):*\n[Contraceptive prevalence (%)](", + "description": "*Millennium Development Goals (MDGs):*\n" + "[Contraceptive prevalence (%)](", "format": "csv", - "url_type": "upload", + "hash": "e45ee7ce7e88149af8dd32b27f9512ce", + "name": "Millennium Development Goals (MDGs) Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health systems Indicators for Zambia", - "description": "*Health systems:*\n[Median availability of selected generic medicines (", + "description": "*Health systems:*\n" + "[Median availability of selected generic " + "medicines (", "format": "csv", - "url_type": "upload", + "hash": "7d0665438e81d8eceb98c1e31fca80c1", + "name": "Health systems Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Malaria Indicators for Zambia", - "description": "*Malaria:*\n[Children aged <5 years sleeping under insecticide-treated ", + "description": "*Malaria:*\n" + "[Children aged <5 years sleeping under " + "insecticide-treated ", "format": "csv", - "url_type": "upload", + "hash": "751d31dd6b56b26b29dac2c0e1839e34", + "name": "Malaria Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Tuberculosis Indicators for Zambia", - "description": "*Tuberculosis:*\n[Deaths due to tuberculosis among HIV-negative people ", + "description": "*Tuberculosis:*\n" + "[Deaths due to tuberculosis among HIV-negative " + "people ", "format": "csv", - "url_type": "upload", + "hash": "faeac4e1eef307c2ab7b0a3821e6c667", + "name": "Tuberculosis Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Child health Indicators for Zambia", - "description": "*Child health:*\n[Children aged <5 years stunted (%)](https://www.who.i", + "description": "*Child health:*\n" + "[Children aged <5 years stunted " + "(%)](https://www.who.i", "format": "csv", - "url_type": "upload", + "hash": "d72d187df41e10ea7d9fcdc7f5909205", + "name": "Child health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Infectious diseases Indicators for Zambia", - "description": "*Infectious diseases:*\n[Cholera - number of reported cases](https://ww", + "description": "*Infectious diseases:*\n" + "[Cholera - number of reported " + "cases](https://ww", "format": "csv", - "url_type": "upload", + "hash": "fad6f4e614a212e80c67249a666d2b09", + "name": "Infectious diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "World Health Statistics Indicators for Zambia", - "description": "*World Health Statistics:*\n[Literacy rate among adults aged >= 15 year", + "description": "*World Health Statistics:*\n" + "[Literacy rate among adults aged >= 15 year", "format": "csv", - "url_type": "upload", + "hash": "d3d9446802a44259755d38e6d163e820", + "name": "World Health Statistics Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health financing Indicators for Zambia", - "description": "*Health financing:*\n[Private prepaid plans as a percentage of private ", + "description": "*Health financing:*\n" + "[Private prepaid plans as a percentage of " + "private ", "format": "csv", - "url_type": "upload", + "hash": "6512bd43d9caa6e02c990b0a82652dca", + "name": "Health financing Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Public health and environment Indicators for Zambia", - "description": "*Public health and environment:*\n[Population using solid fuels (%)](ht", + "description": "*Public health and environment:*\n" + "[Population using solid fuels (%)](ht", "format": "csv", - "url_type": "upload", + "hash": "c74d97b01eae257e44aa9d5bade97baf", + "name": "Public health and environment Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Substance use and mental health Indicators for Zambia", - "description": "*Substance use and mental health:*\n[Fines for violations](https://www.", + "description": "*Substance use and mental health:*\n" + "[Fines for violations](https://www.", "format": "csv", - "url_type": "upload", + "hash": "c20ad4d76fe97759aa27a0c99bff6710", + "name": "Substance use and mental health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Tobacco Indicators for Zambia", - "description": "*Tobacco:*\n[Prevalence of smoking any tobacco product among persons ag", + "description": "*Tobacco:*\n" + "[Prevalence of smoking any tobacco product " + "among persons ag", "format": "csv", - "url_type": "upload", + "hash": "02e74f10e0327ad868d138f2b4fdd6f0", + "name": "Tobacco Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Injuries and violence Indicators for Zambia", - "description": "*Injuries and violence:*\n[Income level](https://www.who.int/data/gho/i", + "description": "*Injuries and violence:*\n" + "[Income level](https://www.who.int/data/gho/i", "format": "csv", - "url_type": "upload", + "hash": "aab3238922bcc25a6f606eb525ffdc56", + "name": "Injuries and violence Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "HIV/AIDS and other STIs Indicators for Zambia", - "description": "*HIV/AIDS and other STIs:*\n[Prevalence of HIV among adults aged 15 to ", + "description": "*HIV/AIDS and other STIs:*\n" + "[Prevalence of HIV among adults aged 15 to ", "format": "csv", - "url_type": "upload", + "hash": "9bf31c7ff062936a96d3c8bd1f8f2ff3", + "name": "HIV/AIDS and other STIs Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Nutrition Indicators for Zambia", - "description": "*Nutrition:*\n[Early initiation of breastfeeding (%)](https://www.who.i", + "description": "*Nutrition:*\n" + "[Early initiation of breastfeeding " + "(%)](https://www.who.i", "format": "csv", - "url_type": "upload", + "hash": "70efdf2ec9b086079795c442636b55fb", + "name": "Nutrition Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Urban health Indicators for Zambia", - "description": "*Urban health:*\n[Percentage of the total population living in cities >", + "description": "*Urban health:*\n" + "[Percentage of the total population living in " + "cities >", "format": "csv", - "url_type": "upload", + "hash": "6f4922f45568161a8cdf4ad2299f6d23", + "name": "Urban health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases Indicators for Zambia", - "description": "*Noncommunicable diseases:*\n[Prevalence of overweight among adults, BM", + "description": "*Noncommunicable diseases:*\n" + "[Prevalence of overweight among adults, BM", "format": "csv", - "url_type": "upload", + "hash": "1f0e3dad99908345f7439f8ffabdffc4", + "name": "Noncommunicable diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases CCS Indicators for Zambia", - "description": "*Noncommunicable diseases CCS:*\n[Existence of operational policy/strat", + "description": "*Noncommunicable diseases CCS:*\n" + "[Existence of operational policy/strat", "format": "csv", - "url_type": "upload", + "hash": "98f13708210194c475687be6106a3b84", + "name": "Noncommunicable diseases CCS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Negelected tropical diseases Indicators for Zambia", - "description": "*Negelected tropical diseases:*\n[Number of new reported cases of Burul", + "description": "*Negelected tropical diseases:*\n" + "[Number of new reported cases of Burul", "format": "csv", - "url_type": "upload", + "hash": "3c59dc048e8850243be8079a5c74d079", + "name": "Negelected tropical diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health Equity Monitor Indicators for Zambia", - "description": "*Health Equity Monitor:*\n[Antenatal care coverage - at least one visit", + "description": "*Health Equity Monitor:*\n" + "[Antenatal care coverage - at least one visit", "format": "csv", - "url_type": "upload", + "hash": "4e732ced3463d06de0ca9a15b6153677", + "name": "Health Equity Monitor Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Infrastructure Indicators for Zambia", - "description": "*Infrastructure:*\n[Total density per 100 000 population: Health posts]", + "description": "*Infrastructure:*\n" + "[Total density per 100 000 population: Health " + "posts]", "format": "csv", - "url_type": "upload", + "hash": "b6d767d2f8ed5d21a44b0e5886680cb9", + "name": "Infrastructure Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Essential health technologies Indicators for Zambia", - "description": "*Essential health technologies:*\n[Availability of national standards o", + "description": "*Essential health technologies:*\n" + "[Availability of national standards o", "format": "csv", - "url_type": "upload", + "hash": "37693cfc748049e45d87b8c7d8b9aacd", + "name": "Essential health technologies Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Medical equipment Indicators for Zambia", - "description": "*Medical equipment:*\n[Total density per million population: Magnetic R", + "description": "*Medical equipment:*\n" + "[Total density per million population: " + "Magnetic R", "format": "csv", - "url_type": "upload", + "hash": "1ff1de774005f8da13f42943881c655f", + "name": "Medical equipment Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Demographic and socioeconomic statistics Indicators for Zambia", - "description": "*Demographic and socioeconomic statistics:*\n[Cellular subscribers (per", + "description": "*Demographic and socioeconomic statistics:*\n" + "[Cellular subscribers (per", "format": "csv", - "url_type": "upload", + "hash": "8e296a067a37563370ded05f5a3bf3ec", + "name": "Demographic and socioeconomic statistics Indicators " + "for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Neglected tropical diseases Indicators for Zambia", - "description": "*Neglected tropical diseases:*\n[Status of yaws endemicity](https://www", + "description": "*Neglected tropical diseases:*\n" + "[Status of yaws endemicity](https://www", "format": "csv", - "url_type": "upload", + "hash": "33e75ff09dd601bbe69f351039152189", + "name": "Neglected tropical diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "International Health Regulations (2005) monitoring framework Indicators for Zambia", - "description": "*International Health Regulations (2005) monitoring framework:*\n[Legis", + "description": "*International Health Regulations (2005) " + "monitoring framework:*\n" + "[Legis", "format": "csv", - "url_type": "upload", + "hash": "6ea9ab1baa0efb9e19094440c317e21b", + "name": "International Health Regulations (2005) monitoring " + "framework Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Insecticide resistance Indicators for Zambia", - "description": "*Insecticide resistance:*\n[Number of insecticide classes to which resi", + "description": "*Insecticide resistance:*\n" + "[Number of insecticide classes to which resi", "format": "csv", - "url_type": "upload", + "hash": "34173cb38f07f89ddbebc2ac9128303f", + "name": "Insecticide resistance Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Universal Health Coverage Indicators for Zambia", - "description": "*Universal Health Coverage:*\n[Cataract surgical coverage of adults age", + "description": "*Universal Health Coverage:*\n" + "[Cataract surgical coverage of adults age", "format": "csv", - "url_type": "upload", + "hash": "c16a5320fa475530d9583c34fd356ef5", + "name": "Universal Health Coverage Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Global Observatory for eHealth (GOe) Indicators for Zambia", - "description": "*Global Observatory for eHealth (GOe):*\n[National universal health cov", + "description": "*Global Observatory for eHealth (GOe):*\n" + "[National universal health cov", "format": "csv", - "url_type": "upload", + "hash": "182be0c5cdcd5072bb1864cdee4d3d6e", + "name": "Global Observatory for eHealth (GOe) Indicators for " + "Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION:*\n[Government uni", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING : " + "PREVENTION:*\n" + "[Government uni", "format": "csv", - "url_type": "upload", + "hash": "e369853df766fa44e1ed0ff613f563bd", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT:*\n[Government unit/", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: " + "TREATMENT:*\n" + "[Government unit/", "format": "csv", - "url_type": "upload", + "hash": "1c383cd30b7c298ab50293adfecb7b18", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING Indicators for Zambia", - "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING:*\n[Five-year change", + "description": "*RSUD: GOVERNANCE, POLICY AND FINANCING: " + "FINANCING:*\n" + "[Five-year change", "format": "csv", - "url_type": "upload", + "hash": "19ca14e7ea6328a42e0eb13d585e4c22", + "name": "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING " + "Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVIDERS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVID", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "TREATMENT SECTORS AND PROVID", "format": "csv", - "url_type": "upload", + "hash": "a5bfc9e07964f8dddeb95fc584cd965d", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT " + "SECTORS AND PROVIDERS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREATMENT COVERAGE Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREAT", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "TREATMENT CAPACITY AND TREAT", "format": "csv", - "url_type": "upload", + "hash": "a5771bce93e200c36f7cd9dfd0e5deaa", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT " + "CAPACITY AND TREATMENT COVERAGE Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT:*\n", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PHARMACOLOGICAL TREATMENT:*\n", "format": "csv", - "url_type": "upload", + "hash": "d67d8ab4f4c10bf22aa353e27879133c", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PHARMACOLOGICAL TREATMENT Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVENTIONS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVEN", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "SCREENING AND BRIEF INTERVEN", "format": "csv", - "url_type": "upload", + "hash": "d645920e395fedad7bbbed0eca3fe2e0", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING " + "AND BRIEF INTERVENTIONS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROVIDERS Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROV", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "PREVENTION PROGRAMS AND PROV", "format": "csv", - "url_type": "upload", + "hash": "3416a75f4cea9109507cacd8e2f2aefc", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION " + "PROGRAMS AND PROVIDERS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVICES Indicators for Zambia", - "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVI", + "description": "*RSUD: SERVICE ORGANIZATION AND DELIVERY: " + "SPECIAL PROGRAMMES AND SERVI", "format": "csv", - "url_type": "upload", + "hash": "a1d0c6e83f027327d8461063f4ac58a6", + "name": "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL " + "PROGRAMMES AND SERVICES Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: HUMAN RESOURCES Indicators for Zambia", - "description": "*RSUD: HUMAN RESOURCES:*\n[Health professionals providing treatment for", + "description": "*RSUD: HUMAN RESOURCES:*\n" + "[Health professionals providing treatment for", "format": "csv", - "url_type": "upload", + "hash": "17e62166fc8586dfa4d1bc0e1742c08b", + "name": "RSUD: HUMAN RESOURCES Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: INFORMATION SYSTEMS Indicators for Zambia", - "description": "*RSUD: INFORMATION SYSTEMS:*\n[Epidemiological data collection for subs", + "description": "*RSUD: INFORMATION SYSTEMS:*\n" + "[Epidemiological data collection for subs", "format": "csv", - "url_type": "upload", + "hash": "f7177163c833dff4b38fc8d2872f1ec6", + "name": "RSUD: INFORMATION SYSTEMS Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "RSUD: YOUTH Indicators for Zambia", - "description": "*RSUD: YOUTH:*\n[Epidemiological data collection system for substance u", + "description": "*RSUD: YOUTH:*\n" + "[Epidemiological data collection system for " + "substance u", "format": "csv", - "url_type": "upload", + "hash": "6c8349cc7260ae62e3b1396831a8398f", + "name": "RSUD: YOUTH Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "FINANCIAL PROTECTION Indicators for Zambia", - "description": "*FINANCIAL PROTECTION:*\n[Population with household expenditures on hea", + "description": "*FINANCIAL PROTECTION:*\n" + "[Population with household expenditures on hea", "format": "csv", - "url_type": "upload", + "hash": "d9d4f495e875a2e075a1a4a6e1b9770f", + "name": "FINANCIAL PROTECTION Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Noncommunicable diseases and mental health Indicators for Zambia", - "description": "*Noncommunicable diseases and mental health:*\n[Number of deaths attrib", + "description": "*Noncommunicable diseases and mental health:*\n" + "[Number of deaths attrib", "format": "csv", - "url_type": "upload", + "hash": "67c6a1e7ce56d3d6fa748ab6d9af3fd7", + "name": "Noncommunicable diseases and mental health Indicators " + "for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Health workforce Indicators for Zambia", - "description": "*Health workforce:*\n[Medical doctors (per 10 000 population)](https://", + "description": "*Health workforce:*\n" + "[Medical doctors (per 10 000 " + "population)](https://", "format": "csv", - "url_type": "upload", + "hash": "642e92efb79421734881b53e1e1b18b6", + "name": "Health workforce Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Neglected Tropical Diseases Indicators for Zambia", - "description": "*Neglected Tropical Diseases:*\n[Number of new leprosy cases](https://w", + "description": "*Neglected Tropical Diseases:*\n" + "[Number of new leprosy cases](https://w", "format": "csv", - "url_type": "upload", + "hash": "33e75ff09dd601bbe69f351039152189", + "name": "Neglected Tropical Diseases Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "QuickCharts Indicators for Zambia", "description": "Cut down data for QuickCharts", "format": "csv", - "url_type": "upload", + "hash": "d82c8d1619ad8176d665453cfb2e55f0", + "name": "QuickCharts Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "TOBACCO Indicators for Zambia", - "description": "*TOBACCO:*\n[Monitor](https://www.who.int/data/gho/indicator-metadata-r", + "description": "*TOBACCO:*\n" + "[Monitor](https://www.who.int/data/gho/indicator-metadata-r", "format": "csv", - "url_type": "upload", + "hash": "02e74f10e0327ad868d138f2b4fdd6f0", + "name": "TOBACCO Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "UHC Indicators for Zambia", - "description": "*UHC:*\n[Population in malaria-endemic areas who slept under an insecti", + "description": "*UHC:*\n" + "[Population in malaria-endemic areas who slept " + "under an insecti", "format": "csv", - "url_type": "upload", + "hash": "6364d3f0f495b6ab9dcf8d3b5c6e0b01", + "name": "UHC Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "ICD Indicators for Zambia", - "description": "*ICD:*\n[ICD-11 implementation progress level](https://web-prod.who.int", + "description": "*ICD:*\n" + "[ICD-11 implementation progress " + "level](https://web-prod.who.int", "format": "csv", - "url_type": "upload", + "hash": "f457c545a9ded88f18ecee47145a72c0", + "name": "ICD Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia", - "description": "*SEXUAL AND REPRODUCTIVE HEALTH:*\n[Institutional Births (birth taken p", + "description": "*SEXUAL AND REPRODUCTIVE HEALTH:*\n" + "[Institutional Births (birth taken p", "format": "csv", - "url_type": "upload", + "hash": "c0c7c76d30bd3dcaefc96f40275bdc0a", + "name": "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Immunization Indicators for Zambia", - "description": "*Immunization:*\n[Proportion of vaccination cards seen (%)](https://www", + "description": "*Immunization:*\n" + "[Proportion of vaccination cards seen " + "(%)](https://www", "format": "csv", - "url_type": "upload", + "hash": "2838023a778dfaecdc212708f721b788", + "name": "Immunization Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, ], "state": "active", + "subnational": "0", + "tags": [ + {"name": "hxl", "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87"} + ], + "title": "Zambia - Health Indicators", } self.check_resources(update, results) @@ -1103,8 +1505,11 @@ def test_update_logic_3( dataset, new_resources_yaml, resources_yaml, + temp_dir, ): - self.add_new_dataset_resources(new_dataset, new_resources_yaml, include=[0, 3]) + self.add_new_dataset_resources( + temp_dir, new_dataset, new_resources_yaml, include=[0, 3] + ) self.add_dataset_resources(dataset, resources_yaml, include=[0, 1, 2]) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources @@ -1122,44 +1527,47 @@ def test_update_logic_3( update = results["update"] del update["updated_by_script"] assert update == { - "name": "who-data-for-zambia", - "title": "Zambia - Health Indicators", - "private": False, - "notes": "Contains data from World Health Organization...", + "data_update_frequency": "30", + "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", "dataset_source": "World Health Organization", + "groups": [{"name": "zmb"}], "license_id": "hdx-other", "license_other": "CC BY-NC-SA 3.0 IGO", - "methodology": "Registry", "maintainer": "35f7bb2c-4ab6-4796-8334-525b30a94c89", + "methodology": "Registry", + "name": "who-data-for-zambia", + "notes": "Contains data from World Health Organization...", "owner_org": "c021f6be-3598-418e-8f7f-c7a799194dba", - "data_update_frequency": "30", - "subnational": "0", - "dataset_date": "[1961-01-01T00:00:00 TO 2019-12-31T23:59:59]", - "groups": [{"name": "zmb"}], - "tags": [ - { - "name": "hxl", - "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87", - } - ], + "private": False, "resources": [ { - "name": "All Health Indicators for Zambia", - "description": "See resource descriptions below for links to indicator metadata", + "description": "See resource descriptions below for links to " + "indicator metadata", "format": "csv", - "url_type": "upload", + "hash": "b4b147bc522828731f1a016bfa72c073", + "name": "All Health Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, { - "name": "Millennium Development Goals (MDGs) Indicators for Zambia", - "description": "*Millennium Development Goals (MDGs):*\n[Contraceptive prevalence (%)](", + "description": "*Millennium Development Goals (MDGs):*\n" + "[Contraceptive prevalence (%)](", "format": "csv", - "url_type": "upload", + "hash": "e45ee7ce7e88149af8dd32b27f9512ce", + "name": "Millennium Development Goals (MDGs) Indicators for Zambia", "resource_type": "file.upload", + "size": 2, "url": "updated_by_file_upload_step", + "url_type": "upload", }, ], "state": "active", + "subnational": "0", + "tags": [ + {"name": "hxl", "vocabulary_id": "4e61d464-4943-4e97-973a-84673c1aaa87"} + ], + "title": "Zambia - Health Indicators", } self.check_resources(update, results) From 1a6d3284e43225aebc7b3b9de25d79ec92984e98 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 14 Aug 2025 16:31:57 +1200 Subject: [PATCH 03/16] Return statuses --- src/hdx/api/utilities/filestore_helper.py | 32 ++++- src/hdx/data/dataset.py | 72 ++++++++--- src/hdx/data/resource.py | 50 +++++++- tests/hdx/__init__.py | 4 +- tests/hdx/data/test_dataset_core.py | 114 ++++++++++++----- tests/hdx/data/test_resource.py | 21 ++- .../hdx/data/test_update_dataset_resources.py | 12 ++ tests/hdx/data/test_update_logic.py | 120 +++++++++++++++++- 8 files changed, 352 insertions(+), 73 deletions(-) diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index 6428b10e..b30c6cfd 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -47,8 +47,16 @@ def check_filestore_resource( filestore_resources: Dict[int, str], resource_index: int, **kwargs: Any, - ) -> None: + ) -> int: """Helper method to add new resource from dataset including filestore. + Returns status code where: + 0 = no file to upload and last_modified set to now + (resource creation or data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (resource creation or either hash or size of file + has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored Args: resource_data_to_update (Resource): Updated resource from dataset @@ -56,7 +64,7 @@ def check_filestore_resource( resource_index (int): Index of resource Returns: - None + int: Status code """ cls.resource_check_required_fields(resource_data_to_update, **kwargs) file_to_upload = resource_data_to_update.get_file_to_upload() @@ -67,6 +75,8 @@ def check_filestore_resource( resource_data_to_update["url"] = cls.temporary_url resource_data_to_update["size"] = size resource_data_to_update["hash"] = hash + return 2 + return 0 @classmethod def dataset_update_filestore_resource( @@ -75,8 +85,16 @@ def dataset_update_filestore_resource( resource_data_to_update: "Resource", filestore_resources: Dict[int, str], resource_index: int, - ) -> None: + ) -> int: """Helper method to merge updated resource from dataset into HDX resource read from HDX including filestore. + Returns status code where: + 0 = no file to upload and last_modified set to now + (resource creation or data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (resource creation or either hash or size of file + has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored Args: original_resource_data (Resource): Original resource from dataset @@ -85,7 +103,7 @@ def dataset_update_filestore_resource( resource_index (int): Index of resource Returns: - None + int: Status code """ file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: @@ -97,15 +115,21 @@ def dataset_update_filestore_resource( # ensure last_modified is not updated if file hasn't changed if "last_modified" in resource_data_to_update: del resource_data_to_update["last_modified"] + return 4 + else: + return 3 else: # update file if size or hash has changed filestore_resources[resource_index] = file_to_upload resource_data_to_update["url"] = cls.temporary_url resource_data_to_update["size"] = size resource_data_to_update["hash"] = hash + return 2 elif resource_data_to_update.is_marked_data_updated(): # Should not output timezone info here resource_data_to_update["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" ) resource_data_to_update.data_updated = False + return 0 + return 1 diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 87fd6a91..19b1ae3b 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -853,12 +853,16 @@ def _dataset_update_resources( remove_additional_resources: bool, match_resource_order: bool, **kwargs: Any, - ) -> Tuple[List, List, Dict, List]: + ) -> Tuple[List, List, Dict, List, Dict]: """Helper method to compare new and existing dataset data returning resources to be updated, resources to be deleted, resources where files need to be uploaded to the filestore and if match_resource_order is True, then the new resource order. + Returns a tuple of resources information of the form: + (resources_to_update, resources_to_delete, filestore_resources, + new_resource_order, resource status codes) + Args: update_resources (bool): Whether to update resources match_resources_by_metadata (bool): Compare resource metadata rather than position in list @@ -866,7 +870,7 @@ def _dataset_update_resources( match_resource_order (bool): Match order of given resources by name Returns: - Tuple[List, List, Dict, List]: (resources_to_update, resources_to_delete, filestore_resources, new_resource_order) + Tuple[List, List, Dict, List, Dict]: Tuple of resources information """ # When the user sets up a dataset, "data" contains the metadata. The # HDX dataset update process involves copying "data" to "old_data" and @@ -877,6 +881,7 @@ def _dataset_update_resources( resources_to_update = [] resources_to_delete = [] filestore_resources = {} + statuses = {} if update_resources and resources_metadata_to_update: if match_resources_by_metadata: ( @@ -896,13 +901,15 @@ def _dataset_update_resources( resource_data_to_update = resources_metadata_to_update[ updated_resource_matches[match_index] ] + resource_name = resource["name"] logger.warning(f"Resource exists. Updating {resource['name']}") - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( resource, resource_data_to_update, filestore_resources, i, ) + statuses[resource_name] = status resources_to_update.append(resource_data_to_update) resource_index = len(self.resources) @@ -910,12 +917,13 @@ def _dataset_update_resources( resource_data_to_update = resources_metadata_to_update[ updated_resource_index ] - FilestoreHelper.check_filestore_resource( + status = FilestoreHelper.check_filestore_resource( resource_data_to_update, filestore_resources, resource_index, **kwargs, ) + statuses[resource_data_to_update["name"]] = status resource_index = resource_index + 1 resources_to_update.append(resource_data_to_update) if remove_additional_resources: @@ -938,19 +946,21 @@ def _dataset_update_resources( logger.warning( f"Changing resource name to: {updated_resource_name}" ) - FilestoreHelper.dataset_update_filestore_resource( + status = FilestoreHelper.dataset_update_filestore_resource( resource, resource_data_to_update, filestore_resources, i, ) + statuses[updated_resource_name] = status else: - FilestoreHelper.check_filestore_resource( + status = FilestoreHelper.check_filestore_resource( resource_data_to_update, filestore_resources, i, **kwargs, ) + statuses[resource_data_to_update["name"]] = status resources_to_update.append(resource_data_to_update) for i, resource in enumerate(self.resources): @@ -973,6 +983,7 @@ def _dataset_update_resources( resources_to_delete, filestore_resources, new_resource_order, + statuses, ) def _dataset_hdx_update( @@ -985,12 +996,15 @@ def _dataset_hdx_update( create_default_views: bool, hxl_update: bool, **kwargs: Any, - ) -> Dict: + ) -> Tuple[Dict, Dict]: """Helper method to compare new and existing dataset data, update resources including those with files in the filestore, delete extra resources if needed and update dataset data and save the dataset and resources to HDX. + Returns a tuple of information of the form: (resource status codes, + dictionary of what gets passed to the revise call (for testing)) + Args: update_resources (bool): Whether to update resources match_resources_by_metadata (bool): Compare resource metadata rather than position in list @@ -1001,13 +1015,14 @@ def _dataset_hdx_update( hxl_update (bool): Whether to call package_hxl_update. Returns: - Dict: Dictionary of what gets passed to the revise call (for testing) + Tuple[Dict, Dict]: Tuple of (resource status codes, revise call info) """ ( resources_to_update, resources_to_delete, filestore_resources, new_resource_order, + statuses, ) = self._dataset_update_resources( update_resources, match_resources_by_metadata, @@ -1029,7 +1044,7 @@ def _dataset_hdx_update( if not found: self.old_data["tags"].append(tag) self._prepare_hdx_call(self.old_data, kwargs) - return self._revise_dataset( + revise_call = self._revise_dataset( keys_to_delete, resources_to_update, resources_to_delete, @@ -1039,6 +1054,7 @@ def _dataset_hdx_update( create_default_views=create_default_views, **kwargs, ) + return statuses, revise_call def update_in_hdx( self, @@ -1050,10 +1066,19 @@ def update_in_hdx( create_default_views: bool = True, hxl_update: bool = True, **kwargs: Any, - ) -> None: + ) -> Dict: """Check if dataset exists in HDX and if so, update it. match_resources_by_metadata uses ids if they are available, otherwise names only if names are unique or format in addition if not. + Returns a dictionary with key resource name and value status code: + 0 = no file to upload and last_modified set to now + (resource creation or data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (resource creation or either hash or size of file + has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored + Args: update_resources (bool): Whether to update resources. Defaults to True. match_resources_by_metadata (bool): Compare resource metadata rather than position in list. Defaults to True. @@ -1068,7 +1093,7 @@ def update_in_hdx( batch (str): A string you can specify to show which datasets are part of a single batch update Returns: - None + Dict: Status codes of resources """ loaded = False if "id" in self.data: @@ -1081,7 +1106,7 @@ def update_in_hdx( self._check_existing_object("dataset", "name") if not self._dataset_load_from_hdx(self.data["name"]): raise HDXError("No existing dataset to update!") - self._dataset_hdx_update( + statuses, _ = self._dataset_hdx_update( update_resources=update_resources, match_resources_by_metadata=match_resources_by_metadata, keys_to_delete=keys_to_delete, @@ -1092,6 +1117,7 @@ def update_in_hdx( **kwargs, ) logger.info(f"Updated {self.get_hdx_url()}") + return statuses def create_in_hdx( self, @@ -1104,10 +1130,19 @@ def create_in_hdx( create_default_views: bool = True, hxl_update: bool = True, **kwargs: Any, - ) -> None: + ) -> Dict: """Check if dataset exists in HDX and if so, update it, otherwise create it. match_resources_by_metadata uses ids if they are available, otherwise names only if names are unique or format in addition if not. + Returns a dictionary with key resource name and value status code: + 0 = no file to upload and last_modified set to now + (resource creation or data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (resource creation or either hash or size of file + has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored + Args: allow_no_resources (bool): Whether to allow no resources. Defaults to False. update_resources (bool): Whether to update resources (if updating). Defaults to True. @@ -1123,7 +1158,7 @@ def create_in_hdx( batch (str): A string you can specify to show which datasets are part of a single batch update Returns: - None + Dict: Status codes of resources """ if "ignore_check" not in kwargs: # allow ignoring of field checks self.check_required_fields(allow_no_resources=allow_no_resources, **kwargs) @@ -1139,7 +1174,7 @@ def create_in_hdx( if self._dataset_load_from_hdx(self.data["name"]): loadedid = self.data["name"] if loadedid: - self._dataset_hdx_update( + statuses, _ = self._dataset_hdx_update( update_resources=update_resources, match_resources_by_metadata=match_resources_by_metadata, keys_to_delete=keys_to_delete, @@ -1150,14 +1185,16 @@ def create_in_hdx( **kwargs, ) logger.info(f"Updated {self.get_hdx_url()}") - return + return statuses + statuses = {} filestore_resources = {} if self.resources: for i, resource in enumerate(self.resources): - FilestoreHelper.check_filestore_resource( + status = FilestoreHelper.check_filestore_resource( resource, filestore_resources, i, **kwargs ) + statuses[resource["name"]] = status self.unseparate_resources() self._prepare_hdx_call(self.data, kwargs) kwargs["operation"] = "create" @@ -1178,6 +1215,7 @@ def create_in_hdx( **kwargs, ) logger.info(f"Created {self.get_hdx_url()}") + return statuses def delete_from_hdx(self) -> None: """Deletes a dataset from HDX. diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index c54dbea6..7fcfb6d3 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -362,9 +362,16 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None def _resource_merge_hdx_update( self, **kwargs: Any, - ) -> None: + ) -> int: """Helper method to update last_modified for external resources if data_updated is True, then check if HDX object exists and update it. + Returns status code where: + 0 = no file to upload and last_modified set to now + (data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (either hash or size of file has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), last_modified field ignored Args: **kwargs: See below @@ -372,7 +379,7 @@ def _resource_merge_hdx_update( data_updated (bool): If True, set last_modified to now. Defaults to False. Returns: - None + int: Status code """ data_updated = kwargs.pop("data_updated", self.data_updated) files = {} @@ -383,20 +390,28 @@ def _resource_merge_hdx_update( # ensure last_modified is not updated if file hasn't changed if "last_modified" in self.data: del self.data["last_modified"] + status = 4 + else: + status = 3 else: # update file if size or hash has changed files["upload"] = self.file_to_upload self.old_data["size"] = size self.old_data["hash"] = hash + status = 2 elif data_updated: # Should not output timezone info here self.old_data["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" ) self.data_updated = False + status = 0 + else: + status = 1 # old_data will be merged into data in the next step self._merge_hdx_update("resource", "id", files, True, **kwargs) + return status def update_in_hdx(self, **kwargs: Any) -> int: """Check if resource exists in HDX and if so, update it. To indicate @@ -406,6 +421,14 @@ def update_in_hdx(self, **kwargs: Any) -> int: is used to supply a file, the resource last_modified field is set to now automatically regardless of the value of data_updated. + Returns status code where: + 0 = no file to upload and last_modified set to now + (data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (either hash or size of file has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored + Args: **kwargs: See below operation (string): Operation to perform eg. patch. Defaults to update. @@ -413,14 +436,14 @@ def update_in_hdx(self, **kwargs: Any) -> int: date_data_updated (datetime): Date to use for last_modified. Default to None. Returns: - int: Return status code + int: Status code """ self._check_load_existing_object("resource", "id") if self.file_to_upload and "url" in self.data: del self.data["url"] - self._resource_merge_hdx_update(**kwargs) + return self._resource_merge_hdx_update(**kwargs) - def create_in_hdx(self, **kwargs: Any) -> None: + def create_in_hdx(self, **kwargs: Any) -> int: """Check if resource exists in HDX and if so, update it, otherwise create it. To indicate that the data in an external resource (given by a URL) has been updated, set data_updated to True, which will result in @@ -429,13 +452,22 @@ def create_in_hdx(self, **kwargs: Any) -> None: field is set to now automatically regardless of the value of data_updated. + Returns status code where: + 0 = no file to upload and last_modified set to now + (resource creation or data_updated flag is True), + 1 = no file to upload and data_updated flag is False, + 2 = file uploaded to filestore (resource creation or either hash or size of file + has changed), + 3 = file not uploaded to filestore (hash and size of file are the same), + 4 = file not uploaded (hash, size unchanged), given last_modified ignored + Args: **kwargs: See below data_updated (bool): If True, set last_modified to now. Defaults to False. date_data_updated (datetime): Date to use for last_modified. Default to None. Returns: - None + int: Status code """ if "ignore_check" not in kwargs: # allow ignoring of field checks self.check_required_fields() @@ -444,7 +476,7 @@ def create_in_hdx(self, **kwargs: Any) -> None: logger.warning(f"{'resource'} exists. Updating {id}") if self.file_to_upload and "url" in self.data: del self.data["url"] - self._resource_merge_hdx_update(**kwargs) + return self._resource_merge_hdx_update(**kwargs) else: files = {} if self.file_to_upload: @@ -452,7 +484,11 @@ def create_in_hdx(self, **kwargs: Any) -> None: self.data["size"], self.data["hash"] = get_size_and_hash( self.file_to_upload, self.get_format() ) + status = 2 + else: + status = 0 self._save_to_hdx("create", "name", files, True) + return status def delete_from_hdx(self) -> None: """Deletes a resource from HDX diff --git a/tests/hdx/__init__.py b/tests/hdx/__init__.py index 78bf8982..e04422e5 100755 --- a/tests/hdx/__init__.py +++ b/tests/hdx/__init__.py @@ -193,11 +193,11 @@ def json(self): "mimetype": None, "state": "active", "created": "2016-06-07T08:57:27.367959", - "description": "Resource2", + "description": "Resource3", "position": 2, "hash": "", "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", - "name": "Resource2", + "name": "Resource3", "url": "http://resource2_csv.zip", "resource_type": "api", "url_type": "api", diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py index eb80f670..d56738d2 100755 --- a/tests/hdx/data/test_dataset_core.py +++ b/tests/hdx/data/test_dataset_core.py @@ -547,22 +547,27 @@ def test_create_in_hdx(self, configuration, post_create): dataset = Dataset(datasetdata) with pytest.raises(HDXError): dataset.create_in_hdx() - dataset.create_in_hdx(allow_no_resources=True) + statuses = dataset.create_in_hdx(allow_no_resources=True) + assert statuses == {} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) + resource2 = Resource(resourcesdata[1]) del resource["description"] del dataset["tags"] - dataset.add_update_resources([resource, resource]) + dataset.add_update_resources([resource, resource2]) with pytest.raises(HDXError): dataset.create_in_hdx() - dataset.create_in_hdx(ignore_fields=["tags", "resource:description"]) + statuses = dataset.create_in_hdx(ignore_fields=["tags", "resource:description"]) + assert statuses == {"Resource1": 0, "Resource2": 0} dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) - dataset.add_update_resources([resource, resource]) - dataset.create_in_hdx() + resource2 = Resource(resourcesdata[1]) + dataset.add_update_resources([resource, resource2]) + statuses = dataset.create_in_hdx() + assert statuses == {"Resource1": 0, "Resource2": 0} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" assert dataset["state"] == "active" assert len(dataset.resources) == 3 @@ -583,7 +588,8 @@ def test_create_in_hdx(self, configuration, post_create): del dataset["tags"] with pytest.raises(HDXError): dataset.create_in_hdx() - dataset.create_in_hdx(ignore_check=True) + statuses = dataset.create_in_hdx(ignore_check=True) + assert statuses == {"Resource1": 0} config = Configuration(user_agent="test", hdx_read_only=True) config.setup_session_remoteckan() @@ -603,7 +609,8 @@ def test_create_in_hdx(self, configuration, post_create): del resourcesdata[0]["id"] del resourcesdata[1]["id"] dataset.add_update_resources(resourcesdata) - dataset.create_in_hdx() + statuses = dataset.create_in_hdx() + assert statuses == {"Resource1": 0, "Resource2": 0} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" assert len(dataset.resources) == 3 assert dataset.resources[0].configuration.unique == uniqueval @@ -615,7 +622,8 @@ def test_create_in_hdx(self, configuration, post_create): file = tempfile.NamedTemporaryFile(delete=False) resource.set_file_to_upload(file.name) dataset.add_update_resource(resource) - dataset.create_in_hdx() + statuses = dataset.create_in_hdx() + assert statuses == {"Resource1": 2} file.close() assert dataset["state"] == "active" assert len(dataset.resources) == 3 @@ -640,7 +648,8 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): dataset.remove_tag("crisis-somewhere") dataset["id"] = "TEST1" dataset["name"] = "MyDataset1" - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset["id"] == "TEST1" assert dataset["dataset_date"] == "02/26/2016" assert dataset.get_tags() == [ @@ -661,19 +670,23 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): "view_type": "hdx_hxl_preview", } dataset.remove_tag("crisis-somewhere") - dataset.update_in_hdx(keep_crisis_tags=False) + statuses = dataset.update_in_hdx(keep_crisis_tags=False) + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset.get_tags() == ["conflict", "political violence"] dataset.preview_resourceview = ResourceView(resourceviewdata) - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset.preview_resourceview is None - dataset.update_in_hdx(updated_by_script="hi") + statuses = dataset.update_in_hdx(updated_by_script="hi") + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} pattern = r"hi \([12]\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d\)" match = re.search(pattern, dataset["updated_by_script"]) assert match - dataset.update_in_hdx( + statuses = dataset.update_in_hdx( updated_by_script="hi", batch="6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", ) + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} match = re.search(pattern, dataset["updated_by_script"]) assert match assert dataset["batch"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" @@ -693,23 +706,27 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): dataset["groups"] = [] with pytest.raises(HDXError): dataset.update_in_hdx() - dataset.update_in_hdx(ignore_check=True) + statuses = dataset.update_in_hdx(ignore_check=True) + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} # These dataset creates actually do updates datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) dataset["id"] = "NOTEXIST" dataset["name"] = "DatasetExist" - dataset.create_in_hdx(allow_no_resources=True) + statuses = dataset.create_in_hdx(allow_no_resources=True) + assert statuses == {} assert dataset["name"] == "DatasetExist" datasetdata["name"] = "MyDataset1" datasetdata["id"] = "TEST1" dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) + resource2 = Resource(resourcesdata[1]) resource.mark_data_updated() - dataset.add_update_resources([resource, resource]) - dataset.create_in_hdx() + dataset.add_update_resources([resource, resource2]) + statuses = dataset.create_in_hdx() + assert statuses == {"Resource1": 0, "Resource2": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 3 resource = dataset.get_resource() @@ -720,38 +737,52 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) - dataset.add_update_resources([resource, resource]) - dataset.create_in_hdx(match_resources_by_metadata=False) + resource2 = Resource(resourcesdata[1]) + dataset.add_update_resources([resource, resource2]) + statuses = dataset.create_in_hdx(match_resources_by_metadata=False) + assert statuses == {"Resource1": 1, "Resource2": 1} assert dataset["id"] == "TEST1" assert dataset["dataset_date"] == "06/04/2016" assert dataset["state"] == "active" assert len(dataset.resources) == 3 - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 3 dataset = Dataset.read_from_hdx("TEST4") dataset["id"] = "TEST4" assert dataset["state"] == "active" - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert len(dataset.resources) == 3 dataset = Dataset.read_from_hdx("TEST4") try: file = tempfile.NamedTemporaryFile(delete=False) resource.set_file_to_upload(file.name) dataset.add_update_resource(resource) - dataset.update_in_hdx(batch="6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d") + statuses = dataset.update_in_hdx( + batch="6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" + ) assert len(dataset.resources) == 3 + assert statuses == {"Resource1": 2, "Resource2": 1, "Resource3": 1} resource["name"] = "123" resource.set_file_to_upload(None) resource["url"] = "http://lala" dataset.add_update_resource(resource) - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 3 del resource["id"] - resource["name"] = "Resource1" + resource["name"] = "Resource4" dataset.add_update_resource(resource) - dataset.update_in_hdx() + statuses = dataset.update_in_hdx() + assert statuses == { + "Resource1": 1, + "Resource2": 1, + "Resource3": 1, + "Resource4": 0, + } assert dataset["state"] == "active" assert len(dataset.resources) == 4 @@ -770,9 +801,16 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): resource["name"] = "changed name" resource["id"] = "456" resource.set_file_to_upload(file.name) - dataset.update_in_hdx(match_resources_by_metadata=True) + dataset.add_update_resource(resource) + statuses = dataset.update_in_hdx(match_resources_by_metadata=True) + assert statuses == { + "Resource1": 1, + "Resource2": 1, + "ResourcePosition": 2, + "changed name": 2, + } assert dataset["state"] == "active" - assert len(dataset.resources) == 4 + assert len(dataset.resources) == 5 # existing resources dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) @@ -789,7 +827,12 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): resource["name"] = "changed name" resource["id"] = "456" resource.set_file_to_upload(file.name) - dataset.update_in_hdx(match_resources_by_metadata=False) + statuses = dataset.update_in_hdx(match_resources_by_metadata=False) + assert statuses == { + "Resource2": 1, + "ResourcePosition": 2, + "changed name": 2, + } assert dataset["state"] == "active" assert len(dataset.resources) == 3 file.close() @@ -797,24 +840,27 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) dataset.add_update_resource(resource) - dataset.update_in_hdx(remove_additional_resources=False) + statuses = dataset.update_in_hdx(remove_additional_resources=False) + assert statuses == {"Resource1": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 3 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) dataset.add_update_resource(resource) - dataset.update_in_hdx(remove_additional_resources=True) + statuses = dataset.update_in_hdx(remove_additional_resources=True) + assert statuses == {"Resource1": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 1 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) dataset.add_update_resource(resource) - dataset.update_in_hdx( + statuses = dataset.update_in_hdx( match_resources_by_metadata=False, remove_additional_resources=True, ) + assert statuses == {"Resource1": 1} assert dataset["state"] == "active" assert len(dataset.resources) == 1 @@ -829,7 +875,8 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): dataset["id"] = "TEST1" dataset["name"] = "MyDataset1" dataset.resources = [resources[2], resources[0], resources[1]] - dataset.update_in_hdx(match_resource_order=True) + statuses = dataset.update_in_hdx(match_resource_order=True) + assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} resource_ids = [x["id"] for x in dataset.get_resources()] assert resource_ids == [ "3d777226-96aa-4239-860a-703389d16d1g", @@ -1269,7 +1316,8 @@ def test_hdxconnect(self, configuration, post_create): with pytest.raises(HDXError): dataset.create_in_hdx() dataset["num_of_rows"] = 100 - dataset.create_in_hdx() + statuses = dataset.create_in_hdx() + assert statuses == {} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index 96658c55..d59889e3 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -714,7 +714,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data resource_data_copy = copy.deepcopy(resource_data) resource = Resource(resource_data_copy) - resource.create_in_hdx() + status = resource.create_in_hdx() + assert status == 0 assert resource["id"] == "de6549d8-268b-4dfe-adaf-a4ae5c8510d5" assert resource["url_type"] == "api" assert resource["resource_type"] == "api" @@ -727,7 +728,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data resource = Resource(resource_data_copy) resource.set_file_to_upload(test_data) assert resource.get_file_to_upload() == test_data - resource.create_in_hdx() + status = resource.create_in_hdx() + assert status == 2 assert resource["url_type"] == "upload" assert resource["resource_type"] == "file.upload" assert ( @@ -738,7 +740,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" assert resource["state"] == "active" resource.set_file_to_upload(test_data) - resource.create_in_hdx() + status = resource.create_in_hdx() + assert status == 2 assert ( resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" @@ -771,7 +774,8 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data resource.set_format("XLSX") resource["id"] = "74b74ae1-df0c-4716-829f-4f939a046811" resource["name"] = "MyResource1" - resource.update_in_hdx(data_updated=True) + status = resource.update_in_hdx(data_updated=True) + assert status == 0 assert resource["id"] == "74b74ae1-df0c-4716-829f-4f939a046811" assert resource["format"] == "xlsx" resource.set_format(".xsl") @@ -790,7 +794,8 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data resource.set_file_to_upload(test_data, guess_format_from_suffix=True) assert resource["format"] == "csv" - resource.update_in_hdx() + status = resource.update_in_hdx() + assert status == 2 assert resource["url_type"] == "upload" assert resource["resource_type"] == "file.upload" assert ( @@ -820,7 +825,8 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data resource.mark_data_updated() assert resource.data_updated is True assert resource.is_marked_data_updated() is True - resource.create_in_hdx() + status = resource.create_in_hdx() + assert status == 0 assert resource.is_marked_data_updated() is False assert resource["id"] == "74b74ae1-df0c-4716-829f-4f939a046811" assert resource.get_format() == "xlsx" @@ -828,7 +834,8 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data match = date_pattern.search(resource["last_modified"]) assert match resource["format"] = "Geoservice" - resource.update_in_hdx() + status = resource.update_in_hdx() + assert status == 1 assert resource.get_format() == "geoservice" resource["format"] = "NOTEXIST" with pytest.raises(HDXError): diff --git a/tests/hdx/data/test_update_dataset_resources.py b/tests/hdx/data/test_update_dataset_resources.py index 438d45f0..72a1928d 100644 --- a/tests/hdx/data/test_update_dataset_resources.py +++ b/tests/hdx/data/test_update_dataset_resources.py @@ -104,6 +104,7 @@ def test_dataset_update_resources( resources_to_delete, filestore_resources, new_resource_order, + statuses, ) = dataset._dataset_update_resources(True, True, True, True) assert resources_to_update == expected_resources_to_update assert resources_to_delete == [8, 2, 1, 0] @@ -129,6 +130,17 @@ def test_dataset_update_resources( ("Demographic and Socio-economic indicator list", "csv"), ("QuickCharts-SDG 4 Global and Thematic data", "csv"), ] + assert statuses == { + "Demographic and Socio-economic data": 2, + "Demographic and Socio-economic indicator list": 2, + "Other Policy Relevant Indicators data": 2, + "Other Policy Relevant Indicators indicator list": 2, + "Other Policy Relevant Indicators metadata": 2, + "QuickCharts-SDG 4 Global and Thematic data": 2, + "SDG 4 Global and Thematic data": 2, + "SDG 4 Global and Thematic indicator list": 2, + "SDG 4 Global and Thematic metadata": 2, + } dataset._prepare_hdx_call(dataset.old_data, {}) assert ( dataset["updated_by_script"] diff --git a/tests/hdx/data/test_update_logic.py b/tests/hdx/data/test_update_logic.py index 67bea805..2c0e6db0 100644 --- a/tests/hdx/data/test_update_logic.py +++ b/tests/hdx/data/test_update_logic.py @@ -177,7 +177,7 @@ def test_update_logic_1( self.add_dataset_resources(dataset, resources_yaml) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources - results = dataset._dataset_hdx_update( + statuses, results = dataset._dataset_hdx_update( update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], @@ -187,6 +187,61 @@ def test_update_logic_1( hxl_update=False, test=True, ) + assert statuses == { + "All Health Indicators for Zambia": 2, + "Child health Indicators for Zambia": 2, + "Demographic and socioeconomic statistics Indicators for Zambia": 2, + "Essential health technologies Indicators for Zambia": 2, + "FINANCIAL PROTECTION Indicators for Zambia": 2, + "Global Observatory for eHealth (GOe) Indicators for Zambia": 2, + "HIV/AIDS and other STIs Indicators for Zambia": 2, + "Health Equity Monitor Indicators for Zambia": 2, + "Health financing Indicators for Zambia": 2, + "Health systems Indicators for Zambia": 2, + "Health workforce Indicators for Zambia": 2, + "ICD Indicators for Zambia": 2, + "Immunization Indicators for Zambia": 2, + "Infectious diseases Indicators for Zambia": 2, + "Infrastructure Indicators for Zambia": 2, + "Injuries and violence Indicators for Zambia": 2, + "Insecticide resistance Indicators for Zambia": 2, + "International Health Regulations (2005) monitoring framework Indicators for Zambia": 2, + "Malaria Indicators for Zambia": 2, + "Medical equipment Indicators for Zambia": 2, + "Millennium Development Goals (MDGs) Indicators for Zambia": 2, + "Mortality and global health estimates Indicators for Zambia": 2, + "NLIS Indicators for Zambia": 2, + "Negelected tropical diseases Indicators for Zambia": 2, + "Neglected Tropical Diseases Indicators for Zambia": 2, + "Neglected tropical diseases Indicators for Zambia": 2, + "Noncommunicable diseases CCS Indicators for Zambia": 2, + "Noncommunicable diseases Indicators for Zambia": 2, + "Noncommunicable diseases and mental health Indicators for Zambia": 2, + "Nutrition Indicators for Zambia": 2, + "Public health and environment Indicators for Zambia": 2, + "QuickCharts Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT Indicators for Zambia": 2, + "RSUD: HUMAN RESOURCES Indicators for Zambia": 2, + "RSUD: INFORMATION SYSTEMS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROVIDERS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVENTIONS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVICES Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREATMENT COVERAGE Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVIDERS Indicators for Zambia": 2, + "RSUD: YOUTH Indicators for Zambia": 2, + "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia": 2, + "Substance use and mental health Indicators for Zambia": 2, + "Sustainable development goals Indicators for Zambia": 2, + "TOBACCO Indicators for Zambia": 2, + "Tuberculosis Indicators for Zambia": 2, + "UHC Indicators for Zambia": 2, + "Universal Health Coverage Indicators for Zambia": 2, + "Urban health Indicators for Zambia": 2, + "World Health Statistics Indicators for Zambia": 2, + } assert results["filter"] == ["-resources__13"] update = results["update"] del update["updated_by_script"] @@ -845,7 +900,7 @@ def test_update_logic_2( self.add_dataset_resources(dataset, resources_yaml) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources - results = dataset._dataset_hdx_update( + statuses, results = dataset._dataset_hdx_update( update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], @@ -855,6 +910,61 @@ def test_update_logic_2( hxl_update=False, test=True, ) + assert statuses == { + "All Health Indicators for Zambia": 2, + "Child health Indicators for Zambia": 2, + "Demographic and socioeconomic statistics Indicators for Zambia": 2, + "Essential health technologies Indicators for Zambia": 2, + "FINANCIAL PROTECTION Indicators for Zambia": 2, + "Global Observatory for eHealth (GOe) Indicators for Zambia": 2, + "HIV/AIDS and other STIs Indicators for Zambia": 2, + "Health Equity Monitor Indicators for Zambia": 2, + "Health financing Indicators for Zambia": 2, + "Health systems Indicators for Zambia": 2, + "Health workforce Indicators for Zambia": 2, + "ICD Indicators for Zambia": 2, + "Immunization Indicators for Zambia": 2, + "Infectious diseases Indicators for Zambia": 2, + "Infrastructure Indicators for Zambia": 2, + "Injuries and violence Indicators for Zambia": 2, + "Insecticide resistance Indicators for Zambia": 2, + "International Health Regulations (2005) monitoring framework Indicators for Zambia": 2, + "Malaria Indicators for Zambia": 2, + "Medical equipment Indicators for Zambia": 2, + "Millennium Development Goals (MDGs) Indicators for Zambia": 2, + "Mortality and global health estimates Indicators for Zambia": 2, + "Negelected tropical diseases Indicators for Zambia": 2, + "Neglected Tropical Diseases Indicators for Zambia": 2, + "Neglected tropical diseases Indicators for Zambia": 2, + "Noncommunicable diseases CCS Indicators for Zambia": 2, + "Noncommunicable diseases Indicators for Zambia": 2, + "Noncommunicable diseases and mental health Indicators for Zambia": 2, + "Nutrition Indicators for Zambia": 2, + "Public health and environment Indicators for Zambia": 2, + "QuickCharts Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING : PREVENTION Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING: FINANCING Indicators for Zambia": 2, + "RSUD: GOVERNANCE, POLICY AND FINANCING: TREATMENT Indicators for Zambia": 2, + "RSUD: HUMAN RESOURCES Indicators for Zambia": 2, + "RSUD: INFORMATION SYSTEMS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: PHARMACOLOGICAL TREATMENT Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: PREVENTION PROGRAMS AND PROVIDERS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: SCREENING AND BRIEF INTERVENTIONS Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: SPECIAL PROGRAMMES AND SERVICES Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT CAPACITY AND TREATMENT COVERAGE Indicators for Zambia": 2, + "RSUD: SERVICE ORGANIZATION AND DELIVERY: TREATMENT SECTORS AND PROVIDERS Indicators for Zambia": 2, + "RSUD: YOUTH Indicators for Zambia": 2, + "SEXUAL AND REPRODUCTIVE HEALTH Indicators for Zambia": 2, + "Substance use and mental health Indicators for Zambia": 2, + "Sustainable development goals Indicators for Zambia": 2, + "TOBACCO Indicators for Zambia": 2, + "Tobacco Indicators for Zambia": 2, + "Tuberculosis Indicators for Zambia": 2, + "UHC Indicators for Zambia": 2, + "Universal Health Coverage Indicators for Zambia": 2, + "Urban health Indicators for Zambia": 2, + "World Health Statistics Indicators for Zambia": 2, + } assert results["filter"] == ["-resources__53"] update = results["update"] del update["updated_by_script"] @@ -1513,7 +1623,7 @@ def test_update_logic_3( self.add_dataset_resources(dataset, resources_yaml, include=[0, 1, 2]) dataset.old_data = new_dataset.data dataset.old_data["resources"] = new_dataset.resources - results = dataset._dataset_hdx_update( + statuses, results = dataset._dataset_hdx_update( update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], @@ -1523,6 +1633,10 @@ def test_update_logic_3( hxl_update=False, test=True, ) + assert statuses == { + "All Health Indicators for Zambia": 2, + "Millennium Development Goals (MDGs) Indicators for Zambia": 2, + } assert results["filter"] == ["-resources__2", "-resources__1"] update = results["update"] del update["updated_by_script"] From 60a6a76f51f9287efa59f257ae46e57e37d83cc2 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 19 Aug 2025 09:42:22 +1200 Subject: [PATCH 04/16] Move file upload and url checks to appropriate places Code cleanup Fix tests --- requirements.txt | 13 +- src/hdx/api/configuration.py | 2 +- src/hdx/api/utilities/filestore_helper.py | 29 ++- src/hdx/data/dataset.py | 228 ++++++++++-------- src/hdx/data/hdxobject.py | 54 +++-- src/hdx/data/resource.py | 140 ++++++----- src/hdx/data/resource_view.py | 2 +- src/hdx/data/showcase.py | 6 +- tests/hdx/__init__.py | 16 +- .../utilities/test_dataset_title_helper.py | 8 +- .../api/utilities/test_filestore_helper.py | 10 +- tests/hdx/api/utilities/test_size_hash.py | 12 - tests/hdx/conftest.py | 9 + tests/hdx/data/test_dataset_core.py | 110 +++++---- tests/hdx/data/test_dataset_noncore.py | 4 +- .../data/test_dataset_resource_generation.py | 14 -- tests/hdx/data/test_resource.py | 15 +- .../hdx/data/test_update_dataset_resources.py | 11 +- tests/hdx/data/test_update_logic.py | 15 +- tests/hdx/data/test_vocabulary.py | 4 +- 20 files changed, 387 insertions(+), 315 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0a68fcbe..2ae77d2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,10 +29,11 @@ ckanapi==4.8 click==8.2.1 # via # mkdocs + # mkdocs-material # typer colorama==0.4.6 # via mkdocs-material -coverage==7.10.3 +coverage==7.10.4 # via pytest-cov defopt==7.0.0 # via hdx-python-api (pyproject.toml) @@ -50,7 +51,7 @@ email-validator==2.2.0 # via hdx-python-api (pyproject.toml) et-xmlfile==2.0.0 # via openpyxl -filelock==3.18.0 +filelock==3.19.1 # via virtualenv frictionless==5.18.1 # via hdx-python-utilities @@ -96,7 +97,7 @@ jsonlines==4.0.0 # via hdx-python-utilities jsonpath-ng==1.7.0 # via libhxl -jsonschema==4.25.0 +jsonschema==4.25.1 # via # frictionless # tableschema-to-template @@ -137,7 +138,7 @@ mkdocs==1.6.1 # mkdocs-material mkdocs-get-deps==0.2.0 # via mkdocs -mkdocs-material==9.6.16 +mkdocs-material==9.6.17 # via mkapi mkdocs-material-extensions==1.3.1 # via mkdocs-material @@ -236,7 +237,7 @@ referencing==0.36.2 # via # jsonschema # jsonschema-specifications -requests==2.32.4 +requests==2.32.5 # via # hdx-python-api (pyproject.toml) # ckanapi @@ -291,7 +292,7 @@ text-unidecode==1.3 # via python-slugify typeguard==4.4.4 # via inflect -typer==0.16.0 +typer==0.16.1 # via frictionless typing-extensions==4.14.1 # via diff --git a/src/hdx/api/configuration.py b/src/hdx/api/configuration.py index f0d704c1..059f8e63 100755 --- a/src/hdx/api/configuration.py +++ b/src/hdx/api/configuration.py @@ -350,7 +350,7 @@ def call_remoteckan(self, *args: Any, **kwargs: Any) -> Dict: Dict: The response from the remote CKAN call_action method """ - requests_kwargs = kwargs.get("requests_kwargs", dict()) + requests_kwargs = kwargs.get("requests_kwargs", {}) credentials = self._get_credentials() if credentials: requests_kwargs["auth"] = credentials diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index b30c6cfd..b66eea33 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -13,14 +13,11 @@ class FilestoreHelper: temporary_url = "updated_by_file_upload_step" @staticmethod - def resource_check_required_fields( - resource: "Resource", check_upload: bool = False, **kwargs: Any - ) -> None: + def resource_check_required_fields(resource: "Resource", **kwargs: Any) -> None: """Helper method to get ignore_fields from kwargs if it exists and add package_id Args: resource (Resource): Resource to check - check_upload (bool): Whether to check for file upload. Defaults to False. **kwargs: Keyword arguments Returns: @@ -28,9 +25,7 @@ def resource_check_required_fields( """ if "ignore_check" in kwargs: # allow ignoring of field checks return - if check_upload and resource.get_file_to_upload() and "url" in resource.data: - del resource.data["url"] - ignore_fields = kwargs.get("ignore_fields", list()) + ignore_fields = kwargs.get("ignore_fields", []) resource_ignore_fields = [] for ignore_field in ignore_fields: if ignore_field.startswith("resource:"): @@ -66,6 +61,7 @@ def check_filestore_resource( Returns: int: Status code """ + resource_data_to_update.set_types() cls.resource_check_required_fields(resource_data_to_update, **kwargs) file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: @@ -112,24 +108,31 @@ def dataset_update_filestore_resource( if size == original_resource_data.get( "size" ) and hash == original_resource_data.get("hash"): + if resource_data_to_update._url_backup: + resource_data_to_update["url"] = resource_data_to_update._url_backup + resource_data_to_update._url_backup = None # ensure last_modified is not updated if file hasn't changed if "last_modified" in resource_data_to_update: del resource_data_to_update["last_modified"] - return 4 + status = 4 else: - return 3 + status = 3 else: # update file if size or hash has changed filestore_resources[resource_index] = file_to_upload resource_data_to_update["url"] = cls.temporary_url resource_data_to_update["size"] = size resource_data_to_update["hash"] = hash - return 2 + resource_data_to_update._url_backup = None + status = 2 elif resource_data_to_update.is_marked_data_updated(): # Should not output timezone info here resource_data_to_update["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" ) - resource_data_to_update.data_updated = False - return 0 - return 1 + resource_data_to_update._data_updated = False + status = 0 + else: + status = 1 + resource_data_to_update.set_types() + return status diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 19b1ae3b..21df9c92 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -137,7 +137,7 @@ def __init__( initial_data = {} self.init_resources() super().__init__(initial_data, configuration=configuration) - self.preview_resourceview = None + self._preview_resourceview = None @staticmethod def actions() -> Dict[str, str]: @@ -148,9 +148,7 @@ def actions() -> Dict[str, str]: """ return { "show": "package_show", - "update": "package_update", "create": "package_create", - "patch": "package_patch", "revise": "package_revise", "delete": "hdx_dataset_purge", "search": "package_search", @@ -183,7 +181,7 @@ def separate_resources(self) -> None: None """ self._separate_hdxobjects( - self.resources, "resources", "name", res_module.Resource + self._resources, "resources", "name", res_module.Resource ) def unseparate_resources(self) -> None: @@ -192,8 +190,8 @@ def unseparate_resources(self) -> None: Returns: None """ - if self.resources: - self.data["resources"] = self._convert_hdxobjects(self.resources) + if self._resources: + self.data["resources"] = self._convert_hdxobjects(self._resources) def get_dataset_dict(self) -> Dict: """Move self.resources into resources key in internal dictionary @@ -202,8 +200,8 @@ def get_dataset_dict(self) -> Dict: Dict: Dataset dictionary """ package = deepcopy(self.data) - if self.resources: - package["resources"] = self._convert_hdxobjects(self.resources) + if self._resources: + package["resources"] = self._convert_hdxobjects(self._resources) return package def save_to_json(self, path: str, follow_urls: bool = False): @@ -249,7 +247,7 @@ def init_resources(self) -> None: Returns: None """ - self.resources: List[res_module.Resource] = [] + self._resources: List[res_module.Resource] = [] def _get_resource_from_obj( self, resource: Union["Resource", Dict, str] @@ -296,13 +294,13 @@ def add_update_resource( raise HDXError( f"Resource {resource['name']} being added already has a dataset id!" ) - resource.check_url_filetoupload() - resource_index = ResourceMatcher.match_resource_list(self.resources, resource) + resource.check_both_url_filetoupload() + resource_index = ResourceMatcher.match_resource_list(self._resources, resource) if resource_index is None: - self.resources.append(resource) + self._resources.append(resource) else: updated_resource = merge_two_dictionaries( - self.resources[resource_index], resource + self._resources[resource_index], resource ) if resource.get_file_to_upload(): updated_resource.set_file_to_upload(resource.get_file_to_upload()) @@ -331,18 +329,18 @@ def add_update_resources( raise HDXError( f"Resource {resource['name']} being added already has a dataset id!" ) + resource.check_both_url_filetoupload() resource_objects.append(resource) ( resource_matches, updated_resource_matches, _, updated_resource_no_matches, - ) = ResourceMatcher.match_resource_lists(self.resources, resource_objects) + ) = ResourceMatcher.match_resource_lists(self._resources, resource_objects) for i, resource_index in enumerate(resource_matches): resource = resource_objects[updated_resource_matches[i]] - resource.check_url_filetoupload() updated_resource = merge_two_dictionaries( - self.resources[resource_index], resource + self._resources[resource_index], resource ) if resource.get_file_to_upload(): updated_resource.set_file_to_upload(resource.get_file_to_upload()) @@ -350,8 +348,7 @@ def add_update_resources( updated_resource.mark_data_updated() for resource_index in updated_resource_no_matches: resource = resource_objects[resource_index] - resource.check_url_filetoupload() - self.resources.append(resource) + self._resources.append(resource) def delete_resource( self, @@ -370,7 +367,7 @@ def delete_resource( if isinstance(resource, str): if is_valid_uuid(resource) is False: raise HDXError(f"{resource} is not a valid resource id!") - return self._remove_hdxobject(self.resources, resource, delete=delete) + return self._remove_hdxobject(self._resources, resource, delete=delete) def get_resources(self) -> List["Resource"]: """Get dataset's resources @@ -378,7 +375,7 @@ def get_resources(self) -> List["Resource"]: Returns: List[Resource]: List of Resource objects """ - return self.resources + return self._resources def get_resource(self, index: int = 0) -> "Resource": """Get one resource from dataset by index @@ -389,7 +386,7 @@ def get_resource(self, index: int = 0) -> "Resource": Returns: Resource: Resource object """ - return self.resources[index] + return self._resources[index] def number_of_resources(self) -> int: """Get number of dataset's resources @@ -397,7 +394,7 @@ def number_of_resources(self) -> int: Returns: int: Number of Resource objects """ - return len(self.resources) + return len(self._resources) def reorder_resources( self, resource_ids: ListTuple[str], hxl_update: bool = True @@ -425,9 +422,9 @@ def reorder_resources( ordered_ids = results["order"] reordered_resources = [] for resource_id in ordered_ids: - resource = next(x for x in self.resources if x["id"] == resource_id) + resource = next(x for x in self._resources if x["id"] == resource_id) reordered_resources.append(resource) - self.resources = reordered_resources + self._resources = reordered_resources if hxl_update: self.hxl_update() @@ -448,7 +445,7 @@ def move_resource( """ from_index = None to_index = None - for i, resource in enumerate(self.resources): + for i, resource in enumerate(self._resources): res_name = resource["name"] if res_name == resource_name: from_index = i @@ -457,11 +454,11 @@ def move_resource( if to_index is None: # insert at the start if resource cannot be found to_index = 0 - resource = self.resources.pop(from_index) + resource = self._resources.pop(from_index) if from_index < to_index: # to index was calculated while element was in front to_index -= 1 - self.resources.insert(to_index, resource) + self._resources.insert(to_index, resource) return resource def update_from_yaml( @@ -516,10 +513,10 @@ def _dataset_create_resources(self) -> None: """Creates resource objects in dataset""" if "resources" in self.data: - self.old_data["resources"] = self._copy_hdxobjects( - self.resources, + self._old_data["resources"] = self._copy_hdxobjects( + self._resources, res_module.Resource, - ("file_to_upload", "data_updated"), + ("_file_to_upload", "_data_updated", "_url_backup"), ) self.init_resources() self.separate_resources() @@ -539,15 +536,46 @@ def _dataset_load_from_hdx(self, id_or_name: str) -> bool: self._dataset_create_resources() return True + def check_resources_url_filetoupload(self) -> None: + """Check that metadata for dataset and its resources is complete. The parameter ignore_fields + should be set if required to any fields that should be ignored for the particular operation. + Prepend "resource:" for resource fields. + + Args: + ignore_fields (ListTuple[str]): Fields to ignore. Default is tuple(). + allow_no_resources (bool): Whether to allow no resources. Defaults to False. + + Returns: + None + """ + for resource in self._resources: + resource.check_both_url_filetoupload() + + def check_resources_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None: + """Check that metadata for resources is complete. The parameter ignore_fields + should be set if required to any fields that should be ignored for the + particular operation. + + Args: + ignore_fields (ListTuple[str]): Fields to ignore. Default is tuple(). + + Returns: + None + """ + for resource in self._resources: + FilestoreHelper.resource_check_required_fields( + resource, ignore_fields=ignore_fields + ) + def check_required_fields( self, ignore_fields: ListTuple[str] = tuple(), allow_no_resources: bool = False, **kwargs: Any, ) -> None: - """Check that metadata for dataset and its resources is complete. The parameter ignore_fields - should be set if required to any fields that should be ignored for the particular operation. - Prepend "resource:" for resource fields. + """Check that metadata for dataset is complete. The parameter ignore_fields + should be set if required to any fields that should be ignored for the + particular operation. Args: ignore_fields (ListTuple[str]): Fields to ignore. Default is tuple(). @@ -556,22 +584,14 @@ def check_required_fields( Returns: None """ - dataset_ignore_fields = [] - for ignore_field in ignore_fields: - if not ignore_field.startswith("resource:"): - dataset_ignore_fields.append(ignore_field) if self.is_requestable(): - self._check_required_fields("dataset-requestable", dataset_ignore_fields) + self._check_required_fields("dataset-requestable", ignore_fields) else: - self._check_required_fields("dataset", dataset_ignore_fields) - if len(self.resources) == 0 and not allow_no_resources: + self._check_required_fields("dataset", ignore_fields) + if len(self._resources) == 0 and not allow_no_resources: raise HDXError( "There are no resources! Please add at least one resource!" ) - for resource in self.resources: - FilestoreHelper.resource_check_required_fields( - resource, ignore_fields=ignore_fields - ) @staticmethod def revise( @@ -676,9 +696,9 @@ def _revise_filter( for key, value in dataset_data_to_update.items(): if not isinstance(value, list): continue - if key not in self.old_data: + if key not in self._old_data: continue - orig_list = self.old_data[key] + orig_list = self._old_data[key] elements_to_remove = [] for i, orig_value in enumerate(orig_list): if isinstance(orig_value, dict) and any( @@ -751,6 +771,7 @@ def _revise_files_to_upload_resource_deletions( def _revise_dataset( self, + allow_no_resources: bool, keys_to_delete: ListTuple[str], resources_to_update: ListTuple["Resource"], resources_to_delete: ListTuple[int], @@ -764,6 +785,7 @@ def _revise_dataset( """Helper method to save the modified dataset and add any filestore resources Args: + allow_no_resources (bool): Whether to allow no resources keys_to_delete (ListTuple[str]): List of top level metadata keys to delete resources_to_update (ListTuple[Resource]): Resources to update resources_to_delete (ListTuple[int]): List of indexes of resources to delete @@ -779,8 +801,8 @@ def _revise_dataset( Dict: Dictionary of what gets passed to the revise call (for testing) """ results = {} - dataset_data_to_update = self.old_data - self.old_data = self.data + dataset_data_to_update = self._old_data + self._old_data = self.data if ( "batch_mode" in kwargs ): # Whether or not CKAN should change groupings of datasets on /datasets page @@ -813,25 +835,28 @@ def _revise_dataset( files_to_upload=files_to_upload, ) self.data = new_dataset.data - self.resources = new_dataset.resources + self._resources = new_dataset._resources # We do field check after call so that we have the changed data if "ignore_check" not in kwargs or not kwargs.get( "ignore_check" ): # allow ignoring of field checks - ignore_fields = kwargs.get("ignore_fields", list()) + ignore_fields = kwargs.get("ignore_fields", []) ignore_field = self.configuration["dataset"].get("ignore_on_update") if ignore_field and ignore_field not in ignore_fields: ignore_fields.append(ignore_field) ignore_field = kwargs.get("ignore_field") if ignore_field and ignore_field not in ignore_fields: ignore_fields.append(ignore_field) - self.check_required_fields(ignore_fields=ignore_fields) + self.check_required_fields( + ignore_fields=ignore_fields, allow_no_resources=allow_no_resources + ) + self.check_resources_fields(ignore_fields=ignore_fields) if new_resource_order: - existing_order = [(x["name"], x["format"].lower()) for x in self.resources] + existing_order = [(x["name"], x["format"].lower()) for x in self._resources] if existing_order != new_resource_order: sorted_resources = sorted( - self.resources, + self._resources, key=lambda x: new_resource_order.index( (x["name"], x["format"].lower()) ), @@ -877,7 +902,7 @@ def _dataset_update_resources( # then reading the existing dataset on HDX into "data". Hence, # "old_data" below contains the user-supplied data we want to use for # updating while "data" contains the data read from HDX - resources_metadata_to_update = self.old_data.get("resources", None) + resources_metadata_to_update = self._old_data.get("resources", None) resources_to_update = [] resources_to_delete = [] filestore_resources = {} @@ -890,9 +915,9 @@ def _dataset_update_resources( resource_no_matches, updated_resource_no_matches, ) = ResourceMatcher.match_resource_lists( - self.resources, resources_metadata_to_update + self._resources, resources_metadata_to_update ) - for i, resource in enumerate(self.resources): + for i, resource in enumerate(self._resources): try: match_index = resource_matches.index(i) except ValueError: @@ -912,7 +937,7 @@ def _dataset_update_resources( statuses[resource_name] = status resources_to_update.append(resource_data_to_update) - resource_index = len(self.resources) + resource_index = len(self._resources) for updated_resource_index in updated_resource_no_matches: resource_data_to_update = resources_metadata_to_update[ updated_resource_index @@ -928,7 +953,7 @@ def _dataset_update_resources( resources_to_update.append(resource_data_to_update) if remove_additional_resources: for resource_index in resource_no_matches: - resource = self.resources[resource_index] + resource = self._resources[resource_index] logger.warning( f"Removing additional resource {resource['name']}!" ) @@ -937,9 +962,9 @@ def _dataset_update_resources( for i, resource_data_to_update in enumerate( resources_metadata_to_update ): - if len(self.resources) > i: + if len(self._resources) > i: updated_resource_name = resource_data_to_update["name"] - resource = self.resources[i] + resource = self._resources[i] resource_name = resource["name"] logger.warning(f"Resource exists. Updating {resource_name}") if resource_name != updated_resource_name: @@ -963,7 +988,7 @@ def _dataset_update_resources( statuses[resource_data_to_update["name"]] = status resources_to_update.append(resource_data_to_update) - for i, resource in enumerate(self.resources): + for i, resource in enumerate(self._resources): if len(resources_metadata_to_update) <= i: resources_to_update.append(resource) if remove_additional_resources: @@ -988,6 +1013,7 @@ def _dataset_update_resources( def _dataset_hdx_update( self, + allow_no_resources: bool, update_resources: bool, match_resources_by_metadata: bool, keys_to_delete: ListTuple[str], @@ -1006,6 +1032,7 @@ def _dataset_hdx_update( dictionary of what gets passed to the revise call (for testing)) Args: + allow_no_resources (bool): Whether to allow no resources update_resources (bool): Whether to update resources match_resources_by_metadata (bool): Compare resource metadata rather than position in list keys_to_delete (ListTuple[str]): List of top level metadata keys to delete @@ -1032,19 +1059,22 @@ def _dataset_hdx_update( ) keep_crisis_tags = kwargs.get("keep_crisis_tags", True) if keep_crisis_tags: - for tag in self.data["tags"]: + old_tags = self._old_data.get("tags", []) + for tag in self.data.get("tags", []): tag_name = tag["name"] if tag_name[:7] != "crisis-": continue found = False - for old_tag in self.old_data["tags"]: + for old_tag in old_tags: if old_tag["name"] == tag_name: found = True break if not found: - self.old_data["tags"].append(tag) - self._prepare_hdx_call(self.old_data, kwargs) + old_tags.append(tag) + self._old_data["tags"] = old_tags + self._prepare_hdx_call(self._old_data, kwargs) revise_call = self._revise_dataset( + allow_no_resources, keys_to_delete, resources_to_update, resources_to_delete, @@ -1058,6 +1088,7 @@ def _dataset_hdx_update( def update_in_hdx( self, + allow_no_resources: bool = False, update_resources: bool = True, match_resources_by_metadata: bool = True, keys_to_delete: ListTuple[str] = tuple(), @@ -1080,6 +1111,7 @@ def update_in_hdx( 4 = file not uploaded (hash, size unchanged), given last_modified ignored Args: + allow_no_resources (bool): Whether to allow no resources. Defaults to False. update_resources (bool): Whether to update resources. Defaults to True. match_resources_by_metadata (bool): Compare resource metadata rather than position in list. Defaults to True. keys_to_delete (ListTuple[str]): List of top level metadata keys to delete. Defaults to tuple(). @@ -1107,6 +1139,7 @@ def update_in_hdx( if not self._dataset_load_from_hdx(self.data["name"]): raise HDXError("No existing dataset to update!") statuses, _ = self._dataset_hdx_update( + allow_no_resources=allow_no_resources, update_resources=update_resources, match_resources_by_metadata=match_resources_by_metadata, keys_to_delete=keys_to_delete, @@ -1160,21 +1193,19 @@ def create_in_hdx( Returns: Dict: Status codes of resources """ - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields(allow_no_resources=allow_no_resources, **kwargs) - # No need to check again after revising dataset - kwargs["ignore_check"] = True - loadedid = None - if "id" in self.data: - if self._dataset_load_from_hdx(self.data["id"]): - loadedid = self.data["id"] - else: - logger.warning(f"Failed to load dataset with id {self.data['id']}") + loadedid = self.data.get("id") + if loadedid is not None: + if not self._dataset_load_from_hdx(loadedid): + logger.warning(f"Failed to load dataset with id {loadedid}") + loadedid = None if not loadedid: - if self._dataset_load_from_hdx(self.data["name"]): - loadedid = self.data["name"] + loadedid = self.data.get("name") + if loadedid is not None: + if not self._dataset_load_from_hdx(loadedid): + loadedid = None if loadedid: statuses, _ = self._dataset_hdx_update( + allow_no_resources=allow_no_resources, update_resources=update_resources, match_resources_by_metadata=match_resources_by_metadata, keys_to_delete=keys_to_delete, @@ -1187,10 +1218,14 @@ def create_in_hdx( logger.info(f"Updated {self.get_hdx_url()}") return statuses + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields(allow_no_resources=allow_no_resources, **kwargs) + # No need to check again after revising dataset + kwargs["ignore_check"] = True statuses = {} filestore_resources = {} - if self.resources: - for i, resource in enumerate(self.resources): + if self._resources: + for i, resource in enumerate(self._resources): status = FilestoreHelper.check_filestore_resource( resource, filestore_resources, i, **kwargs ) @@ -1206,8 +1241,9 @@ def create_in_hdx( self.separate_resources() else: self._revise_dataset( + allow_no_resources, keys_to_delete, - self.resources, + self._resources, [], filestore_resources, None, @@ -1315,7 +1351,7 @@ def search_in_hdx( no_results = len(result["results"]) for datasetdict in result["results"]: dataset = Dataset(configuration=configuration) - dataset.old_data = {} + dataset._old_data = {} dataset.data = datasetdict dataset._dataset_create_resources() datasets.append(dataset) @@ -2134,7 +2170,7 @@ def get_filetypes(self) -> List[str]: List[str]: List of filetypes """ if not self.is_requestable(): - return [resource.get_format() for resource in self.resources] + return [resource.get_format() for resource in self._resources] return self._get_stringlist_from_commastring("file_types") def add_filetype(self, filetype: str) -> bool: @@ -2211,7 +2247,7 @@ def preview_off(self) -> None: None """ self.data["dataset_preview"] = "no_preview" - for resource in self.resources: + for resource in self._resources: resource.disable_dataset_preview() def preview_resource(self) -> None: @@ -2234,7 +2270,7 @@ def set_quickchart_resource( Resource: Resource that is used for preview or None if no preview set """ if isinstance(resource, int) and not isinstance(resource, bool): - resource = self.resources[resource] + resource = self._resources[resource] if isinstance(resource, res_module.Resource) or isinstance(resource, dict): res = resource.get("id") if res is None: @@ -2250,7 +2286,7 @@ def set_quickchart_resource( else: search = "name" preview_resource = None - for dataset_resource in self.resources: + for dataset_resource in self._resources: if preview_resource is None and dataset_resource[search] == resource: dataset_resource.enable_dataset_preview() self.preview_resource() @@ -2265,9 +2301,9 @@ def quickcharts_resource_last(self) -> bool: Returns: bool: True if QuickCharts resource found, False if not """ - for i, resource in enumerate(self.resources): + for i, resource in enumerate(self._resources): if resource["name"][:12] == "QuickCharts-": - self.resources.append(self.resources.pop(i)) + self._resources.append(self._resources.pop(i)) return True return False @@ -2292,13 +2328,13 @@ def _create_preview_resourceview(self) -> None: Returns: None """ - if self.preview_resourceview: - for resource in self.resources: - if resource["name"] == self.preview_resourceview["resource_name"]: - del self.preview_resourceview["resource_name"] - self.preview_resourceview["resource_id"] = resource["id"] - self.preview_resourceview.create_in_hdx() - self.preview_resourceview = None + if self._preview_resourceview: + for resource in self._resources: + if resource["name"] == self._preview_resourceview["resource_name"]: + del self._preview_resourceview["resource_name"] + self._preview_resourceview["resource_id"] = resource["id"] + self._preview_resourceview.create_in_hdx() + self._preview_resourceview = None break def _generate_resource_view( @@ -2454,9 +2490,9 @@ def replace_indicator(qc_config, index): if "resource_id" in resourceview: resourceview.create_in_hdx() - self.preview_resourceview = None + self._preview_resourceview = None else: - self.preview_resourceview = resourceview + self._preview_resourceview = resourceview return resourceview def generate_quickcharts( diff --git a/src/hdx/data/hdxobject.py b/src/hdx/data/hdxobject.py index d4e057b2..a4dce245 100755 --- a/src/hdx/data/hdxobject.py +++ b/src/hdx/data/hdxobject.py @@ -47,7 +47,7 @@ def actions() -> Dict[str, str]: def __init__( self, initial_data: Dict, configuration: Optional[Configuration] = None ) -> None: - self.old_data = None + self._old_data = None if configuration is None: self.configuration: Configuration = Configuration.read() else: @@ -60,7 +60,7 @@ def get_old_data_dict(self) -> Dict: Returns: Dict: Previous internal dictionary """ - return self.old_data + return self._old_data def update_from_yaml(self, path: str) -> None: """Update metadata with static metadata from YAML file @@ -134,7 +134,7 @@ def _load_from_hdx(self, object_type: str, id_field: str) -> bool: """ success, result = self._read_from_hdx(object_type, id_field) if success: - self.old_data = self.data + self._old_data = self.data self.data = result return True logger.debug(result) @@ -231,9 +231,8 @@ def _check_required_fields( if not self.data[field] and not isinstance(self.data[field], bool): raise HDXError(f"Field {field} is empty in {object_type}!") - def _check_kwargs_fields(self, object_type: str, **kwargs: Any) -> None: - """Helper method to check kwargs and set fields appropriately and to check metadata fields unless it is - specified not to do so. + def _check_fields(self, object_type: str, **kwargs: Any) -> None: + """Helper method to check metadata fields unless it is specified not to do so. Args: object_type (str): Description of HDX object type (for messages) @@ -243,18 +242,10 @@ def _check_kwargs_fields(self, object_type: str, **kwargs: Any) -> None: Returns: None """ - if ( - "batch_mode" in kwargs - ): # Whether or not CKAN should change groupings of datasets on /datasets page - self.data["batch_mode"] = kwargs["batch_mode"] - if ( - "skip_validation" in kwargs - ): # Whether or not CKAN should perform validation steps (checking fields present) - self.data["skip_validation"] = kwargs["skip_validation"] if "ignore_check" not in kwargs or not kwargs.get( "ignore_check" ): # allow ignoring of field checks - ignore_fields = kwargs.get("ignore_fields", list()) + ignore_fields = kwargs.get("ignore_fields", []) ignore_field = self.configuration[object_type].get("ignore_on_update") if ignore_field and ignore_field not in ignore_fields: ignore_fields.append(ignore_field) @@ -263,6 +254,25 @@ def _check_kwargs_fields(self, object_type: str, **kwargs: Any) -> None: ignore_fields.append(ignore_field) self.check_required_fields(ignore_fields=ignore_fields) + def _check_kwargs(self, **kwargs: Any) -> None: + """Helper method to check kwargs and set fields appropriately + + Args: + **kwargs: See below + ignore_field (str): Any field to ignore when checking dataset metadata. Defaults to None. + + Returns: + None + """ + if ( + "batch_mode" in kwargs + ): # Whether or not CKAN should change groupings of datasets on /datasets page + self.data["batch_mode"] = kwargs["batch_mode"] + if ( + "skip_validation" in kwargs + ): # Whether or not CKAN should perform validation steps (checking fields present) + self.data["skip_validation"] = kwargs["skip_validation"] + def _hdx_update( self, object_type: str, @@ -285,9 +295,11 @@ def _hdx_update( Returns: None """ - self._check_kwargs_fields(object_type, **kwargs) + self._check_kwargs(**kwargs) operation = kwargs.pop("operation", "update") self._save_to_hdx(operation, id_field_name, files_to_upload, force_active) + # We do field check after call so that we have the changed data + self._check_fields(object_type, **kwargs) def _merge_hdx_update( self, @@ -311,7 +323,7 @@ def _merge_hdx_update( Returns: None """ - merge_two_dictionaries(self.data, self.old_data) + merge_two_dictionaries(self.data, self._old_data) self._hdx_update( object_type, id_field_name, @@ -421,7 +433,7 @@ def _save_to_hdx( if force_active: self.data["state"] = "active" result = self._write_to_hdx(action, self.data, id_field_name, files_to_upload) - self.old_data = self.data + self._old_data = self.data self.data = result @abstractmethod @@ -454,8 +466,6 @@ def _create_in_hdx( Returns: None """ - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() if id_field_name in self.data and self._load_from_hdx( object_type, self.data[id_field_name] ): @@ -468,6 +478,8 @@ def _create_in_hdx( **kwargs, ) else: + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() self._save_to_hdx("create", name_field_name, files_to_upload, force_active) @abstractmethod @@ -640,7 +652,7 @@ def _separate_hdxobjects( Returns: None """ - new_hdxobjects = self.data.get(hdxobjects_name, list()) + new_hdxobjects = self.data.get(hdxobjects_name, []) """:type : List["HDXObject"]""" if new_hdxobjects: hdxobject_names = set() diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 7fcfb6d3..2b6fec29 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -10,7 +10,6 @@ import hdx.data.dataset from hdx.api.configuration import Configuration from hdx.api.utilities.date_helper import DateHelper -from hdx.api.utilities.filestore_helper import FilestoreHelper from hdx.api.utilities.size_hash import get_size_and_hash from hdx.data.hdxobject import HDXError, HDXObject from hdx.data.resource_view import ResourceView @@ -41,8 +40,9 @@ def __init__( if not initial_data: initial_data = {} super().__init__(initial_data, configuration=configuration) - self.file_to_upload = None - self.data_updated = False + self._file_to_upload = None + self._data_updated = False + self._url_backup = None @staticmethod def actions() -> Dict[str, str]: @@ -294,7 +294,7 @@ def get_file_to_upload(self) -> Optional[str]: Returns: Optional[str]: The file that will be or has been uploaded or None if there isn't one """ - return self.file_to_upload + return self._file_to_upload def set_file_to_upload( self, file_to_upload: str, guess_format_from_suffix: bool = False @@ -309,41 +309,64 @@ def set_file_to_upload( Optional[str]: The format that was guessed or None if no format was set """ if "url" in self.data: + self._url_backup = self.data["url"] del self.data["url"] - self.file_to_upload = file_to_upload + self._file_to_upload = file_to_upload file_format = None if guess_format_from_suffix: file_format = self.set_format(Path(file_to_upload).suffix) return file_format - def check_url_filetoupload(self) -> None: - """Check if url or file to upload provided for resource and add resource_type - and url_type if not supplied. Correct the file type. + def check_both_url_filetoupload(self) -> None: + """Check for error where both url or file to upload are provided for resource. Returns: None """ - if self.file_to_upload is None: - if "url" in self.data: - if "resource_type" not in self.data: - self.data["resource_type"] = "api" - if "url_type" not in self.data: - self.data["url_type"] = "api" - else: - raise HDXError("Either a url or a file to upload must be supplied!") + if self._file_to_upload is not None and "url" in self.data: + raise HDXError( + "Either a url or a file to upload must be supplied not both!" + ) + + def check_neither_url_filetoupload(self) -> None: + if self._file_to_upload is None and "url" not in self.data: + raise HDXError("Either a url or a file to upload must be supplied!") + + def set_types(self, data: Optional[Dict] = None) -> None: + """Add resource_type and url_type if not supplied based on url or file to + upload. Correct the file type. + + Args: + data (Dict): Resource data. Defaults to None (self.data) + + Returns: + None + """ + if data is None: + data = self.data + if self._file_to_upload is None: + if "url" in data: + if "resource_type" not in data: + data["resource_type"] = "api" + if "url_type" not in data: + data["url_type"] = "api" else: - if "url" in self.data: - if self.data["url"] != FilestoreHelper.temporary_url: - raise HDXError( - "Either a url or a file to upload must be supplied not both!" - ) - if "resource_type" not in self.data: - self.data["resource_type"] = "file.upload" - if "url_type" not in self.data: - self.data["url_type"] = "upload" - if "tracking_summary" in self.data: - del self.data["tracking_summary"] - self.clean_format() + if "resource_type" not in data: + data["resource_type"] = "file.upload" + if "url_type" not in data: + data["url_type"] = "upload" + if "tracking_summary" in data: + del data["tracking_summary"] + file_format = data.get("format") + if file_format is not None: + file_format = self.get_mapped_format( + file_format, configuration=self.configuration + ) + if not file_format: + raise HDXError( + f"Supplied file type {file_format} is invalid and could not be mapped to a known type!" + ) + data["format"] = file_format def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None: """Check that metadata for resource is complete. The parameter ignore_fields @@ -356,8 +379,8 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None Returns: None """ - self.check_url_filetoupload() self._check_required_fields("resource", ignore_fields) + self.check_neither_url_filetoupload() def _resource_merge_hdx_update( self, @@ -381,12 +404,14 @@ def _resource_merge_hdx_update( Returns: int: Status code """ - data_updated = kwargs.pop("data_updated", self.data_updated) + data_updated = kwargs.pop("data_updated", self._data_updated) files = {} - if self.file_to_upload: - file_format = self.old_data.get("format", "").lower() - size, hash = get_size_and_hash(self.file_to_upload, file_format) + if self._file_to_upload: + file_format = self._old_data.get("format", "").lower() + size, hash = get_size_and_hash(self._file_to_upload, file_format) if size == self.data.get("size") and hash == self.data.get("hash"): + if self._url_backup: + self._old_data["url"] = self._url_backup # ensure last_modified is not updated if file hasn't changed if "last_modified" in self.data: del self.data["last_modified"] @@ -395,20 +420,22 @@ def _resource_merge_hdx_update( status = 3 else: # update file if size or hash has changed - files["upload"] = self.file_to_upload - self.old_data["size"] = size - self.old_data["hash"] = hash + files["upload"] = self._file_to_upload + self._old_data["size"] = size + self._old_data["hash"] = hash status = 2 + self._url_backup = None elif data_updated: # Should not output timezone info here - self.old_data["last_modified"] = now_utc_notz().isoformat( + self._old_data["last_modified"] = now_utc_notz().isoformat( timespec="microseconds" ) - self.data_updated = False + self._data_updated = False status = 0 else: status = 1 + self.set_types(self._old_data) # old_data will be merged into data in the next step self._merge_hdx_update("resource", "id", files, True, **kwargs) return status @@ -438,9 +465,8 @@ def update_in_hdx(self, **kwargs: Any) -> int: Returns: int: Status code """ + self.check_both_url_filetoupload() self._check_load_existing_object("resource", "id") - if self.file_to_upload and "url" in self.data: - del self.data["url"] return self._resource_merge_hdx_update(**kwargs) def create_in_hdx(self, **kwargs: Any) -> int: @@ -469,26 +495,26 @@ def create_in_hdx(self, **kwargs: Any) -> int: Returns: int: Status code """ - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() + self.check_both_url_filetoupload() id = self.data.get("id") if id and self._load_from_hdx("resource", id): logger.warning(f"{'resource'} exists. Updating {id}") - if self.file_to_upload and "url" in self.data: - del self.data["url"] return self._resource_merge_hdx_update(**kwargs) + + self.set_types() + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() + files = {} + if self._file_to_upload: + files["upload"] = self._file_to_upload + self.data["size"], self.data["hash"] = get_size_and_hash( + self._file_to_upload, self.get_format() + ) + status = 2 else: - files = {} - if self.file_to_upload: - files["upload"] = self.file_to_upload - self.data["size"], self.data["hash"] = get_size_and_hash( - self.file_to_upload, self.get_format() - ) - status = 2 - else: - status = 0 - self._save_to_hdx("create", "name", files, True) - return status + status = 0 + self._save_to_hdx("create", "name", files, True) + return status def delete_from_hdx(self) -> None: """Deletes a resource from HDX @@ -810,7 +836,7 @@ def is_marked_data_updated(self) -> bool: Returns: bool: Whether resource's data is marked to be updated """ - return self.data_updated + return self._data_updated def mark_data_updated(self) -> None: """Mark resource data as updated @@ -818,7 +844,7 @@ def mark_data_updated(self) -> None: Returns: None """ - self.data_updated = True + self._data_updated = True def get_date_data_updated(self) -> datetime: """Get date resource data was updated diff --git a/src/hdx/data/resource_view.py b/src/hdx/data/resource_view.py index 61d78c4c..7bbf3163 100755 --- a/src/hdx/data/resource_view.py +++ b/src/hdx/data/resource_view.py @@ -139,7 +139,7 @@ def _update_resource_view(self, log: bool = False, **kwargs: Any) -> bool: resource_views = self.get_all_for_resource(self.data["resource_id"]) for resource_view in resource_views: if self.data["title"] == resource_view["title"]: - self.old_data = self.data + self._old_data = self.data self.data = resource_view.data update = True break diff --git a/src/hdx/data/showcase.py b/src/hdx/data/showcase.py index 597ae97e..4c20c53e 100755 --- a/src/hdx/data/showcase.py +++ b/src/hdx/data/showcase.py @@ -118,7 +118,7 @@ def update_in_hdx(self, **kwargs: Any) -> None: # We load an existing object even though it may well have been loaded already # to prevent an admittedly unlikely race condition where someone has updated # the object in the intervening time - merge_two_dictionaries(self.data, self.old_data) + merge_two_dictionaries(self.data, self._old_data) self.clean_tags() self._hdx_update("showcase", "name", force_active=True, **kwargs) self._update_in_hdx("showcase", "name", **kwargs) @@ -133,7 +133,7 @@ def create_in_hdx(self, **kwargs: Any) -> None: self.check_required_fields() if "name" in self.data and self._load_from_hdx("showcase", self.data["name"]): logger.warning(f"{'showcase'} exists. Updating {self.data['name']}") - merge_two_dictionaries(self.data, self.old_data) + merge_two_dictionaries(self.data, self._old_data) self.clean_tags() self._hdx_update("showcase", "name", force_active=True, **kwargs) else: @@ -364,7 +364,7 @@ def search_in_hdx( for dataset in datasets: showcase = Showcase(configuration=configuration) showcase.data = dataset.data - showcase.old_data = dataset.old_data + showcase._old_data = dataset._old_data showcases.append(showcase) return showcases diff --git a/tests/hdx/__init__.py b/tests/hdx/__init__.py index e04422e5..18078d85 100755 --- a/tests/hdx/__init__.py +++ b/tests/hdx/__init__.py @@ -275,14 +275,7 @@ def dataset_mockshow(url, datadict): % result, ) else: - if datadict["id"] == "TEST1": - result = json.dumps(dataset_resultdict) - return MockResponse( - 200, - '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}' - % result, - ) - if datadict["id"] == "DatasetExist": + if datadict["id"] in ("TEST1", "DatasetExist"): result = json.dumps(dataset_resultdict) return MockResponse( 200, @@ -299,9 +292,14 @@ def dataset_mockshow(url, datadict): 200, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}', ) - if datadict["id"] == "TEST4": + if datadict["id"] in ("TEST4", "TEST5"): resultdictcopy = copy.deepcopy(dataset_resultdict) resultdictcopy["id"] = "TEST4" + if datadict["id"] == "TEST5": + # test existing size and hash same + resource = resultdictcopy["resources"][0] + resource["size"] = 23724 + resource["hash"] = "6b8acf7e28d62685a1e829e7fa220d17" result = json.dumps(resultdictcopy) return MockResponse( 200, diff --git a/tests/hdx/api/utilities/test_dataset_title_helper.py b/tests/hdx/api/utilities/test_dataset_title_helper.py index ee3fa73f..d056873a 100755 --- a/tests/hdx/api/utilities/test_dataset_title_helper.py +++ b/tests/hdx/api/utilities/test_dataset_title_helper.py @@ -134,10 +134,10 @@ def test_get_date_from_title(self, expected_ranges_2019): ) assert DatasetTitleHelper.get_dates_from_title( "Ward boundaries Yangon City_mimu_v8_1" - ) == ("Ward boundaries Yangon City_mimu_v8_1", list()) + ) == ("Ward boundaries Yangon City_mimu_v8_1", []) assert DatasetTitleHelper.get_dates_from_title( "Mon_State_Village_Tract_Boundaries" - ) == ("Mon_State_Village_Tract_Boundaries", list()) + ) == ("Mon_State_Village_Tract_Boundaries", []) assert DatasetTitleHelper.get_dates_from_title( "ICA Afghanistan, 2019 - Landslide hazard, 2013" ) == ( @@ -181,10 +181,10 @@ def test_get_date_from_title(self, expected_ranges_2019): ) assert DatasetTitleHelper.get_dates_from_title( "Mon_State_Village_Tract_Boundaries 9999" - ) == ("Mon_State_Village_Tract_Boundaries 9999", list()) + ) == ("Mon_State_Village_Tract_Boundaries 9999", []) assert DatasetTitleHelper.get_dates_from_title( "Mon_State_Village_Tract_Boundaries 10/12/01 lala" - ) == ("Mon_State_Village_Tract_Boundaries 10/12/01 lala", list()) + ) == ("Mon_State_Village_Tract_Boundaries 10/12/01 lala", []) # It's the "Mon" that makes an extra date component that causes it to ignore the date (correctly) assert DatasetTitleHelper.get_dates_from_title( "State_Village_Tract_Boundaries 10/12/01 lala" diff --git a/tests/hdx/api/utilities/test_filestore_helper.py b/tests/hdx/api/utilities/test_filestore_helper.py index aa5a6122..b02829b4 100644 --- a/tests/hdx/api/utilities/test_filestore_helper.py +++ b/tests/hdx/api/utilities/test_filestore_helper.py @@ -10,7 +10,7 @@ "format": "xlsx", "url": "http://test/spreadsheet.xlsx", "description": "My Resource", - "api_type": "api", + "url_type": "api", "resource_type": "api", } @@ -25,12 +25,12 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): orig_resource, resource, filestore_resources, 0 ) assert resource == { - "api_type": "api", "description": "My Resource", "format": "xlsx", "name": "MyResource1", "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "resource_type": "api", + "url_type": "api", "url": "http://test/spreadsheet.xlsx", } assert filestore_resources == {} @@ -40,7 +40,6 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): orig_resource, resource, filestore_resources, 0 ) assert resource == { - "api_type": "api", "description": "My Resource", "format": "xlsx", "name": "MyResource1", @@ -48,6 +47,7 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): "size": 1548, "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "resource_type": "api", + "url_type": "api", "url": "updated_by_file_upload_step", } assert filestore_resources == {0: test_data} @@ -57,7 +57,6 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): resource, resource, filestore_resources, 0 ) assert resource == { - "api_type": "api", "description": "My Resource", "format": "xlsx", "name": "MyResource1", @@ -65,11 +64,12 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): "size": 1548, "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "resource_type": "api", + "url_type": "api", "url": "updated_by_file_upload_step", } assert filestore_resources == {} - resource.file_to_upload = None + resource._file_to_upload = None resource.mark_data_updated() FilestoreHelper.dataset_update_filestore_resource( orig_resource, resource, filestore_resources, 0 diff --git a/tests/hdx/api/utilities/test_size_hash.py b/tests/hdx/api/utilities/test_size_hash.py index 78c25189..ef938ad7 100644 --- a/tests/hdx/api/utilities/test_size_hash.py +++ b/tests/hdx/api/utilities/test_size_hash.py @@ -1,19 +1,7 @@ -from os.path import join - -import pytest - from hdx.api.utilities.size_hash import get_size_and_hash class TestSizeHash: - @pytest.fixture(scope="class") - def test_xlsx(self, fixturesfolder): - return join( - fixturesfolder, - "size_hash", - "ACLED-All-Africa-File_20170101-to-20170708.xlsx", - ) - def test_get_size_and_hash(self, test_data, test_xlsx): size, hash = get_size_and_hash(test_data, "csv") assert size == 1548 diff --git a/tests/hdx/conftest.py b/tests/hdx/conftest.py index 36480815..cd24acdf 100755 --- a/tests/hdx/conftest.py +++ b/tests/hdx/conftest.py @@ -40,6 +40,15 @@ def test_data(fixturesfolder): return join(fixturesfolder, "test_data.csv") +@pytest.fixture(scope="session") +def test_xlsx(fixturesfolder): + return join( + fixturesfolder, + "size_hash", + "ACLED-All-Africa-File_20170101-to-20170708.xlsx", + ) + + @pytest.fixture(scope="session") def locations(): Locations.set_validlocations( diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py index d56738d2..29e7e157 100755 --- a/tests/hdx/data/test_dataset_core.py +++ b/tests/hdx/data/test_dataset_core.py @@ -4,6 +4,7 @@ import json import os import re +import shutil import tempfile from os.path import join @@ -32,6 +33,7 @@ from hdx.data.resource_view import ResourceView from hdx.utilities.dictandlist import merge_two_dictionaries from hdx.utilities.loader import load_yaml +from hdx.utilities.path import temp_dir searchdict = load_yaml(join("tests", "fixtures", "dataset_search_results.yaml")) dataset_list = [ @@ -356,14 +358,13 @@ def post(url, data, headers, files, allow_redirects, auth=None): '{"success": false, "error": {"message": "TEST ERROR: Not update", "__type": "TEST ERROR: Not Update Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=dataset_update"}', ) datadict = json.loads(datadict["update"]) - if datadict["name"] in ["MyDataset1", "DatasetExist"]: + if datadict["name"] in ("MyDataset1", "DatasetExist"): resultdictcopy = copy.deepcopy(dataset_resultdict) for i, resource in enumerate(datadict["resources"]): if not resource: datadict["resources"][i] = resultdictcopy["resources"][ i ] - merge_two_dictionaries(resultdictcopy, datadict) for i, resource in enumerate(resultdictcopy["resources"]): resource["package_id"] = resultdictcopy["id"] @@ -570,7 +571,7 @@ def test_create_in_hdx(self, configuration, post_create): assert statuses == {"Resource1": 0, "Resource2": 0} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 datasetdata["name"] = "MyDataset2" dataset = Dataset(datasetdata) @@ -612,8 +613,8 @@ def test_create_in_hdx(self, configuration, post_create): statuses = dataset.create_in_hdx() assert statuses == {"Resource1": 0, "Resource2": 0} assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" - assert len(dataset.resources) == 3 - assert dataset.resources[0].configuration.unique == uniqueval + assert len(dataset._resources) == 3 + assert dataset._resources[0].configuration.unique == uniqueval assert dataset["state"] == "active" datasetdata = copy.deepcopy(dataset_data) dataset = Dataset(datasetdata) @@ -626,12 +627,12 @@ def test_create_in_hdx(self, configuration, post_create): assert statuses == {"Resource1": 2} file.close() assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 finally: os.remove(file.name) # Dataset creates that end up updating are in the test below - def test_update_in_hdx(self, configuration, post_update, date_pattern): + def test_update_in_hdx(self, configuration, post_update, date_pattern, test_xlsx): dataset = Dataset() dataset["id"] = "NOTEXIST" with pytest.raises(HDXError): @@ -673,10 +674,10 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): statuses = dataset.update_in_hdx(keep_crisis_tags=False) assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset.get_tags() == ["conflict", "political violence"] - dataset.preview_resourceview = ResourceView(resourceviewdata) + dataset._preview_resourceview = ResourceView(resourceviewdata) statuses = dataset.update_in_hdx() assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} - assert dataset.preview_resourceview is None + assert dataset._preview_resourceview is None statuses = dataset.update_in_hdx(updated_by_script="hi") assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} pattern = r"hi \([12]\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d\)" @@ -728,7 +729,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): statuses = dataset.create_in_hdx() assert statuses == {"Resource1": 0, "Resource2": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 resource = dataset.get_resource() assert resource.is_marked_data_updated() is False match = date_pattern.search(resource["last_modified"]) @@ -744,26 +745,30 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): assert dataset["id"] == "TEST1" assert dataset["dataset_date"] == "06/04/2016" assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 statuses = dataset.update_in_hdx() assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset = Dataset.read_from_hdx("TEST4") dataset["id"] = "TEST4" assert dataset["state"] == "active" statuses = dataset.update_in_hdx() assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset = Dataset.read_from_hdx("TEST4") - try: - file = tempfile.NamedTemporaryFile(delete=False) - resource.set_file_to_upload(file.name) + with temp_dir("test_update_in_hdx") as tempdir: + path = join(tempdir, "test_update_in_hdx.xlsx") + shutil.copyfile(test_xlsx, path) + resource.set_file_to_upload(path) dataset.add_update_resource(resource) statuses = dataset.update_in_hdx( batch="6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" ) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 + result = dataset.get_resource() + assert result["size"] == 23724 + assert result["hash"] == "6b8acf7e28d62685a1e829e7fa220d17" assert statuses == {"Resource1": 2, "Resource2": 1, "Resource3": 1} resource["name"] = "123" resource.set_file_to_upload(None) @@ -772,7 +777,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): statuses = dataset.update_in_hdx() assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 del resource["id"] resource["name"] = "Resource4" dataset.add_update_resource(resource) @@ -784,7 +789,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): "Resource4": 0, } assert dataset["state"] == "active" - assert len(dataset.resources) == 4 + assert len(dataset._resources) == 4 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) @@ -795,12 +800,12 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): resource = Resource(resourcesdata[0]) resource["name"] = "ResourcePosition" resource["id"] = "123" - resource.set_file_to_upload(file.name) + resource.set_file_to_upload(path) dataset.add_update_resource(resource) resource = Resource(resourcesdata[0]) resource["name"] = "changed name" resource["id"] = "456" - resource.set_file_to_upload(file.name) + resource.set_file_to_upload(path) dataset.add_update_resource(resource) statuses = dataset.update_in_hdx(match_resources_by_metadata=True) assert statuses == { @@ -810,9 +815,10 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): "changed name": 2, } assert dataset["state"] == "active" - assert len(dataset.resources) == 5 # existing resources + assert len(dataset._resources) == 5 # existing resources dataset = Dataset(datasetdata) + dataset["id"] = "TEST5" resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) dataset.add_update_resource(resource) @@ -821,21 +827,24 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): resource = Resource(resourcesdata[0]) resource["name"] = "ResourcePosition" resource["id"] = "123" - resource.set_file_to_upload(file.name) + resource.set_file_to_upload(path) dataset.add_update_resource(resource) resource = dataset.get_resources()[0] resource["name"] = "changed name" resource["id"] = "456" - resource.set_file_to_upload(file.name) + resource["last_modified"] = "2025-05-08T13:09:42.842208" + resource.set_file_to_upload(path) statuses = dataset.update_in_hdx(match_resources_by_metadata=False) assert statuses == { "Resource2": 1, "ResourcePosition": 2, - "changed name": 2, + "changed name": 4, } + result = dataset.get_resource(2) + assert result["size"] == 23724 + assert result["hash"] == "6b8acf7e28d62685a1e829e7fa220d17" assert dataset["state"] == "active" - assert len(dataset.resources) == 3 - file.close() + assert len(dataset._resources) == 3 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) @@ -843,7 +852,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): statuses = dataset.update_in_hdx(remove_additional_resources=False) assert statuses == {"Resource1": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) @@ -851,7 +860,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): statuses = dataset.update_in_hdx(remove_additional_resources=True) assert statuses == {"Resource1": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 1 + assert len(dataset._resources) == 1 dataset = Dataset(datasetdata) resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) @@ -862,7 +871,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): ) assert statuses == {"Resource1": 1} assert dataset["state"] == "active" - assert len(dataset.resources) == 1 + assert len(dataset._resources) == 1 dataset = Dataset.read_from_hdx("TEST1") resources = dataset.get_resources() @@ -874,7 +883,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): ] dataset["id"] = "TEST1" dataset["name"] = "MyDataset1" - dataset.resources = [resources[2], resources[0], resources[1]] + dataset._resources = [resources[2], resources[0], resources[1]] statuses = dataset.update_in_hdx(match_resource_order=True) assert statuses == {"Resource1": 1, "Resource2": 1, "Resource3": 1} resource_ids = [x["id"] for x in dataset.get_resources()] @@ -883,8 +892,6 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern): "de6549d8-268b-4dfe-adaf-a4ae5c8510d5", "3d777226-96aa-4239-860a-703389d16d1f", ] - finally: - os.remove(file.name) def test_delete_from_hdx(self, configuration, post_delete): dataset = Dataset.read_from_hdx("TEST1") @@ -999,18 +1006,18 @@ def test_add_update_delete_resources(self, configuration, post_delete): dataset = Dataset(datasetdata) dataset.add_update_resources(resourcesdata) dataset.add_update_resources(resourcesdata) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset.delete_resource("de6549d8-268b-4dfe-adaf-a4ae5c8510d6") - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset.delete_resource("de6549d8-268b-4dfe-adaf-a4ae5c8510d5") - assert len(dataset.resources) == 2 + assert len(dataset._resources) == 2 resourcesdata = copy.deepcopy(resources_data) resource = Resource(resourcesdata[0]) resource.set_file_to_upload("lala") dataset.add_update_resource(resource) - assert dataset.resources[2].get_file_to_upload() == "lala" + assert dataset._resources[2].get_file_to_upload() == "lala" dataset.add_update_resource("de6549d8-268b-4dfe-adaf-a4ae5c8510d5") - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 with pytest.raises(HDXError): dataset.add_update_resource(123) with pytest.raises(HDXError): @@ -1022,30 +1029,31 @@ def test_add_update_delete_resources(self, configuration, post_delete): dataset.delete_resource("NOTEXIST") datasetdata["resources"] = resourcesdata dataset = Dataset(datasetdata) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 resource = copy.deepcopy(resources_data[0]) del resource["id"] dataset.add_update_resource(resource) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset.add_update_resources([resource]) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 resource = copy.deepcopy(resources_data[2]) del resource["id"] dataset.add_update_resource(resource) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 dataset.add_update_resources([resource]) - assert len(dataset.resources) == 3 + assert len(dataset._resources) == 3 resource["format"] = "mdb" dataset.add_update_resource(resource) - assert len(dataset.resources) == 4 + assert len(dataset._resources) == 4 resource = copy.deepcopy(resources_data[2]) del resource["id"] resource["format"] = "doc" dataset.add_update_resources([resource]) - assert len(dataset.resources) == 5 + assert len(dataset._resources) == 5 resource["format"] = "NOTEXIST" - with pytest.raises(HDXError): - dataset.add_update_resource(resource) + # Check for format is no longer in add update but occurs on create/update + dataset.add_update_resource(resource) + assert len(dataset._resources) == 6 def test_reorder_resources(self, configuration, post_reorder): dataset = Dataset.read_from_hdx("TEST1") @@ -1117,7 +1125,7 @@ def test_move_resources(self, configuration): "afg_hpc_needs_api_2024.csv", ] resources = [Resource({"name": x}) for x in input_resource_names] - dataset.resources = resources + dataset._resources = resources resource_name = "afg_hpc_needs_api_2024.csv" insert_before = "afg_hpc_needs_2024" resource = dataset.move_resource(resource_name, insert_before) @@ -1155,7 +1163,7 @@ def test_move_resources(self, configuration): "afg_hpc_needs_api_2021.csv", ] resources = [Resource({"name": x}) for x in input_resource_names] - dataset.resources = resources + dataset._resources = resources resource_name = "afg_hpc_needs_api_2021.csv" insert_before = "afg_hpc_needs_2021" _ = dataset.move_resource(resource_name, insert_before) @@ -1190,7 +1198,7 @@ def test_move_resources(self, configuration): "afg_hpc_needs_api_2024.csv", ] resources = [Resource({"name": x}) for x in input_resource_names] - dataset.resources = resources + dataset._resources = resources resource_name = "afg_hpc_needs_api_2024.csv" insert_before = "afg_hpc_needs_2024" _ = dataset.move_resource(resource_name, insert_before) @@ -1223,7 +1231,7 @@ def test_move_resources(self, configuration): "afg_hpc_needs_2021.xlsx", ] resources = [Resource({"name": x}) for x in input_resource_names] - dataset.resources = resources + dataset._resources = resources _ = dataset.move_resource(resource_name, insert_before) check.equal(resource["name"], "afg_hpc_needs_api_2024.csv") resource_names = [x["name"] for x in resources] diff --git a/tests/hdx/data/test_dataset_noncore.py b/tests/hdx/data/test_dataset_noncore.py index ed9ee3c7..6a5d5b95 100755 --- a/tests/hdx/data/test_dataset_noncore.py +++ b/tests/hdx/data/test_dataset_noncore.py @@ -776,9 +776,9 @@ def test_quickcharts_resource_last(self): dataset = Dataset(datasetdata) assert dataset.quickcharts_resource_last() is False resource = {"name": "QuickCharts-resource"} - dataset.resources.insert(1, resource) + dataset._resources.insert(1, resource) assert dataset.quickcharts_resource_last() is True - assert dataset.resources[3]["name"] == resource["name"] + assert dataset._resources[3]["name"] == resource["name"] assert dataset.quickcharts_resource_last() is True def test_generate_resource_view( diff --git a/tests/hdx/data/test_dataset_resource_generation.py b/tests/hdx/data/test_dataset_resource_generation.py index 87e2f7e2..87e126f8 100644 --- a/tests/hdx/data/test_dataset_resource_generation.py +++ b/tests/hdx/data/test_dataset_resource_generation.py @@ -68,8 +68,6 @@ def test_generate_qc_resource_from_rows(self, configuration): "name": "Conflict Data for Algeria", "description": "Conflict data with HXL tags", "format": "csv", - "resource_type": "file.upload", - "url_type": "upload", } assert_files_same( join("tests", "fixtures", "qc_from_rows", qc_filename), @@ -146,8 +144,6 @@ def process_row(headers, row): "description": "Conflict data with HXL tags", "format": "csv", "name": "Conflict Data for Algeria", - "resource_type": "file.upload", - "url_type": "upload", }, "headers": [ "lala", @@ -323,8 +319,6 @@ def process_row(headers, row): "description": "Cut down data for QuickCharts", "format": "csv", "name": "QuickCharts-Conflict Data for Algeria", - "resource_type": "file.upload", - "url_type": "upload", }, "qcheaders": [ "EVENT_ID_CNTY", @@ -412,15 +406,11 @@ def process_row(headers, row): "name": "Conflict Data for Algeria", "description": "Conflict data with HXL tags", "format": "csv", - "resource_type": "file.upload", - "url_type": "upload", }, { "name": "QuickCharts-Conflict Data for Algeria", "description": "Cut down data for QuickCharts", "format": "csv", - "resource_type": "file.upload", - "url_type": "upload", }, ] assert_files_same( @@ -479,8 +469,6 @@ def process_row(headers, row): "description": "Conflict data with HXL tags", "format": "csv", "name": "Conflict Data for Algeria", - "resource_type": "file.upload", - "url_type": "upload", }, "headers": [ "lala", @@ -656,8 +644,6 @@ def process_row(headers, row): "description": "Cut down data for QuickCharts", "format": "csv", "name": "QuickCharts-Conflict Data for Algeria", - "resource_type": "file.upload", - "url_type": "upload", }, "qcheaders": ["EVENT_ID_CNTY", "FATALITIES"], "qcrows": [ diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index d59889e3..3ebc4039 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -661,21 +661,20 @@ def test_read_from_hdx(self, configuration, read): def test_check_url_filetoupload(self, configuration): resource_data_copy = copy.deepcopy(resource_data) resource = Resource(resource_data_copy) - resource.check_url_filetoupload() + resource.set_types() resource.set_file_to_upload("abc") - resource.check_url_filetoupload() + resource.set_types() resource["url"] = "lala" with pytest.raises(HDXError): - resource.check_url_filetoupload() + resource.check_both_url_filetoupload() resource = Resource(resource_data_copy) resource["format"] = "NOTEXIST" with pytest.raises(HDXError): - resource.check_url_filetoupload() + resource.set_types() with pytest.raises(HDXError): resource.set_format("NOTEXIST") del resource["format"] - with pytest.raises(HDXError): - resource.check_url_filetoupload() + resource.set_types() def test_get_set_date_of_resource(self, configuration): resource = Resource({"daterange_for_data": "[2020-01-07T00:00:00 TO *]"}) @@ -700,7 +699,7 @@ def test_get_set_date_of_resource(self, configuration): def test_check_required_fields(self, configuration): resource_data_copy = copy.deepcopy(resource_data) resource = Resource(resource_data_copy) - resource.check_url_filetoupload() + resource.set_types() resource.check_required_fields() def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data): @@ -823,7 +822,7 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data resource_data_copy["id"] = "74b74ae1-df0c-4716-829f-4f939a046811" resource = Resource(resource_data_copy) resource.mark_data_updated() - assert resource.data_updated is True + assert resource._data_updated is True assert resource.is_marked_data_updated() is True status = resource.create_in_hdx() assert status == 0 diff --git a/tests/hdx/data/test_update_dataset_resources.py b/tests/hdx/data/test_update_dataset_resources.py index 72a1928d..6ef38ae6 100644 --- a/tests/hdx/data/test_update_dataset_resources.py +++ b/tests/hdx/data/test_update_dataset_resources.py @@ -95,9 +95,11 @@ def test_dataset_update_resources( new_dataset, expected_resources_to_update, ): - dataset.old_data = new_dataset.data - dataset.old_data["resources"] = new_dataset._copy_hdxobjects( - new_dataset.resources, Resource, ("file_to_upload", "data_updated") + dataset._old_data = new_dataset.data + dataset._old_data["resources"] = new_dataset._copy_hdxobjects( + new_dataset._resources, + Resource, + ("_file_to_upload", "_data_updated", "_url_backup"), ) ( resources_to_update, @@ -141,12 +143,13 @@ def test_dataset_update_resources( "SDG 4 Global and Thematic indicator list": 2, "SDG 4 Global and Thematic metadata": 2, } - dataset._prepare_hdx_call(dataset.old_data, {}) + dataset._prepare_hdx_call(dataset._old_data, {}) assert ( dataset["updated_by_script"] == "HDX Scraper: UNESCO (2022-12-19T12:51:30.579185)" ) results = dataset._revise_dataset( + False, tuple(), resources_to_update, resources_to_delete, diff --git a/tests/hdx/data/test_update_logic.py b/tests/hdx/data/test_update_logic.py index 2c0e6db0..d542bddb 100644 --- a/tests/hdx/data/test_update_logic.py +++ b/tests/hdx/data/test_update_logic.py @@ -175,9 +175,10 @@ def test_update_logic_1( temp_dir, new_dataset, new_resources_yaml, exclude=[13] ) self.add_dataset_resources(dataset, resources_yaml) - dataset.old_data = new_dataset.data - dataset.old_data["resources"] = new_dataset.resources + dataset._old_data = new_dataset.data + dataset._old_data["resources"] = new_dataset._resources statuses, results = dataset._dataset_hdx_update( + allow_no_resources=False, update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], @@ -898,9 +899,10 @@ def test_update_logic_2( temp_dir, new_dataset, new_resources_yaml, exclude=[52] ) self.add_dataset_resources(dataset, resources_yaml) - dataset.old_data = new_dataset.data - dataset.old_data["resources"] = new_dataset.resources + dataset._old_data = new_dataset.data + dataset._old_data["resources"] = new_dataset._resources statuses, results = dataset._dataset_hdx_update( + allow_no_resources=False, update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], @@ -1621,9 +1623,10 @@ def test_update_logic_3( temp_dir, new_dataset, new_resources_yaml, include=[0, 3] ) self.add_dataset_resources(dataset, resources_yaml, include=[0, 1, 2]) - dataset.old_data = new_dataset.data - dataset.old_data["resources"] = new_dataset.resources + dataset._old_data = new_dataset.data + dataset._old_data["resources"] = new_dataset._resources statuses, results = dataset._dataset_hdx_update( + allow_no_resources=False, update_resources=True, match_resources_by_metadata=True, keys_to_delete=[], diff --git a/tests/hdx/data/test_vocabulary.py b/tests/hdx/data/test_vocabulary.py index 12591da6..0232bdeb 100755 --- a/tests/hdx/data/test_vocabulary.py +++ b/tests/hdx/data/test_vocabulary.py @@ -1483,13 +1483,13 @@ def test_tag_mappings(self, configuration, read): "Action to Take": "merge", "New Tag(s)": "refugees", } - assert Vocabulary.get_mapped_tag("refugee") == (["refugees"], list()) + assert Vocabulary.get_mapped_tag("refugee") == (["refugees"], []) assert Vocabulary.get_mapped_tag("monitoring") == ( [], ["monitoring"], ) tags_dict["refugee"]["Action to Take"] = "ERROR" - assert Vocabulary.get_mapped_tag("refugee") == ([], list()) + assert Vocabulary.get_mapped_tag("refugee") == ([], []) refugeesdict = copy.deepcopy(tags_dict["refugees"]) del tags_dict["refugees"] assert Vocabulary.get_mapped_tag("refugees") == ( From b5cf8f34b7b4845512fb40854fbf49a44d33de6b Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 19 Aug 2025 10:14:04 +1200 Subject: [PATCH 05/16] Fix CKAN test --- tests/hdx/api/test_ckan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hdx/api/test_ckan.py b/tests/hdx/api/test_ckan.py index f0a0f9fa..c653679a 100644 --- a/tests/hdx/api/test_ckan.py +++ b/tests/hdx/api/test_ckan.py @@ -283,7 +283,6 @@ def create_resource(): resource = resources[i] assert updated_resource["name"] == resource["name"] assert updated_resource.get_format() == resource.get_format() - assert updated_resource["url_type"].lower() == resource["url_type"] url = resource.get("url") if url: if "humdata" in url: @@ -293,6 +292,7 @@ def create_resource(): else: assert "humdata" in updated_resource["url"] if i != 7: + assert updated_resource["url_type"].lower() == resource["url_type"] assert updated_resource.get("size") == resource.get("size") assert updated_resource.get("hash") == resource.get("hash") From 0ad9170f4e95b9aa43a821c8038ee91cd0d22070 Mon Sep 17 00:00:00 2001 From: mcarans Date: Tue, 19 Aug 2025 14:10:10 +1200 Subject: [PATCH 06/16] Ensure filestore resources are checked when creating datasets Increase coverage --- src/hdx/data/dataset.py | 4 +- src/hdx/data/resource.py | 12 ++--- .../api/utilities/test_filestore_helper.py | 8 +-- tests/hdx/data/test_resource.py | 54 +++++++++++++++---- 4 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 21df9c92..1c35074c 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -1220,8 +1220,6 @@ def create_in_hdx( if "ignore_check" not in kwargs: # allow ignoring of field checks self.check_required_fields(allow_no_resources=allow_no_resources, **kwargs) - # No need to check again after revising dataset - kwargs["ignore_check"] = True statuses = {} filestore_resources = {} if self._resources: @@ -1230,6 +1228,8 @@ def create_in_hdx( resource, filestore_resources, i, **kwargs ) statuses[resource["name"]] = status + # No need to check again after revising dataset + kwargs["ignore_check"] = True self.unseparate_resources() self._prepare_hdx_call(self.data, kwargs) kwargs["operation"] = "create" diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 2b6fec29..25b05719 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -346,15 +346,11 @@ def set_types(self, data: Optional[Dict] = None) -> None: data = self.data if self._file_to_upload is None: if "url" in data: - if "resource_type" not in data: - data["resource_type"] = "api" - if "url_type" not in data: - data["url_type"] = "api" + data["resource_type"] = "api" + data["url_type"] = "api" else: - if "resource_type" not in data: - data["resource_type"] = "file.upload" - if "url_type" not in data: - data["url_type"] = "upload" + data["resource_type"] = "file.upload" + data["url_type"] = "upload" if "tracking_summary" in data: del data["tracking_summary"] file_format = data.get("format") diff --git a/tests/hdx/api/utilities/test_filestore_helper.py b/tests/hdx/api/utilities/test_filestore_helper.py index b02829b4..8fae229e 100644 --- a/tests/hdx/api/utilities/test_filestore_helper.py +++ b/tests/hdx/api/utilities/test_filestore_helper.py @@ -46,8 +46,8 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): "hash": "3790da698479326339fa99a074cbc1f7", "size": 1548, "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", - "resource_type": "api", - "url_type": "api", + "resource_type": "file.upload", + "url_type": "upload", "url": "updated_by_file_upload_step", } assert filestore_resources == {0: test_data} @@ -63,8 +63,8 @@ def test_dataset_update_filestore_resource(self, configuration, test_data): "hash": "3790da698479326339fa99a074cbc1f7", "size": 1548, "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", - "resource_type": "api", - "url_type": "api", + "resource_type": "file.upload", + "url_type": "upload", "url": "updated_by_file_upload_step", } assert filestore_resources == {} diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index 3ebc4039..b39cf528 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -152,23 +152,24 @@ def mockshow(url, datadict): '{"success": false, "error": {"message": "TEST ERROR: Not show", "__type": "TEST ERROR: Not Show Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}', ) result = json.dumps(resultdict) - if datadict["id"] == "74b74ae1-df0c-4716-829f-4f939a046811": + datadict_id = datadict["id"] + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046811": return MockResponse( 200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}' % result, ) - if datadict["id"] == "74b74ae1-df0c-4716-829f-4f939a046812": + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046812": return MockResponse( 404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}', ) - if datadict["id"] == "74b74ae1-df0c-4716-829f-4f939a046813": + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046813": return MockResponse( 200, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}', ) - if datadict["id"] == "74b74ae1-df0c-4716-829f-4f939a046814": + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046814": resdictcopy = copy.deepcopy(resultdict) resdictcopy["url"] = "lalalala" result = json.dumps(resdictcopy) @@ -177,7 +178,7 @@ def mockshow(url, datadict): '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}' % result, ) - if datadict["id"] == "74b74ae1-df0c-4716-829f-4f939a046815": + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046815": resdictcopy = copy.deepcopy(resultdict) resdictcopy["id"] = "datastore_unknown_resource" result = json.dumps(resdictcopy) @@ -186,6 +187,25 @@ def mockshow(url, datadict): '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}' % result, ) + if datadict_id in ( + "74b74ae1-df0c-4716-829f-4f939a046816", + "74b74ae1-df0c-4716-829f-4f939a046817", + ): + resdictcopy = copy.deepcopy(resultdict) + resdictcopy["url_type"] = "upload" + resdictcopy["resource_type"] = "file.upload" + resdictcopy["size"] = 1548 + resdictcopy["hash"] = "3790da698479326339fa99a074cbc1f7" + if datadict_id == "74b74ae1-df0c-4716-829f-4f939a046817": + resdictcopy["last_modified"] = "2023-05-08T13:09:42.209741" + else: + del resdictcopy["last_modified"] + result = json.dumps(resdictcopy) + return MockResponse( + 200, + '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}' + % result, + ) return MockResponse( 404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_show"}', @@ -382,7 +402,7 @@ def post(url, data, headers, files, allow_redirects, auth=None): resultdictcopy["url"] = ( f"http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/{filename}" ) - resultdictcopy["size"] = datadict["size"] + resultdictcopy["size"] = int(datadict["size"]) resultdictcopy["hash"] = datadict["hash"] resultdictcopy["state"] = datadict["state"] @@ -658,9 +678,12 @@ def test_read_from_hdx(self, configuration, read): with pytest.raises(HDXError): Resource.read_from_hdx("ABC") - def test_check_url_filetoupload(self, configuration): + def test_check_types_url_filetoupload(self, configuration): resource_data_copy = copy.deepcopy(resource_data) + del resource_data_copy["url"] resource = Resource(resource_data_copy) + with pytest.raises(HDXError): + resource.check_neither_url_filetoupload() resource.set_types() resource.set_file_to_upload("abc") resource.set_types() @@ -735,7 +758,7 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) - assert resource["size"] == "1548" + assert resource["size"] == 1548 assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" assert resource["state"] == "active" resource.set_file_to_upload(test_data) @@ -745,7 +768,7 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create, test_data resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) - assert resource["size"] == "1548" + assert resource["size"] == 1548 assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" resource_data_copy["name"] = "MyResource2" resource = Resource(resource_data_copy) @@ -804,7 +827,6 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data assert resource["size"] == "1548" assert resource["hash"] == "3790da698479326339fa99a074cbc1f7" assert resource["state"] == "active" - resource["id"] = "NOTEXIST" with pytest.raises(HDXError): resource.update_in_hdx() @@ -836,6 +858,18 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update, test_data status = resource.update_in_hdx() assert status == 1 assert resource.get_format() == "geoservice" + + # These resource ids are set up so that the test datasets read have the same + # hash and size as the file being uploaded + resource["id"] = "74b74ae1-df0c-4716-829f-4f939a046816" + resource.set_file_to_upload(test_data, guess_format_from_suffix=True) + status = resource.update_in_hdx() + assert status == 3 + resource["id"] = "74b74ae1-df0c-4716-829f-4f939a046817" + resource.set_file_to_upload(test_data, guess_format_from_suffix=True) + status = resource.update_in_hdx() + assert status == 4 + resource["format"] = "NOTEXIST" with pytest.raises(HDXError): resource.update_in_hdx() From 01e9611e0bdfc3d28d3a4abb53eb5906d4beff86 Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 09:04:42 +1200 Subject: [PATCH 07/16] Split format correction into separate method with tests Only set url_type to api when there's a url if it is not already set to upload (alternatively could check if url looks like a filestore one but I think there are some datasets set up linking to other datasets' resources) --- requirements.txt | 2 +- src/hdx/api/utilities/filestore_helper.py | 30 ++++++--- src/hdx/data/resource.py | 79 ++++++++++++++--------- tests/hdx/__init__.py | 8 +-- tests/hdx/data/test_dataset_core.py | 6 ++ tests/hdx/data/test_resource.py | 33 ++++++---- 6 files changed, 101 insertions(+), 57 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2ae77d2c..adb7bf3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -260,7 +260,7 @@ rpds-py==0.27.0 # referencing rsa==4.9.1 # via google-auth -ruamel-yaml==0.18.14 +ruamel-yaml==0.18.15 # via hdx-python-utilities ruamel-yaml-clib==0.2.12 # via ruamel-yaml diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index b66eea33..c588480d 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -62,6 +62,7 @@ def check_filestore_resource( int: Status code """ resource_data_to_update.set_types() + resource_data_to_update.correct_format(resource_data_to_update.data) cls.resource_check_required_fields(resource_data_to_update, **kwargs) file_to_upload = resource_data_to_update.get_file_to_upload() if file_to_upload: @@ -120,19 +121,30 @@ def dataset_update_filestore_resource( else: # update file if size or hash has changed filestore_resources[resource_index] = file_to_upload + resource_data_to_update["resource_type"] = "file.upload" + resource_data_to_update["url_type"] = "upload" + if "tracking_summary" in resource_data_to_update: + del resource_data_to_update["tracking_summary"] resource_data_to_update["url"] = cls.temporary_url resource_data_to_update["size"] = size resource_data_to_update["hash"] = hash resource_data_to_update._url_backup = None status = 2 - elif resource_data_to_update.is_marked_data_updated(): - # Should not output timezone info here - resource_data_to_update["last_modified"] = now_utc_notz().isoformat( - timespec="microseconds" - ) - resource_data_to_update._data_updated = False - status = 0 else: - status = 1 - resource_data_to_update.set_types() + if ( + "url" in resource_data_to_update + and resource_data_to_update.get("url_type") != "upload" + ): + resource_data_to_update["resource_type"] = "api" + resource_data_to_update["url_type"] = "api" + if resource_data_to_update.is_marked_data_updated(): + # Should not output timezone info here + resource_data_to_update["last_modified"] = now_utc_notz().isoformat( + timespec="microseconds" + ) + resource_data_to_update._data_updated = False + status = 0 + else: + status = 1 + resource_data_to_update.correct_format(resource_data_to_update.data) return status diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 25b05719..87a09d11 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -332,37 +332,43 @@ def check_neither_url_filetoupload(self) -> None: if self._file_to_upload is None and "url" not in self.data: raise HDXError("Either a url or a file to upload must be supplied!") - def set_types(self, data: Optional[Dict] = None) -> None: - """Add resource_type and url_type if not supplied based on url or file to - upload. Correct the file type. + def correct_format(self, data: Dict = None) -> None: + """Correct the format of the file Args: - data (Dict): Resource data. Defaults to None (self.data) + data (Dict): Resource data. Returns: None """ - if data is None: - data = self.data - if self._file_to_upload is None: - if "url" in data: - data["resource_type"] = "api" - data["url_type"] = "api" - else: - data["resource_type"] = "file.upload" - data["url_type"] = "upload" - if "tracking_summary" in data: - del data["tracking_summary"] file_format = data.get("format") - if file_format is not None: - file_format = self.get_mapped_format( - file_format, configuration=self.configuration + if file_format is None: + return + file_format = self.get_mapped_format( + file_format, configuration=self.configuration + ) + if not file_format: + raise HDXError( + f"Supplied file format {file_format} is invalid and could not be mapped to a known type!" ) - if not file_format: - raise HDXError( - f"Supplied file type {file_format} is invalid and could not be mapped to a known type!" - ) - data["format"] = file_format + data["format"] = file_format + + def set_types(self) -> None: + """Add resource_type and url_type if not supplied based on url or file to + upload. + + Returns: + None + """ + if self._file_to_upload is None: + if "url" in self.data: + self.data["resource_type"] = "api" + self.data["url_type"] = "api" + else: + self.data["resource_type"] = "file.upload" + self.data["url_type"] = "upload" + if "tracking_summary" in self.data: + del self.data["tracking_summary"] def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None: """Check that metadata for resource is complete. The parameter ignore_fields @@ -417,21 +423,29 @@ def _resource_merge_hdx_update( else: # update file if size or hash has changed files["upload"] = self._file_to_upload + self._old_data["resource_type"] = "file.upload" + self._old_data["url_type"] = "upload" + if "tracking_summary" in self._old_data: + del self._old_data["tracking_summary"] self._old_data["size"] = size self._old_data["hash"] = hash status = 2 self._url_backup = None - elif data_updated: - # Should not output timezone info here - self._old_data["last_modified"] = now_utc_notz().isoformat( - timespec="microseconds" - ) - self._data_updated = False - status = 0 else: - status = 1 + if "url" in self._old_data: + self._old_data["resource_type"] = "api" + self._old_data["url_type"] = "api" + if data_updated: + # Should not output timezone info here + self._old_data["last_modified"] = now_utc_notz().isoformat( + timespec="microseconds" + ) + self._data_updated = False + status = 0 + else: + status = 1 - self.set_types(self._old_data) + self.correct_format(self._old_data) # old_data will be merged into data in the next step self._merge_hdx_update("resource", "id", files, True, **kwargs) return status @@ -498,6 +512,7 @@ def create_in_hdx(self, **kwargs: Any) -> int: return self._resource_merge_hdx_update(**kwargs) self.set_types() + self.correct_format(self.data) if "ignore_check" not in kwargs: # allow ignoring of field checks self.check_required_fields() files = {} diff --git a/tests/hdx/__init__.py b/tests/hdx/__init__.py index 18078d85..f4d475ae 100755 --- a/tests/hdx/__init__.py +++ b/tests/hdx/__init__.py @@ -84,7 +84,7 @@ def json(self): "id": "de6549d8-268b-4dfe-adaf-a4ae5c8510d5", "description": "Resource1", "name": "Resource1", - "url": "http://resource1.xlsx", + "url": "http://data.humdata.org/dataset/resource/resource1.xlsx", "format": "xlsx", }, { @@ -148,9 +148,9 @@ def json(self): "hash": "", "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource1", - "url": "http://resource1.xlsx", - "resource_type": "api", - "url_type": "api", + "url": "http://data.humdata.org/dataset/resource/resource1.xlsx", + "resource_type": "file.upload", + "url_type": "upload", "size": None, "mimetype_inner": None, "cache_last_updated": None, diff --git a/tests/hdx/data/test_dataset_core.py b/tests/hdx/data/test_dataset_core.py index 29e7e157..66ceb3d2 100755 --- a/tests/hdx/data/test_dataset_core.py +++ b/tests/hdx/data/test_dataset_core.py @@ -365,6 +365,12 @@ def post(url, data, headers, files, allow_redirects, auth=None): datadict["resources"][i] = resultdictcopy["resources"][ i ] + if datadict["id"] == "TEST5": + resultdictcopy["resources"][i].update( + datadict["resources"][i] + ) + if datadict["id"] == "TEST5": + del datadict["resources"] merge_two_dictionaries(resultdictcopy, datadict) for i, resource in enumerate(resultdictcopy["resources"]): resource["package_id"] = resultdictcopy["id"] diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index b39cf528..4a772a1c 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -678,26 +678,37 @@ def test_read_from_hdx(self, configuration, read): with pytest.raises(HDXError): Resource.read_from_hdx("ABC") - def test_check_types_url_filetoupload(self, configuration): - resource_data_copy = copy.deepcopy(resource_data) - del resource_data_copy["url"] - resource = Resource(resource_data_copy) + def test_url_filetoupload(self, configuration): + resource = Resource(resource_data) + del resource["url"] with pytest.raises(HDXError): resource.check_neither_url_filetoupload() - resource.set_types() resource.set_file_to_upload("abc") - resource.set_types() resource["url"] = "lala" with pytest.raises(HDXError): resource.check_both_url_filetoupload() - resource = Resource(resource_data_copy) + + def test_check_types(self, configuration): + resource = Resource(resource_data) + resource.set_types() + assert resource["resource_type"] == "api" + assert resource["url_type"] == "api" + resource.set_file_to_upload("abc") + resource.set_types() + assert resource["resource_type"] == "file.upload" + assert resource["url_type"] == "upload" + + def test_correct_format(self, configuration): + resource = Resource(resource_data) + resource["format"] = "XLSX" + resource.correct_format(resource.data) + assert resource.get_format() == "xlsx" resource["format"] = "NOTEXIST" with pytest.raises(HDXError): - resource.set_types() - with pytest.raises(HDXError): - resource.set_format("NOTEXIST") + resource.correct_format(resource.data) del resource["format"] - resource.set_types() + resource.correct_format(resource.data) + assert resource.get_format() is None def test_get_set_date_of_resource(self, configuration): resource = Resource({"daterange_for_data": "[2020-01-07T00:00:00 TO *]"}) From 16e3baaa69c0648e85b74be317077103f5baf794 Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 09:31:38 +1200 Subject: [PATCH 08/16] Fix resource_view and showcase to check required fields on create path only in create_in_hdx --- src/hdx/data/resource_view.py | 4 ++-- src/hdx/data/showcase.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/hdx/data/resource_view.py b/src/hdx/data/resource_view.py index 7bbf3163..6a6bd153 100755 --- a/src/hdx/data/resource_view.py +++ b/src/hdx/data/resource_view.py @@ -164,9 +164,9 @@ def create_in_hdx(self, **kwargs: Any) -> None: Returns: None """ - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() if not self._update_resource_view(log=True, **kwargs): + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() self._save_to_hdx("create", "title") def delete_from_hdx(self) -> None: diff --git a/src/hdx/data/showcase.py b/src/hdx/data/showcase.py index 4c20c53e..4e8cb56c 100755 --- a/src/hdx/data/showcase.py +++ b/src/hdx/data/showcase.py @@ -129,19 +129,17 @@ def create_in_hdx(self, **kwargs: Any) -> None: Returns: None """ - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() if "name" in self.data and self._load_from_hdx("showcase", self.data["name"]): logger.warning(f"{'showcase'} exists. Updating {self.data['name']}") merge_two_dictionaries(self.data, self._old_data) self.clean_tags() self._hdx_update("showcase", "name", force_active=True, **kwargs) else: + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() self.clean_tags() self._save_to_hdx("create", "title", force_active=True) - self._create_in_hdx("showcase", "name", "title", **kwargs) - def delete_from_hdx(self) -> None: """Deletes a showcase from HDX. From 5928b584114b9b7c1be186b2c3ce1a67ac863a46 Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 09:33:16 +1200 Subject: [PATCH 09/16] Fix resource_view and showcase to check required fields on create path only in create_in_hdx --- src/hdx/data/showcase.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hdx/data/showcase.py b/src/hdx/data/showcase.py index 4e8cb56c..6fbedfc6 100755 --- a/src/hdx/data/showcase.py +++ b/src/hdx/data/showcase.py @@ -134,11 +134,11 @@ def create_in_hdx(self, **kwargs: Any) -> None: merge_two_dictionaries(self.data, self._old_data) self.clean_tags() self._hdx_update("showcase", "name", force_active=True, **kwargs) - else: - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() - self.clean_tags() - self._save_to_hdx("create", "title", force_active=True) + return + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() + self.clean_tags() + self._save_to_hdx("create", "title", force_active=True) def delete_from_hdx(self) -> None: """Deletes a showcase from HDX. From 2427ba49a69768ef3abe2b9b504c925b2379b7ee Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 10:31:29 +1200 Subject: [PATCH 10/16] Add slogging for when we don't update filestore when hash and size same --- src/hdx/api/utilities/filestore_helper.py | 6 ++++++ src/hdx/data/resource.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index c588480d..0d3298a8 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -1,5 +1,6 @@ """Helper to the Dataset class for handling resources with filestores.""" +import logging from typing import TYPE_CHECKING, Any, Dict from hdx.api.utilities.size_hash import get_size_and_hash @@ -8,6 +9,8 @@ if TYPE_CHECKING: from hdx.data.resource import Resource +logger = logging.getLogger(__name__) + class FilestoreHelper: temporary_url = "updated_by_file_upload_step" @@ -109,6 +112,9 @@ def dataset_update_filestore_resource( if size == original_resource_data.get( "size" ) and hash == original_resource_data.get("hash"): + logger.info( + f"Updating metadata not filestore for resource {original_resource_data['name']} as size and hash are unchanged!" + ) if resource_data_to_update._url_backup: resource_data_to_update["url"] = resource_data_to_update._url_backup resource_data_to_update._url_backup = None diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 87a09d11..0f21967e 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -412,6 +412,9 @@ def _resource_merge_hdx_update( file_format = self._old_data.get("format", "").lower() size, hash = get_size_and_hash(self._file_to_upload, file_format) if size == self.data.get("size") and hash == self.data.get("hash"): + logger.info( + f"Updating metadata not filestore for resource {self.data['name']} as size and hash are unchanged!" + ) if self._url_backup: self._old_data["url"] = self._url_backup # ensure last_modified is not updated if file hasn't changed From ad0e559b525bb868bfcab2f1b9ff39e14857d05e Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 10:36:09 +1200 Subject: [PATCH 11/16] Add warning for when we don't update filestore when hash and size same --- src/hdx/api/utilities/filestore_helper.py | 2 +- src/hdx/data/resource.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index 0d3298a8..24106369 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -112,7 +112,7 @@ def dataset_update_filestore_resource( if size == original_resource_data.get( "size" ) and hash == original_resource_data.get("hash"): - logger.info( + logger.warning( f"Updating metadata not filestore for resource {original_resource_data['name']} as size and hash are unchanged!" ) if resource_data_to_update._url_backup: diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 0f21967e..ac4211a2 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -412,7 +412,7 @@ def _resource_merge_hdx_update( file_format = self._old_data.get("format", "").lower() size, hash = get_size_and_hash(self._file_to_upload, file_format) if size == self.data.get("size") and hash == self.data.get("hash"): - logger.info( + logger.warning( f"Updating metadata not filestore for resource {self.data['name']} as size and hash are unchanged!" ) if self._url_backup: From c99ba7da7085681c1910d382ffcdb907fe233cca Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 10:38:26 +1200 Subject: [PATCH 12/16] Add warning for when we don't update filestore when hash and size same --- src/hdx/api/utilities/filestore_helper.py | 2 +- src/hdx/data/resource.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hdx/api/utilities/filestore_helper.py b/src/hdx/api/utilities/filestore_helper.py index 24106369..2b71b9bb 100755 --- a/src/hdx/api/utilities/filestore_helper.py +++ b/src/hdx/api/utilities/filestore_helper.py @@ -113,7 +113,7 @@ def dataset_update_filestore_resource( "size" ) and hash == original_resource_data.get("hash"): logger.warning( - f"Updating metadata not filestore for resource {original_resource_data['name']} as size and hash are unchanged!" + f"Not updating filestore for resource {original_resource_data['name']} as size and hash unchanged!" ) if resource_data_to_update._url_backup: resource_data_to_update["url"] = resource_data_to_update._url_backup diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index ac4211a2..33645091 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -413,7 +413,7 @@ def _resource_merge_hdx_update( size, hash = get_size_and_hash(self._file_to_upload, file_format) if size == self.data.get("size") and hash == self.data.get("hash"): logger.warning( - f"Updating metadata not filestore for resource {self.data['name']} as size and hash are unchanged!" + f"Not updating filestore for resource {self.data['name']} as size and hash unchanged!" ) if self._url_backup: self._old_data["url"] = self._url_backup From 93ec4f70d54520e3444c79026d936afdf5351127 Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 11:47:48 +1200 Subject: [PATCH 13/16] Updating resource view - CKAN returns None so must check fields before update not after --- src/hdx/data/resource_view.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/hdx/data/resource_view.py b/src/hdx/data/resource_view.py index 6a6bd153..53ecec64 100755 --- a/src/hdx/data/resource_view.py +++ b/src/hdx/data/resource_view.py @@ -131,9 +131,9 @@ def _update_resource_view(self, log: bool = False, **kwargs: Any) -> bool: Returns: bool: True if updated and False if not """ - update = False + updated = False if "id" in self.data and self._load_from_hdx("resource view", self.data["id"]): - update = True + updated = True else: if "resource_id" in self.data: resource_views = self.get_all_for_resource(self.data["resource_id"]) @@ -141,13 +141,13 @@ def _update_resource_view(self, log: bool = False, **kwargs: Any) -> bool: if self.data["title"] == resource_view["title"]: self._old_data = self.data self.data = resource_view.data - update = True + updated = True break - if update: + if updated: if log: logger.warning(f"resource view exists. Updating {self.data['id']}") self._merge_hdx_update("resource view", "id", **kwargs) - return update + return updated def update_in_hdx(self, **kwargs: Any) -> None: """Check if resource view exists in HDX and if so, update resource view @@ -164,9 +164,11 @@ def create_in_hdx(self, **kwargs: Any) -> None: Returns: None """ + # Updating resource view returns None so we must check fields up front + if "ignore_check" not in kwargs: # allow ignoring of field checks + self.check_required_fields() + kwargs["ignore_check"] = True if not self._update_resource_view(log=True, **kwargs): - if "ignore_check" not in kwargs: # allow ignoring of field checks - self.check_required_fields() self._save_to_hdx("create", "title") def delete_from_hdx(self) -> None: From ae336a79face496394bd6539fc8c7b10a818d53f Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 13:46:00 +1200 Subject: [PATCH 14/16] Check that new or updated resources do not have both a url and a file to upload --- src/hdx/data/dataset.py | 10 +-- .../hdx/data/test_update_dataset_resources.py | 76 +++++++++++++++++++ 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 1c35074c..1df2e424 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -537,13 +537,7 @@ def _dataset_load_from_hdx(self, id_or_name: str) -> bool: return True def check_resources_url_filetoupload(self) -> None: - """Check that metadata for dataset and its resources is complete. The parameter ignore_fields - should be set if required to any fields that should be ignored for the particular operation. - Prepend "resource:" for resource fields. - - Args: - ignore_fields (ListTuple[str]): Fields to ignore. Default is tuple(). - allow_no_resources (bool): Whether to allow no resources. Defaults to False. + """Check for error where both url or file to upload are provided for resources Returns: None @@ -1127,6 +1121,7 @@ def update_in_hdx( Returns: Dict: Status codes of resources """ + self.check_resources_url_filetoupload() loaded = False if "id" in self.data: self._check_existing_object("dataset", "id") @@ -1193,6 +1188,7 @@ def create_in_hdx( Returns: Dict: Status codes of resources """ + self.check_resources_url_filetoupload() loadedid = self.data.get("id") if loadedid is not None: if not self._dataset_load_from_hdx(loadedid): diff --git a/tests/hdx/data/test_update_dataset_resources.py b/tests/hdx/data/test_update_dataset_resources.py index 6ef38ae6..20a36ed4 100644 --- a/tests/hdx/data/test_update_dataset_resources.py +++ b/tests/hdx/data/test_update_dataset_resources.py @@ -265,3 +265,79 @@ def test_dataset_update_resources( "url_type": "upload", }, ] + + def test_dataset_update_resources_position( + self, fixture_path, configuration, test_data, test_xlsx + ): + dataset = Dataset({"name": "test"}) + resource = Resource( + { + "name": "test1", + "format": "csv", + } + ) + resource.set_file_to_upload(test_data) + dataset.add_update_resource(resource) + resource2 = Resource( + { + "name": "test2", + "description": "test2", + "format": "xlsx", + } + ) + resource2.set_file_to_upload(test_xlsx) + dataset.add_update_resource(resource2) + + dataset._old_data = dataset.data + dataset._old_data["resources"] = dataset._copy_hdxobjects( + dataset._resources, + Resource, + ("_file_to_upload", "_data_updated", "_url_backup"), + ) + + dataset._resources = [] + resource = Resource( + { + "name": "test1", + "url": "https://data.humdata.org/dataset/resource/file1.csv", + "format": "csv", + "resource_type": "file.upload", + "url_type": "upload", + } + ) + dataset.add_update_resource(resource) + ( + resources_to_update, + resources_to_delete, + filestore_resources, + new_resource_order, + statuses, + ) = dataset._dataset_update_resources(True, False, True, True) + assert resources_to_update == [ + { + "format": "csv", + "hash": "3790da698479326339fa99a074cbc1f7", + "name": "test1", + "resource_type": "file.upload", + "size": 1548, + "url": "updated_by_file_upload_step", + "url_type": "upload", + }, + { + "description": "test2", + "format": "xlsx", + "hash": "6b8acf7e28d62685a1e829e7fa220d17", + "name": "test2", + "resource_type": "file.upload", + "size": 23724, + "url": "updated_by_file_upload_step", + "url_type": "upload", + }, + ] + assert resources_to_delete == [] + assert filestore_resources == { + 0: "tests/fixtures/test_data.csv", + 1: "tests/fixtures/size_hash/ACLED-All-Africa-File_20170101-to-20170708.xlsx", + } + assert new_resource_order == [("test1", "csv"), ("test2", "xlsx")] + assert statuses == {"test1": 2, "test2": 2} From 98af96b0e2c14c8c4e57014eaae54e327ecfb790 Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 20 Aug 2025 14:22:11 +1200 Subject: [PATCH 15/16] Add breaking changes to doc (it's sort of breaking in that behaviour is changing) --- documentation/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/documentation/index.md b/documentation/index.md index 0bc5b338..f5b0fb7a 100755 --- a/documentation/index.md +++ b/documentation/index.md @@ -54,6 +54,10 @@ The library has detailed API documentation which can be found in the menu at the ## Breaking Changes +From 6.5.0, files will not be uploaded to the HDX filestore if the hash and size have +not changed, but if there are any resource metadata changes, except for last_modified, +they will still take place). + From 6.4.5, fix for changes in dependency defopt 7.0.0 From 6.2.8, fix mark_data_updated which was broken due to an error in From d9ce86ff1a129a2fcd707d5b8245b95f6d958ed2 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 21 Aug 2025 14:05:39 +1200 Subject: [PATCH 16/16] Add blue and green demo to server list --- documentation/index.md | 4 +-- src/hdx/api/hdx_base_configuration.yaml | 8 ++++-- tests/hdx/api/test_configuration.py | 36 ++++++++++++++++++------- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/documentation/index.md b/documentation/index.md index f5b0fb7a..843051dc 100755 --- a/documentation/index.md +++ b/documentation/index.md @@ -54,8 +54,8 @@ The library has detailed API documentation which can be found in the menu at the ## Breaking Changes -From 6.5.0, files will not be uploaded to the HDX filestore if the hash and size have -not changed, but if there are any resource metadata changes, except for last_modified, +From 6.5.0, files will not be uploaded to the HDX filestore if the hash and size have +not changed, but if there are any resource metadata changes, except for last_modified, they will still take place). From 6.4.5, fix for changes in dependency defopt 7.0.0 diff --git a/src/hdx/api/hdx_base_configuration.yaml b/src/hdx/api/hdx_base_configuration.yaml index 9f5f7431..0dce103e 100755 --- a/src/hdx/api/hdx_base_configuration.yaml +++ b/src/hdx/api/hdx_base_configuration.yaml @@ -1,14 +1,18 @@ # HDX configuration hdx_prod_site: url: "https://data.humdata.org" -hdx_demo_site: - url: "https://demo.data-humdata-org.ahconu.org" hdx_stage_site: url: "https://stage.data-humdata-org.ahconu.org" hdx_feature_site: url: "https://feature.data-humdata-org.ahconu.org" hdx_dev_site: url: "https://dev.data-humdata-org.ahconu.org" +hdx_demo_site: + url: "https://demo.data-humdata-org.ahconu.org" +hdx_bluedemo_site: + url: "https://blue.demo.data-humdata-org.ahconu.org" +hdx_greendemo_site: + url: "https://green.demo.data-humdata-org.ahconu.org" dataset: required_fields: - name diff --git a/tests/hdx/api/test_configuration.py b/tests/hdx/api/test_configuration.py index d355b148..2ccc2f91 100755 --- a/tests/hdx/api/test_configuration.py +++ b/tests/hdx/api/test_configuration.py @@ -400,9 +400,6 @@ def test_project_configuration_dict(self, hdx_config_yaml): "hdx_prod_site": { "url": "https://data.humdata.org", }, - "hdx_demo_site": { - "url": "https://demo.data-humdata-org.ahconu.org", - }, "hdx_stage_site": { "url": "https://stage.data-humdata-org.ahconu.org", }, @@ -412,6 +409,15 @@ def test_project_configuration_dict(self, hdx_config_yaml): "hdx_dev_site": { "url": "https://dev.data-humdata-org.ahconu.org", }, + "hdx_demo_site": { + "url": "https://demo.data-humdata-org.ahconu.org", + }, + "hdx_bluedemo_site": { + "url": "https://blue.demo.data-humdata-org.ahconu.org", + }, + "hdx_greendemo_site": { + "url": "https://green.demo.data-humdata-org.ahconu.org", + }, "dataset": { "required_fields": [ "name", @@ -523,9 +529,6 @@ def test_project_configuration_json(self, hdx_config_yaml, project_config_json): "hdx_prod_site": { "url": "https://data.humdata.org", }, - "hdx_demo_site": { - "url": "https://demo.data-humdata-org.ahconu.org", - }, "hdx_stage_site": { "url": "https://stage.data-humdata-org.ahconu.org", }, @@ -535,6 +538,15 @@ def test_project_configuration_json(self, hdx_config_yaml, project_config_json): "hdx_dev_site": { "url": "https://dev.data-humdata-org.ahconu.org", }, + "hdx_demo_site": { + "url": "https://demo.data-humdata-org.ahconu.org", + }, + "hdx_bluedemo_site": { + "url": "https://blue.demo.data-humdata-org.ahconu.org", + }, + "hdx_greendemo_site": { + "url": "https://green.demo.data-humdata-org.ahconu.org", + }, "my_param": "abc", "dataset": { "required_fields": [ @@ -640,9 +652,6 @@ def test_project_configuration_yaml(self, hdx_config_yaml, project_config_yaml): "hdx_prod_site": { "url": "https://data.humdata.org", }, - "hdx_demo_site": { - "url": "https://demo.data-humdata-org.ahconu.org", - }, "hdx_stage_site": { "url": "https://stage.data-humdata-org.ahconu.org", }, @@ -652,6 +661,15 @@ def test_project_configuration_yaml(self, hdx_config_yaml, project_config_yaml): "hdx_dev_site": { "url": "https://dev.data-humdata-org.ahconu.org", }, + "hdx_demo_site": { + "url": "https://demo.data-humdata-org.ahconu.org", + }, + "hdx_bluedemo_site": { + "url": "https://blue.demo.data-humdata-org.ahconu.org", + }, + "hdx_greendemo_site": { + "url": "https://green.demo.data-humdata-org.ahconu.org", + }, "dataset": { "required_fields": [ "name",