Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions documentation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ The library has detailed API documentation which can be found in the menu at the


## Breaking Changes
From 6.5.7, get_size_and_hash moved to HDX Python Utilities

From 6.5.2, remove unused `generate_qc_resource_from_rows` method.
`generate_resource_from_rows`, `generate_resource_from_iterable` and
`download_and_generate_resource` are deprecated. They are replaced by
Expand Down Expand Up @@ -906,11 +908,11 @@ corresponding id and that resource on HDX will be overwritten.

resource.create_in_hdx(dataset=DATASET)

Alternatively, if a resource doesn't have an id, but contains a package_id, the create
and update methods will use it to load the corresponding dataset, the resource will be
assigned to that dataset and it will be compared to resources in that dataset. If a
match is found, then the resource will be given the corresponding id and that resource
on HDX will be overwritten.
Alternatively, if a resource doesn't have an id, but contains a package_id, the create
and update methods will use it to load the corresponding dataset, the resource will be
assigned to that dataset and it will be compared to resources in that dataset. If a
match is found, then the resource will be given the corresponding id and that resource
on HDX will be overwritten.

You can download a resource using the **download** function eg.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"defopt>=7.0.0",
"email_validator",
"hdx-python-country>=3.9.8",
"hdx-python-utilities>=3.9.6",
"hdx-python-utilities>=3.9.9",
"libhxl>=5.2.2",
"makefun",
"quantulum3",
Expand Down
28 changes: 13 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ backrefs==6.1
# via mkdocs-material
cachetools==6.2.4
# via google-auth
certifi==2025.11.12
certifi==2026.1.4
# via requests
cfgv==3.5.0
# via pre-commit
Expand All @@ -32,7 +32,7 @@ click==8.3.1
# typer
colorama==0.4.6
# via mkdocs-material
coverage==7.13.0
coverage==7.13.1
# via pytest-cov
defopt==7.0.0
# via hdx-python-api (pyproject.toml)
Expand All @@ -44,19 +44,19 @@ docopt==0.6.2
# via
# ckanapi
# num2words
docutils==0.22.3
docutils==0.22.4
# via defopt
email-validator==2.3.0
# via hdx-python-api (pyproject.toml)
et-xmlfile==2.0.0
# via openpyxl
filelock==3.20.1
filelock==3.20.2
# via virtualenv
frictionless==5.18.1
# via hdx-python-utilities
ghp-import==2.1.0
# via mkdocs
google-auth==2.45.0
google-auth==2.46.0
# via
# google-auth-oauthlib
# gspread
Expand All @@ -66,11 +66,11 @@ gspread==6.2.1
# via hdx-python-api (pyproject.toml)
hdx-python-country==3.9.8
# via hdx-python-api (pyproject.toml)
hdx-python-utilities==3.9.6
hdx-python-utilities==3.9.9
# via
# hdx-python-api (pyproject.toml)
# hdx-python-country
humanize==4.14.0
humanize==4.15.0
# via frictionless
identify==2.6.15
# via pre-commit
Expand Down Expand Up @@ -117,7 +117,7 @@ markdown==3.10
# pymdown-extensions
markdown-it-py==4.0.0
# via rich
marko==2.2.1
marko==2.2.2
# via frictionless
markupsafe==3.0.3
# via
Expand All @@ -137,13 +137,13 @@ mkdocs==1.6.1
# mkdocs-material
mkdocs-get-deps==0.2.0
# via mkdocs
mkdocs-material==9.7.0
mkdocs-material==9.7.1
# via mkapi
mkdocs-material-extensions==1.3.1
# via mkdocs-material
more-itertools==10.8.0
# via inflect
nodeenv==1.9.1
nodeenv==1.10.0
# via pre-commit
num2words==0.5.14
# via quantulum3
Expand Down Expand Up @@ -192,7 +192,7 @@ pygments==2.19.2
# mkdocs-material
# pytest
# rich
pymdown-extensions==10.19.1
pymdown-extensions==10.20
# via mkdocs-material
pyphonetics==0.5.3
# via hdx-python-utilities
Expand Down Expand Up @@ -259,10 +259,8 @@ rpds-py==0.30.0
# referencing
rsa==4.9.1
# via google-auth
ruamel-yaml==0.18.17
ruamel-yaml==0.19.1
# via hdx-python-utilities
ruamel-yaml-clib==0.2.15
# via ruamel-yaml
setuptools==80.9.0
# via ckanapi
shellingham==1.5.4
Expand Down Expand Up @@ -291,7 +289,7 @@ text-unidecode==1.3
# via python-slugify
typeguard==4.4.4
# via inflect
typer==0.20.0
typer==0.21.0
# via frictionless
typing-extensions==4.15.0
# via
Expand Down
10 changes: 3 additions & 7 deletions src/hdx/api/utilities/filestore_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import logging
from typing import TYPE_CHECKING, Any, Dict

from hdx.api.utilities.size_hash import get_size_and_hash
from hdx.utilities.dateparse import now_utc_notz
from hdx.utilities.file_hashing import get_size_and_hash

if TYPE_CHECKING:
from hdx.data.resource import Resource
Expand Down Expand Up @@ -112,13 +112,9 @@ def dataset_update_filestore_resource(
force_update = kwargs.pop("force_update", False)
file_format = resource_data_to_update.get("format", "").lower()
size, hash = get_size_and_hash(file_to_upload, file_format)
if (
not force_update
and size == original_resource_data.get("size")
and hash == original_resource_data.get("hash")
):
if not force_update and hash == original_resource_data.get("hash"):
logger.warning(
f"Not updating filestore for resource {original_resource_data['name']} as size and hash unchanged!"
f"Not updating filestore for resource {original_resource_data['name']} as hash unchanged!"
)
if resource_data_to_update._url_backup:
resource_data_to_update["url"] = resource_data_to_update._url_backup
Expand Down
43 changes: 0 additions & 43 deletions src/hdx/api/utilities/size_hash.py

This file was deleted.

10 changes: 3 additions & 7 deletions src/hdx/data/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import hdx.data.resource_matcher
from hdx.api.configuration import Configuration
from hdx.api.utilities.date_helper import DateHelper
from hdx.api.utilities.size_hash import get_size_and_hash
from hdx.data.hdxobject import HDXError, HDXObject
from hdx.data.resource_view import ResourceView
from hdx.utilities.dateparse import now_utc, now_utc_notz, parse_date
from hdx.utilities.downloader import Download
from hdx.utilities.file_hashing import get_size_and_hash
from hdx.utilities.retriever import Retrieve
from hdx.utilities.typehint import ListTuple
from hdx.utilities.uuid import is_valid_uuid
Expand Down Expand Up @@ -415,13 +415,9 @@ def _resource_merge_hdx_update(
force_update = kwargs.pop("force_update", False)
file_format = self._old_data.get("format", "").lower()
size, hash = get_size_and_hash(self._file_to_upload, file_format)
if (
not force_update
and size == self.data.get("size")
and hash == self.data.get("hash")
):
if not force_update and hash == self.data.get("hash"):
logger.warning(
f"Not updating filestore for resource {self.data['name']} as size and hash unchanged!"
f"Not updating filestore for resource {self.data['name']} as hash unchanged!"
)
if self._url_backup:
self._old_data["url"] = self._url_backup
Expand Down
2 changes: 1 addition & 1 deletion tests/hdx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def dataset_mockshow(url, datadict):
# test existing size and hash same
resource = resultdictcopy["resources"][0]
resource["size"] = 23724
resource["hash"] = "6b8acf7e28d62685a1e829e7fa220d17"
resource["hash"] = "b2f92ef4b1c895568421cb887859a13d"
result = json.dumps(resultdictcopy)
return MockResponse(
200,
Expand Down
12 changes: 0 additions & 12 deletions tests/hdx/api/utilities/test_size_hash.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/hdx/data/test_dataset_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern, test_xlsx
assert len(dataset._resources) == 3
result = dataset.get_resource()
assert result["size"] == 23724
assert result["hash"] == "6b8acf7e28d62685a1e829e7fa220d17"
assert result["hash"] == "b2f92ef4b1c895568421cb887859a13d"
assert statuses == {"Resource1": 2, "Resource2": 1, "Resource3": 1}
resource["name"] = "123"
resource.set_file_to_upload(None)
Expand Down Expand Up @@ -848,7 +848,7 @@ def test_update_in_hdx(self, configuration, post_update, date_pattern, test_xlsx
}
result = dataset.get_resource(2)
assert result["size"] == 23724
assert result["hash"] == "6b8acf7e28d62685a1e829e7fa220d17"
assert result["hash"] == "b2f92ef4b1c895568421cb887859a13d"
assert dataset["state"] == "active"
assert len(dataset._resources) == 3
dataset = Dataset(datasetdata)
Expand Down
2 changes: 1 addition & 1 deletion tests/hdx/data/test_update_dataset_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def test_dataset_update_resources_position(
{
"description": "test2",
"format": "xlsx",
"hash": "6b8acf7e28d62685a1e829e7fa220d17",
"hash": "b2f92ef4b1c895568421cb887859a13d",
"name": "test2",
"resource_type": "file.upload",
"size": 23724,
Expand Down
Loading