Skip to content

Commit 3eab990

Browse files
committed
Add download_generate_resource
1 parent 986f6a6 commit 3eab990

3 files changed

Lines changed: 39 additions & 108 deletions

File tree

requirements.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ babel==2.17.0
1414
# via mkdocs-material
1515
backrefs==5.9
1616
# via mkdocs-material
17-
cachetools==6.2.0
17+
cachetools==6.2.1
1818
# via google-auth
1919
certifi==2025.10.5
2020
# via requests
@@ -66,19 +66,19 @@ gspread==6.2.1
6666
# via hdx-python-api (pyproject.toml)
6767
hdx-python-country==3.9.8
6868
# via hdx-python-api (pyproject.toml)
69-
hdx-python-utilities==3.9.4
69+
hdx-python-utilities==3.9.5
7070
# via
7171
# hdx-python-api (pyproject.toml)
7272
# hdx-python-country
7373
humanize==4.13.0
7474
# via frictionless
7575
identify==2.6.15
7676
# via pre-commit
77-
idna==3.10
77+
idna==3.11
7878
# via
7979
# email-validator
8080
# requests
81-
ijson==3.4.0
81+
ijson==3.4.0.post0
8282
# via hdx-python-utilities
8383
inflect==7.5.0
8484
# via quantulum3
@@ -129,7 +129,7 @@ mergedeep==1.3.4
129129
# via
130130
# mkdocs
131131
# mkdocs-get-deps
132-
mkapi==4.4.5
132+
mkapi==4.5.0
133133
# via hdx-python-api (pyproject.toml)
134134
mkdocs==1.6.1
135135
# via
@@ -313,7 +313,7 @@ urllib3==2.5.0
313313
# requests
314314
validators==0.35.0
315315
# via frictionless
316-
virtualenv==20.34.0
316+
virtualenv==20.35.3
317317
# via pre-commit
318318
watchdog==6.0.0
319319
# via mkdocs

src/hdx/data/dataset.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3030,8 +3030,11 @@ def download_generate_resource(
30303030
resourcedata: Dict,
30313031
header_insertions: Optional[ListTuple[Tuple[int, str]]] = None,
30323032
row_function: Optional[Callable[[List[str], Dict], Dict]] = None,
3033-
datecol: Optional[str] = None,
3034-
yearcol: Optional[str] = None,
3033+
columns: Union[ListTuple[int], ListTuple[str], None] = None,
3034+
format: str = "csv",
3035+
encoding: Optional[str] = None,
3036+
datecol: Optional[Union[int, str]] = None,
3037+
yearcol: Optional[Union[int, str]] = None,
30353038
date_function: Optional[Callable[[Dict], Optional[Dict]]] = None,
30363039
**kwargs: Any,
30373040
) -> Tuple[bool, Dict]:
@@ -3087,10 +3090,12 @@ def download_generate_resource(
30873090
resourcedata (Dict): Resource data
30883091
header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None.
30893092
row_function (Optional[Callable[[List[str],Dict],Dict]]): Function to call for each row. Defaults to None.
3090-
datecol (Optional[str]): Date column for setting time period. Defaults to None (don't set).
3091-
yearcol (Optional[str]): Year column for setting dataset year range. Defaults to None (don't set).
3092-
date_function (Optional[Callable[[Dict],Optional[Dict]]]): Date function to call for each row. Defaults to None.
3093-
quickcharts (Optional[Dict]): Dictionary containing optional keys: hashtag, values, cutdown and/or cutdownhashtags
3093+
columns (Union[ListTuple[int], ListTuple[str], None]): Columns to write. Defaults to all.
3094+
format (str): Format to write. Defaults to csv.
3095+
encoding (Optional[str]): Encoding to use. Defaults to None (infer encoding).
3096+
datecol: Optional[Union[int, str]] = None,
3097+
yearcol: Optional[Union[int, str]] = None,
3098+
date_function: Optional[Callable[[Dict], Optional[Dict]]] = None,
30943099
**kwargs: Any additional args to pass to downloader.get_tabular_rows
30953100
30963101
Returns:
@@ -3104,18 +3109,18 @@ def download_generate_resource(
31043109
format="csv",
31053110
**kwargs,
31063111
)
3107-
return self.generate_resource_from_iterable(
3108-
headers,
3109-
iterator,
3110-
hxltags,
3112+
return self.generate_resource(
31113113
folder,
31123114
filename,
3115+
iterator,
31133116
resourcedata,
3117+
headers,
3118+
columns=columns,
3119+
format=format,
3120+
encoding=encoding,
31143121
datecol=datecol,
31153122
yearcol=yearcol,
31163123
date_function=date_function,
3117-
quickcharts=quickcharts,
3118-
encoding=kwargs.get("encoding", None),
31193124
)
31203125

31213126
def download_and_generate_resource(
@@ -3195,6 +3200,10 @@ def download_and_generate_resource(
31953200
Returns:
31963201
Tuple[bool, Dict]: (True if resource added, dictionary of results)
31973202
"""
3203+
warnings.warn(
3204+
"download_and_generate_resource() is deprecated, use download_generate_resource() instead",
3205+
DeprecationWarning,
3206+
)
31983207
headers, iterator = downloader.get_tabular_rows(
31993208
url,
32003209
dict_form=True,

tests/hdx/data/test_dataset_resource_generation.py

Lines changed: 12 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class TestDatasetResourceGeneration:
3838
"ISO3": "#country+code",
3939
}
4040

41-
def test_generate_resource(self, configuration):
41+
def test_download_generate_resource(self, configuration):
4242
with temp_dir("test") as folder:
4343
filename = "conflict_data_alg.csv"
4444
resourcedata = {
@@ -56,20 +56,14 @@ def process_row(headers, row):
5656

5757
dataset = Dataset()
5858
with Download(user_agent="test") as downloader:
59-
headers, iterator = downloader.get_tabular_rows(
59+
success, results = dataset.download_generate_resource(
60+
downloader,
6061
TestDatasetResourceGeneration.url,
61-
dict_form=True,
62-
header_insertions=[(0, "lala")],
63-
row_function=process_row,
64-
format="csv",
65-
)
66-
67-
success, results = dataset.generate_resource(
6862
folder,
6963
filename,
70-
iterator,
7164
resourcedata,
72-
headers,
65+
header_insertions=[(0, "lala")],
66+
row_function=process_row,
7367
yearcol="YEAR",
7468
)
7569
assert success is True
@@ -242,48 +236,36 @@ def process_row(headers, row):
242236
join(folder, filename),
243237
)
244238

245-
success, results = dataset.download_and_generate_resource(
239+
success, results = dataset.download_generate_resource(
246240
downloader,
247241
TestDatasetResourceGeneration.url,
248-
TestDatasetResourceGeneration.hxltags,
249242
folder,
250243
filename,
251244
resourcedata,
252245
header_insertions=[(0, "lala")],
253246
row_function=process_row,
254247
datecol="EVENT_DATE",
255-
quickcharts=quickcharts,
256248
)
257249
assert success is True
258250
assert (
259251
dataset["dataset_date"]
260252
== "[2001-04-18T00:00:00 TO 2001-04-21T23:59:59]"
261253
)
262254

263-
quickcharts = {
264-
"hashtag": "#event+code",
265-
"values": ["1416RTA", "2230RTA", "2231RTA"],
266-
"numeric_hashtag": "#affected+killed",
267-
"cutdown": 2,
268-
"cutdownhashtags": ["#event+code"],
269-
}
270-
success, results = dataset.download_and_generate_resource(
255+
success, results = dataset.download_generate_resource(
271256
downloader,
272257
TestDatasetResourceGeneration.url,
273-
TestDatasetResourceGeneration.hxltags,
274258
folder,
275259
filename,
276260
resourcedata,
277261
header_insertions=[(0, "lala")],
278262
row_function=process_row,
279263
yearcol="YEAR",
280-
quickcharts=quickcharts,
281264
)
282265
assert success is True
283266
assert results == {
284267
"startdate": datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc),
285268
"enddate": datetime(2002, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
286-
"bites_disabled": [False, True, False],
287269
"resource": {
288270
"description": "Conflict data with HXL tags",
289271
"format": "csv",
@@ -318,34 +300,6 @@ def process_row(headers, row):
318300
"FATALITIES",
319301
],
320302
"rows": [
321-
{
322-
"lala": "",
323-
"GWNO": "",
324-
"EVENT_ID_CNTY": "#event+code",
325-
"EVENT_ID_NO_CNTY": "",
326-
"EVENT_DATE": "#date+occurred",
327-
"YEAR": "#date+year",
328-
"TIME_PRECISION": "",
329-
"EVENT_TYPE": "#event+type",
330-
"ACTOR1": "#group+name+first",
331-
"ALLY_ACTOR_1": "",
332-
"INTER1": "",
333-
"ACTOR2": "#group+name+second",
334-
"ALLY_ACTOR_2": "",
335-
"INTER2": "",
336-
"INTERACTION": "",
337-
"COUNTRY": "#country+name",
338-
"ADMIN1": "#adm1+name",
339-
"ADMIN2": "#adm2+name",
340-
"ADMIN3": "#adm3+name",
341-
"LOCATION": "#loc+name",
342-
"LATITUDE": "#geo+lat",
343-
"LONGITUDE": "#geo+lon",
344-
"GEO_PRECISION": "",
345-
"SOURCE": "#meta+source",
346-
"NOTES": "#description",
347-
"FATALITIES": "#affected+killed",
348-
},
349303
{
350304
"GWNO": "615",
351305
"EVENT_ID_CNTY": "1416RTA",
@@ -459,20 +413,6 @@ def process_row(headers, row):
459413
"lala": "lala",
460414
},
461415
],
462-
"qc_resource": {
463-
"description": "Cut down data for QuickCharts",
464-
"format": "csv",
465-
"name": "QuickCharts-Conflict Data for Algeria",
466-
},
467-
"qcheaders": ["EVENT_ID_CNTY", "FATALITIES"],
468-
"qcrows": [
469-
{
470-
"EVENT_ID_CNTY": "#event+code",
471-
"FATALITIES": "#affected+killed",
472-
},
473-
{"EVENT_ID_CNTY": "1416RTA", "FATALITIES": "1"},
474-
{"EVENT_ID_CNTY": "2231RTA", "FATALITIES": "0"},
475-
],
476416
}
477417

478418
def process_year(row):
@@ -484,19 +424,15 @@ def process_year(row):
484424
)
485425
return {"startdate": startdate, "enddate": enddate}
486426

487-
del quickcharts["hashtag"]
488-
del quickcharts["numeric_hashtag"]
489-
success, results = dataset.download_and_generate_resource(
427+
success, results = dataset.download_generate_resource(
490428
downloader,
491429
TestDatasetResourceGeneration.url,
492-
TestDatasetResourceGeneration.hxltags,
493430
folder,
494431
filename,
495432
resourcedata,
496433
header_insertions=[(0, "lala")],
497434
row_function=process_row,
498435
date_function=process_year,
499-
quickcharts=quickcharts,
500436
)
501437
assert success is True
502438
assert results["startdate"] == datetime(
@@ -509,15 +445,6 @@ def process_year(row):
509445
dataset["dataset_date"]
510446
== "[2001-01-01T00:00:00 TO 2001-12-31T23:59:59]"
511447
)
512-
assert_files_same(
513-
join(
514-
"tests",
515-
"fixtures",
516-
"gen_resource",
517-
f"min_{qc_filename}",
518-
),
519-
join(folder, qc_filename),
520-
)
521448

522449
with pytest.raises(HDXError):
523450
dataset.download_and_generate_resource(
@@ -530,10 +457,9 @@ def process_year(row):
530457
yearcol="YEAR",
531458
date_function=process_year,
532459
)
533-
success, results = dataset.download_and_generate_resource(
460+
success, results = dataset.download_generate_resource(
534461
downloader,
535462
TestDatasetResourceGeneration.url,
536-
TestDatasetResourceGeneration.hxltags,
537463
folder,
538464
filename,
539465
resourcedata,
@@ -542,10 +468,9 @@ def process_year(row):
542468
)
543469
assert success is True
544470
url = "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/main/tests/fixtures/empty.csv"
545-
success, results = dataset.download_and_generate_resource(
471+
success, results = dataset.download_generate_resource(
546472
downloader,
547473
url,
548-
TestDatasetResourceGeneration.hxltags,
549474
folder,
550475
filename,
551476
resourcedata,
@@ -555,23 +480,20 @@ def process_year(row):
555480
)
556481
assert success is False
557482
url = "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/main/tests/fixtures/gen_resource/test_data_no_data.csv"
558-
success, results = dataset.download_and_generate_resource(
483+
success, results = dataset.download_generate_resource(
559484
downloader,
560485
url,
561-
TestDatasetResourceGeneration.hxltags,
562486
folder,
563487
filename,
564488
resourcedata,
565489
header_insertions=[(0, "lala")],
566490
row_function=process_row,
567-
quickcharts=quickcharts,
568491
)
569492
assert success is False
570493
url = "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/main/tests/fixtures/gen_resource/test_data_no_years.csv"
571-
success, results = dataset.download_and_generate_resource(
494+
success, results = dataset.download_generate_resource(
572495
downloader,
573496
url,
574-
TestDatasetResourceGeneration.hxltags,
575497
folder,
576498
filename,
577499
resourcedata,

0 commit comments

Comments
 (0)