Skip to content

Commit 6a5489e

Browse files
fneumlkstrp
andauthored
update and process eurostat energy balances via API (PyPSA#1987)
* Reapply "process eurostat energy balances via API" This reverts commit 46c01e8. * get_energy_ratio: fix query * transformation_output_coke: handling * new rule build_eurostat_balances (performance) * get_energy_ratio: fix unit conversion * fix typo * fix typo * pre-commit fixes * schema fix * add to archive * change to build * Revert "change to build" This reverts commit 6dd2cdc. --------- Co-authored-by: lkstrp <lkstrp@pm.me>
1 parent b46858e commit 6a5489e

13 files changed

Lines changed: 277 additions & 466 deletions

data/versions.csv

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,10 @@ eu_nuts2013,2015-12-03,primary,latest supported,2025-12-02, ,https://gisco-servi
4646
eu_nuts2013,2015-12-03,archive,latest supported,2026-01-13,,https://data.pypsa.org/workflows/eur/eu_nuts2013/2015-12-03/ref-nuts-2013-03m.geojson.zip
4747
eu_nuts2021,2021-01-01,primary,latest supported,2025-12-02,,https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2021-01m.geojson.zip
4848
eu_nuts2021,2021-01-01,archive,latest supported,2026-01-13,,https://data.pypsa.org/workflows/eur/eu_nuts2021/2021-01-01/ref-nuts-2021-01m.geojson.zip
49-
eurostat_balances,2023-04,primary,latest supported broken-link,2025-12-02,"The link is broken, use the archived versions instead.",https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-April2023.zip
50-
eurostat_balances,2023-04,archive,latest supported,2026-01-13,,https://data.pypsa.org/workflows/eur/eurostat_balances/2023-04/balances.zip
49+
eurostat_balances,unknown,primary,latest supported,2026-02-04,,https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/nrg_bal_c?format=TSV&compressed=true
50+
eurostat_balances,2026-02,archive,latest supported,2026-02-09,,https://data.pypsa.org/workflows/eur/eurostat_balances/2026-02/nrg_bal_c
51+
eurostat_balances,2023-04,primary,not-supported,2025-12-02,"The link is broken, use the archived versions instead.",https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-April2023.zip
52+
eurostat_balances,2023-04,archive,not-supported,2026-01-13,,https://data.pypsa.org/workflows/eur/eurostat_balances/2023-04/balances.zip
5153
eurostat_household_balances,unknown,primary,latest supported,2025-12-02,"URL limits the period to 2013-2022, but the data is updated regularly.",https://ec.europa.eu/eurostat/databrowser-backend/api/extraction/1.0/LIVE/false/sdmx/csv/nrg_d_hhq__custom_11480365?startPeriod=2013&endPeriod=2022
5254
eurostat_household_balances,2025-07-09,archive,latest supported,2026-01-13,,https://data.pypsa.org/workflows/eur/eurostat_household_balances/2025-07-09/nrg_d_hhq.csv
5355
gdp_per_capita,2018-02-06,primary,latest supported,2025-12-02,"Primary link is a direct download, earlier part of zenodo bundle",https://zenodo.org/records/16556029/files/GDP_per_capita_PPP_1990_2015_v2.nc

doc/data_inventory.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"nitrogen_statistics","Nitrogen Statistics and Information","Statistics and information on the worldwide supply of, demand for, and flow of the mineral commodity nitrogen.","United States Geological Survey (USGS)","https://www.usgs.gov/centers/nmic/nitrogen-statistics-and-information","Public Domain"
99
"eu_nuts2013","Nomenclature of Territorial Units for Statistics (NUTS) 2013 - shapefiles","Shapefiles of EU's Nomenclature of Territorial Units for Statistics (NUTS) 2013, which is a hierarchical system for dividing up the economic territory of the European Union.","eurostat","https://ec.europa.eu/eurostat/web/nuts/overview","Reuse policy following 2011/833/EU"
1010
"eu_nuts2021","Nomenclature of Territorial Units for Statistics (NUTS) 2021 - shapefiles","Shapefiles of EU's Nomenclature of Territorial Units for Statistics (NUTS) 2021, which is a hierarchical system for dividing up the economic territory of the European Union.","eurostat","https://ec.europa.eu/eurostat/web/nuts/overview","Reuse policy following 2011/833/EU"
11-
"eurostat_balances","Energy Balances","European energy balances by country and fuel, as reported by Eurostat.","eurostat","https://ec.europa.eu/eurostat/data/database","Reuse policy following 2011/833/EU ; newer versions of the same data are available as CC-BY-4.0 through the eurostat API"
11+
"eurostat_balances","Energy Balances","European energy balances by country and fuel, as reported by Eurostat.","eurostat","https://ec.europa.eu/eurostat/data/database","CC-BY-4.0"
1212
"eurostat_household_balances","Eurostat Household Energy Balances","Disaggregated final energy consumption in household - quantities (nrg_d_hhq)","eurostat","https://ec.europa.eu/eurostat/databrowser/product/page/NRG_D_HHQ","CC-BY-4.0"
1313
"luisa_land_cover","The LUISA base map 2018","The LUISA Base Map 2018 is a high-resolution land use/land cover map developed and produced by the Joint Research Centre of the European Commission","European Commission Joint Research Centre","https://data.jrc.ec.europa.eu/dataset/51858b51-8f27-4006-bf82-53eba35a142c","CC-BY-4.0"
1414
"jrc_idees","JRC-IDEES-2021","The JRC-IDEES-2021 release contains a consistent set of disaggregated energy-economy-emissions data for each Member State of the European Union, covering all sectors of the energy system for the 2000-2021 period: industry, buildings, transport, and power generation.", "European Commission Joint Research Centre", "https://data.jrc.ec.europa.eu/dataset/82322924-506a-4c9a-8532-2bdd30d69bf5", "CC-BY-4.0"

doc/release_notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ Release Notes
99
Upcoming Release
1010
================
1111

12+
* Download and process Eurostat energy balances from API. This replaces the discontinued ZIP bulk download.
13+
14+
* The function `rescale_idees_from_eurostat` was removed.
15+
16+
* New rule `build_eurostat_balances` to build energy balances from Eurostat data. Outsourced from `build_energy_totals`
1217
* Updated CO2 emission allowance prices data source to Instrat API for real-time pricing data.
1318
* Improved OSM network building process (https://github.com/PyPSA/pypsa-eur/pull/2030): Introducing support for temporal attributes (start_date, construction tags) and pure DC buses (switching stations). The interactive network map has been completely rebuilt using PyDeck/deck.gl with GPU acceleration and includes custom JS controls, offering fuzzy search, clickable OSM references, URL-based view sharing (#theme/zoom/lat/lon), and substantially improved performance. Additionally, a generalised plot from https://www.nature.com/articles/s41597-025-04550-7 enables systematic comparison of network topology changes over time.
1419

doc/sector.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ Rule ``build_tes_capacity_profiles``
117117

118118
.. automodule:: build_tes_capacity_profiles
119119

120+
Rule ``build_eurostat_balances``
121+
==============================================================================
122+
123+
.. automodule:: build_eurostat_balances
124+
120125
Rule ``build_energy_totals``
121126
==============================================================================
122127

rules/build_sector.smk

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,24 @@ rule build_solar_thermal_profiles:
780780
scripts("build_solar_thermal_profiles.py")
781781

782782

783+
rule build_eurostat_balances:
784+
message:
785+
"Building Eurostat energy balances"
786+
input:
787+
tsv_gz=rules.retrieve_eurostat_balances.output["tsv_gz"],
788+
output:
789+
csv=resources("eurostat_energy_balances.csv"),
790+
threads: 1
791+
resources:
792+
mem_mb=4000,
793+
log:
794+
logs("build_eurostat_balances.log"),
795+
benchmark:
796+
benchmarks("build_eurostat_balances")
797+
script:
798+
"../scripts/build_eurostat_balances.py"
799+
800+
783801
rule build_energy_totals:
784802
message:
785803
"Building energy totals"
@@ -793,7 +811,7 @@ rule build_energy_totals:
793811
swiss_transport=f"{BFS_ROAD_VEHICLE_STOCK_DATASET['folder']}/vehicle_stock.csv",
794812
idees=rules.retrieve_jrc_idees.output["directory"],
795813
district_heat_share="data/district_heat_share.csv",
796-
eurostat=rules.retrieve_eurostat_balances.output["directory"],
814+
eurostat=resources("eurostat_energy_balances.csv"),
797815
eurostat_households=rules.retrieve_eurostat_household_balances.output["csv"],
798816
output:
799817
transformation_output_coke=resources("transformation_output_coke.csv"),
@@ -859,7 +877,7 @@ rule build_biomass_potentials:
859877
biomass=config_provider("biomass"),
860878
input:
861879
enspreso_biomass=rules.retrieve_enspreso_biomass.output["xlsx"],
862-
eurostat=rules.retrieve_eurostat_balances.output["directory"],
880+
eurostat=resources("eurostat_energy_balances.csv"),
863881
nuts2=rules.retrieve_eu_nuts_2013.output["shapes_level_2"],
864882
regions_onshore=resources("regions_onshore_base_s_{clusters}.geojson"),
865883
nuts3_population=ancient(rules.retrieve_nuts3_population.output["gz"]),
@@ -1049,7 +1067,7 @@ rule build_industrial_production_per_country:
10491067
input:
10501068
ch_industrial_production="data/ch_industrial_production_per_subsector.csv",
10511069
ammonia_production=resources("ammonia_production.csv"),
1052-
eurostat=rules.retrieve_eurostat_balances.output["directory"],
1070+
eurostat=resources("eurostat_energy_balances.csv"),
10531071
jrc=rules.retrieve_jrc_idees.output["directory"],
10541072
output:
10551073
industrial_production_per_country=resources(
@@ -1594,7 +1612,7 @@ rule prepare_sector_network:
15941612
else []
15951613
),
15961614
network=resources("networks/base_s_{clusters}_elec_{opts}.nc"),
1597-
eurostat=rules.retrieve_eurostat_balances.output["directory"],
1615+
eurostat=resources("eurostat_energy_balances.csv"),
15981616
pop_weighted_energy_totals=resources(
15991617
"pop_weighted_energy_totals_s_{clusters}.csv"
16001618
),

rules/postprocess.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ rule plot_summary:
447447
costs=RESULTS + "csvs/costs.csv",
448448
energy=RESULTS + "csvs/energy.csv",
449449
balances=RESULTS + "csvs/energy_balance.csv",
450-
eurostat=rules.retrieve_eurostat_balances.output["directory"],
450+
eurostat=resources("eurostat_energy_balances.csv"),
451451
co2=rules.retrieve_ghg_emissions.output["csv"],
452452
output:
453453
costs=RESULTS + "graphs/costs.svg",

rules/retrieve.smk

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,11 @@ if (EUROSTAT_BALANCES_DATASET := dataset_version("eurostat_balances"))["source"]
3131
message:
3232
"Retrieving Eurostat balances data"
3333
input:
34-
zip_file=storage(EUROSTAT_BALANCES_DATASET["url"]),
34+
tsv_gz=storage(EUROSTAT_BALANCES_DATASET["url"]),
3535
output:
36-
zip_file=f"{EUROSTAT_BALANCES_DATASET['folder']}/balances.zip",
37-
directory=directory(EUROSTAT_BALANCES_DATASET["folder"]),
36+
tsv_gz=f"{EUROSTAT_BALANCES_DATASET['folder']}/estat_nrg_bal_c.tsv.gz",
3837
run:
39-
copy2(input["zip_file"], output["zip_file"])
40-
unpack_archive(output["zip_file"], output["directory"])
38+
copy2(input["tsv_gz"], output["tsv_gz"])
4139

4240

4341
if (

scripts/build_biomass_potentials.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import pandas as pd
1414

1515
from scripts._helpers import configure_logging, set_scenario_config
16-
from scripts.build_energy_totals import build_eurostat
1716

1817
logger = logging.getLogger(__name__)
1918
AVAILABLE_BIOMASS_YEARS = [2010, 2020, 2030, 2040, 2050]
@@ -246,7 +245,7 @@ def convert_nuts2_to_regions(bio_nuts2, regions):
246245
return bio_regions
247246

248247

249-
def add_unsustainable_potentials(df):
248+
def add_unsustainable_potentials(df, input_eurostat):
250249
"""
251250
Add unsustainable biomass potentials to the given dataframe. The difference
252251
between the data of JRC and Eurostat is assumed to be unsustainable
@@ -269,36 +268,29 @@ def add_unsustainable_potentials(df):
269268
else:
270269
latest_year = 2021
271270
idees_rename = {"GR": "EL", "GB": "UK"}
271+
year = max(min(latest_year, int(snakemake.wildcards.planning_horizons)), 1990) # noqa: F841
272272
df_unsustainable = (
273-
build_eurostat(
274-
countries=snakemake.config["countries"],
275-
input_eurostat=snakemake.input.eurostat,
276-
nprocesses=int(snakemake.threads),
277-
)
278-
.xs(
279-
max(min(latest_year, int(snakemake.wildcards.planning_horizons)), 1990),
280-
level=1,
281-
)
282-
.xs("Primary production", level=2)
283-
.droplevel([1, 2, 3])
273+
pd.read_csv(input_eurostat)
274+
.query("year == @year and nrg_bal == 'PPRD'") # Primary production
275+
.set_index(["country", "siec"])
276+
.value.unstack("siec")
284277
)
285278

286-
df_unsustainable.index = df_unsustainable.index.str.strip()
287279
df_unsustainable = df_unsustainable.rename(
288280
{v: k for k, v in idees_rename.items()}, axis=0
289281
)
290282

291283
bio_carriers = [
292-
"Primary solid biofuels",
293-
"Biogases",
294-
"Renewable municipal waste",
295-
"Pure biogasoline",
296-
"Blended biogasoline",
297-
"Pure biodiesels",
298-
"Blended biodiesels",
299-
"Pure bio jet kerosene",
300-
"Blended bio jet kerosene",
301-
"Other liquid biofuels",
284+
"R5110-5150_W6000RI", # Primary solid biofuels
285+
"R5300", # Biogases
286+
"W6210", # Renewable municipal waste
287+
"R5210P", # Pure biogasoline
288+
"R5210B", # Blended biogasoline
289+
"R5220P", # Pure biodiesels
290+
"R5220B", # Blended biodiesels
291+
"R5230P", # Pure bio jet kerosene
292+
"R5230B", # Blended bio jet kerosene
293+
"R5290", # Other liquid biofuels
302294
]
303295

304296
df_unsustainable = df_unsustainable[bio_carriers]
@@ -310,20 +302,26 @@ def add_unsustainable_potentials(df):
310302

311303
# Calculate unsustainable solid biomass
312304
df_wo_ch["unsustainable solid biomass"] = _calc_unsustainable_potential(
313-
df_wo_ch, df_unsustainable, share_unsus, "Primary solid biofuels"
305+
df_wo_ch,
306+
df_unsustainable,
307+
share_unsus,
308+
"R5110-5150_W6000RI", # Primary solid biofuels
314309
)
315310

316311
# Calculate unsustainable biogas
317312
df_wo_ch["unsustainable biogas"] = _calc_unsustainable_potential(
318-
df_wo_ch, df_unsustainable, share_unsus, "Biogases"
313+
df_wo_ch,
314+
df_unsustainable,
315+
share_unsus,
316+
"R5300", # Biogases
319317
)
320318

321319
# Calculate unsustainable bioliquids
322320
df_wo_ch["unsustainable bioliquids"] = _calc_unsustainable_potential(
323321
df_wo_ch,
324322
df_unsustainable,
325323
share_unsus,
326-
resource_type="gasoline|diesel|kerosene|liquid",
324+
resource_type="R5210|R5220|R5230|R5290", # gasoline, diesel, kerosene, liquids
327325
)
328326

329327
share_sus = params.get("share_sustainable_potential_available").get(investment_year)
@@ -391,7 +389,8 @@ def add_unsustainable_potentials(df):
391389
grouper = {v: k for k, vv in params["classes"].items() for v in vv}
392390
df = df.T.groupby(grouper).sum().T
393391

394-
df = add_unsustainable_potentials(df)
392+
input_eurostat = snakemake.input.eurostat
393+
df = add_unsustainable_potentials(df, input_eurostat)
395394

396395
df *= 1e6 # TWh/a to MWh/a
397396
df.index.name = "MWh/a"

0 commit comments

Comments
 (0)