From ad75a1d49aaa015182052e37245894f66bc736f3 Mon Sep 17 00:00:00 2001 From: Koen van Greevenbroek Date: Tue, 30 Jun 2026 15:31:02 -0700 Subject: [PATCH 1/2] feat: mirror land cover on Zenodo, drop Copernicus API key from builds The only CDS/ECMWF dependency was the satellite-land-cover download. The 2016+ C3S land-cover maps (baseline_year 2020) are CC-BY-4.0, so we now mirror the extracted lccs_class to Zenodo and fetch it with curl, removing the per-user Copernicus credential from all builds. - download_land_cover becomes a curl rule pulling the lccs_class file from the Zenodo record pinned by data.land_cover.zenodo_record (10.5281/ zenodo.21085632); extract_land_cover_class and the 2.2GB zip step are gone. - credentials gate now requires only usda (schema, secrets.py, secrets.yaml.example, solve_namespace validation stub, CI stubs). - add tools/zenodo_publish.py (reusable Zenodo REST publisher) and tools/mirror_land_cover.py (CDS download -> extract -> publish, with --no-publish review drafts and --publish-record finalize). The former download/extract scripts are kept as the library functions these import. - docs: data_sources.rst (CC-BY-4.0, Zenodo retrieval, redistributing- datasets section), introduction.rst, development.rst. --- .github/workflows/test.yml | 2 - config/default.yaml | 8 +- config/schemas/config.schema.yaml | 24 +- config/secrets.yaml.example | 21 +- docs/data_sources.rst | 73 +++-- docs/development.rst | 2 +- docs/introduction.rst | 25 +- tools/mirror_land_cover.py | 273 +++++++++++++++++++ tools/zenodo_publish.py | 166 +++++++++++ workflow/rules/retrieve.smk | 42 ++- workflow/scripts/download_land_cover.py | 21 +- workflow/scripts/extract_land_cover_class.py | 13 +- workflow/scripts/solve_namespace.py | 1 - workflow/validation/secrets.py | 33 +-- 14 files changed, 573 insertions(+), 131 deletions(-) create mode 100644 tools/mirror_land_cover.py create mode 100644 tools/zenodo_publish.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f9216241..72f624a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -67,6 +67,4 @@ jobs: - name: Snakemake dryrun test env: USDA_API_KEY: dummy - ECMWF_DATASTORES_URL: https://cds.climate.copernicus.eu/api - ECMWF_DATASTORES_KEY: dummy run: pixi run --environment dev pytest -m integration -k dryrun -v diff --git a/config/default.yaml b/config/default.yaml index f9808b00..b350b248 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -1565,9 +1565,13 @@ data: fat: "Total lipid (fat)" cal: "Energy" land_cover: - # ECMWF credentials: configure in config/secrets.yaml or via environment variables - # See config/secrets.yaml.example for setup instructions + # Copernicus ESA CCI land cover. The map for `baseline_year` is mirrored on + # Zenodo (CC-BY-4.0) so normal builds need no Copernicus CDS API key. The + # `zenodo_record` is the numeric Zenodo record id hosting the mirrored + # `lccs_class` file; refresh it with tools/mirror_land_cover.py (see + # docs/data_sources.rst). version: "v2_1_1" + zenodo_record: "21085632" # https://doi.org/10.5281/zenodo.21085632 faostat: qcl_production_element_code: 5510 # "Production" in tonnes (QCL dataset, covers crops and livestock) fbs_food_supply_element_code: 645 # "Food supply quantity (kg/capita/yr)" in FBS dataset diff --git a/config/schemas/config.schema.yaml b/config/schemas/config.schema.yaml index 2770bb47..fb86c0b5 100644 --- a/config/schemas/config.schema.yaml +++ b/config/schemas/config.schema.yaml @@ -1361,12 +1361,16 @@ properties: type: string land_cover: type: object - required: [version] + required: [version, zenodo_record] additionalProperties: false properties: version: type: string description: "Version of land cover dataset" + zenodo_record: + type: string + minLength: 1 + description: "Zenodo record id hosting the mirrored lccs_class file (refresh with tools/mirror_land_cover.py)" faostat: type: object required: [qcl_production_element_code, fbs_food_supply_element_code, fbs_other_uses_element_code, fbs_production_element_code] @@ -2162,9 +2166,9 @@ properties: credentials: type: object - required: [usda, ecmwf] + required: [usda] additionalProperties: false - description: "API credentials for external data sources (configure via config/secrets.yaml or environment variables)" + description: "API credentials required by the build (configure via config/secrets.yaml or environment variables). Copernicus CDS credentials are NOT needed for builds; they are only used by tools/mirror_land_cover.py to refresh the mirrored land-cover data." properties: usda: type: object @@ -2175,20 +2179,6 @@ properties: type: string minLength: 1 description: "USDA FoodData Central API key (or set USDA_API_KEY env var)" - ecmwf: - type: object - required: [url, key] - additionalProperties: false - properties: - url: - type: string - minLength: 1 - format: uri - description: "ECMWF datastores API URL (or set ECMWF_DATASTORES_URL env var)" - key: - type: string - minLength: 1 - description: "ECMWF datastores API key (or set ECMWF_DATASTORES_KEY env var)" sensitivity: type: object diff --git a/config/secrets.yaml.example b/config/secrets.yaml.example index e6fa00a4..d87f073b 100644 --- a/config/secrets.yaml.example +++ b/config/secrets.yaml.example @@ -9,8 +9,12 @@ # # Alternatively, you can set environment variables instead: # export USDA_API_KEY="your-key" -# export ECMWF_DATASTORES_URL="https://cds.climate.copernicus.eu/api" -# export ECMWF_DATASTORES_KEY="your-key" +# +# Only the `usda` credential is required to build and solve the model. The +# `ecmwf` and `zenodo` credentials below are MAINTAINER-ONLY: they are used +# exclusively by tools/mirror_land_cover.py to refresh the Copernicus +# land-cover data mirrored on Zenodo. Ordinary builds download that data from +# Zenodo and need neither. credentials: usda: @@ -19,12 +23,21 @@ credentials: # For testing, you can use "DEMO_KEY" but it has very limited rate limits api_key: "DEMO_KEY" # Replace with your actual key + # MAINTAINER-ONLY (tools/mirror_land_cover.py). Safe to omit for builds. ecmwf: # ECMWF Climate Data Store credentials # Get your credentials from: https://cds.climate.copernicus.eu/api-how-to # You need to: # 1. Register at https://cds.climate.copernicus.eu/user/register # 2. Accept the dataset license at https://cds.climate.copernicus.eu/datasets/satellite-land-cover - # 3. Get your UID and API key from your profile page + # 3. Get your personal access token from your profile page + # Or set ECMWF_DATASTORES_URL / ECMWF_DATASTORES_KEY env vars instead. url: "https://cds.climate.copernicus.eu/api" - key: "UID:API_KEY" # Replace with your actual credentials + key: "API_KEY" # Replace with your actual token + + # MAINTAINER-ONLY (tools/mirror_land_cover.py). Safe to omit for builds. + zenodo: + # Zenodo personal access token with the "deposit:write" and + # "deposit:actions" scopes. Create one at https://zenodo.org/account/settings/applications/tokens/new/ + # Or set ZENODO_TOKEN env var instead. + token: "ZENODO_TOKEN" # Replace with your actual token diff --git a/docs/data_sources.rst b/docs/data_sources.rst index 176f944a..9e1e161c 100644 --- a/docs/data_sources.rst +++ b/docs/data_sources.rst @@ -24,9 +24,7 @@ Several licensed datasets cannot be fetched automatically. While their use is fr 3. Download the IHME 2023 dietary risk exposure estimates (two archives, ``IHME_GBD_2023_RISK_EXPOSURE_DIET_1`` and ``_2``) (:ref:`ihme-diet-risk-exposure`). 4. Obtain the **GDD-IA** intake CSVs by personal request to the Global Dietary Database team and place them as ``data/manually_downloaded/GDD-IA-intake_grams_{year}.csv`` and ``data/manually_downloaded/GDD-IA-intake_kcals_{year}.csv`` (:ref:`gdd-ia-dietary-intake`). -**Required API key setup:** - -5. Register for a Copernicus Climate Data Store account and configure your API key to enable automatic retrieval of land cover data (:ref:`copernicus-land-cover`). +No Copernicus/ECMWF API key is required: the land cover data is fetched from a Zenodo mirror (:ref:`copernicus-land-cover`). The only API credential needed for an automated build is the USDA FoodData Central key (see :doc:`introduction`). .. _weight-bases: @@ -560,28 +558,23 @@ Copernicus Satellite Land Cover **Description**: Global land cover classification gridded maps from 1992 to present derived from satellite observations. The dataset describes the land surface into 22 classes including various vegetation types, water bodies, built-up areas, and bare land. -**Version**: v2.1.1 (2016 onwards); NetCDF format via the Copernicus Climate Data Store API +**Version**: v2.1.1 (2016 onwards); NetCDF format **Coverage**: * Spatial: Global (Plate Carree projection), 300 m resolution * Temporal: Annual (with approximately one-year publication delay) -**Access**: https://cds.climate.copernicus.eu/datasets/satellite-land-cover (`API documentation `__) - -**License**: Multiple licenses apply including ESA CCI licence, CC-BY licence, and VITO licence. Users must also cite the Climate Data Store entry and provide attribution to the Copernicus program. (`Terms of use `__) +**Access**: Original source: https://cds.climate.copernicus.eu/datasets/satellite-land-cover. For builds, GLADE downloads a mirror of the single year/version it needs from Zenodo (see *Retrieval* below), so no Copernicus account or API key is required. -**Citation**: Copernicus Climate Change Service, Climate Data Store, (2019): Land cover classification gridded maps from 1992 to present derived from satellite observation. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). https://doi.org/10.24381/cds.006f2c9a +**License**: CC-BY-4.0. The 2016-onwards C3S maps (which is what GLADE uses, since ``baseline_year`` is 2020) are released under the Creative Commons Attribution 4.0 International licence, as stated in the authoritative C3S/Copernicus metadata. This permits redistribution provided the Copernicus attribution and source DOI are retained; both are embedded in the Zenodo deposition. (The CDS download page also bundles the ESA CCI licence -- which governs the pre-2016 v2.0.7 maps that GLADE does not use -- and the VITO licence, which restricts only near-real-time PROBA-V products, not historical annual maps.) -**Retrieval**: Automatic via the ``download_land_cover`` and ``extract_land_cover_class`` Snakemake rules. The full dataset (~2.2GB) contains multiple variables but only the land cover classification (``lccs_class``) is needed. The extraction rule outputs ``data/downloads/land_cover_lccs_class.nc`` (~440MB) and deletes the full download. +**Required attribution**: "Generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains." -**Manual setup required**: +**Citation**: Copernicus Climate Change Service, Climate Data Store, (2019): Land cover classification gridded maps from 1992 to present derived from satellite observation. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). https://doi.org/10.24381/cds.006f2c9a -1. Register for a free CDS account at https://cds.climate.copernicus.eu/user/register -2. Accept the required dataset licenses at https://cds.climate.copernicus.eu/datasets/satellite-land-cover?tab=download#manage-licences -3. Obtain an API key from your account settings -4. Configure the API key in ``~/.ecmwfdatastoresrc`` or via environment variables (see API documentation for setup instructions) +**Retrieval**: Automatic via the ``download_land_cover`` Snakemake rule, which uses ``curl`` to fetch the pre-extracted land cover classification (``lccs_class`` only, ~320 MB NetCDF) from our Zenodo mirror -- no API key needed. The rule writes ``data/downloads/land_cover_lccs_class.nc``. The mirror itself is produced from the upstream CDS dataset by the maintainer tool ``tools/mirror_land_cover.py`` (see :ref:`redistributing-datasets`). -**Configuration**: The land cover year is derived from the top-level ``baseline_year`` parameter. The version can be configured via ``config['data']['land_cover']['version']`` (default: v2_1_1). +**Configuration**: The land cover year is derived from the top-level ``baseline_year`` parameter, and the version from ``config['data']['land_cover']['version']`` (default: v2_1_1). The mirror to download from is pinned by ``config['data']['land_cover']['zenodo_record']`` (the numeric Zenodo record id); the download URL and file name are derived from these three values. **Usage**: Spatial analysis of agricultural land availability and land use constraints. @@ -1065,7 +1058,7 @@ Most datasets used in this project require attribution. Some disallow redistribu **Open licenses (attribution required, redistribution allowed)**: * **CC0 1.0 / Public domain** (USDA FoodData Central, IFA FUBC, BLS CPI-U): No restrictions; attribution requested -* **CC BY 4.0** (GAEZ, FAOSTAT, GLEAM 3.0 Feed Intake, SoilGrids, Cook-Patton, LUIcube, LAMASUS, ISIMIP2a / LPJmL grassland yield): Requires attribution +* **CC BY 4.0** (GAEZ, FAOSTAT, GLEAM 3.0 Feed Intake, SoilGrids, Cook-Patton, LUIcube, LAMASUS, ISIMIP2a / LPJmL grassland yield, Copernicus Land Cover 2016+): Requires attribution * **CC BY 3.0 IGO** (UN WPP): Requires attribution to UN * **CC BY** (USDA Costs, USDA Livestock Costs): Requires attribution * **Eurostat copyright** (Eurostat apro_cpsh1): Free reuse with attribution @@ -1077,4 +1070,50 @@ Most datasets used in this project require attribution. Some disallow redistribu * **Pending publication — CC-BY-NC on release** (GDD-IA): Available upon personal request from Marco Springmann; will be re-licensed under CC-BY-NC when published * **Non-commercial with attribution** (GADM, FADN): Free for academic/non-commercial use; GADM prohibits redistribution, FADN requires EU attribution * **FAO terms** (GLEAM 3.0 Supplement, FAO Nutrient Conversion): Non-commercial reuse with FAO acknowledgement; commercial use requires prior permission -* **Custom terms** (ESA Biomass CCI, Copernicus Land Cover, Water Footprint Network): Various provider-specific terms; see individual entries above +* **Custom terms** (ESA Biomass CCI, Water Footprint Network): Various provider-specific terms; see individual entries above + +.. _redistributing-datasets: + +Redistributing datasets via Zenodo +---------------------------------- + +Some upstream datasets are free to use but sit behind an API key or registration +wall (historically the Copernicus land cover data). Where the licence permits +redistribution, GLADE mirrors the exact slice it needs to `Zenodo +`__ and downloads it during builds with a plain HTTP +request. This removes the per-user credential, pins an immutable, citable +version (each Zenodo version has its own DOI and record id), and gives a single +reusable pattern for any future dataset in the same situation. + +The components are: + +* ``tools/zenodo_publish.py`` -- a dataset-agnostic helper that creates (or + versions) a Zenodo deposition, uploads files, sets metadata, and publishes via + the Zenodo REST API. Reuse it for any redistributable dataset. +* ``tools/mirror_land_cover.py`` -- the land-cover-specific maintainer tool. It + downloads ``satellite-land-cover`` from the Copernicus CDS, extracts + ``lccs_class``, and publishes it to Zenodo under CC-BY-4.0 with the required + Copernicus attribution baked into the deposition metadata. +* The ``download_land_cover`` build rule, which ``curl``\ s the mirrored file + from the record pinned by ``config['data']['land_cover']['zenodo_record']``. + +**Before mirroring a new dataset**, confirm its licence actually permits +redistribution (CC-BY / CC0 / public domain are safe; "use only" or +non-commercial-no-redistribution terms are not) and record the required +attribution in the deposition metadata. + +**Refreshing the land cover mirror** (maintainer, requires a Copernicus CDS +token and a Zenodo token -- see ``config/secrets.yaml.example``):: + + # Optional dry-run against the Zenodo sandbox (leaves an unpublished draft): + pixi run -e dev python tools/mirror_land_cover.py --sandbox --no-publish + + # First publication (creates a new Zenodo record): + pixi run -e dev python tools/mirror_land_cover.py + + # New data version (publishes a new version of an existing record): + pixi run -e dev python tools/mirror_land_cover.py --parent-record + +The tool prints the published record id; set it as +``config['data']['land_cover']['zenodo_record']`` in ``config/default.yaml`` and +commit that change so builds pick up the new mirror. diff --git a/docs/development.rst b/docs/development.rst index 8b21fcd2..fccc15ae 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -207,7 +207,7 @@ How It Works Tests call a shared helper ``run_snakemake_target()`` in ``tests/conftest.py`` that invokes the Snakemake Python API directly (no subprocess). The helper layers ``tests/config/test.yaml`` on top of ``config/default.yaml`` and targets specific output files. * **Dryrun test** (``test_workflow_dryrun``): Validates full DAG construction with ``forceall=True`` without executing any rule. Makes no API calls, but the startup credential gate (presence-only, so dummy values suffice) and the manually-downloaded source files must still be satisfied for the DAG to resolve. Catches missing inputs, broken rules, and invalid wildcard patterns. -* **Execution test** (``test_build_solve_analyze``): Runs the actual pipeline through analysis for the default scenario. Requires USDA/ECMWF credentials for data downloads on first run. +* **Execution test** (``test_build_solve_analyze``): Runs the actual pipeline through analysis for the default scenario. Requires a USDA credential for data downloads on first run. * **Plot test** (``test_plots``): Generates representative plots from solved model outputs. Tests never delete ``results/test/`` or ``.snakemake/``; Snakemake detects up-to-date outputs and skips them automatically, so subsequent runs are near-instant when code hasn't changed. diff --git a/docs/introduction.rst b/docs/introduction.rst index 6c6af6c8..33e8b888 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -119,14 +119,17 @@ manually: publication (will be released under CC-BY-NC). See :doc:`current_diets` and the :ref:`gdd-ia-dietary-intake` entry in :doc:`data_sources`. -Two API credentials are needed for automatic downloads: +Only one API credential matters for automatic downloads: -* **Copernicus Climate Data Store** — required for satellite land-cover data. - Register at https://cds.climate.copernicus.eu/user/register, accept the - land-cover dataset licence, and copy the API key from your profile. -* **USDA FoodData Central** — optional; the repository ships pre-fetched - nutritional data. A free key from https://fdc.nal.usda.gov/api-key-signup - is only needed if you want to refresh that data. +* **USDA FoodData Central** — a free key from + https://fdc.nal.usda.gov/api-key-signup. The repository ships pre-fetched + nutritional data, so this is only needed if you want to refresh it; for a + default build ``DEMO_KEY`` suffices. + +No Copernicus/ECMWF account is required: the satellite land-cover data is +fetched from a Zenodo mirror (see :ref:`copernicus-land-cover`). A Copernicus +CDS token is only needed by maintainers refreshing that mirror with +``tools/mirror_land_cover.py`` (see :ref:`redistributing-datasets`). Installation ------------ @@ -169,14 +172,12 @@ Installation cp config/secrets.yaml.example config/secrets.yaml - Edit ``config/secrets.yaml`` and fill in your ECMWF Climate Data Store - credentials (and optionally the USDA key). Alternatively, set the - equivalent environment variables: + Edit ``config/secrets.yaml`` and fill in your USDA key (or leave the + ``DEMO_KEY`` default for a standard build). Alternatively, set the + equivalent environment variable: .. code-block:: bash - export ECMWF_DATASTORES_URL="https://cds.climate.copernicus.eu/api" - export ECMWF_DATASTORES_KEY="your-uid:your-api-key" export USDA_API_KEY="your-usda-api-key" 4. **Download the manually-licensed datasets**: follow the diff --git a/tools/mirror_land_cover.py b/tools/mirror_land_cover.py new file mode 100644 index 00000000..6720e865 --- /dev/null +++ b/tools/mirror_land_cover.py @@ -0,0 +1,273 @@ +# SPDX-FileCopyrightText: 2026 Koen van Greevenbroek +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Mirror the Copernicus ESA CCI land-cover map onto Zenodo. + +This is a MAINTAINER tool, not part of the build. It: + +1. downloads the ``satellite-land-cover`` dataset for a given year/version from + the Copernicus Climate Data Store (needs an ECMWF/CDS token), +2. extracts the ``lccs_class`` variable (the only one the model uses), and +3. uploads the result to Zenodo under CC-BY-4.0 with the Copernicus attribution + required by the dataset licence. + +Ordinary builds then fetch the file from Zenodo with no API key (see the +``download_land_cover`` rule and ``config['data']['land_cover']['zenodo_record']``). + +The 2016-onwards C3S land-cover maps are licensed CC-BY-4.0, which permits this +redistribution provided the Copernicus attribution and source DOI are retained; +both are embedded in the Zenodo deposition metadata below. + +Run inside the project environment, e.g.:: + + # First publication (creates a new Zenodo record): + pixi run -e dev python tools/mirror_land_cover.py + + # Refresh / new data version (new version of an existing record): + pixi run -e dev python tools/mirror_land_cover.py --parent-record 1234567 + + # Dry-run against the Zenodo sandbox, leaving an unpublished draft: + pixi run -e dev python tools/mirror_land_cover.py --sandbox --no-publish + +Credentials are read from environment variables (``ECMWF_DATASTORES_URL`` / +``ECMWF_DATASTORES_KEY`` / ``ZENODO_TOKEN``) or from ``config/secrets.yaml`` +(``credentials.ecmwf`` and ``credentials.zenodo``). + +After publishing, set ``config['data']['land_cover']['zenodo_record']`` in +``config/default.yaml`` to the printed record id. +""" + +import argparse +import os +from pathlib import Path +import sys + +import yaml + +# Source DOI of the Copernicus CDS land-cover dataset (all versions/years). +SOURCE_DOI = "10.24381/cds.006f2c9a" + +# Attribution wording required by the Copernicus product licence (CC-BY-4.0). +COPERNICUS_ATTRIBUTION = ( + "Generated using Copernicus Climate Change Service information {year}. " + "Neither the European Commission nor ECMWF is responsible for any use that " + "may be made of the Copernicus information or data it contains." +) + +PROJECT_ROOT = Path(__file__).resolve().parent.parent + + +def _load_secret(env_var: str, *yaml_path: str) -> str | None: + """Return a secret from an env var, falling back to config/secrets.yaml. + + ``yaml_path`` is the nested key path under the top-level ``credentials`` + mapping, e.g. ``("ecmwf", "key")``. + """ + if value := os.getenv(env_var): + return value + + secrets_file = PROJECT_ROOT / "config" / "secrets.yaml" + if not secrets_file.exists(): + return None + with open(secrets_file) as handle: + secrets = yaml.safe_load(handle) or {} + node = secrets.get("credentials", {}) + for key in yaml_path: + if not isinstance(node, dict): + return None + node = node.get(key) + return node if isinstance(node, str) else None + + +def _default_year_and_version() -> tuple[int, str]: + """Read the baseline year and land-cover version from config/default.yaml.""" + with open(PROJECT_ROOT / "config" / "default.yaml") as handle: + config = yaml.safe_load(handle) + return config["baseline_year"], config["data"]["land_cover"]["version"] + + +def _build_metadata(year: int, version: str) -> dict: + """Zenodo deposition metadata carrying the required Copernicus attribution.""" + attribution = COPERNICUS_ATTRIBUTION.format(year=year) + description = ( + f"

Land cover classification (lccs_class variable only) " + f"for {year}, Copernicus ESA CCI land cover version {version}, at 300 m " + f"global resolution. Extracted from the Copernicus Climate Data Store " + f"satellite-land-cover dataset and redistributed here so " + f"that the GLADE model can be built without a Copernicus CDS API key.

" + f"

This is a mirror of a third-party product, provided under the " + f"Creative Commons Attribution 4.0 International licence (CC-BY-4.0).

" + f"

Attribution: {attribution}

" + f"

Source: Copernicus Climate Change Service, Climate " + f"Data Store (2019): Land cover classification gridded maps from 1992 to " + f"present derived from satellite observation. " + f"DOI: https://doi.org/{SOURCE_DOI}

" + ) + return { + "title": ( + f"Copernicus ESA CCI land cover (lccs_class), {year}, " + f"{version} - GLADE mirror" + ), + "upload_type": "dataset", + "description": description, + "creators": [ + {"name": "Copernicus Climate Change Service (C3S)"}, + {"name": "UCLouvain"}, + ], + "license": "cc-by-4.0", + "access_right": "open", + "keywords": ["land cover", "ESA CCI", "Copernicus", "C3S", "GLADE"], + "related_identifiers": [ + { + "identifier": SOURCE_DOI, + "relation": "isDerivedFrom", + "scheme": "doi", + "resource_type": "dataset", + } + ], + "notes": attribution, + } + + +def main() -> None: + default_year, default_version = _default_year_and_version() + + parser = argparse.ArgumentParser( + description="Mirror the Copernicus ESA CCI land-cover map onto Zenodo.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--year", type=int, default=default_year, help="Land-cover map year." + ) + parser.add_argument( + "--version", default=default_version, help="ESA CCI land-cover version." + ) + parser.add_argument( + "--parent-record", + default=None, + help="Existing Zenodo record id to publish a new version of (refresh).", + ) + parser.add_argument( + "--work-dir", + type=Path, + default=PROJECT_ROOT / "data" / "downloads", + help="Directory for the downloaded archive and extracted NetCDF.", + ) + parser.add_argument( + "--sandbox", + action="store_true", + help="Use sandbox.zenodo.org instead of the production service.", + ) + parser.add_argument( + "--no-publish", + dest="publish", + action="store_false", + help="Leave the Zenodo deposition as an unpublished draft for review.", + ) + parser.add_argument( + "--skip-download", + action="store_true", + help="Reuse an existing extracted NetCDF in --work-dir (skip the CDS download).", + ) + parser.add_argument( + "--publish-record", + default=None, + help=( + "Publish an existing draft deposition by record id (e.g. after " + "reviewing a draft created with --no-publish) and exit. Irreversible." + ), + ) + args = parser.parse_args() + + # Make the workflow scripts and sibling tools importable. + sys.path.insert(0, str(PROJECT_ROOT / "workflow" / "scripts")) + sys.path.insert(0, str(PROJECT_ROOT / "tools")) + import download_land_cover + import extract_land_cover_class + from zenodo_publish import publish_dataset, publish_draft + + if args.publish_record: + zenodo_token = _load_secret("ZENODO_TOKEN", "zenodo", "token") + if not zenodo_token: + parser.error( + "Missing Zenodo token. Set ZENODO_TOKEN, or credentials.zenodo.token " + "in config/secrets.yaml." + ) + result = publish_draft(zenodo_token, args.publish_record, sandbox=args.sandbox) + print("Published.") + print(f" record id : {result['record_id']}") + print(f" doi : {result['doi']}") + print( + "\nSet this in config/default.yaml under data.land_cover:\n" + f' zenodo_record: "{result["record_id"]}"' + ) + return + + args.work_dir.mkdir(parents=True, exist_ok=True) + target_name = f"land_cover_lccs_class_{args.year}_{args.version}.nc" + extracted = args.work_dir / target_name + + if args.skip_download: + if not extracted.exists(): + parser.error(f"--skip-download given but {extracted} does not exist") + print(f"Reusing existing {extracted}") + else: + ecmwf_url = _load_secret("ECMWF_DATASTORES_URL", "ecmwf", "url") + ecmwf_key = _load_secret("ECMWF_DATASTORES_KEY", "ecmwf", "key") + if not ecmwf_url or not ecmwf_key: + parser.error( + "Missing Copernicus CDS credentials. Set ECMWF_DATASTORES_URL and " + "ECMWF_DATASTORES_KEY, or credentials.ecmwf.{url,key} in " + "config/secrets.yaml." + ) + + archive = args.work_dir / f"land_cover_{args.year}_{args.version}.zip" + print(f"Downloading satellite-land-cover {args.version} for {args.year} ...") + download_land_cover.main( + dataset="satellite-land-cover", + request={ + "variable": "all", + "year": [str(args.year)], + "version": [args.version], + }, + output=archive, + url=ecmwf_url, + key=ecmwf_key, + ) + print(f"Extracting lccs_class -> {extracted} ...") + extract_land_cover_class.main(input_path=archive, output_path=extracted) + archive.unlink(missing_ok=True) + + zenodo_token = _load_secret("ZENODO_TOKEN", "zenodo", "token") + if not zenodo_token: + parser.error( + "Missing Zenodo token. Set ZENODO_TOKEN, or credentials.zenodo.token " + "in config/secrets.yaml." + ) + + print("Publishing to Zenodo ...") + result = publish_dataset( + token=zenodo_token, + files=[extracted], + metadata=_build_metadata(args.year, args.version), + parent_record=args.parent_record, + sandbox=args.sandbox, + publish=args.publish, + ) + + print("\nDone.") + print(f" record id : {result['record_id']}") + print(f" doi : {result['doi']}") + if result["draft"]: + edit_link = result["links"].get("html") or result["links"].get("latest_draft") + print(f" status : DRAFT (not published) - review at {edit_link}") + else: + print( + "\nSet this in config/default.yaml under data.land_cover:\n" + f' zenodo_record: "{result["record_id"]}"' + ) + + +if __name__ == "__main__": + main() diff --git a/tools/zenodo_publish.py b/tools/zenodo_publish.py new file mode 100644 index 00000000..d2f60133 --- /dev/null +++ b/tools/zenodo_publish.py @@ -0,0 +1,166 @@ +# SPDX-FileCopyrightText: 2026 Koen van Greevenbroek +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Reusable Zenodo REST API publisher for redistributing input datasets. + +This module is dataset-agnostic: it creates (or versions) a Zenodo deposition, +uploads one or more files, sets metadata, and optionally publishes. Use it for +any dataset whose licence permits redistribution but whose original source is +behind an API key or registration wall, so that ordinary builds can fetch a +mirror with a plain HTTP download. + +The first consumer is tools/mirror_land_cover.py. See docs/data_sources.rst for +the overall mirroring workflow. + +Requires a Zenodo personal access token with the ``deposit:write`` and +``deposit:actions`` scopes (create one at +https://zenodo.org/account/settings/applications/tokens/new/). +""" + +from pathlib import Path + +import requests + +ZENODO_BASE = "https://zenodo.org" +ZENODO_SANDBOX_BASE = "https://sandbox.zenodo.org" + + +def _raise_for_status(response: requests.Response) -> None: + """Raise a descriptive error including the Zenodo response body.""" + if not response.ok: + raise RuntimeError( + f"Zenodo API error {response.status_code} for " + f"{response.request.method} {response.url}: {response.text}" + ) + + +def publish_draft(token: str, record_id: str, *, sandbox: bool = False) -> dict: + """Publish an existing draft deposition (e.g. after manual review). + + This finalizes a draft created earlier with ``publish=False`` without + re-uploading anything. Publishing is irreversible. + + Returns ``{"record_id": str, "doi": str | None, "draft": False, "links": dict}``. + """ + base = ZENODO_SANDBOX_BASE if sandbox else ZENODO_BASE + session = requests.Session() + session.headers["Authorization"] = f"Bearer {token}" + response = session.post( + f"{base}/api/deposit/depositions/{record_id}/actions/publish" + ) + _raise_for_status(response) + deposition = response.json() + return { + "record_id": str(deposition["id"]), + "doi": deposition.get("doi"), + "draft": False, + "links": deposition.get("links", {}), + } + + +def publish_dataset( + token: str, + files: list[Path], + metadata: dict, + *, + parent_record: str | None = None, + sandbox: bool = False, + publish: bool = True, +) -> dict: + """Create or version a Zenodo deposition, upload files, set metadata, publish. + + Parameters + ---------- + token + Zenodo personal access token (deposit:write + deposit:actions scopes). + files + Files to upload as the deposition content. When versioning, these + replace any files inherited from the previous version. + metadata + Zenodo deposition metadata (the value of the API "metadata" key), e.g. + title, upload_type, description, creators, license, access_right. + parent_record + If given, create a new version of this existing published record id + (used to refresh a mirror). Otherwise create a brand-new deposition. + sandbox + Target sandbox.zenodo.org instead of the production service. + publish + Publish the deposition (irreversible). If False, leave it as a draft + for manual review in the Zenodo web UI. + + Returns + ------- + dict + ``{"record_id": str, "doi": str | None, "draft": bool, "links": dict}``. + """ + base = ZENODO_SANDBOX_BASE if sandbox else ZENODO_BASE + api = f"{base}/api" + + # Authenticate via a bearer header rather than a query parameter so the + # token never ends up in a URL, error message, or log line. + session = requests.Session() + session.headers["Authorization"] = f"Bearer {token}" + json_headers = {"Content-Type": "application/json"} + + if parent_record: + # Open a new-version draft from the latest published record. + response = session.post( + f"{api}/deposit/depositions/{parent_record}/actions/newversion", + ) + _raise_for_status(response) + latest_draft_url = response.json()["links"]["latest_draft"] + response = session.get(latest_draft_url) + _raise_for_status(response) + deposition = response.json() + else: + response = session.post( + f"{api}/deposit/depositions", json={}, headers=json_headers + ) + _raise_for_status(response) + deposition = response.json() + + deposition_id = deposition["id"] + bucket_url = deposition["links"]["bucket"] + + # When versioning, drop files inherited from the previous version so the new + # version contains exactly `files`. + for existing in deposition.get("files", []): + response = session.delete( + f"{api}/deposit/depositions/{deposition_id}/files/{existing['id']}", + ) + _raise_for_status(response) + + # Upload via the bucket API (streams large files without multipart overhead). + for file_path in files: + file_path = Path(file_path) + with open(file_path, "rb") as handle: + response = session.put(f"{bucket_url}/{file_path.name}", data=handle) + _raise_for_status(response) + + # Attach metadata. + response = session.put( + f"{api}/deposit/depositions/{deposition_id}", + headers=json_headers, + json={"metadata": metadata}, + ) + _raise_for_status(response) + deposition = response.json() + + prereserved = deposition.get("metadata", {}).get("prereserved_doi") + doi = prereserved.get("doi") if isinstance(prereserved, dict) else None + + if publish: + response = session.post( + f"{api}/deposit/depositions/{deposition_id}/actions/publish" + ) + _raise_for_status(response) + deposition = response.json() + doi = deposition.get("doi", doi) + + return { + "record_id": str(deposition_id), + "doi": doi, + "draft": not publish, + "links": deposition.get("links", {}), + } diff --git a/workflow/rules/retrieve.smk b/workflow/rules/retrieve.smk index 6181b749..5c070144 100644 --- a/workflow/rules/retrieve.smk +++ b/workflow/rules/retrieve.smk @@ -970,40 +970,30 @@ rule download_luicube_grassland: rule download_land_cover: + # Copernicus ESA CCI land cover (lccs_class only) for the baseline year, + # fetched from our Zenodo mirror (CC-BY-4.0) so builds need no Copernicus + # CDS API key. Refresh the mirror with tools/mirror_land_cover.py and update + # config['data']['land_cover']['zenodo_record'] to the new record id. output: - temp("data/downloads/land_cover.zip"), + "data/downloads/land_cover_lccs_class.nc", params: - dataset="satellite-land-cover", - request={ - "variable": "all", - "year": [str(config["baseline_year"])], - "version": [config["data"]["land_cover"]["version"]], - }, + url=( + f"https://zenodo.org/records/{config['data']['land_cover']['zenodo_record']}" + f"/files/land_cover_lccs_class_{config['baseline_year']}" + f"_{config['data']['land_cover']['version']}.nc?download=1" + ), resources: - runtime="60m", + runtime="30m", mem_mb=500, log: "/shared/download_land_cover.log", benchmark: "/shared/download_land_cover.tsv" - script: - "../scripts/download_land_cover.py" - - -rule extract_land_cover_class: - input: - "data/downloads/land_cover.zip", - output: - "data/downloads/land_cover_lccs_class.nc", - resources: - runtime="15m", - mem_mb=13000, - log: - "/shared/extract_land_cover_class.log", - benchmark: - "/shared/extract_land_cover_class.tsv" - script: - "../scripts/extract_land_cover_class.py" + shell: + r""" + mkdir -p "$(dirname {output})" + curl -L --fail --progress-bar -o "{output}" "{params.url}" > {log} 2>&1 + """ rule download_biomass_cci: diff --git a/workflow/scripts/download_land_cover.py b/workflow/scripts/download_land_cover.py index 5cd815f8..f4bdf062 100644 --- a/workflow/scripts/download_land_cover.py +++ b/workflow/scripts/download_land_cover.py @@ -4,12 +4,11 @@ """Download land cover data using the ECMWF datastores client. -Credentials are sourced from config/secrets.yaml or environment variables -(ECMWF_DATASTORES_URL and ECMWF_DATASTORES_KEY). No longer relies on -the ~/.ecmwfdatastoresrc configuration file. - -Snakemake passes the ``snakemake`` object into this module; no standalone CLI -usage is supported. +This is a library module used by the maintainer tool +``tools/mirror_land_cover.py`` (which imports :func:`main`). It is no longer +wired into a Snakemake rule: ordinary builds fetch the mirrored data from +Zenodo. Credentials are passed in by the caller, sourced from environment +variables (ECMWF_DATASTORES_URL / ECMWF_DATASTORES_KEY) or config/secrets.yaml. """ from pathlib import Path @@ -37,13 +36,3 @@ def main(dataset: str, request: dict, output: Path, url: str, key: str) -> None: client = Client(url=url, key=key) client.retrieve(dataset, request, target=str(output)) - - -if __name__ == "__main__": - main( - dataset=snakemake.params.dataset, - request=snakemake.params.request, - output=Path(snakemake.output[0]), - url=snakemake.config["credentials"]["ecmwf"]["url"], - key=snakemake.config["credentials"]["ecmwf"]["key"], - ) diff --git a/workflow/scripts/extract_land_cover_class.py b/workflow/scripts/extract_land_cover_class.py index f1e9de2e..64da347d 100644 --- a/workflow/scripts/extract_land_cover_class.py +++ b/workflow/scripts/extract_land_cover_class.py @@ -10,8 +10,10 @@ This script extracts just that variable to reduce file size from ~2.2GB to ~440MB. It operates on the ZIP archive distributed by CDS, which bundles the NetCDF file. -Snakemake passes the ``snakemake`` object into this module; no standalone CLI -usage is supported. +This is a library module used by the maintainer tool +``tools/mirror_land_cover.py`` (which imports :func:`main`). It is no longer +wired into a Snakemake rule: ordinary builds fetch the already-extracted file +from Zenodo. """ from pathlib import Path @@ -74,10 +76,3 @@ def main(input_path: Path, output_path: Path) -> None: finally: if cleanup_dir is not None: cleanup_dir.cleanup() - - -if __name__ == "__main__": - main( - input_path=Path(snakemake.input[0]), - output_path=Path(snakemake.output[0]), - ) diff --git a/workflow/scripts/solve_namespace.py b/workflow/scripts/solve_namespace.py index a629c19d..204a7f16 100644 --- a/workflow/scripts/solve_namespace.py +++ b/workflow/scripts/solve_namespace.py @@ -162,7 +162,6 @@ def validate_scenario_config_schemas( if not merged.get("credentials"): merged["credentials"] = { "usda": {"api_key": "unused"}, - "ecmwf": {"url": "unused", "key": "unused"}, } try: validate_config_schema(merged, Path(project_root)) diff --git a/workflow/validation/secrets.py b/workflow/validation/secrets.py index 471424b5..20aeca36 100644 --- a/workflow/validation/secrets.py +++ b/workflow/validation/secrets.py @@ -11,15 +11,18 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: - """Load API credentials from secrets file or environment variables. + """Load build-time API credentials from secrets file or environment variables. Environment variables take precedence over the secrets file. This allows overriding file-based credentials in CI/CD or testing environments. + Only USDA credentials are required to build and solve the model. Copernicus + CDS credentials are not part of the build: the land-cover data is fetched + from a Zenodo mirror, and the CDS key is only needed by + tools/mirror_land_cover.py when refreshing that mirror. + Environment variables: USDA_API_KEY: USDA FoodData Central API key - ECMWF_DATASTORES_URL: ECMWF datastores API URL - ECMWF_DATASTORES_KEY: ECMWF datastores API key Parameters ---------- @@ -31,8 +34,7 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: dict Dictionary with credentials structure: { - "usda": {"api_key": str}, - "ecmwf": {"url": str, "key": str} + "usda": {"api_key": str} } Raises @@ -41,18 +43,12 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: If any required credentials are missing from both environment variables and the secrets file. """ - credentials = {"usda": {}, "ecmwf": {}} + credentials = {"usda": {}} # Check environment variables first (highest priority) if usda_key := os.getenv("USDA_API_KEY"): credentials["usda"]["api_key"] = usda_key - if ecmwf_url := os.getenv("ECMWF_DATASTORES_URL"): - credentials["ecmwf"]["url"] = ecmwf_url - - if ecmwf_key := os.getenv("ECMWF_DATASTORES_KEY"): - credentials["ecmwf"]["key"] = ecmwf_key - # Try secrets file as fallback secrets_file = project_root / "config" / "secrets.yaml" if secrets_file.exists(): @@ -61,7 +57,7 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: # Merge file secrets (env vars take precedence) if file_secrets and "credentials" in file_secrets: - for service in ["usda", "ecmwf"]: + for service in ["usda"]: if service in file_secrets["credentials"]: for key, value in file_secrets["credentials"][service].items(): credentials[service].setdefault(key, value) @@ -72,14 +68,6 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: missing.append( "USDA API key (set USDA_API_KEY env var or add to config/secrets.yaml)" ) - if not credentials["ecmwf"].get("url"): - missing.append( - "ECMWF URL (set ECMWF_DATASTORES_URL env var or add to config/secrets.yaml)" - ) - if not credentials["ecmwf"].get("key"): - missing.append( - "ECMWF key (set ECMWF_DATASTORES_KEY env var or add to config/secrets.yaml)" - ) if missing: error_msg = f""" @@ -94,15 +82,12 @@ def load_secrets_with_env_fallback(project_root: Path) -> dict: Option 2 - Environment variables (recommended for CI/CD): export USDA_API_KEY="your-usda-key" - export ECMWF_DATASTORES_URL="https://cds.climate.copernicus.eu/api" - export ECMWF_DATASTORES_KEY="your-ecmwf-key" Missing credentials: {chr(10).join(' - ' + m for m in missing)} Get API keys: - USDA: https://fdc.nal.usda.gov/api-guide.html - - ECMWF: https://cds.climate.copernicus.eu/api-how-to """ raise ValueError(error_msg) From 5d79343f448c0a93041e152ac312eb469eb0daf6 Mon Sep 17 00:00:00 2001 From: Koen van Greevenbroek Date: Tue, 30 Jun 2026 15:59:26 -0700 Subject: [PATCH 2/2] chore: tidy land-cover comments, move ecmwf client to dev env Drop references to the old Copernicus API-key requirement from config and docstring comments so they describe only the current Zenodo-mirror flow. The ecmwf-datastores-client is now used solely by tools/mirror_land_cover.py (run under the dev env), so move it out of the default build dependencies. --- config/default.yaml | 8 +++----- config/secrets.yaml.example | 7 +++---- pixi.lock | 4 ---- pixi.toml | 3 ++- workflow/rules/retrieve.smk | 6 +++--- workflow/scripts/download_land_cover.py | 11 +++++------ workflow/scripts/extract_land_cover_class.py | 10 ++++------ 7 files changed, 20 insertions(+), 29 deletions(-) diff --git a/config/default.yaml b/config/default.yaml index b350b248..ec4ff785 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -1565,11 +1565,9 @@ data: fat: "Total lipid (fat)" cal: "Energy" land_cover: - # Copernicus ESA CCI land cover. The map for `baseline_year` is mirrored on - # Zenodo (CC-BY-4.0) so normal builds need no Copernicus CDS API key. The - # `zenodo_record` is the numeric Zenodo record id hosting the mirrored - # `lccs_class` file; refresh it with tools/mirror_land_cover.py (see - # docs/data_sources.rst). + # Copernicus ESA CCI land cover, mirrored on Zenodo (CC-BY-4.0). `zenodo_record` + # is the record id holding the `lccs_class` file for `baseline_year`; refresh + # it with tools/mirror_land_cover.py (see docs/data_sources.rst). version: "v2_1_1" zenodo_record: "21085632" # https://doi.org/10.5281/zenodo.21085632 faostat: diff --git a/config/secrets.yaml.example b/config/secrets.yaml.example index d87f073b..e1563fc6 100644 --- a/config/secrets.yaml.example +++ b/config/secrets.yaml.example @@ -10,11 +10,10 @@ # Alternatively, you can set environment variables instead: # export USDA_API_KEY="your-key" # -# Only the `usda` credential is required to build and solve the model. The +# Only the `usda` credential is needed to build and solve the model. The # `ecmwf` and `zenodo` credentials below are MAINTAINER-ONLY: they are used -# exclusively by tools/mirror_land_cover.py to refresh the Copernicus -# land-cover data mirrored on Zenodo. Ordinary builds download that data from -# Zenodo and need neither. +# only by tools/mirror_land_cover.py to refresh the Copernicus land-cover data +# mirrored on Zenodo. credentials: usda: diff --git a/pixi.lock b/pixi.lock index a245626d..c42ce96b 100644 --- a/pixi.lock +++ b/pixi.lock @@ -338,7 +338,6 @@ environments: - pypi: https://files.pythonhosted.org/packages/1d/54/a46920229d12c3a6e9f0081d1bdaeffad23c1826353ace95714faee926e5/dask-2025.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/05/d1/8952806fbf9583004ab479d8f58a9496c3d35f6b6009ddd458bdd9978eaf/dpath-2.2.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/a3/cf/7feb3222d770566ca9eaf0bf6922745fadd1ed7ab11832520063a515c240/ecmwf_datastores_client-0.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d0/c3/f133edf2a7ea49cf54f40d1d5f1898ff62ab8b9f4cbca75a9c6e6394fdd8/exactextract-0.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/51/ac/e5d886f892666d2d1e5cb8c1a41146e1d79ae8896477b1153a21711d3b44/fasteners-0.20-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl @@ -360,7 +359,6 @@ environments: - pypi: https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/93/cf/be4e93afbfa0def2cd6fac9302071db0bd6d0617999ecbf53f92b9398de3/multiurl-0.3.7-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0b/9a/c6f79de7ba3a0a8473129936b7b90aa461d3d46fec6f1627672b1dccf4e9/narwhals-2.12.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/97/1a/78b19893197ed7525edfa7f124a461626541e82aec694a468ba97755c24e/netcdf4-1.7.3-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl @@ -1808,7 +1806,6 @@ environments: - pypi: https://files.pythonhosted.org/packages/1d/54/a46920229d12c3a6e9f0081d1bdaeffad23c1826353ace95714faee926e5/dask-2025.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/05/d1/8952806fbf9583004ab479d8f58a9496c3d35f6b6009ddd458bdd9978eaf/dpath-2.2.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/a3/cf/7feb3222d770566ca9eaf0bf6922745fadd1ed7ab11832520063a515c240/ecmwf_datastores_client-0.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d0/c3/f133edf2a7ea49cf54f40d1d5f1898ff62ab8b9f4cbca75a9c6e6394fdd8/exactextract-0.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/51/ac/e5d886f892666d2d1e5cb8c1a41146e1d79ae8896477b1153a21711d3b44/fasteners-0.20-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl @@ -1831,7 +1828,6 @@ environments: - pypi: https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/93/cf/be4e93afbfa0def2cd6fac9302071db0bd6d0617999ecbf53f92b9398de3/multiurl-0.3.7-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0b/9a/c6f79de7ba3a0a8473129936b7b90aa461d3d46fec6f1627672b1dccf4e9/narwhals-2.12.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/97/1a/78b19893197ed7525edfa7f124a461626541e82aec694a468ba97755c24e/netcdf4-1.7.3-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl diff --git a/pixi.toml b/pixi.toml index 3c36e64d..f1c5ce1e 100644 --- a/pixi.toml +++ b/pixi.toml @@ -73,7 +73,6 @@ linopy = { git = "https://github.com/koen-vg/linopy.git", tag = "v0.8.0+glade2" exactextract = ">=0.2.2, <0.3" # Fast zonal statistics from rasters # Data retrieval -ecmwf-datastores-client = ">=0.4.1, <0.5" # ECMWF climate data access gsutil = ">=5.35, <6" # Google Cloud Storage CLI # Validation @@ -108,6 +107,8 @@ myst-nb = ">=1.1, <2" # Render Jupyter notebooks in Sphinx [feature.dev.pypi-dependencies] snakefmt = ">=0.11.4, <0.12" # Snakemake code formatter pytest = ">=8.0.0, <9" # Test framework +# Maintainer-only: Copernicus CDS access for tools/mirror_land_cover.py +ecmwf-datastores-client = ">=0.4.1, <0.5" [activation.env] # Enable new PyPSA components API globally diff --git a/workflow/rules/retrieve.smk b/workflow/rules/retrieve.smk index 5c070144..2b03788c 100644 --- a/workflow/rules/retrieve.smk +++ b/workflow/rules/retrieve.smk @@ -971,9 +971,9 @@ rule download_luicube_grassland: rule download_land_cover: # Copernicus ESA CCI land cover (lccs_class only) for the baseline year, - # fetched from our Zenodo mirror (CC-BY-4.0) so builds need no Copernicus - # CDS API key. Refresh the mirror with tools/mirror_land_cover.py and update - # config['data']['land_cover']['zenodo_record'] to the new record id. + # fetched from our Zenodo mirror (CC-BY-4.0). Refresh the mirror with + # tools/mirror_land_cover.py and point + # config['data']['land_cover']['zenodo_record'] at the new record id. output: "data/downloads/land_cover_lccs_class.nc", params: diff --git a/workflow/scripts/download_land_cover.py b/workflow/scripts/download_land_cover.py index f4bdf062..e50ae337 100644 --- a/workflow/scripts/download_land_cover.py +++ b/workflow/scripts/download_land_cover.py @@ -2,13 +2,12 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Download land cover data using the ECMWF datastores client. +"""Download land cover data from the Copernicus CDS via the datastores client. -This is a library module used by the maintainer tool -``tools/mirror_land_cover.py`` (which imports :func:`main`). It is no longer -wired into a Snakemake rule: ordinary builds fetch the mirrored data from -Zenodo. Credentials are passed in by the caller, sourced from environment -variables (ECMWF_DATASTORES_URL / ECMWF_DATASTORES_KEY) or config/secrets.yaml. +Library module for the maintainer tool ``tools/mirror_land_cover.py`` (which +imports :func:`main`) to fetch the source data before mirroring it to Zenodo. +Credentials are passed in by the caller, sourced from environment variables +(ECMWF_DATASTORES_URL / ECMWF_DATASTORES_KEY) or config/secrets.yaml. """ from pathlib import Path diff --git a/workflow/scripts/extract_land_cover_class.py b/workflow/scripts/extract_land_cover_class.py index 64da347d..57397976 100644 --- a/workflow/scripts/extract_land_cover_class.py +++ b/workflow/scripts/extract_land_cover_class.py @@ -7,13 +7,11 @@ The downloaded land cover dataset contains multiple variables (lccs_class, processed_flag, current_pixel_state, observation_count, change_count), but only the lccs_class variable (land cover classification) is needed for the model. -This script extracts just that variable to reduce file size from ~2.2GB to ~440MB. -It operates on the ZIP archive distributed by CDS, which bundles the NetCDF file. +This extracts just that variable to reduce file size from ~2.2GB to ~440MB. It +operates on the ZIP archive distributed by CDS, which bundles the NetCDF file. -This is a library module used by the maintainer tool -``tools/mirror_land_cover.py`` (which imports :func:`main`). It is no longer -wired into a Snakemake rule: ordinary builds fetch the already-extracted file -from Zenodo. +Library module for the maintainer tool ``tools/mirror_land_cover.py`` (which +imports :func:`main`) to prepare the file before mirroring it to Zenodo. """ from pathlib import Path