Sustainable-Solutions-Lab · koen-vg · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -67,6 +67,4 @@ jobs:
       - name: Snakemake dryrun test
         env:
           USDA_API_KEY: dummy
-          ECMWF_DATASTORES_URL: https://cds.climate.copernicus.eu/api
-          ECMWF_DATASTORES_KEY: dummy
         run: pixi run --environment dev pytest -m integration -k dryrun -v
diff --git a/config/default.yaml b/config/default.yaml
@@ -1565,9 +1565,11 @@ data:
       fat: "Total lipid (fat)"
       cal: "Energy"
   land_cover:
-    # ECMWF credentials: configure in config/secrets.yaml or via environment variables
-    # See config/secrets.yaml.example for setup instructions
+    # Copernicus ESA CCI land cover, mirrored on Zenodo (CC-BY-4.0). `zenodo_record`
+    # is the record id holding the `lccs_class` file for `baseline_year`; refresh
+    # it with tools/mirror_land_cover.py (see docs/data_sources.rst).
     version: "v2_1_1"
+    zenodo_record: "21085632"  # https://doi.org/10.5281/zenodo.21085632
   faostat:
     qcl_production_element_code: 5510  # "Production" in tonnes (QCL dataset, covers crops and livestock)
     fbs_food_supply_element_code: 645  # "Food supply quantity (kg/capita/yr)" in FBS dataset

diff --git a/config/schemas/config.schema.yaml b/config/schemas/config.schema.yaml
@@ -1361,12 +1361,16 @@ properties:
                 type: string
       land_cover:
         type: object
-        required: [version]
+        required: [version, zenodo_record]
         additionalProperties: false
         properties:
           version:
             type: string
             description: "Version of land cover dataset"
+          zenodo_record:
+            type: string
+            minLength: 1
+            description: "Zenodo record id hosting the mirrored lccs_class file (refresh with tools/mirror_land_cover.py)"
       faostat:
         type: object
         required: [qcl_production_element_code, fbs_food_supply_element_code, fbs_other_uses_element_code, fbs_production_element_code]
@@ -2162,9 +2166,9 @@ properties:
 
   credentials:
     type: object
-    required: [usda, ecmwf]
+    required: [usda]
     additionalProperties: false
-    description: "API credentials for external data sources (configure via config/secrets.yaml or environment variables)"
+    description: "API credentials required by the build (configure via config/secrets.yaml or environment variables). Copernicus CDS credentials are NOT needed for builds; they are only used by tools/mirror_land_cover.py to refresh the mirrored land-cover data."
     properties:
       usda:
         type: object
@@ -2175,20 +2179,6 @@ properties:
             type: string
             minLength: 1
             description: "USDA FoodData Central API key (or set USDA_API_KEY env var)"
-      ecmwf:
-        type: object
-        required: [url, key]
-        additionalProperties: false
-        properties:
-          url:
-            type: string
-            minLength: 1
-            format: uri
-            description: "ECMWF datastores API URL (or set ECMWF_DATASTORES_URL env var)"
-          key:
-            type: string
-            minLength: 1
-            description: "ECMWF datastores API key (or set ECMWF_DATASTORES_KEY env var)"
 
   sensitivity:
     type: object

diff --git a/config/secrets.yaml.example b/config/secrets.yaml.example
@@ -9,8 +9,11 @@
 #
 # Alternatively, you can set environment variables instead:
 #   export USDA_API_KEY="your-key"
-#   export ECMWF_DATASTORES_URL="https://cds.climate.copernicus.eu/api"
-#   export ECMWF_DATASTORES_KEY="your-key"
+#
+# Only the `usda` credential is needed to build and solve the model. The
+# `ecmwf` and `zenodo` credentials below are MAINTAINER-ONLY: they are used
+# only by tools/mirror_land_cover.py to refresh the Copernicus land-cover data
+# mirrored on Zenodo.
 
 credentials:
   usda:
@@ -19,12 +22,21 @@ credentials:
     # For testing, you can use "DEMO_KEY" but it has very limited rate limits
     api_key: "DEMO_KEY"  # Replace with your actual key
 
+  # MAINTAINER-ONLY (tools/mirror_land_cover.py). Safe to omit for builds.
   ecmwf:
     # ECMWF Climate Data Store credentials
     # Get your credentials from: https://cds.climate.copernicus.eu/api-how-to
     # You need to:
     #   1. Register at https://cds.climate.copernicus.eu/user/register
     #   2. Accept the dataset license at https://cds.climate.copernicus.eu/datasets/satellite-land-cover
-    #   3. Get your UID and API key from your profile page
+    #   3. Get your personal access token from your profile page
+    # Or set ECMWF_DATASTORES_URL / ECMWF_DATASTORES_KEY env vars instead.
     url: "https://cds.climate.copernicus.eu/api"
-    key: "UID:API_KEY"  # Replace with your actual credentials
+    key: "API_KEY"  # Replace with your actual token
+
+  # MAINTAINER-ONLY (tools/mirror_land_cover.py). Safe to omit for builds.
+  zenodo:
+    # Zenodo personal access token with the "deposit:write" and
+    # "deposit:actions" scopes. Create one at https://zenodo.org/account/settings/applications/tokens/new/
+    # Or set ZENODO_TOKEN env var instead.
+    token: "ZENODO_TOKEN"  # Replace with your actual token
diff --git a/docs/data_sources.rst b/docs/data_sources.rst
@@ -24,9 +24,7 @@ Several licensed datasets cannot be fetched automatically. While their use is fr
 3. Download the IHME 2023 dietary risk exposure estimates (two archives, ``IHME_GBD_2023_RISK_EXPOSURE_DIET_1`` and ``_2``) (:ref:`ihme-diet-risk-exposure`).
 4. Obtain the **GDD-IA** intake CSVs by personal request to the Global Dietary Database team and place them as ``data/manually_downloaded/GDD-IA-intake_grams_{year}.csv`` and ``data/manually_downloaded/GDD-IA-intake_kcals_{year}.csv`` (:ref:`gdd-ia-dietary-intake`).
 
-**Required API key setup:**
-
-5. Register for a Copernicus Climate Data Store account and configure your API key to enable automatic retrieval of land cover data (:ref:`copernicus-land-cover`).
+No Copernicus/ECMWF API key is required: the land cover data is fetched from a Zenodo mirror (:ref:`copernicus-land-cover`). The only API credential needed for an automated build is the USDA FoodData Central key (see :doc:`introduction`).
 
 
 .. _weight-bases:
@@ -560,28 +558,23 @@ Copernicus Satellite Land Cover
 
 **Description**: Global land cover classification gridded maps from 1992 to present derived from satellite observations. The dataset describes the land surface into 22 classes including various vegetation types, water bodies, built-up areas, and bare land.
 
-**Version**: v2.1.1 (2016 onwards); NetCDF format via the Copernicus Climate Data Store API
+**Version**: v2.1.1 (2016 onwards); NetCDF format
 
 **Coverage**:
   * Spatial: Global (Plate Carree projection), 300 m resolution
   * Temporal: Annual (with approximately one-year publication delay)
 
-**Access**: https://cds.climate.copernicus.eu/datasets/satellite-land-cover (`API documentation <https://cds.climate.copernicus.eu/how-to-api>`__)
-
-**License**: Multiple licenses apply including ESA CCI licence, CC-BY licence, and VITO licence. Users must also cite the Climate Data Store entry and provide attribution to the Copernicus program. (`Terms of use <https://cds.climate.copernicus.eu/terms-of-use>`__)
+**Access**: Original source: https://cds.climate.copernicus.eu/datasets/satellite-land-cover. For builds, GLADE downloads a mirror of the single year/version it needs from Zenodo (see *Retrieval* below), so no Copernicus account or API key is required.
 
-**Citation**: Copernicus Climate Change Service, Climate Data Store, (2019): Land cover classification gridded maps from 1992 to present derived from satellite observation. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). https://doi.org/10.24381/cds.006f2c9a
+**License**: CC-BY-4.0. The 2016-onwards C3S maps (which is what GLADE uses, since ``baseline_year`` is 2020) are released under the Creative Commons Attribution 4.0 International licence, as stated in the authoritative C3S/Copernicus metadata. This permits redistribution provided the Copernicus attribution and source DOI are retained; both are embedded in the Zenodo deposition. (The CDS download page also bundles the ESA CCI licence -- which governs the pre-2016 v2.0.7 maps that GLADE does not use -- and the VITO licence, which restricts only near-real-time PROBA-V products, not historical annual maps.)
 
-**Retrieval**: Automatic via the ``download_land_cover`` and ``extract_land_cover_class`` Snakemake rules. The full dataset (~2.2GB) contains multiple variables but only the land cover classification (``lccs_class``) is needed. The extraction rule outputs ``data/downloads/land_cover_lccs_class.nc`` (~440MB) and deletes the full download.
+**Required attribution**: "Generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains."
 
-**Manual setup required**:
+**Citation**: Copernicus Climate Change Service, Climate Data Store, (2019): Land cover classification gridded maps from 1992 to present derived from satellite observation. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). https://doi.org/10.24381/cds.006f2c9a
 
-1. Register for a free CDS account at https://cds.climate.copernicus.eu/user/register
-2. Accept the required dataset licenses at https://cds.climate.copernicus.eu/datasets/satellite-land-cover?tab=download#manage-licences
-3. Obtain an API key from your account settings
-4. Configure the API key in ``~/.ecmwfdatastoresrc`` or via environment variables (see API documentation for setup instructions)
+**Retrieval**: Automatic via the ``download_land_cover`` Snakemake rule, which uses ``curl`` to fetch the pre-extracted land cover classification (``lccs_class`` only, ~320 MB NetCDF) from our Zenodo mirror -- no API key needed. The rule writes ``data/downloads/land_cover_lccs_class.nc``. The mirror itself is produced from the upstream CDS dataset by the maintainer tool ``tools/mirror_land_cover.py`` (see :ref:`redistributing-datasets`).
 
-**Configuration**: The land cover year is derived from the top-level ``baseline_year`` parameter. The version can be configured via ``config['data']['land_cover']['version']`` (default: v2_1_1).
+**Configuration**: The land cover year is derived from the top-level ``baseline_year`` parameter, and the version from ``config['data']['land_cover']['version']`` (default: v2_1_1). The mirror to download from is pinned by ``config['data']['land_cover']['zenodo_record']`` (the numeric Zenodo record id); the download URL and file name are derived from these three values.
 
 **Usage**: Spatial analysis of agricultural land availability and land use constraints.
 
@@ -1065,7 +1058,7 @@ Most datasets used in this project require attribution. Some disallow redistribu
 **Open licenses (attribution required, redistribution allowed)**:
 
 * **CC0 1.0 / Public domain** (USDA FoodData Central, IFA FUBC, BLS CPI-U): No restrictions; attribution requested
-* **CC BY 4.0** (GAEZ, FAOSTAT, GLEAM 3.0 Feed Intake, SoilGrids, Cook-Patton, LUIcube, LAMASUS, ISIMIP2a / LPJmL grassland yield): Requires attribution
+* **CC BY 4.0** (GAEZ, FAOSTAT, GLEAM 3.0 Feed Intake, SoilGrids, Cook-Patton, LUIcube, LAMASUS, ISIMIP2a / LPJmL grassland yield, Copernicus Land Cover 2016+): Requires attribution
 * **CC BY 3.0 IGO** (UN WPP): Requires attribution to UN
 * **CC BY** (USDA Costs, USDA Livestock Costs): Requires attribution
 * **Eurostat copyright** (Eurostat apro_cpsh1): Free reuse with attribution
@@ -1077,4 +1070,50 @@ Most datasets used in this project require attribution. Some disallow redistribu
 * **Pending publication — CC-BY-NC on release** (GDD-IA): Available upon personal request from Marco Springmann; will be re-licensed under CC-BY-NC when published
 * **Non-commercial with attribution** (GADM, FADN): Free for academic/non-commercial use; GADM prohibits redistribution, FADN requires EU attribution
 * **FAO terms** (GLEAM 3.0 Supplement, FAO Nutrient Conversion): Non-commercial reuse with FAO acknowledgement; commercial use requires prior permission
-* **Custom terms** (ESA Biomass CCI, Copernicus Land Cover, Water Footprint Network): Various provider-specific terms; see individual entries above
+* **Custom terms** (ESA Biomass CCI, Water Footprint Network): Various provider-specific terms; see individual entries above
+
+.. _redistributing-datasets:
+
+Redistributing datasets via Zenodo
+----------------------------------
+
+Some upstream datasets are free to use but sit behind an API key or registration
+wall (historically the Copernicus land cover data). Where the licence permits
+redistribution, GLADE mirrors the exact slice it needs to `Zenodo
+<https://zenodo.org/>`__ and downloads it during builds with a plain HTTP
+request. This removes the per-user credential, pins an immutable, citable
+version (each Zenodo version has its own DOI and record id), and gives a single
+reusable pattern for any future dataset in the same situation.
+
+The components are:
+
+* ``tools/zenodo_publish.py`` -- a dataset-agnostic helper that creates (or
+  versions) a Zenodo deposition, uploads files, sets metadata, and publishes via
+  the Zenodo REST API. Reuse it for any redistributable dataset.
+* ``tools/mirror_land_cover.py`` -- the land-cover-specific maintainer tool. It
+  downloads ``satellite-land-cover`` from the Copernicus CDS, extracts
+  ``lccs_class``, and publishes it to Zenodo under CC-BY-4.0 with the required
+  Copernicus attribution baked into the deposition metadata.
+* The ``download_land_cover`` build rule, which ``curl``\ s the mirrored file
+  from the record pinned by ``config['data']['land_cover']['zenodo_record']``.
+
+**Before mirroring a new dataset**, confirm its licence actually permits
+redistribution (CC-BY / CC0 / public domain are safe; "use only" or
+non-commercial-no-redistribution terms are not) and record the required
+attribution in the deposition metadata.
+
+**Refreshing the land cover mirror** (maintainer, requires a Copernicus CDS
+token and a Zenodo token -- see ``config/secrets.yaml.example``)::
+
+    # Optional dry-run against the Zenodo sandbox (leaves an unpublished draft):
+    pixi run -e dev python tools/mirror_land_cover.py --sandbox --no-publish
+
+    # First publication (creates a new Zenodo record):
+    pixi run -e dev python tools/mirror_land_cover.py
+
+    # New data version (publishes a new version of an existing record):
+    pixi run -e dev python tools/mirror_land_cover.py --parent-record <record-id>
+
+The tool prints the published record id; set it as
+``config['data']['land_cover']['zenodo_record']`` in ``config/default.yaml`` and
+commit that change so builds pick up the new mirror.
diff --git a/docs/development.rst b/docs/development.rst
@@ -207,7 +207,7 @@ How It Works
 Tests call a shared helper ``run_snakemake_target()`` in ``tests/conftest.py`` that invokes the Snakemake Python API directly (no subprocess). The helper layers ``tests/config/test.yaml`` on top of ``config/default.yaml`` and targets specific output files.
 
 * **Dryrun test** (``test_workflow_dryrun``): Validates full DAG construction with ``forceall=True`` without executing any rule. Makes no API calls, but the startup credential gate (presence-only, so dummy values suffice) and the manually-downloaded source files must still be satisfied for the DAG to resolve. Catches missing inputs, broken rules, and invalid wildcard patterns.
-* **Execution test** (``test_build_solve_analyze``): Runs the actual pipeline through analysis for the default scenario. Requires USDA/ECMWF credentials for data downloads on first run.
+* **Execution test** (``test_build_solve_analyze``): Runs the actual pipeline through analysis for the default scenario. Requires a USDA credential for data downloads on first run.
 * **Plot test** (``test_plots``): Generates representative plots from solved model outputs.
 
 Tests never delete ``results/test/`` or ``.snakemake/``; Snakemake detects up-to-date outputs and skips them automatically, so subsequent runs are near-instant when code hasn't changed.

diff --git a/docs/introduction.rst b/docs/introduction.rst
@@ -119,14 +119,17 @@ manually:
   publication (will be released under CC-BY-NC). See :doc:`current_diets`
   and the :ref:`gdd-ia-dietary-intake` entry in :doc:`data_sources`.
 
-Two API credentials are needed for automatic downloads:
+Only one API credential matters for automatic downloads:
 
-* **Copernicus Climate Data Store** — required for satellite land-cover data.
-  Register at https://cds.climate.copernicus.eu/user/register, accept the
-  land-cover dataset licence, and copy the API key from your profile.
-* **USDA FoodData Central** — optional; the repository ships pre-fetched
-  nutritional data. A free key from https://fdc.nal.usda.gov/api-key-signup
-  is only needed if you want to refresh that data.
+* **USDA FoodData Central** — a free key from
+  https://fdc.nal.usda.gov/api-key-signup. The repository ships pre-fetched
+  nutritional data, so this is only needed if you want to refresh it; for a
+  default build ``DEMO_KEY`` suffices.
+
+No Copernicus/ECMWF account is required: the satellite land-cover data is
+fetched from a Zenodo mirror (see :ref:`copernicus-land-cover`). A Copernicus
+CDS token is only needed by maintainers refreshing that mirror with
+``tools/mirror_land_cover.py`` (see :ref:`redistributing-datasets`).
 
 Installation
 ------------
@@ -169,14 +172,12 @@ Installation
 
       cp config/secrets.yaml.example config/secrets.yaml
 
-   Edit ``config/secrets.yaml`` and fill in your ECMWF Climate Data Store
-   credentials (and optionally the USDA key). Alternatively, set the
-   equivalent environment variables:
+   Edit ``config/secrets.yaml`` and fill in your USDA key (or leave the
+   ``DEMO_KEY`` default for a standard build). Alternatively, set the
+   equivalent environment variable:
 
    .. code-block:: bash
 
-      export ECMWF_DATASTORES_URL="https://cds.climate.copernicus.eu/api"
-      export ECMWF_DATASTORES_KEY="your-uid:your-api-key"
       export USDA_API_KEY="your-usda-api-key"
 
 4. **Download the manually-licensed datasets**: follow the