Sustainable-Solutions-Lab · koen-vg · Jul 2, 2026 · Jul 2, 2026
diff --git a/.claude/skills/model-calibration/SKILL.md b/.claude/skills/model-calibration/SKILL.md
@@ -13,11 +13,13 @@ SPDX-License-Identifier: CC-BY-4.0
 
 The default workflow consumes five calibration artefact groups organized
 in per-config *sets* under `data/curated/calibration/<source>/`, selected
-by the `calibration.source` config key. Two sets are git-tracked:
-`default` (fit against the anchoring-off baseline diet of the health-off
-default config) and `gbd-anchored` (fit against the GBD-anchored diet;
-consumed by the health-enabled configs gsa, gsa_fixed_diet, validation
-and the doc configs). `tools/calibrate` resolves the base config's
+by the `calibration.source` config key. Three sets are git-tracked:
+`default` (fit against the FBS-derived baseline diet of the health-off
+default config), `gdd-ia` (fit against the anchoring-off GDD-IA diet;
+consumed by central, ghg_sensitivity_* and region_resolution configs)
+and `gbd-anchored` (fit against the GBD-anchored GDD-IA diet; consumed
+by the health-enabled configs gsa, gsa_fixed_diet, validation and the
+doc configs). `tools/calibrate` resolves the base config's
 diet.anchor_groups_to_gbd sentinel once and pins it across all five
 steps, and provenance stamps record the *resolved* anchoring. Each
 artefact group is produced by a dedicated validation-mode solve and

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -47,17 +47,17 @@ jobs:
           cache: true
           environments: dev
 
-      # The manually-downloaded, license-restricted source datasets (GDD, IHME)
-      # have no producing rule, so the DAG can't resolve unless the paths exist.
-      # A dryrun never reads their contents, so empty stubs suffice. Paths track
-      # the rules in workflow/rules/{diet,health}.smk (baseline_year: 2020).
+      # The manually-downloaded, license-restricted IHME source datasets have
+      # no producing rule, so the DAG can't resolve unless the paths exist.
+      # A dryrun never reads their contents, so empty stubs suffice. Paths
+      # track the rules in workflow/rules/health.smk (baseline_year: 2020).
+      # The GDD-IA dietary data needs no stubs: the test config uses the
+      # default FBS-derived baseline diet (diet.source: fbs).
       - name: Stub manually-downloaded inputs
         run: |
           d=data/manually_downloaded
           mkdir -p "$d" "$d/IHME_GBD_2023_RISK_EXPOSURE_DIET_1" "$d/IHME_GBD_2023_RISK_EXPOSURE_DIET_2"
-          touch "$d/GDD-IA-intake_grams_2020.csv" \
-                "$d/GDD-IA-intake_kcals_2020.csv" \
-                "$d/IHME-GBD_2023-death-rates-2020.csv" \
+          touch "$d/IHME-GBD_2023-death-rates-2020.csv" \
                 "$d/IHME_GBD_2019_RELATIVE_RISKS_Y2020M10D15.XLSX"
 
       # Validates the full Snakemake DAG without executing any rule (see

diff --git a/AGENTS.md b/AGENTS.md
@@ -328,10 +328,11 @@ pixi run -e dev pytest -v         # verbose output
 
 Five calibrations feed the default workflow. Their outputs are organized
 in per-config artefact *sets* under `data/curated/calibration/<source>/`
-(selected by the `calibration.source` config key; the `default` set --
-fit against the anchoring-off baseline diet -- and the `gbd-anchored`
-set -- consumed by the health-enabled configs -- are git-tracked) and
-builds depend on them. When upstream data or build logic changes
+(selected by the `calibration.source` config key; git-tracked sets:
+`default` -- fit against the FBS-derived baseline diet -- `gdd-ia` --
+fit against the anchoring-off GDD-IA diet, consumed by the health-off
+GDD-IA configs -- and `gbd-anchored` -- consumed by the health-enabled
+configs) and builds depend on them. When upstream data or build logic changes
 materially, regenerate in this order:
 
 1. **feed** — `config/calibration/feed.yaml` → `grassland_yield.csv`,

diff --git a/config/default.yaml b/config/default.yaml
@@ -838,6 +838,19 @@ weight_conversion:
 
 # --- section: diet ---
 diet:
+  # Source of the per-(country, food-group) baseline dietary intake totals:
+  #   - "fbs": derived from FAOSTAT Food Balance Sheet energy supply
+  #     (auto-retrieved; no manual downloads). Group masses are derived
+  #     from FBS kcal supply at model-basis energy densities and
+  #     waste-corrected to consumer-eaten intake; see the fbs block below
+  #     and docs/current_diets.rst.
+  #   - "gdd_ia": Global Dietary Database intake aggregation (survey-based;
+  #     manual download of a not-yet-public dataset). Used for the
+  #     published results; see docs/data_sources.rst.
+  # The baseline diet is structural: calibration artefact sets are fit
+  # against it, so changing the source requires a matching
+  # calibration.source (see docs/calibration.rst).
+  source: fbs
   baseline_age: "All ages"
   # Whether to anchor the per-country baseline-diet totals of the health
   # risk-factor food groups (health.risk_factors, e.g. fruits, vegetables,
@@ -960,7 +973,21 @@ diet:
   # high-volume per-country exceptions in the CSV (it scales better
   # than a YAML block) and reserve this YAML for the cross-cutting
   # global defaults.
-  # GDD-IA pipeline configuration.
+  # FBS diet-source configuration (used when source: fbs).
+  fbs:
+    # FBS reports wheat (item 2511) and rice (item 2807) supply without a
+    # whole-grain vs refined split, while the model carries them as
+    # separate foods (flour-wholemeal/rice-brown vs flour-white/
+    # rice-white) in separate food groups. These fractions allocate each
+    # item's energy supply to the whole-grain food; the remainder goes to
+    # the refined counterpart. Defaults from a population-weighted
+    # least-squares fit of per-country FBS-derived whole-grain intake to
+    # the survey-based (GDD-IA) estimate; they reproduce GDD-IA's global
+    # population-weighted whole-grain intake (~42 g/day) within ~1 g/day.
+    whole_grain_shares:
+      flour-wholemeal: 0.11
+      rice-brown: 0.04
+  # GDD-IA pipeline configuration (used when source: gdd_ia).
   gdd_ia:
     # GDD-IA reports meat in cooked weight (implied kcal/g for beef
     # ≈ 2.4, between raw 2.15 and cooked 2.50). Apply cooked-to-raw
@@ -1618,6 +1645,7 @@ data:
   faostat:
     qcl_production_element_code: 5510  # "Production" in tonnes (QCL dataset, covers crops and livestock)
     fbs_food_supply_element_code: 645  # "Food supply quantity (kg/capita/yr)" in FBS dataset
+    fbs_food_kcal_element_code: 664  # "Food supply (kcal/capita/day)" in FBS dataset
     fbs_other_uses_element_code: 5154  # "Other uses (non-food)" in 1000 tonnes (FBS dataset)
     fbs_production_element_code: 5511  # "Production" in 1000 tonnes (FBS dataset)
   soilgrids:

diff --git a/config/example.yaml b/config/example.yaml
@@ -32,13 +32,14 @@ name: "example"
 # default) none of that data is needed. Enabling health also anchors the
 # baseline diet to GBD intake exposure (diet.anchor_groups_to_gbd defaults to
 # "match_health"); set that key explicitly to decouple the two.
-# With anchoring on, also consume the calibration artefacts fit against the
-# anchored diet (the default set is fit against the anchoring-off diet).
+# The gbd-anchored calibration artefact set was fit against the anchored
+# GDD-IA diet, so it matches exactly only with diet.source: gdd_ia (which
+# needs the manually-obtained GDD-IA data, see docs/current_diets.rst).
 # health:
 #   enabled: true
 #   value_per_yll: 50000  # USD per year of life lost (0 = evaluate post-hoc only)
 # diet:
-#   anchor_groups_to_gbd: true  # or false to keep the GDD/FAOSTAT-only diet
+#   anchor_groups_to_gbd: true  # or false to keep the source-only diet
 # calibration:
 #   source: "gbd-anchored"
 

diff --git a/config/gsa.yaml b/config/gsa.yaml
@@ -31,6 +31,11 @@
 
 name: "gsa"
 
+# The published results use the GDD-IA survey-based baseline diet (the
+# calibration artefact set consumed here was fit against it).
+diet:
+  source: gdd_ia
+
 health:
   enabled: true
 

diff --git a/config/gsa_fixed_diet.yaml b/config/gsa_fixed_diet.yaml
@@ -123,6 +123,9 @@ validation:
   enforce_baseline_diet: true
 
 diet:
+  # The published results use the GDD-IA survey-based baseline diet (the
+  # gbd-anchored calibration artefact set was fit against it).
+  source: gdd_ia
   # Anchor to GBD so the fixed baseline diet matches the health-enabled
   # gsa.yaml diet (with health off, the sentinel would resolve to false).
   anchor_groups_to_gbd: true

diff --git a/config/schemas/config.schema.yaml b/config/schemas/config.schema.yaml
@@ -987,12 +987,18 @@ properties:
   diet:
     type: object
     required:
+      - source
       - baseline_age
       - anchor_groups_to_gbd
       - source_basis
+      - fbs
       - gdd_ia
     additionalProperties: false
     properties:
+      source:
+        type: string
+        enum: ["fbs", "gdd_ia"]
+        description: "Source of the per-(country, food-group) baseline dietary intake totals: 'fbs' (FAOSTAT Food Balance Sheets, auto-retrieved) or 'gdd_ia' (Global Dietary Database intake aggregation, manual download)"
       baseline_age:
         type: string
         description: "Age group for baseline diet (e.g., 'All ages')"
@@ -1033,6 +1039,19 @@ properties:
           additionalProperties:
             type: string
             enum: ["dry", "fresh", "cooked", "carcass", "brewed"]
+      fbs:
+        type: object
+        required: [whole_grain_shares]
+        additionalProperties: false
+        properties:
+          whole_grain_shares:
+            type: object
+            description: "Per-whole-grain-food fraction of the shared FBS item's supply (wheat 2511, rice 2807) allocated to the whole-grain food; the remainder goes to the refined counterpart in the same FBS item"
+            additionalProperties:
+              type: number
+              minimum: 0.0
+              maximum: 1.0
+        description: "FBS diet-source configuration (used when diet.source is 'fbs')."
       gdd_ia:
         type: object
         required: [cooked_to_raw]
@@ -1394,7 +1413,7 @@ properties:
             description: "Zenodo record id hosting the mirrored lccs_class file (refresh with tools/mirror_land_cover.py)"
       faostat:
         type: object
-        required: [qcl_production_element_code, fbs_food_supply_element_code, fbs_other_uses_element_code, fbs_production_element_code]
+        required: [qcl_production_element_code, fbs_food_supply_element_code, fbs_food_kcal_element_code, fbs_other_uses_element_code, fbs_production_element_code]
         additionalProperties: false
         properties:
           qcl_production_element_code:
@@ -1403,6 +1422,9 @@ properties:
           fbs_food_supply_element_code:
             type: integer
             description: "FAOSTAT element code for 'Food supply quantity (kg/capita/yr)' in FBS dataset"
+          fbs_food_kcal_element_code:
+            type: integer
+            description: "FAOSTAT element code for 'Food supply (kcal/capita/day)' in FBS dataset"
           fbs_other_uses_element_code:
             type: integer
             description: "FAOSTAT element code for 'Other uses (non-food)' in 1000 tonnes (FBS dataset)"

diff --git a/config/validation.yaml b/config/validation.yaml
@@ -8,6 +8,11 @@
 # and feed) to compare model behaviour against observed 2020 outcomes.
 
 name: "validation"
+
+# The published results use the GDD-IA survey-based baseline diet (the
+# calibration artefact set consumed here was fit against it).
+diet:
+  source: gdd_ia
 scenarios:
   default: {}