Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ requires-python = ">=3.10"
dependencies = [
"cffi",
"metkitlib",
"findlibs"
"findlibs",
"pyyaml",
"requests",
"platformdirs",
]

[tool.setuptools.dynamic]
Expand Down
1 change: 1 addition & 0 deletions python/pymetkit/src/pymetkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .pymetkit import *
from .pymetkit import ParamDB
156 changes: 156 additions & 0 deletions python/pymetkit/src/pymetkit/generate_parameter_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""
Standalone script to generate:
- parameter_metadata.yaml — one entry per ECMWF parameter
- unit_metadata.yaml — one entry per ECMWF unit

Usage
-----
python -m pymetkit.generate_parameter_metadata
# or directly:
python generate_parameter_metadata.py
"""

import requests
import yaml
from pathlib import Path

PARAM_URL = "https://codes.ecmwf.int/parameter-database/api/v1/param/"
UNIT_URL = "https://codes.ecmwf.int/parameter-database/api/v1/unit/"

# Output paths: canonical location is share/metkit/ at the repo root, which is
# four parent directories above this module file:
# python/pymetkit/src/pymetkit/ -> python/pymetkit/src/ -> python/pymetkit/
# -> python/ -> <repo_root>
_REPO_ROOT = Path(__file__).parents[4]
PARAM_OUTPUT = _REPO_ROOT / "share" / "metkit" / "parameter_metadata.yaml"
UNIT_OUTPUT = _REPO_ROOT / "share" / "metkit" / "unit_metadata.yaml"

#: Timeout in seconds for HTTP requests to the ECMWF parameter database API.
REQUEST_TIMEOUT = 30

Comment on lines +17 to +30
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generator writes parameter_metadata.yaml and unit_metadata.yaml next to the Python module (Path(__file__).parent), but the PR adds the YAML under share/metkit/ and ParamDB’s fallback search also expects share/metkit/parameter_metadata.yaml. Regenerating with this script will therefore write to a different location than the committed data. Align the output paths with the repository’s canonical YAML location (or update the rest of the codebase to consume the module-adjacent files).

Copilot uses AI. Check for mistakes.

# ---------------------------------------------------------------------------
# Units
# ---------------------------------------------------------------------------


def fetch_units(url: str = UNIT_URL) -> tuple[list[dict], dict[int, str]]:
"""
Fetch all units from the ECMWF parameter database API.

Returns
-------
units : list[dict]
Normalised unit records ready to be written to unit_metadata.yaml.
unit_map : dict[int, str]
Mapping of unit id -> unit name string for use in parameter enrichment.
"""
print(f"Fetching units from {url} ...")
response = requests.get(url, timeout=REQUEST_TIMEOUT)
response.raise_for_status()
raw_units = response.json()
Comment on lines +48 to +51
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both API calls use requests.get(...) without a timeout. If the endpoint stalls, this script can block indefinitely. Consider providing a default timeout (and possibly a retry strategy) to make regeneration more robust.

Copilot uses AI. Check for mistakes.
print(f" Received {len(raw_units)} units.")

units = []
unit_map: dict[int, str] = {}

for raw in raw_units:
uid = int(raw["id"])
# The API may use 'name', 'symbol', or 'label' for the unit string
name = raw.get("name") or raw.get("symbol") or raw.get("label") or ""

entry = {"id": uid}
# Preserve all fields the API returns, but ensure id comes first
for key, value in raw.items():
if key == "id":
continue
entry[key] = value
# Always emit a canonical 'name' field so unit_metadata.yaml has a
# stable schema regardless of which key the API uses (name/symbol/label)
entry["name"] = name

Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetch_units() computes a normalised unit string (name = raw.get("name") or raw.get("symbol") ...) for unit_map, but the YAML output preserves the raw keys and does not ensure there is a canonical name field. If the API returns symbol/label instead of name, unit_metadata.yaml will lack the expected key. Consider explicitly setting entry["name"] = name (and/or dropping the alternate keys) to keep the output schema stable.

Suggested change
# Always emit a canonical name field so unit_metadata.yaml has a stable schema
entry["name"] = name

Copilot uses AI. Check for mistakes.
units.append(entry)
unit_map[uid] = name

units.sort(key=lambda e: e["id"])
return units, unit_map


def write_unit_yaml(units: list[dict], output_path: Path = UNIT_OUTPUT) -> None:
"""Write the unit list to a YAML file."""
with output_path.open("w") as fh:
yaml.dump(
units,
fh,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
)
print(f"Written {len(units)} units to {output_path}")


# ---------------------------------------------------------------------------
# Parameters
# ---------------------------------------------------------------------------


def fetch_parameters(
url: str = PARAM_URL, unit_map: dict[int, str] = None
) -> list[dict]:
"""Fetch all parameters from the ECMWF parameter database API."""
print(f"Fetching parameters from {url} ...")
response = requests.get(url, timeout=REQUEST_TIMEOUT)
response.raise_for_status()
params = response.json()
print(f" Received {len(params)} parameters.")

result = []
for raw in params:
# Resolve short name (API may return 'shortName', 'short_name', or 'shortname')
shortname = (
raw.get("shortname") or raw.get("shortName") or raw.get("short_name") or ""
)

# Resolve units via unit_map if available
unit_id = raw.get("unit_id")
if unit_map and unit_id is not None:
units = unit_map.get(int(unit_id), "")
else:
units = ""

entry = {
"id": int(raw["id"]),
"shortname": shortname,
"longname": raw.get("name", ""),
"units": units,
"description": raw.get("description", ""),
}
result.append(entry)

result.sort(key=lambda e: e["id"])
return result


def write_param_yaml(params: list[dict], output_path: Path = PARAM_OUTPUT) -> None:
"""Write the parameter list to a YAML file."""
with output_path.open("w") as fh:
yaml.dump(
params,
fh,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
)
print(f"Written {len(params)} parameters to {output_path}")


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

if __name__ == "__main__":
units, unit_map = fetch_units()
write_unit_yaml(units)

parameters = fetch_parameters(unit_map=unit_map)
write_param_yaml(parameters)
Loading
Loading