Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions dfetch-hub.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ label = "conan"

[[source]]
name = "clib"
strategy = "git-wiki"
strategy = "catalog-file"
# The GitHub wiki is a regular git repo at <repo>.wiki.git
url = "https://github.com/clibs/clib.wiki.git"
# Which file in the wiki root holds the package index
# Which file in the repo holds the package index (only this file is fetched)
manifest = "Packages.md"
label = "clib"

Expand Down Expand Up @@ -119,6 +119,20 @@ path = "packages"
manifest = "readme"
label = "ts-monorepo"

# ── EXAMPLE: catalog-file — Zephyr project dependencies ─────────────────────
# Parses the west.yml in the Zephyr RTOS repository and catalogues every
# project listed in manifest.projects (HAL modules, MCUboot, mbedTLS, …).
# Each project becomes a separate catalog entry pointing to its upstream repo.
# Uses strategy='catalog-file' with manifest='west.yml' — only that single file
# is fetched; the manifest field selects the parser
# (west.yml → WestProject; Packages.md → CLibPackage, …).
Comment thread
spoorcc marked this conversation as resolved.
[[source]]
name = "zephyr-west"
strategy = "catalog-file"
url = "https://github.com/zephyrproject-rtos/zephyr"
manifest = "west.yml"
label = "zephyr"

# # ── EXAMPLE: SVN — single repo ────────────────────────────────────────────────
# [[source]]
# name = "my-svn-lib"
Expand Down
266 changes: 266 additions & 0 deletions dfetch_hub/catalog/sources/west.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
"""Parse west manifest files (west.yml) to discover Zephyr project dependencies."""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

import yaml
from dfetch.log import get_logger

from dfetch_hub.catalog.sources import BaseManifest, fetch_readme_for_homepage

if TYPE_CHECKING:
from pathlib import Path

logger = get_logger(__name__)

# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------


@dataclass
class WestProject(BaseManifest):
"""Parsed representation of a single west manifest project entry.

Attributes:
groups: West group memberships for this project (e.g. ``["hal"]``,
``["optional"]``). Projects with ``groups: [babblesim]``
are off by default in Zephyr but are still catalogued.
"""

groups: list[str] = field(default_factory=list)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _build_remote_map(remotes_raw: object) -> dict[str, str]:
"""Return a ``{name: url_base}`` mapping built from the *remotes* list.

Args:
remotes_raw: The ``manifest.remotes`` value from west YAML (expected
to be a list of dicts, each with ``name`` and ``url-base``).

Returns:
A dict mapping remote name to base URL (trailing slash stripped).
Returns an empty dict when *remotes_raw* is not a list.
"""
if not isinstance(remotes_raw, list):
return {}
result: dict[str, str] = {}
for item in remotes_raw:
if not isinstance(item, dict):
continue
name = item.get("name")
url_base = item.get("url-base")
if isinstance(name, str) and isinstance(url_base, str):
result[name] = url_base.rstrip("/")
return result


def _project_url(
entry: dict[str, object],
remote_bases: dict[str, str],
default_remote: str,
) -> str | None:
"""Derive the upstream repository URL for a west project entry.

Precedence (west specification):

1. Explicit ``url:`` field.
2. ``{remote.url-base}/{repo-path}`` where ``repo-path`` defaults to ``name``.

Args:
entry: Parsed west project dict.
remote_bases: ``{remote_name: url_base}`` from the manifest remotes.
default_remote: Name of the manifest default remote.

Returns:
The upstream URL string, or ``None`` when it cannot be determined.
"""
explicit = entry.get("url")
if isinstance(explicit, str) and explicit:
return explicit

remote = entry.get("remote") or default_remote
if not isinstance(remote, str):
remote = default_remote

url_base = remote_bases.get(remote, "")
if not url_base:
name = entry.get("name", "<unknown>")
logger.debug("No url-base for remote %r, skipping project %s", remote, name)
return None

repo_path = entry.get("repo-path") or entry.get("name") or ""
if not isinstance(repo_path, str) or not repo_path:
return None

return f"{url_base}/{repo_path}"


def _extract_groups(entry: dict[str, object]) -> list[str]:
"""Return the list of west group names for *entry*.

Args:
entry: Parsed west project dict.

Returns:
A list of group name strings, or an empty list.
"""
raw = entry.get("groups")
if not isinstance(raw, list):
return []
return [str(g) for g in raw if g]


def _build_west_project(
entry: dict[str, object],
remote_bases: dict[str, str],
default_remote: str,
) -> WestProject | None:
"""Build a :class:`WestProject` from a single west manifest project dict.

Args:
entry: Raw project dict from ``manifest.projects``.
remote_bases: Mapping of remote name to base URL.
default_remote: Name of the manifest-level default remote.

Returns:
A populated :class:`WestProject`, or ``None`` if the entry is missing
a required ``name`` or has no resolvable upstream URL.
"""
name = entry.get("name")
if not isinstance(name, str) or not name:
logger.debug("Skipping west project entry with no name: %r", entry)
return None

homepage = _project_url(entry, remote_bases, default_remote)
if not homepage:
logger.debug("Could not determine URL for west project %r — skipped", name)
return None

revision = entry.get("revision")
version = str(revision) if revision else None

description_raw = entry.get("description", "")
description = str(description_raw) if description_raw else ""

urls: dict[str, str] = {"Repository": homepage}

return WestProject(
entry_name=name.lower(),
package_name=name,
description=description,
homepage=homepage,
license=None,
version=version,
groups=_extract_groups(entry),
readme_content=fetch_readme_for_homepage(homepage),
urls=urls,
in_project_repo=False,
)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def _extract_default_remote(manifest_dict: dict[str, object]) -> str:
"""Return the default remote name from a west manifest dict, or an empty string."""
defaults = manifest_dict.get("defaults")
if isinstance(defaults, dict):
dr = defaults.get("remote", "")
return str(dr) if dr else ""
return ""


def _collect_projects(
projects_raw: list[object],
remote_bases: dict[str, str],
default_remote: str,
limit: int | None,
) -> list[WestProject]:
"""Build :class:`WestProject` instances from a raw west project list.

Args:
projects_raw: Raw ``manifest.projects`` list from west YAML.
remote_bases: Mapping of remote name to base URL.
default_remote: Name of the manifest-level default remote.
limit: Maximum number of projects to return (``None`` = unlimited).

Returns:
A list of :class:`WestProject` instances for projects with resolvable URLs.
"""
projects: list[WestProject] = []
for entry in projects_raw:
if limit is not None and len(projects) >= limit:
break
if not isinstance(entry, dict):
continue
project = _build_west_project(entry, remote_bases, default_remote)
if project is not None:
projects.append(project)
return projects


def _load_west_manifest(west_yaml: "Path") -> dict[str, object] | None:
"""Load and validate a west YAML file, returning the ``manifest`` sub-dict.

Args:
west_yaml: Path to the ``west.yml`` file.

Returns:
The ``manifest`` mapping, or ``None`` on any parse or structural error.
"""
try:
data: object = yaml.safe_load(west_yaml.read_text(encoding="utf-8"))
except (OSError, yaml.YAMLError) as exc:
logger.warning("Could not parse %s: %s", west_yaml, exc)
return None
if not isinstance(data, dict):
logger.warning("Ignoring non-mapping west YAML in %s", west_yaml)
return None
manifest = data.get("manifest")
if not isinstance(manifest, dict):
logger.warning("No 'manifest' key found in %s", west_yaml)
return None
return manifest


def parse_west_yaml(west_yaml: "Path", limit: int | None = None) -> list[WestProject]:
"""Parse a ``west.yml`` manifest file into a list of :class:`WestProject`.

Reads the YAML file at *west_yaml*, extracts remote definitions and the
project list, and resolves each project's upstream URL. Projects whose
URL cannot be determined (missing remote, no ``url-base``) are silently
skipped with a ``debug`` log message.

Args:
west_yaml: Path to the ``west.yml`` (or equivalent) manifest file.
limit: Maximum number of projects to return. ``None`` = unlimited.

Returns:
A list of :class:`WestProject` instances, one per discovered project.
Returns an empty list on parse errors.
"""
manifest = _load_west_manifest(west_yaml)
if manifest is None:
return []

remote_bases = _build_remote_map(manifest.get("remotes", []))
default_remote = _extract_default_remote(manifest)

projects_raw = manifest.get("projects", [])
if not isinstance(projects_raw, list):
logger.warning("'manifest.projects' is not a list in %s", west_yaml)
return []

projects = _collect_projects(projects_raw, remote_bases, default_remote, limit)
logger.debug("Parsed %d project(s) from %s", len(projects), west_yaml)
Comment thread
spoorcc marked this conversation as resolved.
return projects
Loading