diff --git a/.github/workflows/refresh-profile.yml b/.github/workflows/refresh-profile.yml index 98ab198..a177a2d 100644 --- a/.github/workflows/refresh-profile.yml +++ b/.github/workflows/refresh-profile.yml @@ -1,16 +1,20 @@ -name: Update Profile README +name: Update Profile and Deploy Pages on: schedule: - cron: '0 5 * * *' workflow_dispatch: -permissions: - contents: read +concurrency: + group: pages + cancel-in-progress: false jobs: refresh: runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write steps: - uses: actions/checkout@v4 @@ -23,29 +27,49 @@ jobs: - run: uv sync --all-groups --frozen - - name: Update README - env: - GITHUB_TOKEN: ${{ secrets.SCORE_BOT_PAT }} - run: uv run generate-profile-readme + - name: Restore snapshot cache + uses: actions/cache@v4 + with: + path: profile/cache/repo_overview.json + key: snapshot-${{ github.run_id }} + restore-keys: snapshot- - name: Collect metrics env: GITHUB_TOKEN: ${{ secrets.SCORE_BOT_PAT }} - run: uv run python scripts/collect_metrics.py + run: uv run generate-repo-overview collect + + - name: Render overview (MD) + run: uv run generate-repo-overview render-overview - name: Create Pull Request uses: peter-evans/create-pull-request@v7 with: - title: Update profile README and metrics + title: Update profile README author: eclipse-score-bot <187756813+eclipse-score-bot@users.noreply.github.com> committer: eclipse-score-bot <187756813+eclipse-score-bot@users.noreply.github.com> body: | - This PR updates: - - The repository descriptions and grouping in `profile/README.md` - - The cross-repo metrics report in `profile/metrics.md` - Please review and merge if everything looks good. - commit-message: "chore: auto-refresh profile readme and metrics" + Auto-generated update of `profile/README.md` from the latest snapshot. + The HTML metrics dashboard is deployed separately to GitHub Pages. + commit-message: "chore: auto-refresh profile readme" base: main branch: bot/status-update env: GITHUB_TOKEN: ${{ secrets.SCORE_BOT_PAT }} + + - name: Render details (HTML) + run: uv run generate-repo-overview render-details + + - uses: actions/upload-pages-artifact@v5 + with: + path: _site + + deploy: + needs: refresh + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + steps: + - id: deployment + uses: actions/deploy-pages@v5 diff --git a/.gitignore b/.gitignore index c903b31..a3b98d6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ __pycache__/ *.py[cod] .pytest_cache/ .ruff_cache/ +/.codex +/profile/cache/bazel_registry_checkout/ +/profile/cache/reference_integration_checkout/ +/profile/cache/repo_overview.json +/_site/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..3050c44 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,37 @@ +# AGENTS.md + +Context file for AI coding assistants. See `docs/repo-overview-tool-design.md` for architecture, data model, and caching details. + +## Quick reference + +```sh +uv sync --all-groups --frozen # install deps +uv run generate-repo-overview collect # GitHub API → snapshot JSON +uv run generate-repo-overview render-overview # snapshot → profile/README.md +uv run generate-repo-overview render-details # snapshot → _site/ (index + per-repo pages) +uv run pytest # run tests +uv run ruff check src/ tests/ # lint +uv run basedpyright src/ # type check +``` + +## Key files for website work + +``` +src/generate_repo_overview/ + metrics_html.py — HTML renderer (index + per-repo detail pages) + metrics_report.py — shared helpers: grouping, version comparison, badges + models.py — RepoEntry, RepoSnapshot, signal dataclasses + cli.py — render-details writes all pages from render_all_pages() + constants.py — default paths (DEFAULT_METRICS_HTML_OUTPUT = _site/) +tests/ + test_cli_render.py — render output tests +``` + +## Website rendering notes + +- No static site generator or template engine — pure Python string concatenation. +- CSS is inlined per page via the `CSS` constant in `_html_common.py`. Dark theme using CSS variables. +- `render_all_pages(snapshot)` returns `dict[str, str]` of relative path to HTML content. +- Index page: tabs, filters, sortable columns — all client-side JS in `_render_script()`. +- Detail pages (`/index.html`): static HTML, no JS. +- Repo name links on the index go to detail pages; GitHub links use a separate icon. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..8f62ad3 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +See [AGENTS.md](AGENTS.md) for project context, key files, and dev commands. diff --git a/README.md b/README.md index 0c8f05a..c53fcfd 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ This repository hosts the start page when you visit the eclipse-score GitHub organization. It contains links to the Eclipse Score website, documentation, and other resources related to the Eclipse Score project. +The Python tool in this repo now acts as a small repo-overview generator: it collects a cached snapshot of organization metadata once, then renders multiple Markdown views from that shared snapshot. ## Development @@ -11,18 +12,55 @@ Use `uv` to create a virtual environment and install the project dependencies: uv sync --all-groups ``` -To generate the organization profile README: +The CLI now has a built-in overview: +```sh +uv run generate-repo-overview +``` + +For a cache-only re-render of the profile README and the HTML dashboard: + +```sh +uv run generate-repo-overview render-overview +uv run generate-repo-overview render-details +``` + +For a fresh GitHub pull before rendering, run: + +```sh +uv run generate-repo-overview collect +``` + +By default, `collect` now does a cache-aware refresh: it checks fast, high-level +repository state and reuses cached deep details for repositories whose default +branch SHA has not changed. Use this for regular updates. + +For volatile repository metrics (open PRs/issues, release counters, and recent +activity), fast mode keeps a per-repository fetch timestamp and refreshes those +values automatically when they are older than 1 hour. + +You can tune this freshness window with `REPO_OVERVIEW_VOLATILE_TTL_MINUTES` +(default: `60`). + +If you need a full deep refresh for every repository, run: + +```sh +uv run generate-repo-overview collect --deep ``` -uv run generate-profile-readme + +If you only want the profile README: + +```sh +uv run generate-repo-overview render-overview ``` Category order and category descriptions are configured in -`src/profile_readme_generator/profile_readme_config.toml`. Pass +`src/generate_repo_overview/profile_readme_config.toml`. Pass `--config /path/to/file.toml` to use a different config file. -The generator reads repository custom properties from GitHub and expects `GITHUB_TOKEN` to be set. -If `GITHUB_TOKEN` is not set, it falls back to `gh auth token`. +The generator reads repository custom properties from GitHub and expects `GITHUB_TOKEN` to be set. If `GITHUB_TOKEN` is not set, it falls back to `gh auth token`. + +Architecture notes for the package live in [src/generate_repo_overview/README.md](src/generate_repo_overview/README.md). The broader design notes are in [docs/repo-overview-tool-design.md](docs/repo-overview-tool-design.md). To run the local checks: diff --git a/docs/repo-overview-tool-design.md b/docs/repo-overview-tool-design.md new file mode 100644 index 0000000..355c665 --- /dev/null +++ b/docs/repo-overview-tool-design.md @@ -0,0 +1,105 @@ +# Repo Overview Tool Design + +## Goals + +- Collect GitHub organization data once and reuse it across multiple reports. +- Keep local iteration fast by rendering from a cached snapshot instead of re-querying GitHub on every run. +- Separate GitHub collection, content enrichment, and rendering so new views are easy to add. +- Extend the profile README workflow with cross-repo metrics — Markdown, HTML dashboard, and GitHub Pages deployment — using a shared snapshot. + +## Architecture + +The tool is split into three layers: + +1. `collector/` + - Connects to GitHub. + - Loads active repositories and custom properties. + - Derives content-based signals such as `has_ci`, `has_lint_config`, `has_coverage_config`, `bazel_version`, and `referenced_by_reference_integration`. + - Writes and reads a local JSON snapshot cache. +2. `profile_readme.py`, `metrics_report.py`, `metrics_html.py` (with `_html_index.py`, `_html_detail.py`, `_html_common.py`) + - Render different views (Markdown and HTML) from the same normalized data model. + - Keep presentation decisions out of the collection layer. +3. `cli.py` + - Orchestrates cache-aware commands: `collect`, `render-overview`, and `render-details`. + +## Data Model + +The shared model lives in `models.py`. + +- `RepoEntry` contains both grouping metadata and overview metrics. +- `RepoSnapshot` stores: + - schema version + - organization name + - generation timestamp + - normalized repositories + +The snapshot is intentionally renderer-agnostic. It stores neutral values such as booleans and plain strings rather than Markdown-specific markers. + +## Caching Strategy + +The default cache file is `profile/cache/repo_overview.json`. + +The cache is used in two ways: + +- Render commands read the snapshot directly and never contact GitHub. +- Collection commands reuse content-derived signals for repositories whose default-branch SHA has not changed. + +That means changing a template or report layout is a local-only operation, and refreshing the snapshot only re-fetches file-tree data for repositories whose content likely changed. + +## Why The Tool Uses The GitHub API Instead Of Cloning Repositories + +The current report set mainly needs: + +- repository metadata +- custom properties +- release dates +- open pull request counts +- file-presence checks +- a few small file contents such as `.bazelversion` +- cloned shared metadata repositories such as `bazel_registry` and `reference_integration` + +For those needs, API access is cheaper and simpler than cloning every repository. + +The collector uses: + +- repository metadata from the organization API +- repository trees to detect whether files or directories exist +- targeted file-content reads only when a detector needs a small file + +Cloning remains a future option if the project later needs heavyweight analysis such as line counting, local static analysis, or parsing large groups of files. + +## Command Surface + +The generic entry point is: + +```sh +uv run generate-repo-overview +``` + +Built-in commands: + +- `collect` + - Sync the cached snapshot from GitHub and write it to disk. + - Use `--deep` to force a full refresh for every repository instead of reusing cached signals for unchanged ones. +- `render-overview` + - Render the profile README from an existing snapshot. +- `render-details` + - Render the HTML metrics page from an existing snapshot. + +The `collect` command always performs a sync. The render commands never contact GitHub. + +## Extension Points + +To add a new view: + +1. Extend `RepoEntry` only if the new view needs new normalized data. +2. Add or update detectors in `collector/` if new collection logic is required. +3. Create a new renderer that accepts `RepoSnapshot` or `list[RepoEntry]`. +4. Add a CLI command that reads the cached snapshot and calls the renderer. + +To add a new detector, prefer: + +- tree-based file existence checks for simple presence signals +- targeted small-file reads for version or config parsing + +Avoid coupling detectors directly to output format. The collector should produce plain data; the renderer should decide how that data is displayed. diff --git a/profile/README.md b/profile/README.md index 45f6ca5..1d66786 100644 --- a/profile/README.md +++ b/profile/README.md @@ -1,5 +1,7 @@ # Welcome to eclipse-score +**[Metrics, Status, etc](https://eclipse-score.github.io/.github/)** — for all repositories in this organization. + This Github organization contains artifacts developed by the [Eclipse S-CORE Project](https://projects.eclipse.org/projects/automotive.score). ## Introduction to Eclipse S-CORE Project @@ -32,6 +34,7 @@ Core S-CORE modules, libraries, and APIs. | [communication](https://github.com/eclipse-score/communication) | Repository for the communication module LoLa | | [config_management](https://github.com/eclipse-score/config_management) | Repository for config management | | [feo](https://github.com/eclipse-score/feo) | Repository for the Fixed Order Execution (FEO) framework | +| [inc_security_crypto](https://github.com/eclipse-score/inc_security_crypto) | Incubation repository for Security & Cryptography feature | | [kyron](https://github.com/eclipse-score/kyron) | Safe async runtime for Rust | | [lifecycle](https://github.com/eclipse-score/lifecycle) | Repository for the lifecycle feature | | [logging](https://github.com/eclipse-score/logging) | Repository for logging daemon | @@ -93,6 +96,12 @@ Integration repositories for various systems and components. | [rules_imagefs](https://github.com/eclipse-score/rules_imagefs) | Repository for Image FileSystem Bazel rules and toolchains definitions | | [testing_tools](https://github.com/eclipse-score/testing_tools) | Repository for testing utilities | +#### testing + +| Repository | Description | +|------------|-------------| +| [qnx_unit_tests](https://github.com/eclipse-score/qnx_unit_tests) | Infrastructure for running unit tests in QNX VMs | + #### Toolchains Toolchain repositories for compilers, linters, and other development tools. diff --git a/profile/metrics.md b/profile/metrics.md deleted file mode 100644 index d7a4408..0000000 --- a/profile/metrics.md +++ /dev/null @@ -1,77 +0,0 @@ -# Cross-Repo Metrics Report - -Generated on 2026-02-19T14:20:36.089865+00:00 - -| Repo |Last Commit | Issues | PRs | Bazel | Lint | CI | Test Coverage | Latest Release | Stars | Forks | -|------|------------|--------|-----|-------|------|----|---------------|----------------|-------|-------| -| [.eclipsefdn](https://github.com/eclipse-score/.eclipsefdn) | 2026-02-19 | 4 | 3 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 31 | -| [.github](https://github.com/eclipse-score/.github) | 2026-02-19 | 2 | 2 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 8 | -| [apt-install](https://github.com/eclipse-score/apt-install) | 2025-07-24 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 2 | 1 | -| [baselibs](https://github.com/eclipse-score/baselibs) | 2026-02-18 | 42 | 16 | 8.3.1 | ❌ no | ✅ yes | ❌ no | 2026-02-16 | 20 | 39 | -| [baselibs_rust](https://github.com/eclipse-score/baselibs_rust) | 2026-02-19 | 12 | 2 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2026-02-10 | 1 | 5 | -| [bazel-tools-cc](https://github.com/eclipse-score/bazel-tools-cc) | 2026-01-23 | 1 | 1 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2025-12-15 | 1 | 3 | -| [bazel-tools-python](https://github.com/eclipse-score/bazel-tools-python) | 2025-12-15 | 1 | 1 | 7.5.0 | ✅ yes | ✅ yes | ❌ no | 2025-11-25 | 0 | 2 | -| [bazel_cpp_toolchains](https://github.com/eclipse-score/bazel_cpp_toolchains) | 2026-02-18 | 5 | 2 | ⚠️ missing | ❌ no | ❌ no | ❌ no | 2026-02-18 | 2 | 4 | -| [bazel_platforms](https://github.com/eclipse-score/bazel_platforms) | 2026-02-18 | 0 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | 2026-02-18 | 1 | 7 | -| [bazel_registry](https://github.com/eclipse-score/bazel_registry) | 2026-02-19 | 2 | 0 | 7.4.0 | ❌ no | ✅ yes | ❌ no | 2025-12-22 | 3 | 17 | -| [bazel_registry_ui](https://github.com/eclipse-score/bazel_registry_ui) | 2026-02-19 | 4 | 3 | 8.4.2 | ✅ yes | ✅ yes | ❌ no | - | 0 | 2 | -| [cicd-workflows](https://github.com/eclipse-score/cicd-workflows) | 2026-02-17 | 13 | 5 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 7 | -| [communication](https://github.com/eclipse-score/communication) | 2026-02-18 | 53 | 18 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2025-12-19 | 35 | 45 | -| [config_management](https://github.com/eclipse-score/config_management) | 2026-01-29 | 1 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 2 | -| [dash-license-scan](https://github.com/eclipse-score/dash-license-scan) | 2026-02-09 | 2 | 1 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | 2025-12-19 | 1 | 1 | -| [devcontainer](https://github.com/eclipse-score/devcontainer) | 2026-02-17 | 9 | 4 | ⚠️ missing | ✅ yes | ✅ yes | ❌ no | 2025-11-28 | 2 | 7 | -| [docs-as-code](https://github.com/eclipse-score/docs-as-code) | 2026-02-19 | 40 | 12 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-13 | 5 | 20 | -| [eclipse-score-website](https://github.com/eclipse-score/eclipse-score-website) | 2026-02-19 | 1 | 1 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 9 | -| [eclipse-score-website-preview](https://github.com/eclipse-score/eclipse-score-website-preview) | 2026-02-19 | 0 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [eclipse-score-website-published](https://github.com/eclipse-score/eclipse-score-website-published) | 2026-02-19 | 2 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [eclipse-score.github.io](https://github.com/eclipse-score/eclipse-score.github.io) | 2026-01-08 | 6 | 3 | 7.4.0 | ✅ yes | ✅ yes | ❌ no | - | 7 | 14 | -| [examples](https://github.com/eclipse-score/examples) | 2025-07-24 | 1 | 1 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 4 | -| [feo](https://github.com/eclipse-score/feo) | 2026-02-19 | 1 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-19 | 2 | 10 | -| [ferrocene_toolchain_builder](https://github.com/eclipse-score/ferrocene_toolchain_builder) | 2026-01-26 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | 2026-01-14 | 0 | 1 | -| [inc_abi_compatible_datatypes](https://github.com/eclipse-score/inc_abi_compatible_datatypes) | 2025-12-18 | 1 | 1 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 2 | -| [inc_ai_platform](https://github.com/eclipse-score/inc_ai_platform) | 2025-10-27 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [inc_config_management](https://github.com/eclipse-score/inc_config_management) | 2025-11-03 | 2 | 2 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 1 | 2 | -| [inc_daal](https://github.com/eclipse-score/inc_daal) | 2026-01-20 | 2 | 2 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 4 | 3 | -| [inc_diagnostics](https://github.com/eclipse-score/inc_diagnostics) | 2025-10-27 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [inc_feo](https://github.com/eclipse-score/inc_feo) | 2025-09-15 | 2 | 2 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 8 | 9 | -| [inc_gen_ai](https://github.com/eclipse-score/inc_gen_ai) | 2025-10-27 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [inc_json](https://github.com/eclipse-score/inc_json) | 2025-08-12 | 1 | 0 | 7.4.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [inc_mw_com](https://github.com/eclipse-score/inc_mw_com) | 2026-01-14 | 6 | 4 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 5 | 9 | -| [inc_mw_log](https://github.com/eclipse-score/inc_mw_log) | 2025-10-29 | 5 | 5 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 7 | -| [inc_os_autosd](https://github.com/eclipse-score/inc_os_autosd) | 2026-02-19 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 6 | -| [inc_process_test_management](https://github.com/eclipse-score/inc_process_test_management) | 2025-07-24 | 1 | 1 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 3 | -| [inc_process_variant_management](https://github.com/eclipse-score/inc_process_variant_management) | 2025-08-07 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 3 | -| [inc_score_codegen](https://github.com/eclipse-score/inc_score_codegen) | 2026-01-22 | 0 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [inc_security_crypto](https://github.com/eclipse-score/inc_security_crypto) | 2026-01-22 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [inc_someip_gateway](https://github.com/eclipse-score/inc_someip_gateway) | 2026-02-18 | 22 | 13 | 8.3.0 | ✅ yes | ✅ yes | ❌ no | - | 1 | 3 | -| [inc_time](https://github.com/eclipse-score/inc_time) | 2026-02-06 | 3 | 3 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 3 | -| [infrastructure](https://github.com/eclipse-score/infrastructure) | 2026-02-12 | 0 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [itf](https://github.com/eclipse-score/itf) | 2026-02-13 | 2 | 0 | 8.1.0 | ❌ no | ✅ yes | ✅ yes | 2025-09-23 | 0 | 12 | -| [kyron](https://github.com/eclipse-score/kyron) | 2026-02-18 | 18 | 2 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-17 | 1 | 3 | -| [lifecycle](https://github.com/eclipse-score/lifecycle) | 2026-02-19 | 29 | 11 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2026-02-17 | 0 | 14 | -| [logging](https://github.com/eclipse-score/logging) | 2026-02-19 | 11 | 5 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-19 | 0 | 10 | -| [module_template](https://github.com/eclipse-score/module_template) | 2026-02-03 | 4 | 3 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 3 | 12 | -| [more-disk-space](https://github.com/eclipse-score/more-disk-space) | 2026-01-20 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [nlohmann_json](https://github.com/eclipse-score/nlohmann_json) | 2026-02-19 | 5 | 4 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 1 | 5 | -| [operating_system](https://github.com/eclipse-score/operating_system) | 2025-07-24 | 2 | 1 | 8.1.0 | ❌ no | ❌ no | ❌ no | - | 1 | 2 | -| [orchestrator](https://github.com/eclipse-score/orchestrator) | 2026-02-17 | 15 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-17 | 4 | 11 | -| [os_images](https://github.com/eclipse-score/os_images) | 2025-11-27 | 3 | 2 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 3 | -| [persistency](https://github.com/eclipse-score/persistency) | 2026-02-19 | 31 | 11 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2026-02-17 | 1 | 25 | -| [process_description](https://github.com/eclipse-score/process_description) | 2026-02-19 | 68 | 4 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-02-05 | 1 | 17 | -| [reference_integration](https://github.com/eclipse-score/reference_integration) | 2026-02-19 | 9 | 8 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2025-12-22 | 3 | 22 | -| [rules_imagefs](https://github.com/eclipse-score/rules_imagefs) | 2026-02-18 | 1 | 1 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [rules_rust](https://github.com/eclipse-score/rules_rust) | 2026-02-18 | 2 | 2 | 8.4.2 | ✅ yes | ✅ yes | ❌ no | 2026-02-18 | 0 | 1 | -| [score](https://github.com/eclipse-score/score) | 2026-02-19 | 590 | 21 | 8.3.0 | ✅ yes | ✅ yes | ❌ no | 2026-02-10 | 91 | 81 | -| [score-crates](https://github.com/eclipse-score/score-crates) | 2026-02-09 | 2 | 2 | ⚠️ missing | ❌ no | ❌ no | ❌ no | 2026-02-09 | 1 | 9 | -| [score_rust_policies](https://github.com/eclipse-score/score_rust_policies) | 2026-02-05 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | 2026-02-05 | 0 | 3 | -| [scrample](https://github.com/eclipse-score/scrample) | 2026-02-12 | 5 | 3 | 8.3.0 | ❌ no | ✅ yes | ❌ no | 2026-01-26 | 2 | 8 | -| [test_integration](https://github.com/eclipse-score/test_integration) | 2025-10-10 | 0 | 0 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 0 | 1 | -| [test_module_a](https://github.com/eclipse-score/test_module_a) | 2025-10-10 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [test_module_b](https://github.com/eclipse-score/test_module_b) | 2025-10-10 | 0 | 0 | 8.3.0 | ❌ no | ✅ yes | ❌ no | - | 0 | 1 | -| [testing_tools](https://github.com/eclipse-score/testing_tools) | 2026-02-19 | 0 | 0 | 8.4.2 | ❌ no | ✅ yes | ❌ no | 2026-02-19 | 1 | 4 | -| [toolchains_gcc](https://github.com/eclipse-score/toolchains_gcc) | 2025-12-02 | 5 | 2 | ⚠️ missing | ❌ no | ❌ no | ❌ no | 2025-12-02 | 6 | 12 | -| [toolchains_gcc_packages](https://github.com/eclipse-score/toolchains_gcc_packages) | 2026-01-21 | 0 | 0 | ⚠️ missing | ❌ no | ✅ yes | ❌ no | - | 0 | 7 | -| [toolchains_qnx](https://github.com/eclipse-score/toolchains_qnx) | 2026-02-09 | 6 | 1 | 8.1.0 | ❌ no | ✅ yes | ❌ no | 2026-02-09 | 4 | 8 | -| [toolchains_rust](https://github.com/eclipse-score/toolchains_rust) | 2026-02-10 | 2 | 1 | ⚠️ missing | ❌ no | ❌ no | ❌ no | 2026-02-10 | 1 | 7 | -| [tooling](https://github.com/eclipse-score/tooling) | 2026-02-16 | 16 | 8 | 8.3.1 | ❌ no | ✅ yes | ❌ no | 2026-02-13 | 5 | 16 | -| [tools](https://github.com/eclipse-score/tools) | 2026-02-16 | 1 | 1 | ⚠️ missing | ❌ no | ❌ no | ❌ no | - | 1 | 1 | \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index edc3868..7020eb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,16 +3,17 @@ requires = ["hatchling>=1.27.0"] build-backend = "hatchling.build" [project] -name = "profile-readme-generator" +name = "generate-repo-overview" version = "0.0.0" -description = "Generate the eclipse-score organization profile README from GitHub repository properties" +description = "Collect cached GitHub organization overviews and render Markdown reports" requires-python = ">=3.12" dependencies = [ "PyGithub", + "tqdm", ] [project.scripts] -generate-profile-readme = "profile_readme_generator.generator:main" +generate-repo-overview = "generate_repo_overview.cli:main" [dependency-groups] dev = [ @@ -23,21 +24,18 @@ dev = [ ] [tool.hatch.build.targets.wheel] -packages = ["src/profile_readme_generator"] +packages = ["src/generate_repo_overview"] [tool.hatch.build.targets.wheel.force-include] -"src/profile_readme_generator/templates" = "profile_readme_generator/templates" -"src/profile_readme_generator/profile_readme_config.toml" = "profile_readme_generator/profile_readme_config.toml" +"src/generate_repo_overview/templates" = "generate_repo_overview/templates" +"src/generate_repo_overview/profile_readme_config.toml" = "generate_repo_overview/profile_readme_config.toml" [tool.uv] package = true [tool.ruff] -src = ["src", "tests", "scripts"] +src = ["src", "tests"] target-version = "py312" -exclude = [ - "scripts/collect_metrics.py", -] [tool.ruff.lint] select = [ @@ -67,7 +65,7 @@ ignore = [ ] [tool.ruff.lint.isort] -known-first-party = ["profile_readme_generator"] +known-first-party = ["generate_repo_overview"] combine-as-imports = true [tool.ruff.lint.per-file-ignores] @@ -93,7 +91,6 @@ reportUnknownVariableType = "warning" reportUnusedVariable = "warning" include = [ "src", - "scripts", "tests", ] exclude = [ @@ -102,7 +99,6 @@ exclude = [ ".venv*/**", "build", "dist", - "scripts/collect_metrics.py", ] venvPath = "." venv = ".venv" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c803fed..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -PyGithub==2.7.0 diff --git a/scripts/collect_metrics.py b/scripts/collect_metrics.py deleted file mode 100644 index 426939a..0000000 --- a/scripts/collect_metrics.py +++ /dev/null @@ -1,167 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2025 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* - -#!/usr/bin/env python3 -""" -Collect extended metrics across all repositories in eclipse-score -and write them into a Markdown report file. -""" - -import os -import pathlib -import re -from dataclasses import dataclass -from datetime import datetime, timedelta, timezone -from typing import Optional - -from github import Github - -ORG = "eclipse-score" -OUTPUT_DIR = pathlib.Path("profile") -OUTPUT_FILE = OUTPUT_DIR / "metrics.md" - -gh = Github(os.getenv("GITHUB_TOKEN")) -NOW = datetime.now(timezone.utc) - -@dataclass -class RepoData: - name: str - description: str - last_commit: Optional[str] - open_issues: int - open_prs: int - bazel_version: str - lint_config: str - ci_setup: str - test_coverage: str - latest_release: Optional[str] - stars: int - forks: int - -def file_exists(repo, path): - try: - repo.get_contents(path) - return True - except: - return False - - -def detect_bazel_version(repo): - try: - content = repo.get_contents(".bazelversion").decoded_content.decode() - for line in content.splitlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - return line - except Exception: - pass - - pattern = re.compile(r'\b\d+\.\d+(?:\.\d+)?\b') - for ws_name in ["WORKSPACE", "WORKSPACE.bzlmod"]: - try: - content = repo.get_contents(ws_name).decoded_content.decode() - for line in content.splitlines(): - line = line.strip() - if line.startswith("#"): - continue - match = pattern.search(line) - if match: - return match.group(0) - except Exception: - continue - - return "⚠️ missing" - - -def detect_lint_config(repo): - for candidate in [".gitlint", ".editorconfig", ".pre-commit-config.yaml"]: - if file_exists(repo, candidate): - return "✅ yes" - return "❌ no" - -def detect_ci_setup(repo): - for candidate in [".github/workflows", "Jenkinsfile"]: - if file_exists(repo, candidate): - return "✅ yes" - return "❌ no" - -def detect_test_coverage(repo): - for candidate in ["coverage.yml", "coverage.xml", "pytest.ini", ".coveragerc"]: - if file_exists(repo, candidate): - return "✅ yes" - return "❌ no" - -def get_latest_release_date(repo): - try: - release = repo.get_latest_release() - return release.published_at.date().isoformat() - except: - return None - -def query_github_org_for_repo_data(gh: Github, org: str): - repo_data_list = [] - user = gh.get_user(org) - for repo in user.get_repos(): - description = repo.description or "" - last_commit = repo.pushed_at.date().isoformat() if repo.pushed_at else None - open_issues = repo.open_issues_count - open_prs = repo.get_pulls(state="open").totalCount - bazel_version = detect_bazel_version(repo) - lint_config = detect_lint_config(repo) - ci_setup = detect_ci_setup(repo) - test_coverage = detect_test_coverage(repo) - latest_release = get_latest_release_date(repo) - stars = repo.stargazers_count - forks = repo.forks_count - - repo_data_list.append( - RepoData( - name=repo.name, - description=description.replace("|", "‖"), - last_commit=last_commit, - open_issues=open_issues, - open_prs=open_prs, - bazel_version=bazel_version, - lint_config=lint_config, - ci_setup=ci_setup, - test_coverage=test_coverage, - latest_release=latest_release, - stars=stars, - forks=forks, - ) - ) - return repo_data_list - -def render_markdown(repos): - header = ( - f"# Cross-Repo Metrics Report\n\n" - f"Generated on {NOW.isoformat()}\n\n" - "| Repo |Last Commit | Issues | PRs | Bazel | Lint | CI | Test Coverage | Latest Release | Stars | Forks |\n" - "|------|------------|--------|-----|-------|------|----|---------------|----------------|-------|-------|" - ) - rows = [] - for r in sorted(repos, key=lambda x: x.name.lower()): - rows.append( - f"| [{r.name}](https://github.com/{ORG}/{r.name}) | {r.last_commit or '-'} | " - f"{r.open_issues} | {r.open_prs} | {r.bazel_version} | {r.lint_config} | " - f"{r.ci_setup} | {r.test_coverage} | {r.latest_release or '-'} | {r.stars} | {r.forks} |" - ) - return "\n".join([header] + rows) - -if __name__ == "__main__": - OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - repos = query_github_org_for_repo_data(gh, ORG) - md = render_markdown(repos) - OUTPUT_FILE.write_text(md, encoding="utf-8") - print(f"Wrote {len(repos)} repos to {OUTPUT_FILE}") diff --git a/src/generate_repo_overview/README.md b/src/generate_repo_overview/README.md new file mode 100644 index 0000000..96e1369 --- /dev/null +++ b/src/generate_repo_overview/README.md @@ -0,0 +1,187 @@ +# `generate_repo_overview` Architecture + +This package is organized around a single idea: + +- collect a normalized snapshot of GitHub organization data +- cache that snapshot on disk +- render multiple views (Markdown and HTML) from the same cached data + +This document explains the package structure and cache behavior. It intentionally does not cover CLI usage. + +## Module Layout + +- `cli.py` + - Wires the top-level commands together. + - Decides whether a command should read the cache only, reuse the cache when possible, or perform a live collection. +- `collector/` + - Subpackage that talks to GitHub and manages the snapshot cache. + - `__init__.py` — orchestration: connects to GitHub, dispatches parallel collection, writes the snapshot. + - `repo_entry.py` — per-repository collection logic: fast/medium/slow paths, volatile metrics. + - `signal_detection.py` — deep content inspection: Bazel, CI, lint, coverage, CODEOWNERS, languages. + - `reference_integration.py` — detects which repos are `bazel_dep` dependencies of `reference_integration`. + - `registry_metadata.py` — parses the `bazel_registry` for maintainers and latest module versions. + - `git_checkout.py` — manages shallow git checkouts for local inspection. + - `snapshot_io.py` — reads and writes the JSON snapshot cache. +- `models.py` + - Defines the normalized data structures shared by collection and rendering. + - The key types are `RepoEntry` and `RepoSnapshot`. +- `profile_readme.py` + - Renders the organization profile README from normalized repository data. + - Owns category config parsing, grouping, and README-oriented table rendering. +- `metrics_report.py` + - Renders the cross-repository Markdown metrics report. +- `metrics_html.py` + - Coordinates HTML page rendering and exposes `render_all_pages()`. +- `_html_common.py` + - Shared HTML building blocks: CSS, icons, language badges, version badges. +- `_html_index.py` + - Renders the main HTML metrics dashboard (tabs, filters, sortable columns). +- `_html_detail.py` + - Renders per-repository HTML detail pages. +- `constants.py` + - Centralizes default org, cache, and output paths. +- `console.py` + - Keeps status output formatting in one place. + +## Data Flow + +The package has three layers: + +1. Collection + - `collector/` fetches live GitHub data and converts it into `RepoEntry` values. +2. Snapshot + - The collected repos are stored inside a `RepoSnapshot`. +3. Rendering + - `profile_readme.py` renders the Markdown profile README. + - `metrics_report.py` renders a Markdown metrics report. + - `metrics_html.py` (with `_html_index.py`, `_html_detail.py`, `_html_common.py`) renders the HTML dashboard. + +The renderers do not talk to GitHub directly. They only consume normalized data. + +## What Is Cached + +The main cache file is: + +- `profile/cache/repo_overview.json` + +That file stores a serialized `RepoSnapshot` containing: + +- schema version +- organization name +- generation timestamp +- all normalized repositories + +The cache loader only accepts the current schema version. If the snapshot schema does not match, the cache is treated as unusable and collection falls back to a fresh GitHub fetch. + +For each repository, the snapshot currently stores: + +- repository identity and grouping + - `name` + - `description` + - `category` + - `subcategory` +- branch identity used for cache reuse + - `default_branch` + - `default_branch_sha` +- volatile metrics (refreshed on a TTL, see below) + - `last_push_date` (default-branch last commit date when available; falls back to repository pushed timestamp) + - `merged_prs_30_days` + - `open_issues` + - `open_prs` + - `open_ready_prs` + - `open_draft_prs` + - `latest_release_version` + - `latest_release_date` + - `commits_since_latest_release` + - `release_bazel_version` + - `release_bazel_deps` + - `volatile_metrics_fetched_at` +- registry metadata + - `maintainers_in_bazel_registry` + - `latest_bazel_registry_version` +- top-level fields + - `stars` + - `forks` +- content-derived signals (reused when `default_branch_sha` is unchanged) + - `is_bazel_repo` + - `bazel_version` + - `codeowners` + - `docs_as_code_version` + - `referenced_by_reference_integration` + - `has_lint_config` + - `has_gitlint_config` + - `has_pyproject_toml` + - `has_pre_commit_config` + - `has_ci` + - `uses_cicd_daily_workflow` + - `has_coverage_config` + - `top_languages` + - `bazel_deps` + +## What Is Cached Where + +There is only one persistent cache file today: + +- `profile/cache/repo_overview.json` + +There is no separate per-repository cache directory and no checked-out repository mirror. + +Instead, the snapshot itself carries enough information to support selective reuse: + +- `default_branch_sha` is stored per repository +- on the next live collection, that SHA is compared with the current GitHub default-branch SHA +- if the SHA has not changed, the existing content-derived signals are reused from the snapshot + +That means the persistent cache lives in one JSON file, while reuse decisions happen per repository inside the collector. + +## What Is Not Cached Separately + +The package does not currently maintain separate caches for: + +- raw GitHub API responses +- repository trees +- individual file contents +- cloned repositories +- rendered Markdown outputs beyond whatever files the CLI writes + +Rendered outputs such as `profile/README.md` and `_site/` are products of the snapshot, not part of the snapshot cache itself. + +## Cache Semantics By Layer + +- Render-only paths read `profile/cache/repo_overview.json` and do not contact GitHub. +- Collection paths always contact GitHub for current repository metadata. +- During collection, some content-derived fields can still be reused from the previous snapshot when the repository content fingerprint (`default_branch_sha`) matches. + +The `collect` command defaults to a cache-aware mode for unchanged repositories: + +- it still fetches high-level state (including current default-branch SHA) +- if the SHA matches the previous snapshot, it reuses cached deep details +- if the SHA changed, it runs the slower deep inspection path for that repository + +Volatile metrics (for example PR/issue counts and release deltas) are tracked +with a per-repository `volatile_metrics_fetched_at` timestamp. In fast mode, +those values are reused only while they are fresh (1 hour by default); once the +timestamp is older than the configured TTL, only volatile metrics are refreshed +while deep content signals remain cached. + +Set `REPO_OVERVIEW_VOLATILE_TTL_MINUTES` to adjust this freshness window. + +Use `collect --deep` when you need a full deep refresh for every repository. + +This is why cached rendering is fast, while live collection is incremental rather than “download everything again.” + +## Why The Package Uses API Access Instead Of Cloning Repositories + +The current reports mostly need: + +- repository metadata +- custom properties +- release dates +- pull request counts +- file and directory presence checks +- a few small text files such as `.bazelversion` +- cloned shared metadata repositories such as `bazel_registry` and `reference_integration` + +For that workload, API access is cheaper and simpler than maintaining local clones for every repository. + +If the project later needs heavyweight analysis such as line counting, large-scale parsing, or local static analysis across many files, a clone-based backend could be added as a separate collection strategy. diff --git a/src/generate_repo_overview/__init__.py b/src/generate_repo_overview/__init__.py new file mode 100644 index 0000000..faedc7b --- /dev/null +++ b/src/generate_repo_overview/__init__.py @@ -0,0 +1 @@ +"""Collect cached GitHub organization overviews and render Markdown reports.""" diff --git a/src/generate_repo_overview/_html_common.py b/src/generate_repo_overview/_html_common.py new file mode 100644 index 0000000..0f3abb3 --- /dev/null +++ b/src/generate_repo_overview/_html_common.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import html +from pathlib import Path +from typing import TYPE_CHECKING + +from .metrics_report import parse_version_key + +if TYPE_CHECKING: + from .models import RepoEntry + +_TEMPLATES = Path(__file__).parent / "templates" + +CSS = (_TEMPLATES / "styles.css").read_text(encoding="utf-8") + +BAZEL_ICON = ( + 'Bazel' +) + +GITHUB_ICON = ( + '' + '' + "" +) + + +_LANGUAGE_COLORS: dict[str, str] = { + "Python": "#3572A5", + "C++": "#f34b7d", + "C": "#555555", + "Go": "#00ADD8", + "Rust": "#dea584", + "Java": "#b07219", + "Kotlin": "#A97BFF", + "TypeScript": "#3178c6", + "JavaScript": "#f1e05a", + "Starlark": "#76d275", + "Shell": "#89e051", + "CMake": "#DA3434", + "Makefile": "#427819", +} + + +def e(text: str) -> str: + return html.escape(text, quote=True) + + +def language_badge(lang: str | None) -> str: + if not lang: + return '' + color = _LANGUAGE_COLORS.get(lang, "#888888") + return ( + f'' + f"{e(lang)}" + ) + + +def repo_name_cell(entry: RepoEntry, org_name: str, *, bazel_icon: bool = True) -> str: + detail_url = f"{e(entry.name)}/" + github_url = f"https://github.com/{org_name}/{entry.name}" + cell = f'{e(entry.name)}' + if bazel_icon and entry.content.is_bazel_repo: + cell += f" {BAZEL_ICON}" + cell += ( + f' {GITHUB_ICON}' + ) + return cell + + +def version_badge( + version: str | None, + max_bazel: tuple[int, ...] | None, + *, + latest_dac: str | None, + is_bazel: bool, +) -> str: + if version is None or not version.strip(): + return '' + + cleaned = version.strip() + parsed = parse_version_key(cleaned) + + if is_bazel: + if parsed is not None and max_bazel is not None and parsed == max_bazel: + return f'{e(cleaned)}' + return f'{e(cleaned)}' + + if latest_dac is None: + return f'{e(cleaned)}' + latest_cleaned = latest_dac.strip() + if cleaned == latest_cleaned: + return f'{e(cleaned)}' + if parsed is not None: + latest_parsed = parse_version_key(latest_cleaned) + if ( + latest_parsed is not None + and len(parsed) >= 2 + and len(latest_parsed) >= 2 + and parsed[:2] == latest_parsed[:2] + ): + return f'{e(cleaned)}' + return f'{e(cleaned)}' diff --git a/src/generate_repo_overview/_html_detail.py b/src/generate_repo_overview/_html_detail.py new file mode 100644 index 0000000..e9df6f4 --- /dev/null +++ b/src/generate_repo_overview/_html_detail.py @@ -0,0 +1,344 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ._html_common import BAZEL_ICON, CSS, GITHUB_ICON, e, language_badge, version_badge + +if TYPE_CHECKING: + from .models import RepoEntry, RepoSnapshot + + +def render_detail_page( + entry: RepoEntry, + org_name: str, + snapshot: RepoSnapshot, + max_bazel: tuple[int, ...] | None, + latest_dac: str | None, +) -> str: + return ( + "\n" + '\n\n' + ' \n' + ' \n' + f" {e(entry.name)} — {e(org_name)}\n" + f" \n" + "\n\n" + + _render_hero(entry, org_name) + + _render_stat_grid(entry) + + _render_release_section(entry) + + _render_dep_diff_section(entry) + + _render_tooling_section(entry) + + _render_ownership_section(entry) + + _render_versions_section(entry, max_bazel, latest_dac) + + _render_footer(snapshot) + + "\n\n" + ) + + +def _render_hero(entry: RepoEntry, org_name: str) -> str: + github_url = f"https://github.com/{org_name}/{entry.name}" + name_html = e(entry.name) + if entry.content.is_bazel_repo: + name_html += f" {BAZEL_ICON}" + + chips = f'{e(entry.category)}' + if entry.subcategory and entry.subcategory != entry.category: + chips += f' {e(entry.subcategory)}' + for lang in entry.content.top_languages: + chips += f" {language_badge(lang)}" + + desc = e(entry.description) if entry.description else "" + + return ( + "
\n" + ' \n" + f"

{name_html}" + f' {GITHUB_ICON}' + f"

\n" + f'

{desc}

\n' + f'
{chips}
\n' + "
\n\n" + ) + + +def _render_stat_grid(entry: RepoEntry) -> str: + v = entry.volatile + last_push = e(v.last_push_date) if v.last_push_date else "—" + prs_text = f"{v.open_ready_prs}+{v.open_draft_prs}" + + cards = [ + (str(entry.stars), "Stars"), + (str(entry.forks), "Forks"), + (str(v.open_issues), "Open Issues"), + (prs_text, "Open PRs (ready+draft)"), + (str(v.merged_prs_30_days), "Merged PRs (30d)"), + (last_push, "Last Push"), + ] + + items = "\n".join( + f'
' + f'
{e(val)}
' + f'
{label}
' + f"
" + for val, label in cards + ) + return f'
\n{items}\n
\n\n' + + +def _render_release_section(entry: RepoEntry) -> str: + v = entry.volatile + if v.latest_release_version is None and v.latest_release_date is None: + version_html = 'No releases' + return ( + '
\n' + '
Release
\n' + f'
{version_html}
\n' + "
\n\n" + ) + + items: list[str] = [] + if v.latest_release_version: + items.append( + f'
' + f'
Latest Version
' + f'{e(v.latest_release_version)}' + f"
" + ) + if v.latest_release_date: + items.append( + f'
' + f'
Release Date
' + f"{e(v.latest_release_date)}" + f"
" + ) + if v.commits_since_latest_release is not None: + count = v.commits_since_latest_release + badge_class = ( + "green" if count == 0 else ("yellow" if count <= 20 else "red") + ) + items.append( + f'
' + f'
Commits Since Release
' + f'{count}' + f"
" + ) + + return ( + '
\n' + '
Release
\n' + f'
{"".join(items)}
\n' + "
\n\n" + ) + + +def _render_dep_diff_section(entry: RepoEntry) -> str: + v = entry.volatile + if v.latest_release_version is None: + return "" + + head_deps = dict(entry.content.bazel_deps) + release_deps = dict(v.release_bazel_deps) + + all_names = sorted(set(head_deps) | set(release_deps)) + + head_bazel = entry.content.bazel_version + release_bazel = v.release_bazel_version + + rows: list[str] = [] + + bazel_status, bazel_class = _dep_diff_status(head_bazel, release_bazel) + rows.append( + f" \n" + f" Bazel\n" + f" {e(head_bazel) if head_bazel else ''}\n" + f" {e(release_bazel) if release_bazel else ''}\n" + f" {_dep_status_badge(bazel_status, bazel_class)}\n" + f" " + ) + + for name in all_names: + head_ver = head_deps.get(name) + rel_ver = release_deps.get(name) + status, css_class = _dep_diff_status(head_ver, rel_ver) + rows.append( + f" \n" + f" {e(name)}\n" + f" {e(head_ver) if head_ver else ''}\n" + f" {e(rel_ver) if rel_ver else ''}\n" + f" {_dep_status_badge(status, css_class)}\n" + f" " + ) + + changed_count = sum( + 1 + for r in rows + if "badge yellow" in r or "badge green" in r or "badge red" in r + ) + + if changed_count == 0 and v.commits_since_latest_release: + summary = ( + f'

No dependency changes since ' + f"{e(v.latest_release_version)}.

" + ) + else: + summary = "" + + release_label = e(v.latest_release_version) + table = ( + f" \n" + f" \n" + f" \n" + f" \n" + f" \n" + f" \n" + f" \n" + f" \n" + + "\n".join(rows) + + "\n \n
DependencyHEADRelease ({release_label})Status
" + ) + + return ( + '
\n' + '
' + 'Dependencies: HEAD vs. Release' + "
\n" + f"
{summary}{table}
\n" + "
\n\n" + ) + + +def _dep_diff_status( + head: str | None, release: str | None +) -> tuple[str, str]: + if head is None and release is None: + return "—", "muted" + if release is None: + return "added", "green" + if head is None: + return "removed", "red" + if head == release: + return "—", "muted" + return "changed", "yellow" + + +def _dep_status_badge(status: str, css_class: str) -> str: + if status == "—": + return '' + return f'{e(status)}' + + +def _render_tooling_section(entry: RepoEntry) -> str: + c = entry.content + signals = [ + (c.has_ci, "GitHub Actions (CI)"), + (c.uses_cicd_daily_workflow, "Daily Workflow"), + (c.has_lint_config, "Lint Config"), + (c.has_gitlint_config, "Gitlint"), + (c.has_pre_commit_config, "Pre-commit"), + (c.has_pyproject_toml, "pyproject.toml"), + (c.has_coverage_config, "Coverage Config"), + (c.is_bazel_repo, "Bazel Repo"), + ] + + items = "\n".join( + f'
' + f'' + f'{"✓" if val else "—"} {e(label)}
' + for val, label in signals + ) + return ( + '
\n' + '
Build & Tooling
\n' + f'
\n{items}\n
\n' + "
\n\n" + ) + + +def _render_ownership_section(entry: RepoEntry) -> str: + parts: list[str] = [] + if entry.content.codeowners: + handles = ", ".join(e(h) for h in entry.content.codeowners) + parts.append( + f'
' + f'
Codeowners
{handles}
' + ) + if entry.registry.maintainers_in_bazel_registry: + handles = ", ".join( + e(h) for h in entry.registry.maintainers_in_bazel_registry + ) + parts.append( + f'
' + f'
Registry Maintainers
{handles}
' + ) + + if not parts: + parts.append('No ownership information available') + + return ( + '
\n' + '
Ownership
\n' + f'
{"".join(parts)}
\n' + "
\n\n" + ) + + +def _render_versions_section( + entry: RepoEntry, + max_bazel: tuple[int, ...] | None, + latest_dac: str | None, +) -> str: + items: list[str] = [] + + bazel_badge = version_badge( + entry.content.bazel_version, max_bazel, latest_dac=None, is_bazel=True + ) + items.append( + f'
' + f'
Bazel Version
{bazel_badge}
' + ) + + dac_badge = version_badge( + entry.content.docs_as_code_version, None, latest_dac=latest_dac, is_bazel=False + ) + items.append( + f'
' + f'
Docs-As-Code Version
{dac_badge}
' + ) + + refint = ( + 'yes' + if entry.content.referenced_by_reference_integration + else 'no' + ) + items.append( + f'
' + f'
Reference Integration
{refint}
' + ) + + if entry.registry.latest_bazel_registry_version: + items.append( + f'
' + f'
Latest Registry Version
' + f'{e(entry.registry.latest_bazel_registry_version)}' + f"
" + ) + + return ( + '
\n' + '
Versions
\n' + f'
{"".join(items)}
\n' + "
\n\n" + ) + + +def _render_footer(snapshot: RepoSnapshot) -> str: + return ( + "\n\n\n" + ) diff --git a/src/generate_repo_overview/_html_index.py b/src/generate_repo_overview/_html_index.py new file mode 100644 index 0000000..7875e92 --- /dev/null +++ b/src/generate_repo_overview/_html_index.py @@ -0,0 +1,723 @@ +from __future__ import annotations + +import json +from collections import Counter +from datetime import date +from pathlib import Path +from typing import TYPE_CHECKING + +from ._html_common import ( + BAZEL_ICON, + CSS, + e, + language_badge, + repo_name_cell, + version_badge, +) +from .metrics_report import ( + get_latest_docs_as_code_release, + get_max_bazel_version, + group_repos_by_category, + has_latest_release, + parse_version_key, +) + +if TYPE_CHECKING: + from .models import RepoEntry, RepoSnapshot + +_INDEX_JS = (Path(__file__).parent / "templates" / "index.js").read_text( + encoding="utf-8" +) + + +def render_index_page(snapshot: RepoSnapshot) -> str: + repos = sorted(snapshot.repos, key=lambda r: r.name.casefold()) + categories = group_repos_by_category(repos) + return ( + "\n" + '\n\n' + ' \n' + ' \n' + f" Cross-Repo Metrics — {e(snapshot.org_name)}\n" + f" \n" + "\n\n" + + _render_header(snapshot, repos) + + _render_tab_bar() + + _render_filters_placeholder() + + '
\n' + + _render_overview_sections(categories, snapshot.org_name) + + _render_versions_sections(categories, repos, snapshot.org_name) + + _render_automation_sections(categories, snapshot.org_name) + + _render_timeline_section(repos, snapshot.org_name) + + "
\n" + + _render_footer(snapshot) + + _render_script(categories) + + "\n\n" + ) + + +def _render_header(snapshot: RepoSnapshot, repos: list[RepoEntry]) -> str: + total = len(repos) + with_ci = sum(r.content.has_ci for r in repos) + with_releases = sum(has_latest_release(r) for r in repos) + with_lint = sum(r.content.has_lint_config for r in repos) + bazel_repos = sum(r.content.is_bazel_repo for r in repos) + + lang_chips = _render_language_distribution(repos) + + return ( + "
\n" + "

Cross-Repo Metrics Report

\n" + f'

Generated {e(snapshot.generated_at)}

\n' + '
\n' + f' {total} repositories\n' + f' {with_ci} with CI\n' + f' {with_releases} with releases\n' + f' {bazel_repos} Bazel repos\n' + f' {with_lint} with lint config\n' + "
\n" + + (f'
{lang_chips}
\n' if lang_chips else "") + + "
\n\n" + ) + + +def _render_language_distribution(repos: list[RepoEntry]) -> str: + counts = Counter( + r.content.top_languages[0] for r in repos if r.content.top_languages + ) + if not counts: + return "" + top = counts.most_common(4) + other = sum(counts.values()) - sum(c for _, c in top) + parts = [ + f"{language_badge(lang)} {count}" + for lang, count in top + ] + if other > 0: + parts.append(f'+{other} other') + return " ".join(parts) + + +def _render_tab_bar() -> str: + return ( + '
\n' + ' \n' + ' \n' + ' \n' + ' \n' + "
\n\n" + ) + + +def _render_filters_placeholder() -> str: + return '
\n\n' + + +def _render_overview_sections( + categories: list[tuple[str, list[RepoEntry]]], + org_name: str, +) -> str: + parts: list[str] = [] + for category, cat_repos in categories: + rows = "\n".join(_overview_row(r, org_name) for r in cat_repos) + parts.append( + f'
\n' + f'
\n' + f' {e(category)}\n' + f' {len(cat_repos)}\n' + f"
\n" + f" \n" + f" \n" + f' \n' + f' \n' + f' \n' + f' \n' + f' \n' + f' \n' + f" \n" + f" \n{rows}\n \n" + f"
Repository Merged PRs (30d) Open Issues Open PRs Latest Release Stars / Forks
\n" + f"
\n" + ) + return "".join(parts) + + +def _overview_row(entry: RepoEntry, org_name: str) -> str: + name_cell = repo_name_cell(entry, org_name) + repo_url = f"https://github.com/{org_name}/{entry.name}" + + merged = _render_merged_badge(entry.volatile.merged_prs_30_days) + issues_cell = _render_issues_cell(entry.volatile.open_issues, repo_url) + prs_cell = _render_prs_cell( + entry.volatile.open_ready_prs, + entry.volatile.open_draft_prs, + repo_url, + ) + release = _render_release( + entry.volatile.latest_release_version, + entry.volatile.commits_since_latest_release, + ) + stars_forks = f"{entry.stars} / {entry.forks}" + + cnt = entry.volatile.merged_prs_30_days + if cnt == 0: + merged_tip = "No pull requests were merged in the last 30 days." + elif cnt >= 10: + merged_tip = f"\U0001f525 {cnt} pull requests merged in the last 30 days — very active!" + else: + merged_tip = ( + f"{cnt} pull request{'s' if cnt != 1 else ''} merged in the last 30 days." + ) + + n = entry.volatile.open_issues + issues_tip = f"{n} open issue{'s' if n != 1 else ''} in this repository." + + ready = entry.volatile.open_ready_prs + draft = entry.volatile.open_draft_prs + total_prs = ready + draft + prs_tip = f"{ready} ready for review + {draft} in draft — {total_prs} open pull request{'s' if total_prs != 1 else ''} in total." + + ver = entry.volatile.latest_release_version + commits = entry.volatile.commits_since_latest_release + if ver is None: + release_tip = "No release has been published for this repository." + elif commits is None: + release_tip = str(ver) + elif commits == 0: + release_tip = f"{ver} — the main branch is fully up to date with this release." + else: + release_tip = f"{ver} — {commits} commit{'s' if commits != 1 else ''} on the main branch not yet included in a release." + + stars_tip = f"{entry.stars} star{'s' if entry.stars != 1 else ''} · {entry.forks} fork{'s' if entry.forks != 1 else ''}" + + return ( + f' \n' + f" {name_cell}\n" + f' {merged}\n' + f' {issues_cell}\n' + f' {prs_cell}\n' + f' {release}\n' + f' {stars_forks}\n' + f" " + ) + + +def _render_merged_badge(count: int) -> str: + if count >= 10: + return f'\U0001f525 {count}' + return str(count) + + +def _render_issues_cell(issues: int, repo_url: str) -> str: + if issues == 0: + return '' + url = e(f"{repo_url}/issues") + return ( + f'{issues}' + ) + + +def _render_prs_cell(ready_prs: int, draft_prs: int, repo_url: str) -> str: + if ready_prs == 0 and draft_prs == 0: + return '' + url = e(f"{repo_url}/pulls") + if ready_prs > 5: + content = f'{ready_prs}+{draft_prs}' + else: + content = f"{ready_prs}+{draft_prs}" + return ( + f'{content}' + ) + + +def _render_release(version: str | None, commits_since: int | None) -> str: + if version is None and commits_since is None: + return '' + ver = e(version) if version else "—" + if commits_since is None: + return f'{ver}' + badge_class = ( + "green" if commits_since == 0 else ("yellow" if commits_since <= 20 else "red") + ) + icon = "✓" if commits_since == 0 else str(commits_since) + return ( + f'{ver} ' + f'+{icon}' + ) + + +_DAC_DEP_NAME = "score_docs_as_code" + + +def _build_version_tooltip( + *, + dependency_version_as_used_on_main_branch: str | None, + latest_available_dependency_version: str | None, + dependency_version_as_used_in_last_release: str | None, + component_name: str, + last_release_tag: str | None = None, +) -> str: + """Build a human-readable tooltip for version comparison. + + Generic function to compare a component's current version (on main) with the + latest available version and what was used in the last release. + + Args: + dependency_version_as_used_on_main_branch: Version currently in use on main branch + latest_available_dependency_version: Latest available version globally + dependency_version_as_used_in_last_release: Version used in the most recent release + component_name: Human-readable component name (e.g., "Bazel", "Docs-As-Code") + last_release_tag: Optional release tag for "was X at " suffix + + Returns: + Human-readable tooltip text + """ + if dependency_version_as_used_on_main_branch is not None: + assert ( + dependency_version_as_used_on_main_branch + == dependency_version_as_used_on_main_branch.strip() + ) + if latest_available_dependency_version is not None: + assert ( + latest_available_dependency_version + == latest_available_dependency_version.strip() + ) + + # Handle component not in use + if not dependency_version_as_used_on_main_branch: + if dependency_version_as_used_in_last_release: + return ( + f"{component_name} is not currently used on the main branch," + f" but was used in the last release." + ) + else: + return f"{component_name} is not used in this repository." + + # Handle missing latest version (no comparison possible) + if latest_available_dependency_version is None: + return f"{component_name} {dependency_version_as_used_on_main_branch} is in use." + + # Build intro: note if version changed between the last release and main + version_changed = ( + dependency_version_as_used_in_last_release + and last_release_tag + and dependency_version_as_used_in_last_release + != dependency_version_as_used_on_main_branch + ) + if version_changed: + tip = ( + f"{component_name} was {dependency_version_as_used_in_last_release}" + f" at {last_release_tag}, updated to" + f" {dependency_version_as_used_on_main_branch} on the main branch" + ) + else: + tip = f"{component_name} {dependency_version_as_used_on_main_branch}" + + # Append up-to-date status + if dependency_version_as_used_on_main_branch == latest_available_dependency_version: + tip += " — now up to date." if version_changed else " — up to date (latest known version)." + else: + current_parts = parse_version_key(dependency_version_as_used_on_main_branch) + latest_parts = parse_version_key(latest_available_dependency_version) + is_patch_only = ( + current_parts + and latest_parts + and len(current_parts) >= 2 + and len(latest_parts) >= 2 + and current_parts[:2] == latest_parts[:2] + ) + if is_patch_only: + tip += f" — a patch update to {latest_available_dependency_version} is available." + else: + tip += f" — an update to {latest_available_dependency_version} is available." + + return tip + + +def _render_dep_changes( + entry: RepoEntry, excluded_deps: frozenset[str] = frozenset() +) -> tuple[str, str]: + """Return (cell_html, tooltip) for the Other Dep Changes column.""" + if entry.volatile.latest_release_version is None: + return '', "No release has been published — nothing to compare against." + + head_deps = dict(entry.content.bazel_deps) + release_deps = dict(entry.volatile.release_bazel_deps) + + changes: list[str] = [] + all_names = sorted(set(head_deps) | set(release_deps)) + for name in all_names: + if name in excluded_deps: + continue + hv = head_deps.get(name) + rv = release_deps.get(name) + if hv != rv: + changes.append(f"{name}: {rv or '—'} → {hv or '—'}") + + count = len(changes) + if count == 0: + tip = f"No dependency changes between {entry.volatile.latest_release_version} and the current main branch." + cell = 'no changes' + return cell, tip + + badge_class = "yellow" if count <= 5 else "red" + cell = f'{count} changed' + tip = "; ".join(changes[:8]) + if len(changes) > 8: + tip += f" (+{len(changes) - 8} more)" + return cell, tip + + +def _render_versions_sections( + categories: list[tuple[str, list[RepoEntry]]], + repos: list[RepoEntry], + org_name: str, +) -> str: + max_bazel = get_max_bazel_version(repos) + latest_dac = get_latest_docs_as_code_release(repos) + parts: list[str] = [] + for category, cat_repos in categories: + rows = "\n".join( + _versions_row(r, org_name, max_bazel, latest_dac) for r in cat_repos + ) + parts.append( + f'\n" + ) + return "".join(parts) + + +def _versions_row( + entry: RepoEntry, + org_name: str, + max_bazel: tuple[int, ...] | None, + latest_dac: str | None, +) -> str: + name_cell = repo_name_cell(entry, org_name) + + bazel_cell = version_badge( + entry.content.bazel_version, max_bazel, latest_dac=None, is_bazel=True + ) + release_bazel = entry.volatile.release_bazel_version + if release_bazel and release_bazel != entry.content.bazel_version: + bazel_cell = ( + f'{e(release_bazel)} → {bazel_cell}' + ) + + release_deps = dict(entry.volatile.release_bazel_deps) + release_dac = release_deps.get(_DAC_DEP_NAME) + dac_cell = version_badge( + entry.content.docs_as_code_version, None, latest_dac=latest_dac, is_bazel=False + ) + if release_dac and release_dac != entry.content.docs_as_code_version: + dac_cell = f'{e(release_dac)} → {dac_cell}' + + # Deps rendered in their own column — excluded from "Other Dep Changes" + dedicated_deps = frozenset({_DAC_DEP_NAME}) + refint = ( + 'yes' + if entry.content.referenced_by_reference_integration + else 'no' + ) + + # Format latest Bazel version as string for generic comparison + max_bazel_str = ".".join(str(x) for x in max_bazel) if max_bazel else None + bazel_tip = _build_version_tooltip( + dependency_version_as_used_on_main_branch=entry.content.bazel_version, + latest_available_dependency_version=max_bazel_str, + dependency_version_as_used_in_last_release=release_bazel, + component_name="Bazel", + last_release_tag=entry.volatile.latest_release_version, + ) + + # Generate Docs-As-Code version comparison tooltip + dac_tip = _build_version_tooltip( + dependency_version_as_used_on_main_branch=entry.content.docs_as_code_version, + latest_available_dependency_version=latest_dac, + dependency_version_as_used_in_last_release=release_dac, + component_name="Docs-As-Code", + last_release_tag=entry.volatile.latest_release_version, + ) + + refint_tip = ( + "This repository is included in the shared reference integration." + if entry.content.referenced_by_reference_integration + else "This repository is not included in the shared reference integration." + ) + + release = _render_release( + entry.volatile.latest_release_version, + entry.volatile.commits_since_latest_release, + ) + ver = entry.volatile.latest_release_version + commits = entry.volatile.commits_since_latest_release + if ver is None: + release_tip = "No release has been published for this repository." + elif commits is None: + release_tip = str(ver) + elif commits == 0: + release_tip = f"{ver} — the main branch is fully up to date with this release." + else: + release_tip = f"{ver} — {commits} commit{'s' if commits != 1 else ''} on the main branch not yet included in a release." + + dep_changes_cell, dep_changes_tip = _render_dep_changes(entry, dedicated_deps) + + return ( + f" \n" + f" {name_cell}\n" + f' {bazel_cell}\n' + f' {dac_cell}\n' + f' {refint}\n' + f' {release}\n' + f' {dep_changes_cell}\n' + f" " + ) + + +def _render_automation_sections( + categories: list[tuple[str, list[RepoEntry]]], + org_name: str, +) -> str: + parts: list[str] = [] + for category, cat_repos in categories: + rows = "\n".join(_automation_row(r, org_name) for r in cat_repos) + parts.append( + f'\n" + ) + return "".join(parts) + + +def _automation_row(entry: RepoEntry, org_name: str) -> str: + name_cell = repo_name_cell(entry, org_name, bazel_icon=False) + c = entry.content + + def _presence(val: bool, icon: str) -> str: + if val: + return f'{icon}' + return '' + + def _yesno(val: bool) -> str: + if val: + return 'yes' + return 'no' + + tips = { + "bazel": "This repository uses Bazel as its build system." + if c.is_bazel_repo + else "This repository does not use Bazel.", + "gitlint": "This repository enforces commit message formatting rules (gitlint)." + if c.has_gitlint_config + else "This repository has no commit message formatting rules configured.", + "pyproject": "This repository has a pyproject.toml (standard Python project configuration)." + if c.has_pyproject_toml + else "This repository does not have a pyproject.toml.", + "precommit": "This repository runs automated checks (formatting, linting, etc.) before each commit is accepted." + if c.has_pre_commit_config + else "This repository has no automated pre-commit checks configured.", + "ci": "This repository has automated CI/CD pipelines that run on every push or pull request." + if c.has_ci + else "This repository has no automated CI/CD pipelines.", + "daily": "This repository has a scheduled daily job that runs automated tests and checks." + if c.uses_cicd_daily_workflow + else "This repository has no scheduled daily automated checks.", + "coverage": "This repository measures test coverage — tracking how much of the code is exercised by automated tests." + if c.has_coverage_config + else "This repository does not measure test coverage.", + } + + langs = entry.content.top_languages + lang_cell = ( + " ".join(language_badge(lang) for lang in langs) + if langs + else '' + ) + lang_tip = ", ".join(langs) if langs else "Language unknown" + + return ( + f" \n" + f" {name_cell}\n" + f' {lang_cell}\n' + f' {_presence(c.is_bazel_repo, BAZEL_ICON)}\n' + f' {_presence(c.has_gitlint_config, "\U0001f50d")}\n' + f' {_presence(c.has_pyproject_toml, "\U0001f40d")}\n' + f' {_presence(c.has_pre_commit_config, "\U0001fa9d")}\n' + f' {_presence(c.has_ci, "⚙️")}\n' + f' {_yesno(c.uses_cicd_daily_workflow)}\n' + f' {_yesno(c.has_coverage_config)}\n' + f" " + ) + + +_TIMELINE_TIERS: list[tuple[str, int, int | None]] = [ + ("Released in the last 30 days", 0, 30), + ("Released this quarter (30-90 days ago)", 30, 90), + ("Released more than 90 days ago", 90, None), +] + + +def _parse_release_date(r: RepoEntry) -> date | None: + raw = r.volatile.latest_release_date + if not raw: + return None + try: + return date.fromisoformat(raw) + except ValueError: + return None + + +def _build_timeline_tier_html( + with_release: list[tuple[RepoEntry, date]], + org_name: str, + today: date, +) -> str: + html_parts: list[str] = [] + remaining = list(with_release) + for label, min_days, max_days in _TIMELINE_TIERS: + tier_rows: list[str] = [] + next_remaining: list[tuple[RepoEntry, date]] = [] + for r, d in remaining: + age = (today - d).days + in_tier = age >= min_days and (max_days is None or age < max_days) + if in_tier: + tier_rows.append(_timeline_row(r, org_name, d)) + else: + next_remaining.append((r, d)) + remaining = next_remaining + if tier_rows: + html_parts.append( + f' {e(label)}\n' + + "".join(tier_rows) + ) + return "".join(html_parts) + + +def _render_timeline_section(repos: list[RepoEntry], org_name: str) -> str: + today = date.today() + + with_release = sorted( + ((r, d) for r in repos if (d := _parse_release_date(r)) is not None), + key=lambda rd: rd[1], + reverse=True, + ) + without_release = [r for r in repos if _parse_release_date(r) is None] + + recent_count = sum(1 for _, d in with_release if (today - d).days <= 30) + unreleased_count = len(without_release) + summary = ( + f"{recent_count} release{'s' if recent_count != 1 else ''} in the last 30 days" + ) + if unreleased_count: + summary += f" · {unreleased_count} repo{'s' if unreleased_count != 1 else ''} with no release" + + tier_html = _build_timeline_tier_html(with_release, org_name, today) + + if without_release: + unreleased_rows = "".join( + _timeline_row_unreleased(r, org_name) for r in without_release + ) + tier_html += ( + ' No release\n' + + unreleased_rows + ) + + return ( + '\n" + ) + + +def _timeline_row(entry: RepoEntry, org_name: str, release_date: object) -> str: + name_cell = repo_name_cell(entry, org_name) + ver = entry.volatile.latest_release_version or "—" + freshness = _render_release( + entry.volatile.latest_release_version, + entry.volatile.commits_since_latest_release, + ) + date_str = str(release_date) + return ( + f" \n" + f" {name_cell}\n" + f' {e(ver)}\n' + f" {e(date_str)}\n" + f" {freshness}\n" + f" \n" + ) + + +def _timeline_row_unreleased(entry: RepoEntry, org_name: str) -> str: + name_cell = repo_name_cell(entry, org_name) + return ( + f" \n" + f" {name_cell}\n" + f' —\n' + f' —\n' + f' —\n' + f" \n" + ) + + +def _render_footer(snapshot: RepoSnapshot) -> str: + return ( + f"\n
\n" + f" Cross-repo metrics for {e(snapshot.org_name)} " + f"— generated {e(snapshot.generated_at)}\n" + f"
\n\n" + ) + + +def _render_script( + categories: list[tuple[str, list[RepoEntry]]], +) -> str: + cat_names = json.dumps(["all"] + [c for c, _ in categories]) + return ( + f"\n" + f"\n" + ) diff --git a/src/generate_repo_overview/_text_utils.py b/src/generate_repo_overview/_text_utils.py new file mode 100644 index 0000000..67c7c03 --- /dev/null +++ b/src/generate_repo_overview/_text_utils.py @@ -0,0 +1,6 @@ +from __future__ import annotations + + +def escape_markdown_table_cell(text: str) -> str: + normalized = text.replace("\r\n", " ").replace("\n", " ").replace("\r", " ") + return normalized.replace("|", r"\|") diff --git a/src/generate_repo_overview/cli.py b/src/generate_repo_overview/cli.py new file mode 100644 index 0000000..64e0cff --- /dev/null +++ b/src/generate_repo_overview/cli.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +import argparse +from pathlib import Path +from textwrap import dedent +from typing import TYPE_CHECKING + +from .collector import collect_snapshot, load_snapshot +from .console import print_status +from .constants import ( + DEFAULT_CACHE, + DEFAULT_METRICS_HTML_OUTPUT, + DEFAULT_ORG, + DEFAULT_OUTPUT, + DEFAULT_TOKEN_ENV, +) +from .metrics_html import render_all_pages +from .profile_readme import load_config, load_template, render_readme + +if TYPE_CHECKING: + from collections.abc import Sequence + + from .models import RepoSnapshot + + +CLI_EPILOG = dedent( + f"""\ + Quick start: + uv run generate-repo-overview collect + Sync the cached snapshot from GitHub. + + uv run generate-repo-overview render-overview + Re-render the profile README from the local cache. + + uv run generate-repo-overview render-details + Re-render the HTML metrics page from the local cache. + + Defaults: + Cache: {DEFAULT_CACHE} + README: {DEFAULT_OUTPUT} + + Use `uv run generate-repo-overview --help` for command-specific options. + """ +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=( + "Collect cached GitHub organization repository overviews and render " + "different views from the same snapshot." + ), + epilog=CLI_EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + subparsers = parser.add_subparsers( + dest="command", + metavar="command", + ) + + collect_parser = subparsers.add_parser( + "collect", + help="Collect and write the cached repository snapshot.", + ) + collect_parser.add_argument( + "--org", default=DEFAULT_ORG, help="GitHub organization name" + ) + collect_parser.add_argument( + "--cache", type=Path, default=DEFAULT_CACHE, help="JSON snapshot cache file" + ) + collect_parser.add_argument( + "--token-env", + default=DEFAULT_TOKEN_ENV, + help="Environment variable that contains the GitHub token", + ) + collect_parser.add_argument( + "--deep", + action="store_true", + help=( + "Force a deep refresh for every repository. " + "By default, unchanged repositories reuse cached detailed signals." + ), + ) + + overview_parser = subparsers.add_parser( + "render-overview", + help="Render the profile README from a cached snapshot.", + ) + overview_parser.add_argument( + "--input", + type=Path, + default=DEFAULT_CACHE, + help="JSON snapshot file to render from", + ) + overview_parser.add_argument( + "--output", type=Path, default=DEFAULT_OUTPUT, help="Markdown file to write" + ) + overview_parser.add_argument( + "--template", + type=Path, + help="Optional markdown template file with a {{ repo_sections }} placeholder", + ) + overview_parser.add_argument( + "--config", + type=Path, + help="Optional category config file that defines order and descriptions", + ) + + details_parser = subparsers.add_parser( + "render-details", + help="Render the HTML metrics page from a cached snapshot.", + ) + details_parser.add_argument( + "--input", + type=Path, + default=DEFAULT_CACHE, + help="JSON snapshot file to render from", + ) + details_parser.add_argument( + "--output", + type=Path, + default=DEFAULT_METRICS_HTML_OUTPUT, + help="Output directory for HTML pages", + ) + + return parser + + +def main(argv: Sequence[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + command = args.command + + if command is None: + parser.print_help() + return 0 + + if command == "collect": + return run_collect(args) + if command == "render-overview": + return run_render_overview(args) + if command == "render-details": + return run_render_details(args) + raise ValueError(f"Unsupported command {command!r}.") + + +def run_collect(args: argparse.Namespace) -> int: + collect_snapshot( + org_name=args.org, + token_env=args.token_env, + cache_path=args.cache, + reuse_unchanged_repositories=not args.deep, + status_prefix="repo-overview", + ) + return 0 + + +def run_render_overview(args: argparse.Namespace) -> int: + snapshot = load_snapshot(args.input) + markdown = render_profile_readme( + snapshot, + template_path=args.template, + config_path=args.config, + ) + write_text_file(path=args.output, content=markdown, status_prefix="repo-overview") + return 0 + + +def run_render_details(args: argparse.Namespace) -> int: + snapshot = load_snapshot(args.input) + pages = render_all_pages(snapshot) + output_dir: Path = args.output + for relative_path, content in pages.items(): + write_text_file( + path=output_dir / relative_path, + content=content, + status_prefix="repo-overview", + ) + return 0 + + +def render_profile_readme( + snapshot: RepoSnapshot, + *, + template_path: Path | None, + config_path: Path | None, +) -> str: + template = load_template(template_path) + config = load_config(config_path) + return render_readme( + list(snapshot.repos), + template=template, + config=config, + org_name=snapshot.org_name, + ) + + +def write_text_file(*, path: Path, content: str, status_prefix: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + print_status(f"Wrote {path}", prefix=status_prefix) diff --git a/src/generate_repo_overview/collector/__init__.py b/src/generate_repo_overview/collector/__init__.py new file mode 100644 index 0000000..9609eed --- /dev/null +++ b/src/generate_repo_overview/collector/__init__.py @@ -0,0 +1,463 @@ +from __future__ import annotations + +import os +import subprocess +import sys +from concurrent.futures import Future, ThreadPoolExecutor, as_completed +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Any, Protocol, cast + +from tqdm import tqdm + +from generate_repo_overview.console import print_status +from generate_repo_overview.constants import ( + DEFAULT_CACHE, + DEFAULT_ORG, + DEFAULT_TOKEN_ENV, +) +from generate_repo_overview.models import ( + SNAPSHOT_SCHEMA_VERSION, + CustomPropertyValue, + RepoEntry, + RepoSnapshot, +) + +from . import reference_integration, registry_metadata, repo_entry +from .registry_metadata import RegistrySignalsPayload +from .snapshot_io import load_snapshot, load_snapshot_if_present, write_snapshot + +if TYPE_CHECKING: + from pathlib import Path + + +class OrganizationLike(Protocol): + @property + def login(self) -> str: ... + + requester: Any + + +class GitHubClientLike(Protocol): + def get_rate_limit(self) -> object: ... + + +@dataclass(frozen=True, slots=True) +class ActiveRepositoryData: + repository: object + custom_properties: dict[str, CustomPropertyValue] + + +DEFAULT_MAX_COLLECTION_WORKERS = 8 + +__all__ = [ + "DEFAULT_MAX_COLLECTION_WORKERS", + "SNAPSHOT_SCHEMA_VERSION", + "ActiveRepositoryData", + "RegistrySignalsPayload", + "collect_snapshot", + "ensure_snapshot", + "fetch_active_repositories", + "fetch_active_repositories_via_rest", + "fetch_repositories", + "fetch_repository_descriptions", + "get_gh_auth_token", + "load_snapshot", + "load_snapshot_if_present", + "paginate_github_rest_list", + "parse_repository_custom_properties", + "resolve_github_token", + "resolve_max_collection_workers", + "write_snapshot", +] + + +def resolve_github_token(token_env: str = DEFAULT_TOKEN_ENV) -> str | None: + token = os.getenv(token_env) + if token: + return token + return get_gh_auth_token() + + +def get_gh_auth_token() -> str | None: + try: + result = subprocess.run( + ["gh", "auth", "token"], + check=True, + capture_output=True, + text=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return None + + token = result.stdout.strip() + return token or None + + +def ensure_snapshot( + *, + org_name: str = DEFAULT_ORG, + cache_path: Path = DEFAULT_CACHE, + token_env: str = DEFAULT_TOKEN_ENV, + refresh: bool = False, + status_prefix: str = "repo-overview", +) -> RepoSnapshot: + if not refresh: + cached_snapshot = load_snapshot_if_present(cache_path) + if cached_snapshot is not None: + print_status( + f"Loading cached snapshot from {cache_path}", + prefix=status_prefix, + ) + return cached_snapshot + + return collect_snapshot( + org_name=org_name, + token_env=token_env, + cache_path=cache_path, + status_prefix=status_prefix, + ) + + +def collect_snapshot( + *, + org_name: str = DEFAULT_ORG, + token_env: str = DEFAULT_TOKEN_ENV, + cache_path: Path | None = DEFAULT_CACHE, + reuse_unchanged_repositories: bool = False, + status_prefix: str = "repo-overview", +) -> RepoSnapshot: + try: + from github import Auth, Github + except ModuleNotFoundError as exc: + raise SystemExit( + "Missing PyGithub. Install project dependencies before running the generator." + ) from exc + + token = resolve_github_token(token_env) + if not token: + message = f"Missing GitHub token. Set {token_env} or authenticate with `gh auth login`." + raise SystemExit(message) + + existing_snapshot = ( + load_snapshot_if_present(cache_path) if cache_path is not None else None + ) + + print_status(f"Connecting to GitHub organization {org_name}", prefix=status_prefix) + github = Github(auth=Auth.Token(token), lazy=True) + print_rest_api_rate_limit( + github, + when="before collection", + status_prefix=status_prefix, + ) + try: + organization = github.get_organization(org_name) + print_status("Collecting repository overview", prefix=status_prefix) + repos = fetch_repositories( + organization, + existing_snapshot=existing_snapshot, + reuse_unchanged_repositories=reuse_unchanged_repositories, + github_token=token, + status_prefix=status_prefix, + ) + + snapshot = RepoSnapshot( + schema_version=SNAPSHOT_SCHEMA_VERSION, + org_name=org_name, + generated_at=datetime.now(UTC).isoformat(), + repos=tuple(repos), + ) + if cache_path is not None: + write_snapshot(snapshot, cache_path) + print_status(f"Wrote snapshot to {cache_path}", prefix=status_prefix) + return snapshot + finally: + print_rest_api_rate_limit( + github, + when="after collection", + status_prefix=status_prefix, + ) + + +def print_rest_api_rate_limit( + github_client: GitHubClientLike, + *, + when: str, + status_prefix: str, +) -> None: + try: + rate_limit = github_client.get_rate_limit() + resources = getattr(rate_limit, "resources", None) + core_rate_limit = getattr(resources, "core", None) + if core_rate_limit is None: + core_rate_limit = getattr(rate_limit, "core", None) + if core_rate_limit is None: + raise AttributeError("Missing core rate limit data.") + except Exception as exc: + print_status( + f"GitHub REST API rate limit {when}: unavailable ({exc})", + prefix=status_prefix, + ) + return + + reset_at = getattr(core_rate_limit, "reset", None) + if isinstance(reset_at, datetime): + reset_display = reset_at.isoformat() + else: + reset_display = "unknown" + + print_status( + "GitHub REST API rate limit " + f"{when}: remaining {getattr(core_rate_limit, 'remaining', 'unknown')}/" + f"{getattr(core_rate_limit, 'limit', 'unknown')}, " + f"used {getattr(core_rate_limit, 'used', 'unknown')}, " + f"resets at {reset_display}", + prefix=status_prefix, + ) + + +def fetch_repositories( + organization: OrganizationLike, + existing_snapshot: RepoSnapshot | None = None, + *, + reuse_unchanged_repositories: bool = False, + github_token: str | None = None, + status_prefix: str = "repo-overview", +) -> list[RepoEntry]: + print_status("Loading active repositories", prefix=status_prefix) + active_repositories = fetch_active_repositories(organization) + print_status( + f"Found {len(active_repositories)} active repositories", + prefix=status_prefix, + ) + print_status( + "Extracting repository custom properties from repo payloads", + prefix=status_prefix, + ) + repositories_with_custom_properties = sum( + 1 + for repository_data in active_repositories.values() + if repository_data.custom_properties + ) + print_status( + "Extracted custom properties for " + f"{repositories_with_custom_properties} repositories", + prefix=status_prefix, + ) + print_status("Loading maintainers in bazel_registry", prefix=status_prefix) + bazel_registry_data = active_repositories.get("bazel_registry") + bazel_registry_metadata_by_repo = ( + registry_metadata.fetch_bazel_registry_metadata_by_repo( + bazel_registry_repository=( + bazel_registry_data.repository + if bazel_registry_data is not None + else None + ), + active_repository_names=set(active_repositories), + github_token=github_token, + ) + ) + print_status( + "Loaded bazel_registry metadata for " + f"{len(bazel_registry_metadata_by_repo)} active repositories", + prefix=status_prefix, + ) + print_status( + "Loading reference_integration Bazel dependencies", + prefix=status_prefix, + ) + reference_integration_data = active_repositories.get("reference_integration") + reference_integration_repository_names = ( + reference_integration.fetch_reference_integration_repository_names( + reference_integration_repository=( + reference_integration_data.repository + if reference_integration_data is not None + else None + ), + active_repository_names=set(active_repositories), + github_token=github_token, + ) + ) + print_status( + "Loaded reference_integration Bazel dependencies for " + f"{len(reference_integration_repository_names)} active repositories", + prefix=status_prefix, + ) + + cached_by_name = ( + {repo.name: repo for repo in existing_snapshot.repos} + if existing_snapshot is not None + else {} + ) + sorted_repositories = sorted( + active_repositories.items(), + key=lambda item: item[0].casefold(), + ) + + total_repositories = len(sorted_repositories) + if total_repositories == 0: + return [] + + max_workers = min(resolve_max_collection_workers(), total_repositories) + print_status( + f"Collecting repository details with up to {max_workers} parallel workers", + prefix=status_prefix, + ) + + repos_by_index: dict[int, RepoEntry] = {} + with ( + ThreadPoolExecutor(max_workers=max_workers) as executor, + tqdm( + total=total_repositories, + desc="Finished", + unit="repo", + file=sys.stderr, + disable=not sys.stderr.isatty(), + ) as progress, + ): + futures: dict[Future[RepoEntry], tuple[int, str]] = {} + for index, (repository_name, repository_data) in enumerate( + sorted_repositories, + start=1, + ): + cached_entry = cached_by_name.get(repository_name) + future = executor.submit( + repo_entry.collect_repository_entry, + repository_name=repository_name, + repository=repository_data.repository, + custom_properties=repository_data.custom_properties, + bazel_registry_metadata=bazel_registry_metadata_by_repo.get( + repository_name + ), + cached_entry=cached_entry, + referenced_by_reference_integration=( + repository_name in reference_integration_repository_names + ), + reuse_cached_entry_when_unchanged=reuse_unchanged_repositories, + ) + futures[future] = (index, repository_name) + + for future in as_completed(futures): + index, repository_name = futures[future] + repos_by_index[index] = future.result() + progress.update(1) + progress.set_postfix_str(repository_name) + + return [repos_by_index[index] for index in range(1, total_repositories + 1)] + + +def resolve_max_collection_workers() -> int: + raw_value = os.getenv("REPO_OVERVIEW_MAX_WORKERS", "").strip() + if raw_value: + try: + parsed = int(raw_value) + except ValueError: + return DEFAULT_MAX_COLLECTION_WORKERS + if parsed > 0: + return parsed + return DEFAULT_MAX_COLLECTION_WORKERS + + +def fetch_active_repositories( + organization: OrganizationLike, +) -> dict[str, ActiveRepositoryData]: + return fetch_active_repositories_via_rest( + requester=organization.requester, + org_login=organization.login, + ) + + +def fetch_active_repositories_via_rest( + *, + requester: Any, + org_login: str, +) -> dict[str, ActiveRepositoryData]: + from github.Repository import Repository + + active_repositories: dict[str, ActiveRepositoryData] = {} + repo_items = paginate_github_rest_list( + requester=requester, + path=f"/orgs/{org_login}/repos", + parameters={"type": "all", "sort": "full_name", "direction": "asc"}, + ) + for response_headers, payload in repo_items: + repository = Repository( + requester=requester, + headers=response_headers, + attributes=payload, + completed=True, + ) + repository_name = cast("str | None", getattr(repository, "name", None)) + if repository_name is None or cast( + "bool", getattr(repository, "archived", False) + ): + continue + active_repositories[repository_name] = ActiveRepositoryData( + repository=repository, + custom_properties=parse_repository_custom_properties(repository), + ) + return active_repositories + + +def paginate_github_rest_list( + *, + requester: Any, + path: str, + parameters: dict[str, Any] | None = None, + per_page: int = 100, +) -> list[tuple[dict[str, Any], dict[str, object]]]: + page = 1 + items: list[tuple[dict[str, Any], dict[str, object]]] = [] + while True: + page_parameters = dict(parameters or {}) + page_parameters["per_page"] = per_page + page_parameters["page"] = page + response_headers, data = requester.requestJsonAndCheck( + "GET", + path, + parameters=page_parameters, + ) + if not isinstance(data, list): + raise RuntimeError( + f"GitHub API call to {path} returned a non-list payload." + ) + page_items = [item for item in data if isinstance(item, dict)] + items.extend( + (cast("dict[str, Any]", response_headers), item) for item in page_items + ) + if len(data) < per_page: + break + page += 1 + return items + + +def fetch_repository_descriptions( + organization: OrganizationLike, +) -> dict[str, str | None]: + return { + name: cast( + "str | None", getattr(repository_data.repository, "description", None) + ) + for name, repository_data in fetch_active_repositories(organization).items() + } + + +def parse_repository_custom_properties( + repository: object, +) -> dict[str, CustomPropertyValue]: + repository_fields = vars(repository) + preloaded_attribute = repository_fields.get("_custom_properties") + preloaded_value = getattr(preloaded_attribute, "value", None) + if not isinstance(preloaded_value, dict): + return {} + + parsed: dict[str, CustomPropertyValue] = {} + for key, value in preloaded_value.items(): + if not isinstance(key, str): + continue + if value is None or isinstance(value, str): + parsed[key] = value + continue + if isinstance(value, list): + parsed[key] = [item for item in value if isinstance(item, str)] + return parsed diff --git a/src/generate_repo_overview/collector/git_checkout.py b/src/generate_repo_overview/collector/git_checkout.py new file mode 100644 index 0000000..af813cd --- /dev/null +++ b/src/generate_repo_overview/collector/git_checkout.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import shutil +import subprocess +from typing import TYPE_CHECKING +from urllib.parse import quote, urlsplit, urlunsplit + +if TYPE_CHECKING: + from pathlib import Path + + +def sync_repository_checkout( + *, + clone_url: str, + default_branch: str, + github_token: str | None, + checkout_path: Path, +) -> Path | None: + authenticated_url = build_authenticated_clone_url(clone_url, github_token) + checkout_path.parent.mkdir(parents=True, exist_ok=True) + + if update_existing_checkout(checkout_path, default_branch): + return checkout_path + + if not clone_fresh_checkout( + authenticated_url=authenticated_url, + default_branch=default_branch, + checkout_path=checkout_path, + ): + return None + + return checkout_path + + +def update_existing_checkout(checkout_path: Path, default_branch: str) -> bool: + git_dir = checkout_path / ".git" + if not git_dir.exists(): + return False + + fetch_ok = run_git_command( + [ + "git", + "-C", + str(checkout_path), + "fetch", + "--depth", + "1", + "origin", + default_branch, + ] + ) + checkout_ok = run_git_command( + [ + "git", + "-C", + str(checkout_path), + "checkout", + "--force", + "--detach", + "FETCH_HEAD", + ] + ) + if not (fetch_ok and checkout_ok): + return False + + run_git_command(["git", "-C", str(checkout_path), "clean", "-fdx"]) + return True + + +def clone_fresh_checkout( + *, + authenticated_url: str, + default_branch: str, + checkout_path: Path, +) -> bool: + shutil.rmtree(checkout_path, ignore_errors=True) + return run_git_command( + [ + "git", + "clone", + "--depth", + "1", + "--branch", + default_branch, + authenticated_url, + str(checkout_path), + ] + ) + + +def run_git_command(command: list[str]) -> bool: + try: + subprocess.run( + command, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except (OSError, subprocess.CalledProcessError): + return False + return True + + +def build_authenticated_clone_url(clone_url: str, github_token: str | None) -> str: + if github_token is None: + return clone_url + + parsed = urlsplit(clone_url) + auth = f"x-access-token:{quote(github_token, safe='')}" + netloc = f"{auth}@{parsed.netloc}" + return urlunsplit( + (parsed.scheme, netloc, parsed.path, parsed.query, parsed.fragment) + ) diff --git a/src/generate_repo_overview/collector/reference_integration.py b/src/generate_repo_overview/collector/reference_integration.py new file mode 100644 index 0000000..c952dcc --- /dev/null +++ b/src/generate_repo_overview/collector/reference_integration.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +import re +from pathlib import Path +from typing import TYPE_CHECKING, cast +from urllib.parse import urlsplit + +from .git_checkout import sync_repository_checkout +from .registry_metadata import ( + BAZEL_REGISTRY_LOCAL_CHECKOUT, + parse_bazel_registry_metadata, +) +from .signal_detection import dedupe_preserving_order + +if TYPE_CHECKING: + from collections.abc import Iterable + +REFERENCE_INTEGRATION_LOCAL_CHECKOUT = Path( + "profile/cache/reference_integration_checkout" +) +ROOT_MODULE_PATH = Path("MODULE.bazel") +INCLUDE_PATTERN = re.compile(r'\binclude\s*\(\s*"(?P