From a203ffea8fc5ad5317ed2c200d87d50360d1f04d Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Tue, 26 May 2026 16:26:46 -0700 Subject: [PATCH 1/3] chore: configure linting for the project --- .editorconfig | 8 +++++ .github/instructions/python.instructions.md | 36 +++++++++++++++++++++ .vscode/extensions.json | 9 ++++++ .vscode/settings.json | 36 ++++++++++++++++----- pyrightconfig.json | 19 +++++++++++ ruff.toml | 26 +++++++++++++++ 6 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 .editorconfig create mode 100644 .github/instructions/python.instructions.md create mode 100644 .vscode/extensions.json create mode 100644 pyrightconfig.json create mode 100644 ruff.toml diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000000..f55b338de90 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +# https://EditorConfig.org + +root = true + +[*] +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md new file mode 100644 index 00000000000..94ac6d3d719 --- /dev/null +++ b/.github/instructions/python.instructions.md @@ -0,0 +1,36 @@ +--- +applyTo: "**/*.py" +description: "ALWAYS read these when reading or modifying Python files. Covers ruff lint/format and pyright type-checking expectations, scope, and the pre-commit workflow." +--- + +# Python Files + +This repo uses [`ruff`](https://docs.astral.sh/ruff/) for linting/formatting and [`pyright`](https://microsoft.github.io/pyright/) for type checking. Config: [`ruff.toml`](../../ruff.toml), [`pyrightconfig.json`](../../pyrightconfig.json). + +## Before committing + +After editing any `.py` file, run both checks on the files you touched: + +```bash +ruff check # add --fix to auto-apply safe fixes +ruff format # formatting +pyright # type checking +``` + +Or check everything at once: + +```bash +ruff check +pyright +``` + +## Expectations + +- **Don't introduce new violations.** If `ruff check` or `pyright` reports new errors against files you modified, fix them before committing. +- **Pre-existing violations** in files you didn't touch are out of scope — leave them for whoever next edits that file. +- **Auto-fixes** (`ruff check --fix`) are safe to apply on files you're already editing. Review the diff before committing. +- **`# noqa` and `# type: ignore` comments** require a justification comment on the same line (e.g. `# noqa: F401 — re-exported`) and should be used extremely sparingly. + +## Scope + +Both tools currently scan: `.github/`, `base/`, `scripts/`. Generated/vendored paths (`base/build`, `base/out`, `specs`, `**/__pycache__`, `**/.venv`, `**/venv`, `**/node_modules`) are excluded. diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000000..7b12f58a90c --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,9 @@ +{ + "recommendations": [ + "charliermarsh.ruff", + "editorconfig.editorconfig", + "DavidAnson.vscode-markdownlint", + "timonwong.shellcheck", + "ms-python.vscode-pylance" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index c5c141b6e84..3b1a29c837a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,7 +3,8 @@ // These files should not be manually edited — modify the TOML sources instead. // Prevent accidental edits to rendered specs. "files.readonlyInclude": { - "specs/**": true + "specs/**": true, + "locks/**": true }, // Suppress diagnostics from rendered specs — they are not source files. // Unfortunately, vscode does not provide a way to exclude all problem @@ -13,16 +14,35 @@ "shellcheck.ignorePatterns": { "specs/**": true }, - "python.analysis.exclude": [ - "specs", - "**/node_modules", - "**/__pycache__", - "**/.*" - ], // Using Azure Pipelines schema for validation of ADO YAMLs. "azure-pipelines.1ESPipelineTemplatesSchemaFile": true, // Associate ADO pipelines with the "azure-pipelines" type instead of GitHub Actions. "files.associations": { "**/.github/workflows/ado/**/*.yml": "azure-pipelines" - } + }, + // General lint rules + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll.markdownlint": "explicit", + "source.fixAll.shellcheck": "explicit" + }, + "shellcheck.enable": true, + "shellcheck.enableQuickFix": true, + // Lint rules for python + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll.ruff": "explicit", + "source.organizeImports.ruff": "explicit", + "source.fixAll": "explicit", + "source.fixAll.pylance": "explicit" + } + }, + "python.analysis.diagnosticsSource": "Pylance", + "python.missingPackage.severity": "Error", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "ruff.configuration": "${workspaceFolder}/ruff.toml", + "ruff.lint.enable": true } diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000000..9eb1d441231 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,19 @@ +{ + "exclude": [ + "**/node_modules", + "**/__pycache__", + "**/.venv", + "**/venv", + "base/build", + "base/out", + "specs" + ], + "typeCheckingMode": "standard", + "pythonVersion": "3.12", + "pythonPlatform": "Linux", + "reportUntypedFunctionDecorator": "error", + "reportMissingTypeStubs": "warning", + "reportMissingParameterType": "error", + "reportMissingTypeArgument": "error", + "reportUnknownParameterType": "error" +} diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000000..e264df7ff23 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,26 @@ +extend-exclude = ["base/build", "base/out", "specs"] + +line-length = 120 + +target-version = "py312" + +[lint] +select = ["ALL"] # Everything +ignore = [ + "COM812", # Conflicts with formatter. + "S603", # Check for validation of input to subprocess calls, almost always a false positive + "S607", # Requiring abspath for all binaries used is very restrictive + "T201", # These scripts are fine to use `print` for output +] + +# We want D401 still, even though google style omits it. +extend-select = ["D401"] + +fixable = ["ALL"] + +[lint.isort] +# Just force this, its better than dealing with a mishmash of modules that have annotations and those that don't +required-imports = ["from __future__ import annotations"] + +[lint.pydocstyle] +convention = "google" From ff02d3709d8fbd4024785c494a122de02556ae89 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Tue, 26 May 2026 16:37:13 -0700 Subject: [PATCH 2/3] chore(lint): run formatter on python files --- .../scripts/components/compute_render_set.py | 10 +- .../workflows/scripts/control-tower/client.py | 44 +-- .../control-tower/run_package_build.py | 13 +- .../scripts/control-tower/run_prcheck.py | 31 +- .../scripts/locks-check/post_locks_comment.py | 35 +- .../check_rendered_specs.py | 48 +-- .../render-specs-check/post_render_comment.py | 44 +-- .../spec-review/create_check_annotations.py | 50 +-- .../scripts/spec-review/format_pr_comment.py | 8 +- .../scripts/spec-review/spec_review_schema.py | 20 +- .vscode/mcps/_mcp_utils.py | 14 +- .vscode/mcps/fedora-distgit-mcp.py | 52 ++- .vscode/mcps/koji-mcp.py | 16 +- .../cases/container-base/test_container.py | 5 +- base/images/tests/cases/test_packages.py | 4 +- .../images/tests/cases/vm-base/test_kernel.py | 6 +- .../tests/cases/vm-base/test_partitions.py | 16 +- base/images/tests/conftest.py | 16 +- base/images/tests/utils/disk.py | 6 +- base/images/tests/utils/extract.py | 61 ++- base/images/tests/utils/parsers.py | 4 +- base/images/tests/utils/pytest_plugin.py | 20 +- scripts/repo/_repo_layout.py | 22 +- scripts/repo/synthesize-repodata.py | 355 +++++++++--------- scripts/update-components-list.py | 25 +- 25 files changed, 422 insertions(+), 503 deletions(-) diff --git a/.github/workflows/scripts/components/compute_render_set.py b/.github/workflows/scripts/components/compute_render_set.py index 3dbdb8a1841..32677decfe6 100644 --- a/.github/workflows/scripts/components/compute_render_set.py +++ b/.github/workflows/scripts/components/compute_render_set.py @@ -74,10 +74,12 @@ def main() -> None: # changed list AND with hand-edited specs would otherwise print twice, # and a component with N modified spec files would print N times. # dict.fromkeys preserves first-seen order. - names = dict.fromkeys([ - *from_changed(entries), - *from_specs_diff(args.specs_diff_file, args.specs_dir, renderable), - ]) + names = dict.fromkeys( + [ + *from_changed(entries), + *from_specs_diff(args.specs_diff_file, args.specs_dir, renderable), + ] + ) for name in names: print(name) diff --git a/.github/workflows/scripts/control-tower/client.py b/.github/workflows/scripts/control-tower/client.py index ca65a4b4faf..2b824f409da 100644 --- a/.github/workflows/scripts/control-tower/client.py +++ b/.github/workflows/scripts/control-tower/client.py @@ -28,9 +28,7 @@ # (azl-ControlTower/ControlTower/Shared/Models/Jobs/JobStatus.cs). NON_TERMINAL_STATUSES = frozenset({"Queued", "Pending", "Running"}) SUCCESS_STATUS = "Completed" -TERMINAL_FAILURE_STATUSES = frozenset( - {"Failed", "Cancelled", "CancelledByAdmin", "Unknown", "TimedOut"} -) +TERMINAL_FAILURE_STATUSES = frozenset({"Failed", "Cancelled", "CancelledByAdmin", "Unknown", "TimedOut"}) @dataclass @@ -86,9 +84,7 @@ def format_error(response: requests.Response) -> str: * ASP.NET validation: ``{"title", "errors": {field: [msg, ...]}}`` """ method = response.request.method if response.request is not None else "?" - lines: list[str] = [ - f"HTTP {response.status_code} {response.reason} from {method} {response.url}" - ] + lines: list[str] = [f"HTTP {response.status_code} {response.reason} from {method} {response.url}"] body: Any try: @@ -178,8 +174,7 @@ def _parse_json_object(response: requests.Response, context: str) -> dict: body = response.json() except ValueError as exc: raise RuntimeError( - f"{context} returned HTTP {response.status_code} " - f"but the body was not valid JSON:\n{response.text}" + f"{context} returned HTTP {response.status_code} but the body was not valid JSON:\n{response.text}" ) from exc if not isinstance(body, dict): raise RuntimeError( @@ -219,9 +214,7 @@ def post_scenario( json_payload=payload, ) if not response.ok: - raise RuntimeError( - f"Control Tower '{context}' request failed.\n" + format_error(response) - ) + raise RuntimeError(f"Control Tower '{context}' request failed.\n" + format_error(response)) return _parse_json_object(response, f"Control Tower '{context}'") @@ -235,13 +228,9 @@ def get_job_status( ) -> dict: """GET the job status. Refreshes the bearer token on 401 and retries once.""" url = f"{base_url}/api/Workflow/jobs/status/{job_id}" - response = _request_with_refresh( - session, "GET", url, credential, audience, token_holder - ) + response = _request_with_refresh(session, "GET", url, credential, audience, token_holder) if not response.ok: - raise RuntimeError( - "Control Tower job status request failed.\n" + format_error(response) - ) + raise RuntimeError("Control Tower job status request failed.\n" + format_error(response)) return _parse_json_object(response, "Control Tower job status") @@ -284,19 +273,13 @@ def poll_until_terminal( job_status_object: dict = {} while True: - job_status_object = get_job_status( - session, base_url, credential, audience, token_holder, job_id - ) + job_status_object = get_job_status(session, base_url, credential, audience, token_holder, job_id) current_status = job_status_object.get("status", "Unknown") elapsed = int(time.monotonic() - start) if current_status != previous_status: task_summary = _summarize_tasks(job_status_object.get("tasks")) - transition = ( - f"{previous_status} -> {current_status}" - if previous_status is not None - else current_status - ) + transition = f"{previous_status} -> {current_status}" if previous_status is not None else current_status suffix = f" | {task_summary}" if task_summary else "" print( f"Job {job_id} status: {transition} (elapsed {elapsed}s){suffix}", @@ -342,14 +325,7 @@ def report_failure(final: dict) -> None: tasks = final.get("tasks") if isinstance(tasks, list): - failed = [ - t - for t in tasks - if isinstance(t, dict) and t.get("status") in TERMINAL_FAILURE_STATUSES - ] + failed = [t for t in tasks if isinstance(t, dict) and t.get("status") in TERMINAL_FAILURE_STATUSES] for task in failed: name = task.get("taskName") or task.get("taskId") - print( - f"##[error]task '{name}' status={task.get('status')} " - f"attempt={task.get('attemptNumber')}" - ) + print(f"##[error]task '{name}' status={task.get('status')} attempt={task.get('attemptNumber')}") diff --git a/.github/workflows/scripts/control-tower/run_package_build.py b/.github/workflows/scripts/control-tower/run_package_build.py index 779376b4ef8..68db9d3185d 100644 --- a/.github/workflows/scripts/control-tower/run_package_build.py +++ b/.github/workflows/scripts/control-tower/run_package_build.py @@ -39,16 +39,12 @@ def _load_build_components(path: Path) -> list[str]: try: raw = path.read_text(encoding="utf-8") except OSError as exc: - raise SystemExit( - f"##[error]Failed to read --changed-components-file {path!s}: {exc}" - ) from exc + raise SystemExit(f"##[error]Failed to read --changed-components-file {path!s}: {exc}") from exc try: entries = json.loads(raw) except json.JSONDecodeError as exc: - raise SystemExit( - f"##[error]--changed-components-file {path!s} is not valid JSON: {exc}" - ) from exc + raise SystemExit(f"##[error]--changed-components-file {path!s} is not valid JSON: {exc}") from exc if not isinstance(entries, list): raise SystemExit( @@ -212,10 +208,7 @@ def main() -> None: job_id = build_response.get("jobId") if not job_id: - print( - "##[error]Control Tower 'package' response did not include a 'jobId'. " - "Cannot confirm job acceptance." - ) + print("##[error]Control Tower 'package' response did not include a 'jobId'. Cannot confirm job acceptance.") sys.exit(1) # ── Brief poll — just confirm the job was accepted ─────────────── diff --git a/.github/workflows/scripts/control-tower/run_prcheck.py b/.github/workflows/scripts/control-tower/run_prcheck.py index e44381b4344..aac9b3830e7 100644 --- a/.github/workflows/scripts/control-tower/run_prcheck.py +++ b/.github/workflows/scripts/control-tower/run_prcheck.py @@ -49,16 +49,12 @@ def _load_components_from_file(path: Path) -> list[str]: try: raw = path.read_text(encoding="utf-8") except OSError as exc: - raise SystemExit( - f"##[error]Failed to read --changed-components-file {path!s}: {exc}" - ) from exc + raise SystemExit(f"##[error]Failed to read --changed-components-file {path!s}: {exc}") from exc try: entries = json.loads(raw) except json.JSONDecodeError as exc: - raise SystemExit( - f"##[error]--changed-components-file {path!s} is not valid JSON: {exc}" - ) from exc + raise SystemExit(f"##[error]--changed-components-file {path!s} is not valid JSON: {exc}") from exc if not isinstance(entries, list): raise SystemExit( @@ -89,9 +85,7 @@ def _parse_args() -> argparse.Namespace: required=True, help="Entra ID audience URI (e.g. api://)", ) - parser.add_argument( - "--api-base-url", required=True, help="Base URL of the Control Tower service" - ) + parser.add_argument("--api-base-url", required=True, help="Base URL of the Control Tower service") parser.add_argument( "--build-reason", required=True, @@ -182,16 +176,11 @@ def main() -> None: print(json.dumps(payload, indent=2)) if args.build_reason == "PullRequest": - print( - "Skipping Control Tower call - pull request triggers are not supported, yet." - ) + print("Skipping Control Tower call - pull request triggers are not supported, yet.") return if not components: - print( - "No affected components detected between source and target commits; " - "skipping Control Tower call." - ) + print("No affected components detected between source and target commits; skipping Control Tower call.") return # ── Acquire bearer token ───────────────────────────────────────── @@ -221,17 +210,11 @@ def main() -> None: job_id = prcheck_response.get("jobId") if not job_id: - print( - "##[error]Control Tower 'prcheck' response did not include a 'jobId'. " - "Cannot poll for job status." - ) + print("##[error]Control Tower 'prcheck' response did not include a 'jobId'. Cannot poll for job status.") sys.exit(1) # ── Poll for job completion ────────────────────────────────────── - print( - f"Polling job {job_id} every {args.poll_interval_seconds}s " - f"(timeout {args.poll_timeout_seconds}s)..." - ) + print(f"Polling job {job_id} every {args.poll_interval_seconds}s (timeout {args.poll_timeout_seconds}s)...") try: final, timed_out = ct.poll_until_terminal( session, diff --git a/.github/workflows/scripts/locks-check/post_locks_comment.py b/.github/workflows/scripts/locks-check/post_locks_comment.py index 3264b0017c6..c91c5cdf42c 100644 --- a/.github/workflows/scripts/locks-check/post_locks_comment.py +++ b/.github/workflows/scripts/locks-check/post_locks_comment.py @@ -117,19 +117,12 @@ def parse_update_output(path: Path) -> list[dict]: if not isinstance(data, list): raise SystemExit( - f"Error: update output has unexpected shape (expected null or list, " - f"got {type(data).__name__})" + f"Error: update output has unexpected shape (expected null or list, got {type(data).__name__})" ) for entry in data: - if ( - not isinstance(entry, dict) - or "component" not in entry - or "changed" not in entry - ): - raise SystemExit( - f"Error: update output entry has unexpected shape: {entry!r}" - ) + if not isinstance(entry, dict) or "component" not in entry or "changed" not in entry: + raise SystemExit(f"Error: update output entry has unexpected shape: {entry!r}") return [entry for entry in data if entry["changed"] is True] @@ -160,9 +153,7 @@ def format_comment( # inject arbitrary commands into a maintainer's terminal. Fall back to # `-a` if any name fails the same regex used for display so the # printed command is always safe to run as-is. - use_all = n_changed > 30 or any( - not _SAFE_NAME_RE.match(name) for name in comp_names - ) + use_all = n_changed > 30 or any(not _SAFE_NAME_RE.match(name) for name in comp_names) remediation_cmd = _update_command([] if use_all else comp_names, use_all=use_all) lines: list[str] = [ @@ -248,9 +239,7 @@ def format_comment( def _gh(*args: str) -> str: - return subprocess.run( - ["gh", *args], capture_output=True, text=True, check=True - ).stdout.strip() + return subprocess.run(["gh", *args], capture_output=True, text=True, check=True).stdout.strip() def find_existing_comments(repo: str, pr: str) -> list[str]: @@ -267,11 +256,7 @@ def find_existing_comments(repo: str, pr: str) -> list[str]: "--paginate", f"/repos/{repo}/issues/{pr}/comments", "--jq", - ( - f'.[] | select(.user.login == "{BOT_AUTHOR}") ' - f'| select(.body | contains("{COMMENT_MARKER}")) ' - "| .id" - ), + (f'.[] | select(.user.login == "{BOT_AUTHOR}") | select(.body | contains("{COMMENT_MARKER}")) | .id'), ) except subprocess.CalledProcessError: return [] @@ -342,9 +327,7 @@ def delete_comment_if_exists(repo: str, pr: str) -> None: def main() -> int: - parser = argparse.ArgumentParser( - description="Post `azldev component update` drift as a PR comment." - ) + parser = argparse.ArgumentParser(description="Post `azldev component update` drift as a PR comment.") parser.add_argument( "--update-output", type=Path, @@ -353,9 +336,7 @@ def main() -> int: ) parser.add_argument("--repo", required=True, help="GitHub repo (owner/repo)") parser.add_argument("--pr", required=True, help="PR number") - parser.add_argument( - "--artifacts-url", default=None, help="Direct URL to patch artifact" - ) + parser.add_argument("--artifacts-url", default=None, help="Direct URL to patch artifact") parser.add_argument("--run-id", default=None, help="GitHub Actions run ID") args = parser.parse_args() diff --git a/.github/workflows/scripts/render-specs-check/check_rendered_specs.py b/.github/workflows/scripts/render-specs-check/check_rendered_specs.py index e06e6e73479..3b1e9ec22a0 100755 --- a/.github/workflows/scripts/render-specs-check/check_rendered_specs.py +++ b/.github/workflows/scripts/render-specs-check/check_rendered_specs.py @@ -128,9 +128,7 @@ def classify_changes(specs_dir: Path) -> tuple[list[str], list[str], list[str]]: missing = _git_lines_z("ls-files", "-z", "--deleted", "--", sd) # Filter out .gitignore / .gitattributes anywhere under specs_dir — they # shouldn't be in rendered output and shouldn't influence our enumeration. - extra = [ - p for p in extra_raw if Path(p).name not in (".gitignore", ".gitattributes") - ] + extra = [p for p in extra_raw if Path(p).name not in (".gitignore", ".gitattributes")] return changed, extra, missing @@ -172,8 +170,7 @@ def build_content_diffs(changed_files: list[str], specs_dir: Path) -> list[dict] sha = head_blobs.get(path_str, "") if not sha: print( - f"::warning::could not resolve HEAD blob for {path_str}; " - "treating as drift", + f"::warning::could not resolve HEAD blob for {path_str}; treating as drift", file=sys.stderr, ) real_diffs.append( @@ -188,8 +185,7 @@ def build_content_diffs(changed_files: list[str], specs_dir: Path) -> list[dict] committed_bytes = _git_bytes("cat-file", "blob", sha) except subprocess.CalledProcessError as exc: print( - f"::warning::git cat-file blob {sha} ({path_str}) failed: {exc}; " - "treating as drift", + f"::warning::git cat-file blob {sha} ({path_str}) failed: {exc}; treating as drift", file=sys.stderr, ) real_diffs.append( @@ -273,14 +269,8 @@ def build_report( """Build the JSON-serialisable report.""" return { "content_diffs": content_diffs, - "extra_files": [ - {"path": p, "component": component_from_path(p, specs_dir)} - for p in extra_files - ], - "missing_files": [ - {"path": p, "component": component_from_path(p, specs_dir)} - for p in missing_files - ], + "extra_files": [{"path": p, "component": component_from_path(p, specs_dir)} for p in extra_files], + "missing_files": [{"path": p, "component": component_from_path(p, specs_dir)} for p in missing_files], } @@ -340,11 +330,7 @@ def generate_patch( return b"" # NUL-separated stdin for --pathspec-file-nul. See docstring for why. - extra_stdin = ( - b"\x00".join(p.encode("utf-8") for p in extra_files) + b"\x00" - if extra_files - else b"" - ) + extra_stdin = b"\x00".join(p.encode("utf-8") for p in extra_files) + b"\x00" if extra_files else b"" # Mark untracked files as intent-to-add so `git diff` picks them up as # "new file" entries instead of silently skipping them. @@ -380,8 +366,7 @@ def generate_patch( except subprocess.CalledProcessError as exc: stderr = (exc.stderr or b"").decode("utf-8", errors="replace").strip() print( - f"::warning::git diff failed while generating patch: " - f"exit={exc.returncode}: {stderr}", + f"::warning::git diff failed while generating patch: exit={exc.returncode}: {stderr}", file=sys.stderr, ) patch = b"" @@ -403,8 +388,7 @@ def generate_patch( except subprocess.CalledProcessError as exc: stderr = (exc.stderr or b"").decode("utf-8", errors="replace").strip() print( - f"::warning::git reset (undo intent-to-add) failed: " - f"exit={exc.returncode}: {stderr}", + f"::warning::git reset (undo intent-to-add) failed: exit={exc.returncode}: {stderr}", file=sys.stderr, ) @@ -444,9 +428,7 @@ def main() -> int: # 1. Classify changes changed, extra, missing = classify_changes(specs_dir) - print( - f"Raw counts: changed={len(changed)} extra={len(extra)} missing={len(missing)}" - ) + print(f"Raw counts: changed={len(changed)} extra={len(extra)} missing={len(missing)}") # 2. Build content diffs content_diffs = build_content_diffs(changed, specs_dir) @@ -476,16 +458,8 @@ def main() -> int: print("All rendered specs are up to date.") return 0 - print( - f"::error::{len(content_diffs)} content diff(s), " - f"{len(extra)} extra file(s), {len(missing)} missing file(s)" - ) - all_comps = sorted( - set( - _unique_components(content_diffs) - + _unique_components(report.get("extra_files", [])) - ) - ) + print(f"::error::{len(content_diffs)} content diff(s), {len(extra)} extra file(s), {len(missing)} missing file(s)") + all_comps = sorted(set(_unique_components(content_diffs) + _unique_components(report.get("extra_files", [])))) if missing: print(f"Remediation: {_render_command([], use_all=True)}") elif all_comps: diff --git a/.github/workflows/scripts/render-specs-check/post_render_comment.py b/.github/workflows/scripts/render-specs-check/post_render_comment.py index a749aab57c2..d6ade5e5242 100644 --- a/.github/workflows/scripts/render-specs-check/post_render_comment.py +++ b/.github/workflows/scripts/render-specs-check/post_render_comment.py @@ -116,9 +116,7 @@ def format_comment( n_extra = len(extra_files) n_missing = len(missing_files) - all_comps: list[str] = sorted( - {item["component"] for item in content_diffs + extra_files} - ) + all_comps: list[str] = sorted({item["component"] for item in content_diffs + extra_files}) # Missing files (in HEAD but not produced by render) are orphans that # need `--clean-stale`, which only works with `-a`. Extras (produced by # render but not committed) are handled fine by a per-component render. @@ -187,10 +185,7 @@ def format_comment( for item in content_diffs: if shown >= MAX_INLINE_DIFFS: remaining = n_diff - shown - lines.append( - f"*… and {remaining} more file(s). " - "Run the remediation command above to see all changes.*" - ) + lines.append(f"*… and {remaining} more file(s). Run the remediation command above to see all changes.*") lines.append("") break path = _safe_path(item["path"]) @@ -201,12 +196,7 @@ def format_comment( # code formatting: the path is rendered as code in the summary, and # the diff body is inside a dynamically chosen fence longer than any # backtick run in the diff text. - block = ( - "
\n" - f"`{path}`\n\n" - f"{fence}diff\n{diff_text}\n{fence}\n\n" - "
\n" - ) + block = f"
\n`{path}`\n\n{fence}diff\n{diff_text}\n{fence}\n\n
\n" if body_so_far + len(block) > budget_cap: remaining = n_diff - shown lines.append( @@ -266,16 +256,14 @@ def _append_file_list( if extra_files: _append_file_list( "### Files to add", - "These files are produced by `azldev component render` but are " - "missing from your branch. Add them.", + "These files are produced by `azldev component render` but are missing from your branch. Add them.", extra_files, ) if missing_files: _append_file_list( "### Files to remove", - "These files are in your branch but are not produced by render. " - "Remove them.", + "These files are in your branch but are not produced by render. Remove them.", missing_files, ) @@ -288,9 +276,7 @@ def _append_file_list( def _gh(*args: str) -> str: - return subprocess.run( - ["gh", *args], capture_output=True, text=True, check=True - ).stdout.strip() + return subprocess.run(["gh", *args], capture_output=True, text=True, check=True).stdout.strip() def find_existing_comments(repo: str, pr: str) -> list[str]: @@ -307,11 +293,7 @@ def find_existing_comments(repo: str, pr: str) -> list[str]: "--paginate", f"/repos/{repo}/issues/{pr}/comments", "--jq", - ( - f'.[] | select(.user.login == "{BOT_AUTHOR}") ' - f'| select(.body | contains("{COMMENT_MARKER}")) ' - "| .id" - ), + (f'.[] | select(.user.login == "{BOT_AUTHOR}") | select(.body | contains("{COMMENT_MARKER}")) | .id'), ) except subprocess.CalledProcessError: return [] @@ -382,9 +364,7 @@ def delete_comment_if_exists(repo: str, pr: str) -> None: def main() -> int: - parser = argparse.ArgumentParser( - description="Post rendered-spec drift results as a PR comment." - ) + parser = argparse.ArgumentParser(description="Post rendered-spec drift results as a PR comment.") parser.add_argument( "--report", type=Path, @@ -393,9 +373,7 @@ def main() -> int: ) parser.add_argument("--repo", required=True, help="GitHub repo (owner/repo)") parser.add_argument("--pr", required=True, help="PR number") - parser.add_argument( - "--artifacts-url", default=None, help="Direct URL to patch artifact" - ) + parser.add_argument("--artifacts-url", default=None, help="Direct URL to patch artifact") parser.add_argument("--run-id", default=None, help="GitHub Actions run ID") args = parser.parse_args() @@ -407,9 +385,7 @@ def main() -> int: return 1 total = ( - len(report.get("content_diffs", [])) - + len(report.get("extra_files", [])) - + len(report.get("missing_files", [])) + len(report.get("content_diffs", [])) + len(report.get("extra_files", [])) + len(report.get("missing_files", [])) ) body: str | None = None diff --git a/.github/workflows/scripts/spec-review/create_check_annotations.py b/.github/workflows/scripts/spec-review/create_check_annotations.py index 771461057a0..6432a4caa17 100755 --- a/.github/workflows/scripts/spec-review/create_check_annotations.py +++ b/.github/workflows/scripts/spec-review/create_check_annotations.py @@ -18,9 +18,9 @@ # Mapping from finding category to (workflow command level, checks API level, checks API title) _SEVERITY_MAP = { - "errors": ("error", "failure", "Spec Error"), - "warnings": ("warning", "warning", "Spec Warning"), - "suggestions": ("notice", "notice", "Suggestion"), + "errors": ("error", "failure", "Spec Error"), + "warnings": ("warning", "warning", "Spec Warning"), + "suggestions": ("notice", "notice", "Suggestion"), } @@ -52,13 +52,7 @@ def escape_workflow_command(s: str) -> str: See https://github.com/actions/toolkit/issues/193 Order matters: % must be escaped first to avoid double-escaping. """ - return ( - s.replace("%", "%25") - .replace("\r", "%0D") - .replace("\n", "%0A") - .replace(":", "%3A") - .replace(",", "%2C") - ) + return s.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A").replace(":", "%3A").replace(",", "%2C") def generate_workflow_commands(report: dict, repo_root: Optional[Path] = None) -> list[str]: @@ -80,26 +74,34 @@ def generate_check_annotations(report: dict, repo_root: Optional[Path] = None) - _, api_level, title = _SEVERITY_MAP[category] line = finding.get("line") or 1 msg = _format_message(finding) - annotations.append({ - "path": spec_file, - "start_line": line, - "end_line": line, - "annotation_level": api_level, - "message": msg, - "title": title, - }) + annotations.append( + { + "path": spec_file, + "start_line": line, + "end_line": line, + "annotation_level": api_level, + "message": msg, + "title": title, + } + ) return annotations def main() -> int: parser = argparse.ArgumentParser(description="Generate check annotations from spec review") parser.add_argument("file", type=Path, help="Path to report JSON") - parser.add_argument("--workflow-commands", action="store_true", - help="Output GitHub Actions workflow commands") - parser.add_argument("--json", action="store_true", - help="Output annotations as JSON for Checks API") - parser.add_argument("--repo-root", type=Path, default=None, - help="Repository root for converting absolute paths to relative (default: auto-detect via git)") + parser.add_argument( + "--workflow-commands", + action="store_true", + help="Output GitHub Actions workflow commands", + ) + parser.add_argument("--json", action="store_true", help="Output annotations as JSON for Checks API") + parser.add_argument( + "--repo-root", + type=Path, + default=None, + help="Repository root for converting absolute paths to relative (default: auto-detect via git)", + ) args = parser.parse_args() try: diff --git a/.github/workflows/scripts/spec-review/format_pr_comment.py b/.github/workflows/scripts/spec-review/format_pr_comment.py index 27212c68a5f..e85cb56a17c 100755 --- a/.github/workflows/scripts/spec-review/format_pr_comment.py +++ b/.github/workflows/scripts/spec-review/format_pr_comment.py @@ -118,8 +118,12 @@ def main() -> int: parser.add_argument("file", type=Path, help="Path to report JSON") parser.add_argument("--repo", required=True, help="GitHub repo (owner/repo)") parser.add_argument("--sha", required=True, help="Commit SHA for file links") - parser.add_argument("--repo-root", type=Path, default=None, - help="Repository root for converting absolute paths to relative") + parser.add_argument( + "--repo-root", + type=Path, + default=None, + help="Repository root for converting absolute paths to relative", + ) args = parser.parse_args() try: diff --git a/.github/workflows/scripts/spec-review/spec_review_schema.py b/.github/workflows/scripts/spec-review/spec_review_schema.py index e9db91c65a5..f172eac8304 100755 --- a/.github/workflows/scripts/spec-review/spec_review_schema.py +++ b/.github/workflows/scripts/spec-review/spec_review_schema.py @@ -75,7 +75,9 @@ def has_errors(self) -> bool: def print_summary(self): status = "ERRORS FOUND" if self.has_errors else "No errors" print(f"{status}") - print(f"Specs: {len(self.spec_reviews)} | Errors: {self.total_errors} | Warnings: {self.total_warnings} | Suggestions: {self.total_suggestions}") + print( + f"Specs: {len(self.spec_reviews)} | Errors: {self.total_errors} | Warnings: {self.total_warnings} | Suggestions: {self.total_suggestions}" + ) def print_errors(self): for review in self.spec_reviews: @@ -145,8 +147,14 @@ def compare_reports( print(f"┌─{'─' * col_w}─┬────────┬──────────┬─────────────┐") print(f"│ {'Model':<{col_w}} │ Errors │ Warnings │ Suggestions │") print(f"├─{'─' * col_w}─┼────────┼──────────┼─────────────┤") - for label, report in [(label_a, report_a), (label_b, report_b), (label_final, report_final)]: - print(f"│ {label:<{col_w}} │ {report.total_errors:>6} │ {report.total_warnings:>8} │ {report.total_suggestions:>11} │") + for label, report in [ + (label_a, report_a), + (label_b, report_b), + (label_final, report_final), + ]: + print( + f"│ {label:<{col_w}} │ {report.total_errors:>6} │ {report.total_warnings:>8} │ {report.total_suggestions:>11} │" + ) print(f"└─{'─' * col_w}─┴────────┴──────────┴─────────────┘") print() @@ -206,7 +214,11 @@ def main() -> int: parser.add_argument("report_final", type=Path, help="Final synthesized report") parser.add_argument("--label-a", default="Reviewer A", help="Display label for reviewer A") parser.add_argument("--label-b", default="Reviewer B", help="Display label for reviewer B") - parser.add_argument("--label-final", default="Synthesized", help="Display label for final report") + parser.add_argument( + "--label-final", + default="Synthesized", + help="Display label for final report", + ) args = parser.parse_args() try: diff --git a/.vscode/mcps/_mcp_utils.py b/.vscode/mcps/_mcp_utils.py index 7e2a75b34b0..d76c6b3a597 100644 --- a/.vscode/mcps/_mcp_utils.py +++ b/.vscode/mcps/_mcp_utils.py @@ -69,9 +69,11 @@ def load_env(env_path: str | Path | None = None) -> None: print(f"[mcp] Loaded .env from {c}", file=sys.stderr) return - print("[mcp] No .env file found (checked: " - + ", ".join(str(p) for p in candidates) + ")", - file=sys.stderr) + print( + "[mcp] No .env file found (checked: " + ", ".join(str(p) for p in candidates) + ")", + file=sys.stderr, + ) + # Valid RPM package name characters: starts with alphanumeric, then # alphanumerics plus . _ + - @@ -118,11 +120,7 @@ def check_ssrf(base_url: str, constructed_url: str) -> str | None: base_port = effective_port(parsed_base) url_port = effective_port(parsed_url) - if ( - parsed_url.hostname != parsed_base.hostname - or parsed_url.scheme != parsed_base.scheme - or url_port != base_port - ): + if parsed_url.hostname != parsed_base.hostname or parsed_url.scheme != parsed_base.scheme or url_port != base_port: return ( "Constructed URL does not match the configured endpoint " f"(scheme/host/port mismatch: {parsed_url.scheme!r}://{parsed_url.hostname!r}:{url_port!r} " diff --git a/.vscode/mcps/fedora-distgit-mcp.py b/.vscode/mcps/fedora-distgit-mcp.py index 98e11d7f124..8d233ad6412 100644 --- a/.vscode/mcps/fedora-distgit-mcp.py +++ b/.vscode/mcps/fedora-distgit-mcp.py @@ -50,9 +50,7 @@ _DEFAULT_BASE_URL = "https://src.fedoraproject.org" _base_url: str = _DEFAULT_BASE_URL -_scratch_dir: str = os.path.join( - os.environ.get("AZLDEV_WORK_DIR", "base/build/work"), "scratch", "distgit" -) +_scratch_dir: str = os.path.join(os.environ.get("AZLDEV_WORK_DIR", "base/build/work"), "scratch", "distgit") _repos_dir: str = os.path.join(_scratch_dir, "repos") _fetch_dir: str = os.path.join(_scratch_dir, "fetched") @@ -199,6 +197,7 @@ def _ensure_repo(package: str, auto_clean: bool, base_url: str) -> tuple[str, st # Tools # --------------------------------------------------------------------------- + @mcp.tool() def distgit_status() -> StatusDict: """Return current MCP server state. @@ -340,8 +339,12 @@ def distgit_search( if mode == "pickaxe": ref_args = ["--all"] if ref == "--all" else [ref] cmd = [ - "git", "--git-dir", git_dir, "log", - "--oneline", "-20", + "git", + "--git-dir", + git_dir, + "log", + "--oneline", + "-20", f"-S{query}", *ref_args, "--", @@ -353,21 +356,36 @@ def distgit_search( full=False, ) cmd = [ - "git", "--git-dir", git_dir, "grep", - "-n", "-i", "-e", query, ref, "--", + "git", + "--git-dir", + git_dir, + "grep", + "-n", + "-i", + "-e", + query, + ref, + "--", ] elif mode == "log-grep": ref_args = ["--all"] if ref == "--all" else [ref] cmd = [ - "git", "--git-dir", git_dir, "log", - "--oneline", "-20", + "git", + "--git-dir", + git_dir, + "log", + "--oneline", + "-20", f"--grep={query}", *ref_args, ] try: result = subprocess.run( - cmd, capture_output=True, text=True, timeout=30, + cmd, + capture_output=True, + text=True, + timeout=30, ) except subprocess.TimeoutExpired: return _add_status({"error": "Search timed out after 30s."}, full=False) @@ -387,7 +405,10 @@ def distgit_search( if not output.strip(): return _add_status( - {"output": f"No matches found for {query!r} in {package} ({mode} on {ref}).", "repo_dir": repo_dir}, + { + "output": f"No matches found for {query!r} in {package} ({mode} on {ref}).", + "repo_dir": repo_dir, + }, full=False, ) @@ -441,7 +462,9 @@ def distgit_show( try: result = subprocess.run( ["git", "--git-dir", git_dir, "show", "--stat", "--patch", commit, "--"], - capture_output=True, text=True, timeout=30, + capture_output=True, + text=True, + timeout=30, ) except subprocess.TimeoutExpired: return _add_status({"error": "git show timed out after 30s."}, full=False) @@ -450,7 +473,10 @@ def distgit_show( if result.returncode != 0: stderr = result.stderr.strip() - return _add_status({"error": f"git show failed (exit {result.returncode}): {stderr}"}, full=False) + return _add_status( + {"error": f"git show failed (exit {result.returncode}): {stderr}"}, + full=False, + ) output = result.stdout diff --git a/.vscode/mcps/koji-mcp.py b/.vscode/mcps/koji-mcp.py index 0f5040f2200..0840c3b9bd3 100755 --- a/.vscode/mcps/koji-mcp.py +++ b/.vscode/mcps/koji-mcp.py @@ -64,9 +64,15 @@ _flags = [] if _base_url in _insecure_urls: _flags.append("insecure=yes") - print(f"[koji-mcp] Base URL: {_base_url}" + (f" ({', '.join(_flags)})" if _flags else ""), file=sys.stderr) + print( + f"[koji-mcp] Base URL: {_base_url}" + (f" ({', '.join(_flags)})" if _flags else ""), + file=sys.stderr, + ) if _insecure_urls: - print(f"[koji-mcp] Pre-approved insecure URLs: {', '.join(sorted(_insecure_urls))}", file=sys.stderr) + print( + f"[koji-mcp] Pre-approved insecure URLs: {', '.join(sorted(_insecure_urls))}", + file=sys.stderr, + ) def _add_status(result: StatusDict, *, full: bool) -> StatusDict: @@ -140,7 +146,8 @@ def koji_allow_insecure(override_base_url: str | None = None) -> StatusDict: if not url: return _add_status( - {"error": "No Koji URL available. Pass override_base_url or call set_koji_url first."}, full=False + {"error": "No Koji URL available. Pass override_base_url or call set_koji_url first."}, + full=False, ) if url not in _ssl_errors_seen: @@ -182,7 +189,8 @@ def koji_fetch(path: str, override_base_url: str | None = None) -> StatusDict: if not base: return _add_status( - {"error": "No Koji URL available. Pass override_base_url or call set_koji_url first."}, full=False + {"error": "No Koji URL available. Pass override_base_url or call set_koji_url first."}, + full=False, ) if not path.startswith("/"): diff --git a/base/images/tests/cases/container-base/test_container.py b/base/images/tests/cases/container-base/test_container.py index 61a6fd5c43e..42f2897b312 100644 --- a/base/images/tests/cases/container-base/test_container.py +++ b/base/images/tests/cases/container-base/test_container.py @@ -14,7 +14,4 @@ def test_no_kernel_modules(rootfs: Path) -> None: ): if modules_dir.exists(): versions = list(modules_dir.iterdir()) - assert not versions, ( - f"Container image has kernel modules under {modules_dir}: " - f"{[v.name for v in versions]}" - ) + assert not versions, f"Container image has kernel modules under {modules_dir}: {[v.name for v in versions]}" diff --git a/base/images/tests/cases/test_packages.py b/base/images/tests/cases/test_packages.py index 464dfb7ec7f..719ace29cf7 100644 --- a/base/images/tests/cases/test_packages.py +++ b/base/images/tests/cases/test_packages.py @@ -32,7 +32,5 @@ def test_required_packages_installed(installed_packages: set[str]) -> None: @pytest.mark.require_capability("runtime-package-management") @pytest.mark.parametrize("pkg", sorted(BLOCKLISTED_PACKAGES)) -def test_blocklisted_package_absent( - pkg: str, installed_packages: set[str] -) -> None: +def test_blocklisted_package_absent(pkg: str, installed_packages: set[str]) -> None: assert pkg not in installed_packages, f"Blocklisted package installed: {pkg}" diff --git a/base/images/tests/cases/vm-base/test_kernel.py b/base/images/tests/cases/vm-base/test_kernel.py index 8d695dbac64..90b7111b94b 100644 --- a/base/images/tests/cases/vm-base/test_kernel.py +++ b/base/images/tests/cases/vm-base/test_kernel.py @@ -42,8 +42,4 @@ def test_config_lsm_matches_upstream(rootfs: Path) -> None: """ expected = "lockdown,yama,integrity,selinux,bpf,landlock,ipe" actual = _parse_config_lsm(rootfs) - assert actual == expected, ( - f"CONFIG_LSM does not match upstream.\n" - f" Expected: {expected}\n" - f" Actual: {actual}" - ) + assert actual == expected, f"CONFIG_LSM does not match upstream.\n Expected: {expected}\n Actual: {actual}" diff --git a/base/images/tests/cases/vm-base/test_partitions.py b/base/images/tests/cases/vm-base/test_partitions.py index 0b7677ec749..53c2b7cd77e 100644 --- a/base/images/tests/cases/vm-base/test_partitions.py +++ b/base/images/tests/cases/vm-base/test_partitions.py @@ -11,22 +11,12 @@ def test_has_root_partition(partition_table: list[PartitionInfo]) -> None: """Image must define exactly one root ('/') filesystem.""" root_parts = [p for p in partition_table if p.mountpoint == "/"] - assert len(root_parts) == 1, ( - f"Expected exactly one root partition, found {len(root_parts)}: " - f"{root_parts}" - ) + assert len(root_parts) == 1, f"Expected exactly one root partition, found {len(root_parts)}: {root_parts}" def test_has_efi_partition(partition_table: list[PartitionInfo]) -> None: """UEFI VM images must have a vfat EFI system partition.""" efi_mountpoints = {"/boot/efi", "/efi"} - efi_parts = [ - p - for p in partition_table - if p.mountpoint in efi_mountpoints and p.type == "vfat" - ] + efi_parts = [p for p in partition_table if p.mountpoint in efi_mountpoints and p.type == "vfat"] if not efi_parts: - pytest.fail( - "No vfat EFI partition found " - f"(expected mountpoint in {sorted(efi_mountpoints)})" - ) + pytest.fail(f"No vfat EFI partition found (expected mountpoint in {sorted(efi_mountpoints)})") diff --git a/base/images/tests/conftest.py b/base/images/tests/conftest.py index 91823ae3223..66f06ba29b3 100644 --- a/base/images/tests/conftest.py +++ b/base/images/tests/conftest.py @@ -68,7 +68,9 @@ def image_path(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") def image_type( - request: pytest.FixtureRequest, capabilities: set[str], image_path: Path, + request: pytest.FixtureRequest, + capabilities: set[str], + image_path: Path, ) -> str: """``'vm'`` or ``'container'`` — from ``--image-type``, capabilities, or file extension.""" explicit = request.config.getoption("--image-type") @@ -197,14 +199,18 @@ def installed_packages(rootfs: Path) -> set[str]: @pytest.fixture(scope="session") -def partition_table( - disk_info: DiskInfo | None, image_type: str -) -> list[PartitionInfo]: +def partition_table(disk_info: DiskInfo | None, image_type: str) -> list[PartitionInfo]: """Partition metadata — auto-skips for container images.""" if image_type != "vm": pytest.skip("partition_table not applicable to container images") assert disk_info is not None logger.info("Partition table: %d partitions", len(disk_info.partitions)) for p in disk_info.partitions: - logger.debug(" %s: type=%s mount=%s size=%d", p.device, p.type, p.mountpoint, p.size_bytes) + logger.debug( + " %s: type=%s mount=%s size=%d", + p.device, + p.type, + p.mountpoint, + p.size_bytes, + ) return disk_info.partitions diff --git a/base/images/tests/utils/disk.py b/base/images/tests/utils/disk.py index 929a3f67f48..cf17b67a1fe 100644 --- a/base/images/tests/utils/disk.py +++ b/base/images/tests/utils/disk.py @@ -74,7 +74,11 @@ def _parse_virt_inspector(xml_output: str) -> DiskInfo: logger.debug( "Partition: dev=%s type=%s label=%s mount=%s size=%d", - device, fs_type, label, mountpoint, size, + device, + fs_type, + label, + mountpoint, + size, ) partitions.append( diff --git a/base/images/tests/utils/extract.py b/base/images/tests/utils/extract.py index 170e5dc90e9..7a99fa953b4 100644 --- a/base/images/tests/utils/extract.py +++ b/base/images/tests/utils/extract.py @@ -73,6 +73,7 @@ def _run(cmd: list[str], **kwargs: object) -> subprocess.CompletedProcess[str]: # -- VM image mounting (libguestfs FUSE) ------------------------------------ + def _guestfs_env() -> dict[str, str]: """Build environment with direct libguestfs backend.""" return {**os.environ, "LIBGUESTFS_BACKEND": "direct"} @@ -90,15 +91,23 @@ def mount_vm_image(image_path: Path, mountpoint: Path) -> Path: cmd = [ GUESTMOUNT.name, "--ro", - "-a", str(image_path), - "-i", str(mountpoint), + "-a", + str(image_path), + "-i", + str(mountpoint), # Aggressive caching — safe because the mount is read-only. - "-o", "kernel_cache", - "-o", "entry_timeout=3600", - "-o", "attr_timeout=3600", - "-o", "negative_timeout=3600", - "-o", "noforget", - "--dir-cache-timeout", "3600", + "-o", + "kernel_cache", + "-o", + "entry_timeout=3600", + "-o", + "attr_timeout=3600", + "-o", + "negative_timeout=3600", + "-o", + "noforget", + "--dir-cache-timeout", + "3600", ] _run(cmd, env=_guestfs_env()) return mountpoint @@ -122,7 +131,9 @@ def unmount_vm_image(mountpoint: Path) -> None: if result.returncode != 0: logger.warning( "guestunmount failed for %s (rc=%d): %s", - mountpoint, result.returncode, result.stderr.strip(), + mountpoint, + result.returncode, + result.stderr.strip(), ) @@ -143,19 +154,27 @@ def mount_container_image(image_path: Path, extract_dir: Path) -> Path: # Convert OCI archive → OCI layout logger.info("Converting OCI archive to layout: %s", image_path) - _run([ - SKOPEO.name, "copy", - f"oci-archive:{image_path}", - f"oci:{oci_layout}:latest", - ]) + _run( + [ + SKOPEO.name, + "copy", + f"oci-archive:{image_path}", + f"oci:{oci_layout}:latest", + ] + ) # Unpack into an OCI runtime bundle (rootless, no user-ns required) logger.info("Unpacking OCI layout to bundle: %s", bundle) - _run([ - UMOCI.name, "unpack", "--rootless", - "--image", f"{oci_layout}:latest", - str(bundle), - ]) + _run( + [ + UMOCI.name, + "unpack", + "--rootless", + "--image", + f"{oci_layout}:latest", + str(bundle), + ] + ) rootfs = bundle / "rootfs" logger.info("Container rootfs at %s", rootfs) @@ -179,5 +198,7 @@ def unmount_container_image(extract_dir: Path) -> None: if result.returncode != 0: logger.warning( "buildah unshare rm failed for %s (rc=%d): %s", - extract_dir, result.returncode, result.stderr.strip(), + extract_dir, + result.returncode, + result.stderr.strip(), ) diff --git a/base/images/tests/utils/parsers.py b/base/images/tests/utils/parsers.py index 21fa3808013..930a412d598 100644 --- a/base/images/tests/utils/parsers.py +++ b/base/images/tests/utils/parsers.py @@ -56,9 +56,7 @@ def query_rpm_packages(rootfs: Path) -> set[str]: check=False, ) if result.returncode != 0: - raise RuntimeError( - f"rpm query failed (rc={result.returncode}): {result.stderr.strip()}" - ) + raise RuntimeError(f"rpm query failed (rc={result.returncode}): {result.stderr.strip()}") pkgs = {line.strip() for line in result.stdout.splitlines() if line.strip()} logger.debug("rpm query returned %d packages", len(pkgs)) return pkgs diff --git a/base/images/tests/utils/pytest_plugin.py b/base/images/tests/utils/pytest_plugin.py index 6d3d26c4083..01054828dcb 100644 --- a/base/images/tests/utils/pytest_plugin.py +++ b/base/images/tests/utils/pytest_plugin.py @@ -73,10 +73,7 @@ def pytest_addoption(parser) -> None: # type: ignore[no-untyped-def] "--image-type", choices=("vm", "container"), default=None, - help=( - "Image type: 'vm' or 'container'. " - "If omitted, derived from --capabilities or --image-path extension." - ), + help=("Image type: 'vm' or 'container'. If omitted, derived from --capabilities or --image-path extension."), ) group.addoption( "--capabilities", @@ -91,10 +88,7 @@ def pytest_addoption(parser) -> None: # type: ignore[no-untyped-def] group.addoption( "--workdir", default=None, - help=( - "Working directory for temporary files (mounts, extractions). " - "Defaults to a temporary directory." - ), + help=("Working directory for temporary files (mounts, extractions). Defaults to a temporary directory."), ) @@ -127,10 +121,7 @@ def pytest_configure(config) -> None: # type: ignore[no-untyped-def] missing = check_tools(when=image_type) if missing: names = ", ".join(t.name for t in missing) - hints = "\n".join( - f" - {t.name}: {t.reason} (install: {t.package_hint})" - for t in missing - ) + hints = "\n".join(f" - {t.name}: {t.reason} (install: {t.package_hint})" for t in missing) raise pytest.UsageError( f"Missing required native tool(s): {names}\n{hints}\n\n" "Run 'uv run python -m utils.tools' for a full status check." @@ -161,10 +152,7 @@ def pytest_runtest_setup(item: pytest.Item) -> None: continue if image_name and image_name.startswith(expected + "-"): continue - pytest.skip( - f"test is specific to image family '{expected}' " - f"(running: '{image_name}')" - ) + pytest.skip(f"test is specific to image family '{expected}' (running: '{image_name}')") def pytest_collection_modifyitems(config, items) -> None: # type: ignore[no-untyped-def] diff --git a/scripts/repo/_repo_layout.py b/scripts/repo/_repo_layout.py index d8334d8db79..2999beae066 100644 --- a/scripts/repo/_repo_layout.py +++ b/scripts/repo/_repo_layout.py @@ -28,20 +28,20 @@ class SubrepoSpec: """One sub-repo in the standard layout.""" - name: str # stable short identifier (e.g. "base", "sdk-srpms") - channel: str # one of CHANNELS - kind: str # one of ALL_KINDS - per_arch: bool # True iff `subpath` contains $basearch - subpath: str # path under a layout prefix + name: str # stable short identifier (e.g. "base", "sdk-srpms") + channel: str # one of CHANNELS + kind: str # one of ALL_KINDS + per_arch: bool # True iff `subpath` contains $basearch + subpath: str # path under a layout prefix SUBREPOS: tuple[SubrepoSpec, ...] = ( - SubrepoSpec("base", "base", KIND_MAIN, True, "base/$basearch"), - SubrepoSpec("base-debuginfo", "base", KIND_DEBUGINFO, True, "base/debuginfo/$basearch"), - SubrepoSpec("base-srpms", "base", KIND_SRPMS, False, "base/srpms"), - SubrepoSpec("sdk", "sdk", KIND_MAIN, True, "sdk/$basearch"), - SubrepoSpec("sdk-debuginfo", "sdk", KIND_DEBUGINFO, True, "sdk/debuginfo/$basearch"), - SubrepoSpec("sdk-srpms", "sdk", KIND_SRPMS, False, "sdk/srpms"), + SubrepoSpec("base", "base", KIND_MAIN, True, "base/$basearch"), + SubrepoSpec("base-debuginfo", "base", KIND_DEBUGINFO, True, "base/debuginfo/$basearch"), + SubrepoSpec("base-srpms", "base", KIND_SRPMS, False, "base/srpms"), + SubrepoSpec("sdk", "sdk", KIND_MAIN, True, "sdk/$basearch"), + SubrepoSpec("sdk-debuginfo", "sdk", KIND_DEBUGINFO, True, "sdk/debuginfo/$basearch"), + SubrepoSpec("sdk-srpms", "sdk", KIND_SRPMS, False, "sdk/srpms"), ) diff --git a/scripts/repo/synthesize-repodata.py b/scripts/repo/synthesize-repodata.py index 791f588949c..435bc3c733f 100755 --- a/scripts/repo/synthesize-repodata.py +++ b/scripts/repo/synthesize-repodata.py @@ -77,7 +77,7 @@ USER_AGENT = "synthesize-repodata/1" HTTP_TIMEOUT = 60.0 HTTP_RETRIES = 3 -HTTP_BACKOFF_BASE = 1.0 # seconds; doubled per attempt. +HTTP_BACKOFF_BASE = 1.0 # seconds; doubled per attempt. # repomd record types we generate ourselves in the output. The synth # tool only emits these — auxiliary records (updateinfo, group, @@ -88,10 +88,16 @@ # Consumers who need updateinfo/groups should fetch them from the # upstream repos directly (e.g. via a layered repo config) rather # than relying on the synth output. -PACKAGE_RECORD_TYPES = frozenset({ - "primary", "filelists", "other", - "primary_db", "filelists_db", "other_db", -}) +PACKAGE_RECORD_TYPES = frozenset( + { + "primary", + "filelists", + "other", + "primary_db", + "filelists_db", + "other_db", + } +) # When the Phase-4 channel-inheritance fallback finds two or more channels # tied for the top spot among a component's published sibling rpms, prefer @@ -107,6 +113,7 @@ # Logging helpers (everything goes to stderr; stdout left clean) # --------------------------------------------------------------------------- + def log(msg: str) -> None: print(msg, file=sys.stderr, flush=True) @@ -124,14 +131,15 @@ def fatal(msg: str) -> int: # Input-repo modelling # --------------------------------------------------------------------------- + @dataclass(frozen=True) class InputRepo: """One concrete (post-`$basearch`-expansion) upstream repo to ingest.""" - kind: str # main | debuginfo | srpms - arch: str # x86_64 | aarch64 | src - url: str # e.g. https://.../base/x86_64 - origin: str # 'prefix' (404 silent) | 'explicit' (404 fatal) + kind: str # main | debuginfo | srpms + arch: str # x86_64 | aarch64 | src + url: str # e.g. https://.../base/x86_64 + origin: str # 'prefix' (404 silent) | 'explicit' (404 fatal) def cache_key(self) -> str: # Stable, filesystem-safe; uniqueness comes from the full URL. @@ -145,15 +153,23 @@ def expand_repo_prefix(prefix: str, arches: Iterable[str]) -> list[InputRepo]: for sub in SUBREPOS: if sub.per_arch: for arch in arches: - out.append(InputRepo( - sub.kind, arch, - f"{base}/{sub.subpath.replace('$basearch', arch)}", - "prefix", - )) + out.append( + InputRepo( + sub.kind, + arch, + f"{base}/{sub.subpath.replace('$basearch', arch)}", + "prefix", + ) + ) else: - out.append(InputRepo( - sub.kind, SRPM_ARCH, f"{base}/{sub.subpath}", "prefix", - )) + out.append( + InputRepo( + sub.kind, + SRPM_ARCH, + f"{base}/{sub.subpath}", + "prefix", + ) + ) return out @@ -170,32 +186,27 @@ def parse_explicit_repo(spec: str, arches: Iterable[str]) -> list[InputRepo]: `srpms` URLs are arch-agnostic and rejected if they contain `$basearch`. """ if ":" not in spec: - raise ValueError( - f"--repo {spec!r}: expected TYPE:URL where TYPE in " - f"{{{', '.join(ALL_KINDS)}}}" - ) + raise ValueError(f"--repo {spec!r}: expected TYPE:URL where TYPE in {{{', '.join(ALL_KINDS)}}}") kind, url = spec.split(":", 1) kind = kind.strip().lower() url = url.strip() if kind not in ALL_KINDS: - raise ValueError( - f"--repo {spec!r}: unknown TYPE {kind!r}; expected one of " - f"{{{', '.join(ALL_KINDS)}}}" - ) + raise ValueError(f"--repo {spec!r}: unknown TYPE {kind!r}; expected one of {{{', '.join(ALL_KINDS)}}}") if kind == KIND_SRPMS: if "$basearch" in url: - raise ValueError( - f"--repo {spec!r}: srpms repos are arch-agnostic; " - f"`$basearch` is not allowed in the URL" - ) + raise ValueError(f"--repo {spec!r}: srpms repos are arch-agnostic; `$basearch` is not allowed in the URL") return [InputRepo(KIND_SRPMS, SRPM_ARCH, url.rstrip("/"), "explicit")] out: list[InputRepo] = [] if "$basearch" in url: for arch in arches: - out.append(InputRepo( - kind, arch, url.replace("$basearch", arch).rstrip("/"), - "explicit", - )) + out.append( + InputRepo( + kind, + arch, + url.replace("$basearch", arch).rstrip("/"), + "explicit", + ) + ) else: # No $basearch: caller is asserting "this URL is for one specific # arch". We can't tell which from the URL alone, so we infer from the @@ -235,9 +246,14 @@ def dedup_input_repos(repos: Iterable[InputRepo]) -> list[InputRepo]: # Phase 1: download repodata # --------------------------------------------------------------------------- + def _http_get( - url: str, dest: Path, ssl_context: ssl.SSLContext | None, - *, timeout: float = HTTP_TIMEOUT, retries: int = HTTP_RETRIES, + url: str, + dest: Path, + ssl_context: ssl.SSLContext | None, + *, + timeout: float = HTTP_TIMEOUT, + retries: int = HTTP_RETRIES, ) -> None: """Download *url* to *dest* with timeout, User-Agent, and bounded retry. @@ -252,16 +268,21 @@ def _http_get( last_exc: BaseException | None = None for attempt in range(retries): try: - with urllib.request.urlopen( - req, timeout=timeout, context=ssl_context, - ) as resp, open(dest, "wb") as fh: + with ( + urllib.request.urlopen( + req, + timeout=timeout, + context=ssl_context, + ) as resp, + open(dest, "wb") as fh, + ): shutil.copyfileobj(resp, fh) return except urllib.error.HTTPError as e: if 500 <= e.code < 600 and attempt < retries - 1: last_exc = e log(f" HTTP {e.code} fetching {url}; retrying") - time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt)) + time.sleep(HTTP_BACKOFF_BASE * (2**attempt)) continue raise except urllib.error.URLError as e: @@ -270,14 +291,14 @@ def _http_get( if attempt < retries - 1: last_exc = e log(f" URL error fetching {url} ({e.reason}); retrying") - time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt)) + time.sleep(HTTP_BACKOFF_BASE * (2**attempt)) continue raise except (TimeoutError, OSError) as e: if attempt < retries - 1: last_exc = e log(f" transport error fetching {url} ({e}); retrying") - time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt)) + time.sleep(HTTP_BACKOFF_BASE * (2**attempt)) continue raise # Defensive: loop only exits via return/raise above. @@ -289,6 +310,7 @@ def _http_get( # SSL configuration # --------------------------------------------------------------------------- + def build_ssl_context(ca_bundle: Path | None, insecure: bool) -> ssl.SSLContext | None: """Return an SSLContext honouring --ca-bundle / --insecure, or None for Python's default behaviour. @@ -297,8 +319,7 @@ def build_ssl_context(ca_bundle: Path | None, insecure: bool) -> ssl.SSLContext ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE - warn("TLS certificate verification disabled (--insecure); " - "connections are NOT authenticated") + warn("TLS certificate verification disabled (--insecure); connections are NOT authenticated") return ctx if ca_bundle is not None: ctx = ssl.create_default_context(cafile=str(ca_bundle)) @@ -308,7 +329,8 @@ def build_ssl_context(ca_bundle: Path | None, insecure: bool) -> ssl.SSLContext def download_repo_metadata( - repo: InputRepo, cache_root: Path, + repo: InputRepo, + cache_root: Path, ssl_context: ssl.SSLContext | None, ) -> Path | None: """Download every record listed in *repo*'s repomd into a cache dir. @@ -325,8 +347,7 @@ def download_repo_metadata( repodata_dir = cache_dir / "repodata" repodata_dir.mkdir(parents=True, exist_ok=True) - repomd_url = urllib.parse.urljoin(repo.url.rstrip("/") + "/", - "repodata/repomd.xml") + repomd_url = urllib.parse.urljoin(repo.url.rstrip("/") + "/", "repodata/repomd.xml") repomd_path = repodata_dir / "repomd.xml" log(f" fetching {repomd_url}") try: @@ -342,10 +363,7 @@ def download_repo_metadata( # URLError(FileNotFoundError) rather than HTTPError(404); treat # that as the local-fs equivalent so prefix-derived sub-repos # under ``file://`` fixtures are silently skipped just like 404s. - if ( - isinstance(e.reason, FileNotFoundError) - and repo.origin == "prefix" - ): + if isinstance(e.reason, FileNotFoundError) and repo.origin == "prefix": log(f" -> not found, skipping (prefix-derived, non-fatal)") shutil.rmtree(cache_dir, ignore_errors=True) return None @@ -369,9 +387,7 @@ def download_repo_metadata( # repomd can't write outside cache_dir. safe_rel = href.lstrip("/") if ".." in Path(safe_rel).parts: - raise RuntimeError( - f"refusing to write metadata record outside cache: {href!r}" - ) + raise RuntimeError(f"refusing to write metadata record outside cache: {href!r}") dest = cache_dir / safe_rel log(f" fetching {url}") _http_get(url, dest, ssl_context) @@ -437,9 +453,7 @@ def _find_metadata_path(repo_dir: Path, kind: str) -> str: """Return the absolute path of *kind* (primary|filelists|other) for the cached repo at *repo_dir*.""" repomd = cr.Repomd() - cr.xml_parse_repomd( - str(repo_dir / "repodata" / "repomd.xml"), repomd, lambda *_: True - ) + cr.xml_parse_repomd(str(repo_dir / "repodata" / "repomd.xml"), repomd, lambda *_: True) for rec in repomd.records: if rec.type == kind: return str(repo_dir / rec.location_href) @@ -501,18 +515,12 @@ def pkgcb(pkg, *, _repo=repo): else: # Same NEVRA listed twice within one repo -> broken upstream # metadata. Worth a note but not alarming. - log( - f" note: NEVRA {nevra} appears multiple times in " - f"{_repo.url}; deduping" - ) + log(f" note: NEVRA {nevra} appears multiple times in {_repo.url}; deduping") - cr.xml_parse_primary( - primary, pkgcb=pkgcb, do_files=False, warningcb=lambda *_: True - ) + cr.xml_parse_primary(primary, pkgcb=pkgcb, do_files=False, warningcb=lambda *_: True) rpm_source_map = sorted( - ({"packageName": pn, "sourcePackageName": sn} - for pn, sn in src_map_set), + ({"packageName": pn, "sourcePackageName": sn} for pn, sn in src_map_set), key=lambda r: (r["packageName"], r["sourcePackageName"]), ) return universe, rpm_source_map @@ -522,6 +530,7 @@ def pkgcb(pkg, *, _repo=repo): # Phase 3: ask azldev for routing # --------------------------------------------------------------------------- + def query_known_components(repo_root: Path) -> set[str]: """Return the set of legitimate Azure Linux component names. @@ -533,7 +542,10 @@ def query_known_components(repo_root: Path) -> set[str]: log(" querying azldev comp list for legitimate component names") proc = subprocess.run( ["azldev", "comp", "list", "-a", "-q", "-O", "json"], - capture_output=True, text=True, cwd=repo_root, check=False, + capture_output=True, + text=True, + cwd=repo_root, + check=False, ) if proc.returncode != 0: sys.stderr.write(proc.stderr) @@ -572,9 +584,11 @@ def query_azldev( log(f" invoking azldev (map: {len(rpm_source_map)} entries)") proc = subprocess.run( - ["azldev", "package", "list", "--rpm-file", str(map_path), - "-q", "-O", "json"], - capture_output=True, text=True, cwd=repo_root, check=False, + ["azldev", "package", "list", "--rpm-file", str(map_path), "-q", "-O", "json"], + capture_output=True, + text=True, + cwd=repo_root, + check=False, ) if proc.returncode != 0: sys.stderr.write(proc.stderr) @@ -588,10 +602,7 @@ def query_azldev( rtype = row.get("type", "") component = row.get("component", "") or "" raw_channel = row.get("publishChannel", "") or "" - channel = ( - raw_channel[len(CHANNEL_PREFIX):] - if raw_channel.startswith(CHANNEL_PREFIX) else raw_channel - ) + channel = raw_channel[len(CHANNEL_PREFIX) :] if raw_channel.startswith(CHANNEL_PREFIX) else raw_channel if component and component not in known_components: # Foreign package: azldev synthesised a default channel for # something not actually built by AZL. Track and skip. @@ -599,7 +610,7 @@ def query_azldev( continue record = { "component": component, - "channel": channel, # may be "" + "channel": channel, # may be "" "raw_channel": raw_channel, "group": row.get("group", "") or "", } @@ -607,8 +618,7 @@ def query_azldev( routing.srpm[name] = record else: # default to rpm routing.rpm[name] = record - if (channel and component - and channel in ALLOWED_OUTPUT_CHANNELS): + if channel and component and channel in ALLOWED_OUTPUT_CHANNELS: component_channels[component][channel] += 1 routing.component_channels = dict(component_channels) return routing @@ -618,15 +628,16 @@ def query_azldev( # Phase 4: route each universe entry -> (channel, kind, arch) destination # --------------------------------------------------------------------------- + @dataclass class RoutingDecision: """Per-universe-entry decision: where the package should land, or why it was excluded.""" - dest_channel: str | None = None # 'base' | 'sdk' | None (=excluded) - reason: str = "" # human-readable provenance - inherited: bool = False # was Phase-4 inheritance used? - tie_break_used: bool = False # did inheritance pick via tie-break? + dest_channel: str | None = None # 'base' | 'sdk' | None (=excluded) + reason: str = "" # human-readable provenance + inherited: bool = False # was Phase-4 inheritance used? + tie_break_used: bool = False # did inheritance pick via tie-break? def _inherit_channel( @@ -657,9 +668,7 @@ def _inherit_channel( tied = [ch for ch, n in ranked if n == top_count] if len(tied) > 1: - picked = ( - tie_break_default if tie_break_default in tied else sorted(tied)[0] - ) + picked = tie_break_default if tie_break_default in tied else sorted(tied)[0] reason = ( f"inherited from sibling rpms (component={component}, " f"channels={dict(ranked)}, tied at {top_count}, picked " @@ -669,13 +678,12 @@ def _inherit_channel( picked = tied[0] if len(counts) == 1: - return picked, ( - f"inherited from sibling rpms (component={component})" - ), False - return picked, ( - f"inherited from sibling rpms (component={component}, " - f"channels={dict(ranked)}, picked {picked})" - ), False + return picked, (f"inherited from sibling rpms (component={component})"), False + return ( + picked, + (f"inherited from sibling rpms (component={component}, channels={dict(ranked)}, picked {picked})"), + False, + ) def decide_routing( @@ -718,9 +726,7 @@ def decide_routing( else: row = routing.rpm.get(name) if row is None: - decisions[key] = RoutingDecision( - None, "no azldev entry for package" - ) + decisions[key] = RoutingDecision(None, "no azldev entry for package") continue channel = row["channel"] if channel: @@ -732,9 +738,7 @@ def decide_routing( f"({sorted(ALLOWED_OUTPUT_CHANNELS)})", ) continue - decisions[key] = RoutingDecision( - channel, f"azldev publishChannel={row['raw_channel']!r}" - ) + decisions[key] = RoutingDecision(channel, f"azldev publishChannel={row['raw_channel']!r}") continue # Empty channel -> inheritance fallback (TODO above). inherited, why, tie_break_used = _inherit_channel( @@ -743,17 +747,12 @@ def decide_routing( tie_break_default, ) if inherited is None: - decisions[key] = RoutingDecision( - None, f"azldev publishChannel empty and {why}" - ) + decisions[key] = RoutingDecision(None, f"azldev publishChannel empty and {why}") else: # Warn at most once per tied component: the tie depends # only on (component, component_channels) so emitting on # every affected NEVRA would just spam. - if ( - tie_break_used - and row["component"] not in tied_components_warned - ): + if tie_break_used and row["component"] not in tied_components_warned: tied_components_warned.add(row["component"]) warn( f"channel-inheritance tie for component " @@ -775,11 +774,12 @@ def decide_routing( # Phase 5: per-destination writers # --------------------------------------------------------------------------- + @dataclass(frozen=True) class Destination: - channel: str # 'base' | 'sdk' - kind: str # main | debuginfo | srpms - arch: str # x86_64 | aarch64 | src + channel: str # 'base' | 'sdk' + kind: str # main | debuginfo | srpms + arch: str # x86_64 | aarch64 | src def relpath(self) -> str: if self.kind == KIND_MAIN: @@ -796,9 +796,9 @@ class _RepoWriter: # (xml record name, db record name, xml class, db class) _STREAMS: tuple[tuple[str, str, type, type], ...] = ( - ("primary", "primary_db", cr.PrimaryXmlFile, cr.PrimarySqlite), + ("primary", "primary_db", cr.PrimaryXmlFile, cr.PrimarySqlite), ("filelists", "filelists_db", cr.FilelistsXmlFile, cr.FilelistsSqlite), - ("other", "other_db", cr.OtherXmlFile, cr.OtherSqlite), + ("other", "other_db", cr.OtherXmlFile, cr.OtherSqlite), ) def __init__(self, dest: Destination, output_dir: Path, pkg_count: int): @@ -815,9 +815,7 @@ def __init__(self, dest: Destination, output_dir: Path, pkg_count: int): xml = xml_cls(xml_path) db = db_cls(db_path) xml.set_num_of_pkgs(pkg_count) - self._streams.append( - (xml_name, db_name, xml_path, db_path, xml, db) - ) + self._streams.append((xml_name, db_name, xml_path, db_path, xml, db)) self.added = 0 def add_pkg(self, pkg: cr.Package) -> None: @@ -885,14 +883,16 @@ def emit_repos( nameslot = (kind, arch, name) if nameslot not in unpub_seen: unpub_seen.add(nameslot) - unpublished.append({ - "name": name, - "kind": kind, - "arch": arch, - "source_repo": entry.repo.url, - "source_package": entry.source_pkg_name, - "reason": decision.reason, - }) + unpublished.append( + { + "name": name, + "kind": kind, + "arch": arch, + "source_repo": entry.repo.url, + "source_package": entry.source_pkg_name, + "reason": decision.reason, + } + ) continue dest = Destination(decision.dest_channel, kind, arch) dest_counts[dest] += 1 @@ -900,21 +900,21 @@ def emit_repos( nameslot = (kind, arch, name) if nameslot not in fb_seen: fb_seen.add(nameslot) - fallbacks.append({ - "name": name, - "kind": kind, - "arch": arch, - "source_repo": entry.repo.url, - "source_package": entry.source_pkg_name, - "dest_channel": decision.dest_channel, - "reason": decision.reason, - "tie_break_used": decision.tie_break_used, - }) + fallbacks.append( + { + "name": name, + "kind": kind, + "arch": arch, + "source_repo": entry.repo.url, + "source_package": entry.source_pkg_name, + "dest_channel": decision.dest_channel, + "reason": decision.reason, + "tie_break_used": decision.tie_break_used, + } + ) # Open writers up-front with correct counts. - writers: dict[Destination, _RepoWriter] = { - d: _RepoWriter(d, output_dir, n) for d, n in dest_counts.items() - } + writers: dict[Destination, _RepoWriter] = {d: _RepoWriter(d, output_dir, n) for d, n in dest_counts.items()} # Iterate each input repo's full metadata and route packages. emitted: set[UniverseKey] = set() @@ -952,9 +952,7 @@ def emit_repos( # in the input (in case the input repo already published one) # and respects any xml:base on the input package. input_base = pkg.location_base or repo_base - absolute_href = urllib.parse.urljoin( - input_base, pkg.location_href or "" - ) + absolute_href = urllib.parse.urljoin(input_base, pkg.location_href or "") pkg.location_href = absolute_href pkg.location_base = "" writers[dest].add_pkg(pkg) @@ -963,10 +961,7 @@ def emit_repos( for dest, writer in writers.items(): writer.finish() if writer.added != dest_counts[dest]: - warn( - f"writer for {dest.relpath()} expected " - f"{dest_counts[dest]} pkgs but emitted {writer.added}" - ) + warn(f"writer for {dest.relpath()} expected {dest_counts[dest]} pkgs but emitted {writer.added}") return dict(dest_counts), unpublished, fallbacks @@ -975,9 +970,8 @@ def emit_repos( # Phase 7: unpublished-packages and fallback-channel reports # --------------------------------------------------------------------------- -def write_unpublished_report( - unpublished: list[dict], output_dir: Path -) -> tuple[Path, Path]: + +def write_unpublished_report(unpublished: list[dict], output_dir: Path) -> tuple[Path, Path]: json_path = output_dir / "unpublished-packages.json" txt_path = output_dir / "unpublished-packages.txt" json_path.write_text(json.dumps(unpublished, indent=2)) @@ -1004,9 +998,7 @@ def write_unpublished_report( return json_path, txt_path -def write_fallback_report( - fallbacks: list[dict], output_dir: Path -) -> tuple[Path, Path]: +def write_fallback_report(fallbacks: list[dict], output_dir: Path) -> tuple[Path, Path]: """Mirror :func:`write_unpublished_report` for inheritance-fallback routings. These packages WERE routed (so they appear in the published repos) but only because Phase-4 inferred a channel from sibling rpms @@ -1050,6 +1042,7 @@ def write_fallback_report( # CLI / orchestration # --------------------------------------------------------------------------- + class _OrderedRepoSourceAction(argparse.Action): """Append (option_string, value) into a single shared list across --repo-prefix and --repo, preserving CLI order. @@ -1069,15 +1062,21 @@ def __call__(self, parser, namespace, values, option_string=None): def parse_args(argv: list[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( - "--output-dir", required=True, type=Path, + "--output-dir", + required=True, + type=Path, help="Directory to write the routed per-channel/per-arch repos into.", ) parser.add_argument( - "--repo-prefix", action=_OrderedRepoSourceAction, - dest="repo_sources", default=None, metavar="URL", + "--repo-prefix", + action=_OrderedRepoSourceAction, + dest="repo_sources", + default=None, + metavar="URL", help=( "URL prefix assumed to host the Standard Azure Linux Repo " "Layout. Expanded into all six sub-repos (404s on any are " @@ -1086,8 +1085,11 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: ), ) parser.add_argument( - "--repo", action=_OrderedRepoSourceAction, - dest="repo_sources", default=None, metavar="TYPE:URL", + "--repo", + action=_OrderedRepoSourceAction, + dest="repo_sources", + default=None, + metavar="TYPE:URL", help=( "Explicit single repo: TYPE:URL where TYPE is main, debuginfo, " "or srpms. URL may contain `$basearch` for main/debuginfo. " @@ -1096,23 +1098,27 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: ), ) parser.add_argument( - "--repo-root", type=Path, default=DEFAULT_REPO_ROOT, + "--repo-root", + type=Path, + default=DEFAULT_REPO_ROOT, help="Path to the azurelinux project root (default: %(default)s).", ) parser.add_argument( - "--arch", action="append", default=[], - help=( - f"Arch to expand `$basearch` into (default: " - f"{', '.join(DEFAULT_ARCHES)}). Repeatable." - ), + "--arch", + action="append", + default=[], + help=(f"Arch to expand `$basearch` into (default: {', '.join(DEFAULT_ARCHES)}). Repeatable."), ) parser.add_argument( - "--keep-cache", action="store_true", + "--keep-cache", + action="store_true", help="Don't delete the metadata cache dir under /.cache/.", ) tls = parser.add_mutually_exclusive_group() tls.add_argument( - "--ca-bundle", type=Path, default=None, + "--ca-bundle", + type=Path, + default=None, help=( "Path to a PEM-encoded CA bundle to trust for HTTPS repo " "fetches (e.g. for repos served by a self-signed CA). " @@ -1120,7 +1126,8 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: ), ) tls.add_argument( - "--insecure", action="store_true", + "--insecure", + action="store_true", help=( "Disable TLS certificate verification entirely for HTTPS repo " "fetches. Use only for trusted networks; prefer --ca-bundle " @@ -1168,14 +1175,10 @@ def main(argv: list[str] | None = None) -> int: try: cache_dir = download_repo_metadata(repo, cache_root, ssl_context) except urllib.error.HTTPError as e: - return fatal( - f"HTTP {e.code} fetching {repo.url}/repodata/repomd.xml " - f"(origin={repo.origin})" - ) + return fatal(f"HTTP {e.code} fetching {repo.url}/repodata/repomd.xml (origin={repo.origin})") if cache_dir is not None: repo_to_dir[repo] = cache_dir - log(f" {len(repo_to_dir)} repo(s) successfully downloaded " - f"({len(repos) - len(repo_to_dir)} skipped)") + log(f" {len(repo_to_dir)} repo(s) successfully downloaded ({len(repos) - len(repo_to_dir)} skipped)") if not repo_to_dir: return fatal("no input repos with usable repodata; nothing to route") @@ -1183,18 +1186,17 @@ def main(argv: list[str] | None = None) -> int: # ---- Phase 2: build package universe + source map ------------------ log("==> Building package universe ...") universe, src_map = build_package_universe(repo_to_dir) - log(f" {len(universe)} unique (kind, arch, NEVRA) entries; " - f"{len(src_map)} unique (pkg, srpm) pairs for azldev") + log(f" {len(universe)} unique (kind, arch, NEVRA) entries; {len(src_map)} unique (pkg, srpm) pairs for azldev") # ---- Phase 3: query azldev ----------------------------------------- log("==> Querying azldev for routing ...") known_components = query_known_components(args.repo_root) - routing = query_azldev( - args.repo_root, src_map, output_dir, known_components - ) - log(f" azldev returned {len(routing.rpm)} rpm row(s), " + routing = query_azldev(args.repo_root, src_map, output_dir, known_components) + log( + f" azldev returned {len(routing.rpm)} rpm row(s), " f"{len(routing.srpm)} srpm row(s), " - f"{len(routing.foreign_names)} foreign name(s) (excluded)") + f"{len(routing.foreign_names)} foreign name(s) (excluded)" + ) # ---- Phase 4: per-entry routing decisions -------------------------- log("==> Computing routing decisions ...") @@ -1202,14 +1204,11 @@ def main(argv: list[str] | None = None) -> int: n_pub = sum(1 for d in decisions.values() if d.dest_channel is not None) n_unpub = sum(1 for d in decisions.values() if d.dest_channel is None) n_inh = sum(1 for d in decisions.values() if d.inherited) - log(f" routed: {n_pub} | unpublished: {n_unpub} | " - f"inheritance-fallback used: {n_inh}") + log(f" routed: {n_pub} | unpublished: {n_unpub} | inheritance-fallback used: {n_inh}") # ---- Phase 5+6: open writers and emit ------------------------------ log("==> Writing per-destination repos ...") - dest_counts, unpublished, fallbacks = emit_repos( - repo_to_dir, universe, decisions, output_dir - ) + dest_counts, unpublished, fallbacks = emit_repos(repo_to_dir, universe, decisions, output_dir) # ---- Phase 7: unpublished + fallback reports ----------------------- log("==> Writing unpublished-packages report ...") diff --git a/scripts/update-components-list.py b/scripts/update-components-list.py index 4dd778b0114..5db304e73b3 100755 --- a/scripts/update-components-list.py +++ b/scripts/update-components-list.py @@ -51,11 +51,7 @@ def component_name_from_header(line: str) -> str: def resolve_source_packages(packages_file: Path) -> set[str]: """Map binary package names from a .packages file to source package names.""" - names = [ - l.split("|")[0].strip() - for l in packages_file.read_text().splitlines() - if l.strip() - ] + names = [l.split("|")[0].strip() for l in packages_file.read_text().splitlines() if l.strip()] r = subprocess.run( [ "xargs", @@ -144,9 +140,7 @@ def add_and_sort_components(components_toml: Path, new: list[str]) -> None: def cmd_add_missing(args: argparse.Namespace) -> int: """Handler for the ``add-missing`` subcommand.""" repo_root = args.repo_root.resolve() - components_toml = ( - args.components_toml or repo_root / "base" / "comps" / "components.toml" - ).resolve() + components_toml = (args.components_toml or repo_root / "base" / "comps" / "components.toml").resolve() if not args.packages_file.is_file(): sys.exit(f"Error: Packages file not found: {args.packages_file}") @@ -154,9 +148,7 @@ def cmd_add_missing(args: argparse.Namespace) -> int: sys.exit(f"Error: components.toml not found: {components_toml}") print("Finding missing components...") - missing = sorted( - resolve_source_packages(args.packages_file) - get_existing_components() - ) + missing = sorted(resolve_source_packages(args.packages_file) - get_existing_components()) if not missing: print("No missing components found!") @@ -241,10 +233,7 @@ def cmd_update(args: argparse.Namespace) -> int: shutil.rmtree(comp_dir) if kept: - print( - f"\nKept {len(kept)} dedicated component dir(s) " - f"(present in sources list)" - ) + print(f"\nKept {len(kept)} dedicated component dir(s) (present in sources list)") if removed: verb = "Would remove" if args.dry_run else "Removed" print(f"\n{verb} {len(removed)} stale dedicated component dir(s):") @@ -297,8 +286,7 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: "--packages-file", type=Path, required=True, - help="Path to the .packages file " - "(e.g. ./base/out/images/vm-base-dev/azl4-vm-base.x86_64-0.1.packages)", + help="Path to the .packages file (e.g. ./base/out/images/vm-base-dev/azl4-vm-base.x86_64-0.1.packages)", ) add_missing.add_argument( "-r", @@ -343,8 +331,7 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: "--components-root", type=Path, default=None, - help="Path to the folder containing component definitions " - "(default: /base/comps)", + help="Path to the folder containing component definitions (default: /base/comps)", ) update.add_argument( "-o", From e1e6365127c6f0cf7bd55ea42ff575003cd69b29 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Wed, 27 May 2026 19:54:32 -0700 Subject: [PATCH 3/3] fix(mcp): move mcp scripts to folder linters can read --- .vscode/mcp.json | 6 +++--- DEVELOPING.md | 4 ++-- scripts/batch-triage/triage.sh | 15 +++++++++------ {.vscode => scripts}/mcps/.env.example | 2 +- {.vscode => scripts}/mcps/_mcp_utils.py | 14 ++++++++------ {.vscode => scripts}/mcps/fedora-distgit-mcp.py | 0 {.vscode => scripts}/mcps/koji-mcp.py | 2 +- {.vscode => scripts}/mcps/requirements.txt | 0 8 files changed, 24 insertions(+), 19 deletions(-) rename {.vscode => scripts}/mcps/.env.example (97%) rename {.vscode => scripts}/mcps/_mcp_utils.py (92%) rename {.vscode => scripts}/mcps/fedora-distgit-mcp.py (100%) rename {.vscode => scripts}/mcps/koji-mcp.py (99%) rename {.vscode => scripts}/mcps/requirements.txt (100%) diff --git a/.vscode/mcp.json b/.vscode/mcp.json index bd89230a9f8..0b7d49ba19d 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -4,14 +4,14 @@ "type": "stdio", "command": "python3", "args": [ - ".vscode/mcps/koji-mcp.py" + "scripts/mcps/koji-mcp.py" ] }, "fedora-distgit": { "type": "stdio", "command": "python3", "args": [ - ".vscode/mcps/fedora-distgit-mcp.py" + "scripts/mcps/fedora-distgit-mcp.py" ] }, "azure-mcp-server": { @@ -70,4 +70,4 @@ "password": false } ] -} \ No newline at end of file +} diff --git a/DEVELOPING.md b/DEVELOPING.md index 1bdf5c283a5..6a2edbbfde6 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -34,10 +34,10 @@ The `azl-diagnose` agent and Koji-related tools require: 1. **MCP Python packages** — the MCP servers won't start without them: ```bash - pip3 install --user -r .vscode/mcps/requirements.txt + pip3 install --user -r scripts/mcps/requirements.txt ``` 2. **Network access to the internal Koji instance** — The internal Koji is only accessible via VPN or the corporate network. If the agent reports connection errors, verify you are connected before retrying. -3. **(Optional) `.env` configuration** — Create a `.env` file (in the workspace root or `.vscode/mcps/`) to pre-configure MCP server settings like the Koji base URL and pre-approved insecure URLs. This avoids the agent having to set the URL or approve self-signed certificates every session. See [.vscode/mcps/.env.example](.vscode/mcps/.env.example) for available variables. +3. **(Optional) `.env` configuration** — Create a `.env` file (in the workspace root or `scripts/mcps/`) to pre-configure MCP server settings like the Koji base URL and pre-approved insecure URLs. This avoids the agent having to set the URL or approve self-signed certificates every session. See [scripts/mcps/.env.example](scripts/mcps/.env.example) for available variables. Ask Copilot about any aspect of the project — it can reference the instructions and skills to provide detailed, actionable answers or perform tasks. For example: diff --git a/scripts/batch-triage/triage.sh b/scripts/batch-triage/triage.sh index 10a1816917a..68e844f78ed 100755 --- a/scripts/batch-triage/triage.sh +++ b/scripts/batch-triage/triage.sh @@ -78,7 +78,7 @@ REPO_ROOT="$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)" # Always build — layer caching makes this fast when nothing changed # Copy requirements.txt into the build context (COPY can't reach outside it) -cp "${REPO_ROOT}/.vscode/mcps/requirements.txt" "${SCRIPT_DIR}/requirements.txt" +cp "${REPO_ROOT}/scripts/mcps/requirements.txt" "${SCRIPT_DIR}/requirements.txt" # Trap will not fire for exec, but will cover cleanup if the build step fails trap 'rm -f "${SCRIPT_DIR}/requirements.txt"' EXIT @@ -103,13 +103,16 @@ DOCKER_ARGS+=( -u "$(id -u):$(id -g)" ) # Require a .env file for MCP server config (Koji URL, auth, etc.) ENV_FILE="" -if [[ -f "${REPO_ROOT}/.vscode/mcps/.env" ]]; then - ENV_FILE="${REPO_ROOT}/.vscode/mcps/.env" -elif [[ -f "${REPO_ROOT}/.env" ]]; then +if [[ -f "${REPO_ROOT}/.env" ]]; then ENV_FILE="${REPO_ROOT}/.env" +elif [[ -f "${REPO_ROOT}/scripts/mcps/.env" ]]; then + ENV_FILE="${REPO_ROOT}/scripts/mcps/.env" +elif [[ -f "${REPO_ROOT}/.vscode/mcps/.env" ]]; then + # Legacy location from when MCP scripts lived under .vscode/mcps/ + ENV_FILE="${REPO_ROOT}/.vscode/mcps/.env" else - echo "Error: No .env file found. Create one at .env in the repo root or .vscode/mcps/.env" >&2 - echo "See .vscode/mcps/.env.example for required variables." >&2 + echo "Error: No .env file found. Create one at .env in the repo root or scripts/mcps/.env" >&2 + echo "See scripts/mcps/.env.example for required variables." >&2 echo "Koji MCP will not auto-configure without this file." >&2 exit 1 fi diff --git a/.vscode/mcps/.env.example b/scripts/mcps/.env.example similarity index 97% rename from .vscode/mcps/.env.example rename to scripts/mcps/.env.example index 241740c34c6..45a74dc69d5 100644 --- a/.vscode/mcps/.env.example +++ b/scripts/mcps/.env.example @@ -2,7 +2,7 @@ # # Copy this file to .env to one of the following locations to set environment variables for the MCP servers: # - .env in the workspace root -# - .vscode/mcps/.env +# - scripts/mcps/.env # # These variables are loaded by the MCP servers at startup. They can also # be set as regular environment variables (env vars take precedence). diff --git a/.vscode/mcps/_mcp_utils.py b/scripts/mcps/_mcp_utils.py similarity index 92% rename from .vscode/mcps/_mcp_utils.py rename to scripts/mcps/_mcp_utils.py index d76c6b3a597..cd300a444ca 100644 --- a/.vscode/mcps/_mcp_utils.py +++ b/scripts/mcps/_mcp_utils.py @@ -1,5 +1,4 @@ -""" -Shared utilities for Azure Linux MCP servers. +"""Shared utilities for Azure Linux MCP servers. Provides common helpers for URL validation, SSRF guards, output handling, and .env file loading so individual MCP servers stay DRY. @@ -18,7 +17,7 @@ # Return type for all MCP tools — values may be str, int, list, or None. StatusDict = dict[str, Any] -_INSTALL_HINT = "Install with: pip3 install --user -r .vscode/mcps/requirements.txt" +_INSTALL_HINT = "Install with: pip3 install --user -r scripts/mcps/requirements.txt" try: from dotenv import load_dotenv @@ -50,8 +49,9 @@ def load_env(env_path: str | Path | None = None) -> None: 1. Explicit ``env_path`` argument 2. ``.env`` in the current working directory 3. ``.env`` in the workspace root (two dirs up from this source file) - 4. ``.vscode/mcps/.env`` relative to the working directory - 5. ``.vscode/mcps/.env`` next to this source file + 4. ``scripts/mcps/.env`` next to this source file + 5. ``.vscode/mcps/.env`` relative to the working directory (legacy) + 6. ``.vscode/mcps/.env`` in the workspace root (legacy) Uses python-dotenv for parsing. Existing env vars are NOT overridden. """ @@ -60,8 +60,10 @@ def load_env(env_path: str | Path | None = None) -> None: candidates.append(Path(env_path)) candidates.append(Path.cwd() / ".env") candidates.append(Path(__file__).resolve().parent.parent.parent / ".env") - candidates.append(Path.cwd() / ".vscode" / "mcps" / ".env") candidates.append(Path(__file__).resolve().parent / ".env") + # Legacy locations from when MCP scripts lived under .vscode/mcps/ + candidates.append(Path.cwd() / ".vscode" / "mcps" / ".env") + candidates.append(Path(__file__).resolve().parent.parent.parent / ".vscode" / "mcps" / ".env") for c in candidates: if c.is_file(): diff --git a/.vscode/mcps/fedora-distgit-mcp.py b/scripts/mcps/fedora-distgit-mcp.py similarity index 100% rename from .vscode/mcps/fedora-distgit-mcp.py rename to scripts/mcps/fedora-distgit-mcp.py diff --git a/.vscode/mcps/koji-mcp.py b/scripts/mcps/koji-mcp.py similarity index 99% rename from .vscode/mcps/koji-mcp.py rename to scripts/mcps/koji-mcp.py index 0840c3b9bd3..8e67fe9a177 100755 --- a/.vscode/mcps/koji-mcp.py +++ b/scripts/mcps/koji-mcp.py @@ -75,7 +75,7 @@ ) -def _add_status(result: StatusDict, *, full: bool) -> StatusDict: +def _add_status(result: StatusDict, full: bool) -> StatusDict: """Build a snapshot of the MCP server's current state.""" status = { "default_base_url": _base_url, diff --git a/.vscode/mcps/requirements.txt b/scripts/mcps/requirements.txt similarity index 100% rename from .vscode/mcps/requirements.txt rename to scripts/mcps/requirements.txt