From d098028c8208909ae9d86ca2f5925de0f4b8a305 Mon Sep 17 00:00:00 2001 From: fausto Date: Thu, 23 Apr 2026 17:24:47 +0200 Subject: [PATCH 1/2] Add GitLab pipeline artifacts support with list and grep commands (#36) Introduce `pipelines artifacts list` and `pipelines artifacts grep` commands for GitLab to enumerate and search job artifact contents. Add artifact download, extraction, and caching logic with local ripgrep-based search. Extend CLI parser to register artifacts subcommands under pipelines (GitLab-only). Add `_require_gitlab_target` validation, artifact tree API integration, and comprehensive error handling for missing artifacts or --- skills/smith/SKILL.md | 4 +- src/smith/cli/handlers.py | 41 ++ src/smith/cli/parser.py | 78 ++++ src/smith/client.py | 77 ++++ src/smith/formatting.py | 11 +- src/smith/providers/gitlab_builds.py | 427 +++++++++++++++++- tests/contract/test_cli_contracts.py | 75 +++ tests/contract/test_client_contracts.py | 85 ++++ .../test_gitlab_provider_contracts.py | 126 ++++++ tests/unit/test_formatting_contracts.py | 15 + tests/unit/test_parser.py | 83 +++- 11 files changed, 1016 insertions(+), 6 deletions(-) diff --git a/skills/smith/SKILL.md b/skills/smith/SKILL.md index 386a8f8..7342a45 100644 --- a/skills/smith/SKILL.md +++ b/skills/smith/SKILL.md @@ -40,7 +40,7 @@ Full vocabulary and flags live in `references/usage-recipes.md`. The minimum you | Discovery | `smith orgs`, `smith orgs`, `smith groups`, `smith repos `, `smith repos`, `smith repos` | | Focused grep | `smith code grep ""`, `smith code grep ""`, `smith code grep ""` | | PRs / MRs | `smith prs search`, `smith prs search`, `smith prs search`, `smith prs list `, `smith prs list ` | -| Pipelines | `smith pipelines list `, `smith pipelines list `, `smith pipelines grep ""`, `smith pipelines grep ""` | +| Pipelines | `smith pipelines list `, `smith pipelines list `, `smith pipelines grep ""`, `smith pipelines grep ""`, `smith pipelines artifacts list `, `smith pipelines artifacts grep ""` | | Stories / Issues | `smith stories search --query`, `smith stories search --query`, `smith stories search --query` | Rules that save retries: @@ -51,6 +51,7 @@ Rules that save retries: - **YouTrack**: no repo arg; only issue IDs (e.g. `RAD-1055`) and `--query`. - Global `smith code search` and `smith prs search` target every enabled remote and reject `--project` or `--repo`. Use `smith ...` to narrow. - `pipelines grep ... ` expects a pipeline/run/build ID. For a specific job or log, call `pipelines list ...` first to find the parent ID, then `pipelines grep ... ".*" --log-id `. +- `pipelines artifacts ... ` is GitLab-only. Use `artifacts list` to enumerate archive paths and `artifacts grep` - `pipelines list ... ` prints a compact DAG (`@` pipelines, `#` stages, `*` jobs, inline `>` downstream). GitLab traverses child pipelines via GraphQL (REST fallback emits header-only rows with a warning). Filter with `--status`, `--grep`, `--skip`/`--take`, `--max-depth` (gitlab only, default 0 = unlimited). Full grammar lives in `references/pipelines-format.md`. Use `--help` on any command for flags. @@ -67,6 +68,7 @@ Use `--help` on any command for flags. ### Pipeline Analysis 1. Use `smith pipelines list --status failed` to focus on failed jobs. 2. Once you know the pipeline log ID, use `smith pipelines grep --reverse` to analyze the logs. +3. For GitLab artifact-backed failures, use `smith pipelines artifacts list ` and then `... artifacts grep ... ""`. ## Stop Conditions Stop narrowing and answer when any of these is true: diff --git a/src/smith/cli/handlers.py b/src/smith/cli/handlers.py index f82990b..aff7af9 100644 --- a/src/smith/cli/handlers.py +++ b/src/smith/cli/handlers.py @@ -633,6 +633,47 @@ def handle_ci_grep(client: SmithClient, args: argparse.Namespace) -> int: ) +def handle_ci_artifacts_list(client: SmithClient, args: argparse.Namespace) -> int: + data = client.execute_ci_artifacts_list( + remote_or_provider=_selected_target(args), + project=getattr(args, "project", None), + repo=getattr(args, "repo", None), + pipeline_id=args.id, + job_id=args.job_id, + ) + return _emit_success( + args=args, + command=args.command_id, + data=data, + partial=_is_partial_result(data), + ) + + +def handle_ci_artifacts_grep(client: SmithClient, args: argparse.Namespace) -> int: + data = client.execute_ci_artifacts_grep( + remote_or_provider=_selected_target(args), + project=getattr(args, "project", None), + repo=getattr(args, "repo", None), + pipeline_id=args.id, + job_id=args.job_id, + pattern=args.pattern, + path=getattr(args, "path", None), + glob=getattr(args, "glob", None), + output_mode=args.output_mode, + case_insensitive=not args.case_sensitive, + context_lines=args.context_lines, + from_line=args.from_line, + to_line=args.to_line, + reverse=getattr(args, "reverse", False), + ) + return _emit_success( + args=args, + command=args.command_id, + data=data, + partial=_is_partial_result(data), + ) + + def handle_work_get(client: SmithClient, args: argparse.Namespace) -> int: request_kwargs: dict[str, Any] = { "remote_or_provider": _selected_target(args), diff --git a/src/smith/cli/parser.py b/src/smith/cli/parser.py index 132288d..7299ad1 100644 --- a/src/smith/cli/parser.py +++ b/src/smith/cli/parser.py @@ -7,6 +7,8 @@ from smith.cli.handlers import ( _csv_list, handle_cache_clean, + handle_ci_artifacts_grep, + handle_ci_artifacts_list, handle_ci_grep, handle_ci_list, handle_code_grep, @@ -183,6 +185,32 @@ def _add_ci_grep_options(parser: argparse.ArgumentParser) -> None: parser.add_argument("--case-sensitive", action="store_true") +def _add_artifact_grep_options(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--path", help="Artifact path scope within the extracted archive") + parser.add_argument("--glob", help="Artifact filename glob filter") + parser.add_argument( + "pattern", + help=( + 'Regex pattern. Use ".*" to match all. ' + 'Form: smith pipelines artifacts grep ""' + ), + ) + parser.add_argument( + "--output-mode", + choices=["content", "files_with_matches", "count"], + default="content", + ) + parser.add_argument("--context-lines", type=int, default=3) + parser.add_argument("--from-line", type=int) + parser.add_argument("--to-line", type=int) + parser.add_argument( + "--reverse", + action="store_true", + help="Emit matches in reverse order so the most recent hits appear first.", + ) + parser.add_argument("--case-sensitive", action="store_true") + + def _add_work_search_filters(parser: argparse.ArgumentParser, *, include_area: bool = True) -> None: if include_area: parser.add_argument("--area") @@ -549,6 +577,56 @@ def _add_remote_pipelines_group(remote_subparsers: Any, *, remote: RemoteConfig) _add_output_format(pipelines_grep) _set_handler(pipelines_grep, handle_ci_grep, "pipelines.grep", primary_path="pipelines grep") + if remote.provider == "gitlab": + pipelines_artifacts = _add_parser( + pipelines_sub, + "artifacts", + help_text="List and grep GitLab job artifacts", + ) + pipelines_artifacts_sub = pipelines_artifacts.add_subparsers( + dest="pipelines_artifacts_action", + required=True, + ) + + pipelines_artifacts_list = _add_parser( + pipelines_artifacts_sub, + "list", + help_text="List artifact paths for a job", + ) + _add_pipeline_positional_args( + pipelines_artifacts_list, + remote=remote, + id_label=id_label, + ) + pipelines_artifacts_list.add_argument("job_id", type=int, help="Job ID") + _add_output_format(pipelines_artifacts_list) + _set_handler( + pipelines_artifacts_list, + handle_ci_artifacts_list, + "pipelines.artifacts.list", + primary_path="pipelines artifacts list", + ) + + pipelines_artifacts_grep = _add_parser( + pipelines_artifacts_sub, + "grep", + help_text="Search extracted GitLab job artifacts", + ) + _add_pipeline_positional_args( + pipelines_artifacts_grep, + remote=remote, + id_label=id_label, + ) + pipelines_artifacts_grep.add_argument("job_id", type=int, help="Job ID") + _add_artifact_grep_options(pipelines_artifacts_grep) + _add_output_format(pipelines_artifacts_grep) + _set_handler( + pipelines_artifacts_grep, + handle_ci_artifacts_grep, + "pipelines.artifacts.grep", + primary_path="pipelines artifacts grep", + ) + def _add_remote_stories_group(remote_subparsers: Any, *, remote: RemoteConfig) -> None: stories = _add_parser(remote_subparsers, "stories", help_text="Get, search, and get mine") diff --git a/src/smith/client.py b/src/smith/client.py index 9ce592e..cb313fe 100755 --- a/src/smith/client.py +++ b/src/smith/client.py @@ -80,6 +80,15 @@ def _require_single_target(remote_or_provider: str, *, command: str) -> str: raise ValueError(f"{command} does not support target 'all'. Use a configured remote name.") return target + def _require_gitlab_target(self, remote_or_provider: str, *, command: str) -> str: + target = self._require_single_target(remote_or_provider, command=command) + remotes = self._resolve_remotes(target) + if not remotes: + raise ValueError(f"No enabled remote found for '{target}'") + if len(remotes) != 1 or remotes[0].provider != "gitlab": + raise ValueError(f"{command} is only supported for GitLab remotes.") + return target + def _get_provider_for_remote(self, remote: RemoteConfig) -> BaseProvider: if remote.name in self._provider_cache: return self._provider_cache[remote.name] @@ -725,6 +734,74 @@ def execute_ci_grep( }, ) + def execute_ci_artifacts_list( + self, + *, + remote_or_provider: str, + project: str | None, + repo: str | None, + pipeline_id: int, + job_id: int, + ) -> dict[str, Any]: + target = self._require_gitlab_target( + remote_or_provider, + command="pipelines.artifacts.list", + ) + effective_repo = repo or project + return self._fanout( + remote_or_provider=target, + operations={ + "gitlab": lambda r: self._gitlab_provider(r).list_job_artifacts( + repo=str(effective_repo), + pipeline_id=pipeline_id, + job_id=job_id, + ), + }, + ) + + def execute_ci_artifacts_grep( + self, + *, + remote_or_provider: str, + project: str | None, + repo: str | None, + pipeline_id: int, + job_id: int, + pattern: str | None, + path: str | None, + glob: str | None, + output_mode: Literal["content", "files_with_matches", "count"], + case_insensitive: bool, + context_lines: int | None, + from_line: int | None, + to_line: int | None, + reverse: bool = False, + ) -> dict[str, Any]: + target = self._require_gitlab_target( + remote_or_provider, + command="pipelines.artifacts.grep", + ) + effective_repo = repo or project + return self._fanout( + remote_or_provider=target, + operations={ + "gitlab": lambda r: self._gitlab_provider(r).grep_job_artifacts( + repo=str(effective_repo), + pipeline_id=pipeline_id, + job_id=job_id, + pattern=pattern, + path=path, + glob=glob, + output_mode=output_mode, + case_insensitive=case_insensitive, + context_lines=context_lines, + from_line=from_line, + to_line=to_line, + reverse=reverse, + ), + }, + ) + def execute_work_get( self, *, diff --git a/src/smith/formatting.py b/src/smith/formatting.py index dfa88e3..e75c773 100755 --- a/src/smith/formatting.py +++ b/src/smith/formatting.py @@ -239,6 +239,11 @@ def _render_grep(data: Any) -> str: return text +def _render_artifacts_list(data: Any) -> str: + paths = data.get("paths", []) if isinstance(data, dict) else [] + return "\n".join(str(path) for path in paths if str(path).strip()) + + def _render_cache_clean(data: Any) -> str: cleaned = data.get("cleaned", []) if isinstance(data, dict) else [] missing = data.get("missing", []) if isinstance(data, dict) else [] @@ -758,6 +763,8 @@ def _render_needs(needs: Any, *, name_to_id: dict[str, Any]) -> str: "cache.clean": _render_cache_clean, "pipelines.list": _render_pipelines_list, "pipelines.grep": _render_grep, + "pipelines.artifacts.list": _render_artifacts_list, + "pipelines.artifacts.grep": _render_grep, "prs.search": _render_pr_list, "prs.list": _render_pr_list, "prs.get": _render_pr_get, @@ -805,7 +812,7 @@ def _render_remote_grouped(command: str, payload: dict[str, Any]) -> str: lines.append(rendered) warnings = _visible_remote_warnings(command, remote_data, entry.get("warnings") or []) - if command not in {"code.grep", "pipelines.grep"}: + if command not in {"code.grep", "pipelines.grep", "pipelines.artifacts.grep"}: for warning in warnings: lines.append(f"warning: {warning}") return "\n".join(lines).rstrip() @@ -851,7 +858,7 @@ def _render_remote_grouped(command: str, payload: dict[str, Any]) -> str: output_lines.append(rendered) warnings = _visible_remote_warnings(command, remote_data, entry.get("warnings") or []) - if command not in {"code.grep", "pipelines.grep"}: + if command not in {"code.grep", "pipelines.grep", "pipelines.artifacts.grep"}: for warning in warnings: output_lines.append(f"warning: {warning}") output_lines.append("") diff --git a/src/smith/providers/gitlab_builds.py b/src/smith/providers/gitlab_builds.py index 186ea52..1e2118a 100644 --- a/src/smith/providers/gitlab_builds.py +++ b/src/smith/providers/gitlab_builds.py @@ -1,8 +1,15 @@ from __future__ import annotations import logging +import os import re +import shutil +import stat +import subprocess +import tempfile +import zipfile from concurrent.futures import ThreadPoolExecutor +from pathlib import Path from typing import TYPE_CHECKING, Any, Literal from smith.errors import SmithApiError @@ -14,7 +21,15 @@ build_pipeline_row, normalize_gitlab_status, ) -from smith.providers.helpers import grep_build_logs_core +from smith.providers import local_checkout as _local_checkout +from smith.providers.helpers import ( + build_grep_result, + grep_build_logs_core, + grep_compile_error_result, + grep_match_lines, + grep_too_many_files_result, +) +from smith.utils import compile_search_pattern, normalize_path, slice_lines if TYPE_CHECKING: # pragma: no cover - import-time only pass @@ -181,6 +196,7 @@ def _pipeline_query_by_iid(query_text: str) -> str: _GID_PIPELINE_PATTERN = re.compile(r"/(?:Ci::Pipeline|CiPipeline)/(\d+)$") _GID_JOB_PATTERN = re.compile(r"/(?:Ci::Build|Ci::Bridge|CiJob)/(\d+)$") _GID_PROJECT_PATTERN = re.compile(r"/Project/(\d+)$") +_ARTIFACT_TREE_UNAVAILABLE_HINT = "This command requires GitLab 18.8+ and a job with downloadable artifacts." def _extract_numeric_id(gid: Any, pattern: re.Pattern[str]) -> int | str | None: @@ -207,6 +223,13 @@ def _pipeline_gid(pipeline_id: int | str) -> str: return f"gid://gitlab/Ci::Pipeline/{pipeline_id}" +def _ids_match(left: Any, right: Any) -> bool: + try: + return int(left) == int(right) + except (TypeError, ValueError): + return str(left or "").strip() == str(right or "").strip() + + def _is_graphql_schema_error(exc: SmithApiError) -> bool: message = str(exc) markers = ( @@ -316,6 +339,408 @@ def _get_content(lid: int) -> str: reverse=reverse, ) + def _pipeline_jobs( + self: Any, + *, + repo: str, + pipeline_id: int, + ) -> list[dict[str, Any]]: + return [ + item + for item in self._get_paginated_list( + f"/projects/{self._project_id(repo)}/pipelines/{pipeline_id}/jobs" + ) + if isinstance(item, dict) + ] + + @staticmethod + def _job_has_downloadable_artifacts(job: dict[str, Any]) -> bool: + artifacts_file = job.get("artifacts_file") + if isinstance(artifacts_file, dict) and str(artifacts_file.get("filename") or "").strip(): + return True + artifacts = job.get("artifacts") + if not isinstance(artifacts, list): + return False + return any( + isinstance(item, dict) + and str(item.get("file_type") or "").strip().lower() == "archive" + for item in artifacts + ) + + def _pipeline_job_with_artifacts( + self: Any, + *, + repo: str, + pipeline_id: int, + job_id: int, + ) -> dict[str, Any]: + for job in self._pipeline_jobs(repo=repo, pipeline_id=pipeline_id): + if _ids_match(job.get("id"), job_id): + if self._job_has_downloadable_artifacts(job): + return job + raise ValueError( + f"Job {job_id} in pipeline {pipeline_id} has no downloadable artifacts." + ) + raise ValueError(f"Job {job_id} not found in pipeline {pipeline_id}.") + + @staticmethod + def _artifacts_cache_root() -> str: + return str(Path(tempfile.gettempdir()) / "smith-gitlab-artifacts") + + def _artifacts_checkout_path( + self: Any, + *, + repo: str, + pipeline_id: int, + job_id: int, + ) -> str: + return os.path.join( + self._artifacts_cache_root(), + self._sanitize_cache_component(self._gitlab_host()), + self._sanitize_cache_component(self._full_project_path(repo)), + str(pipeline_id), + str(job_id), + ) + + @staticmethod + def _artifacts_archive_path(checkout_dir: str) -> str: + return os.path.join(checkout_dir, "artifacts.zip") + + @staticmethod + def _artifacts_extract_path(checkout_dir: str) -> str: + return os.path.join(checkout_dir, "files") + + @staticmethod + def _artifacts_ready_marker(checkout_dir: str) -> str: + return os.path.join(checkout_dir, ".ready") + + def _artifact_tree_entries( + self: Any, + *, + repo: str, + job_id: int, + ) -> list[dict[str, Any]]: + try: + return [ + item + for item in self._get_paginated_list( + f"/projects/{self._project_id(repo)}/jobs/{job_id}/artifacts/tree", + params={"recursive": "true"}, + ) + if isinstance(item, dict) + ] + except SmithApiError as exc: + if exc.status_code == 404: + raise SmithApiError( + f"GitLab artifact tree is unavailable for job {job_id}. {_ARTIFACT_TREE_UNAVAILABLE_HINT}", + status_code=exc.status_code, + ) from exc + raise + + def list_job_artifacts( + self: Any, + *, + repo: str, + pipeline_id: int, + job_id: int, + ) -> dict[str, Any]: + self._pipeline_job_with_artifacts(repo=repo, pipeline_id=pipeline_id, job_id=job_id) + paths = sorted( + { + str(item.get("path") or "").strip() + for item in self._artifact_tree_entries(repo=repo, job_id=job_id) + if str(item.get("path") or "").strip() + } + ) + return {"paths": paths} + + def _download_artifacts_archive( + self: Any, + *, + repo: str, + job_id: int, + archive_path: str, + ) -> None: + try: + response = self._request_response( + "GET", + f"/projects/{self._project_id(repo)}/jobs/{job_id}/artifacts", + ) + except SmithApiError as exc: + if exc.status_code == 404: + raise SmithApiError( + f"Artifacts archive not found for job {job_id}.", + status_code=exc.status_code, + ) from exc + raise + os.makedirs(os.path.dirname(archive_path), exist_ok=True) + with open(archive_path, "wb") as handle: + handle.write(bytes(response.content)) + + @staticmethod + def _extract_artifacts_archive( + archive_path: str, + extract_dir: str, + ) -> None: + extract_root = os.path.realpath(extract_dir) + with zipfile.ZipFile(archive_path) as archive: + for member in archive.infolist(): + member_name = str(member.filename or "") + if not member_name: + continue + mode = member.external_attr >> 16 + if stat.S_ISLNK(mode): + raise SmithApiError("Artifacts archive contains an unsupported symlink entry.") + target_path = os.path.realpath(os.path.join(extract_dir, member_name)) + if os.path.commonpath([extract_root, target_path]) != extract_root: + raise SmithApiError("Artifacts archive contains an invalid path traversal entry.") + archive.extract(member, path=extract_dir) + + def _ensure_artifacts_checkout( + self: Any, + *, + repo: str, + pipeline_id: int, + job_id: int, + ) -> str: + self._pipeline_job_with_artifacts(repo=repo, pipeline_id=pipeline_id, job_id=job_id) + checkout_dir = self._artifacts_checkout_path( + repo=repo, + pipeline_id=pipeline_id, + job_id=job_id, + ) + extract_dir = self._artifacts_extract_path(checkout_dir) + ready_marker = self._artifacts_ready_marker(checkout_dir) + archive_path = self._artifacts_archive_path(checkout_dir) + checkout_lock = self._cache_lock(checkout_dir) + + with checkout_lock: + if os.path.isdir(extract_dir) and os.path.isfile(ready_marker): + return extract_dir + + if os.path.isdir(extract_dir): + shutil.rmtree(extract_dir, ignore_errors=True) + if os.path.isfile(ready_marker): + os.remove(ready_marker) + + os.makedirs(checkout_dir, exist_ok=True) + self._download_artifacts_archive( + repo=repo, + job_id=job_id, + archive_path=archive_path, + ) + try: + self._extract_artifacts_archive(archive_path, extract_dir) + except Exception: + shutil.rmtree(extract_dir, ignore_errors=True) + raise + Path(ready_marker).touch() + return extract_dir + + def _artifact_candidate_paths( + self: Any, + *, + checkout_dir: str, + pattern: str, + case_insensitive: bool, + path: str | None, + glob: str | None, + reverse: bool, + ) -> list[str]: + rg_binary = _local_checkout.require_ripgrep() + checkout_root = os.path.realpath(checkout_dir) + normalized_path = normalize_path(path) + prefix = normalized_path.strip("/") + + if prefix: + target = os.path.join(checkout_dir, prefix.replace("/", os.sep)) + if not _local_checkout.is_path_within_checkout(target, checkout_root) or os.path.islink(target): + return [] + if _local_checkout.is_internal_local_path(prefix): + return [] + if not os.path.exists(target): + return [] + + base_args = [ + rg_binary, + "--no-messages", + "--no-config", + "--hidden", + "--glob", + "!.git", + ] + if case_insensitive: + base_args.append("-i") + if glob: + base_args.extend(["--glob", glob]) + base_args.extend(["-l", "--sortr" if reverse else "--sort", "path", "-e", pattern]) + search_target = os.path.join(checkout_dir, prefix) if prefix else checkout_dir + + try: + result = subprocess.run( + [*base_args, search_target], + check=False, + capture_output=True, + text=True, + ) + except Exception as exc: + raise SmithApiError(f"ripgrep failed to execute for {checkout_dir}: {exc}") from exc + + return_code = int(getattr(result, "returncode", 2)) + if return_code not in (0, 1): + stderr = str(getattr(result, "stderr", "") or "").strip() + raise SmithApiError( + f"ripgrep exited with status {return_code} for {checkout_dir}: {stderr or 'unknown error'}" + ) + + matches: list[str] = [] + seen: set[str] = set() + for line in str(getattr(result, "stdout", "") or "").splitlines(): + raw_path = line.strip() + if not raw_path: + continue + if os.path.isabs(raw_path): + try: + rel_path = os.path.relpath(raw_path, checkout_dir) + except ValueError: + continue + else: + rel_path = raw_path + rel_path = rel_path.replace(os.sep, "/").lstrip("./") + if not rel_path or rel_path.startswith("../") or _local_checkout.is_internal_local_path(rel_path): + continue + if rel_path in seen: + continue + seen.add(rel_path) + matches.append(rel_path) + return matches + + def _grep_artifacts_with_line_window( + self: Any, + *, + checkout_dir: str, + pattern: str, + search_pattern: re.Pattern[str], + output_mode: Literal["content", "files_with_matches", "count"], + case_insensitive: bool, + context_lines: int | None, + from_line: int | None, + to_line: int | None, + reverse: bool, + path: str | None, + glob: str | None, + ) -> dict[str, Any]: + matching = self._artifact_candidate_paths( + checkout_dir=checkout_dir, + pattern=pattern, + case_insensitive=case_insensitive, + path=path, + glob=glob, + reverse=reverse, + ) + if len(matching) > self._config.grep_max_files: + return grep_too_many_files_result(len(matching), self._config.grep_max_files) + + output_lines: list[str] = [] + warnings: list[str] = [] + files_matched = 0 + + for rel_path in matching: + full_path = os.path.join(checkout_dir, rel_path.replace("/", os.sep)) + try: + content = Path(full_path).read_text(encoding="utf-8", errors="replace") + except Exception as exc: + warnings.append(f"failed to read /{rel_path}: {exc}") + continue + + lines = slice_lines( + content.splitlines(), + from_line=from_line, + to_line=to_line, + ) + line_offset = (from_line - 1) if from_line and from_line > 0 else 0 + matched_lines, count = grep_match_lines( + lines=lines, + search_pattern=search_pattern, + file_label=f"/{rel_path}", + output_mode=output_mode, + context_lines=context_lines or 0, + line_offset=line_offset, + reverse=reverse, + ) + if not count: + continue + files_matched += count + output_lines.extend(matched_lines) + + return build_grep_result( + output_lines=output_lines, + matched_count=files_matched, + warnings=warnings, + max_output_chars=self.max_output_chars, + truncation_hint="Use from_line/to_line to read specific ranges, or narrow with --path/--glob.", + ) + + def grep_job_artifacts( + self: Any, + *, + repo: str, + pipeline_id: int, + job_id: int, + pattern: str | None = None, + path: str | None = None, + glob: str | None = None, + output_mode: Literal["content", "files_with_matches", "count"] = "content", + case_insensitive: bool = True, + context_lines: int | None = 3, + from_line: int | None = None, + to_line: int | None = None, + reverse: bool = False, + ) -> dict[str, Any]: + regex_pattern = pattern or ".*" + search_pattern, compile_error = compile_search_pattern( + regex_pattern, + case_insensitive=case_insensitive, + ) + if compile_error or search_pattern is None: + return grep_compile_error_result(compile_error or "Invalid pattern") + + checkout_dir = self._ensure_artifacts_checkout( + repo=repo, + pipeline_id=pipeline_id, + job_id=job_id, + ) + + if from_line is None and to_line is None: + return _local_checkout.ripgrep_local_result( + checkout_dir=checkout_dir, + pattern=regex_pattern, + case_insensitive=case_insensitive, + path=path, + glob=glob, + filename_filter=re.compile(r".*"), + output_mode=output_mode, + context_lines=context_lines or 0, + reverse=reverse, + max_output_chars=self.max_output_chars, + grep_max_files=self._config.grep_max_files, + truncation_hint="Use from_line/to_line to read specific ranges, or narrow with --path/--glob.", + ) + + return self._grep_artifacts_with_line_window( + checkout_dir=checkout_dir, + pattern=regex_pattern, + search_pattern=search_pattern, + output_mode=output_mode, + case_insensitive=case_insensitive, + context_lines=context_lines, + from_line=from_line, + to_line=to_line, + reverse=reverse, + path=path, + glob=glob, + ) + def list_pipelines( self: Any, *, diff --git a/tests/contract/test_cli_contracts.py b/tests/contract/test_cli_contracts.py index 16ec669..0dd7c86 100644 --- a/tests/contract/test_cli_contracts.py +++ b/tests/contract/test_cli_contracts.py @@ -49,6 +49,7 @@ def _make_args(**overrides: Any) -> Namespace: "no_clone": False, "cache_remote": "all", "id": 42, + "job_id": 17, "log_id": 9, "wiql": "SELECT [System.Id] FROM WorkItems", "area": "Ops", @@ -291,6 +292,80 @@ def test_handle_ci_grep_forwards_reverse(monkeypatch: Any, capsys: Any) -> None: assert client.calls[0][1]["reverse"] is True +def test_handle_ci_artifacts_list_forwards_expected_arguments(monkeypatch: Any, capsys: Any) -> None: + client = _RecordingClient(payload={"marker": "ci-artifacts-list"}) + args = _make_args( + command_id="pipelines.artifacts.list", + remote="gitlab", + remote_provider="gitlab", + repo="group/project", + project=None, + job_id=18, + ) + monkeypatch.setattr(handlers, "render_text", lambda command, data: f"{command}:{data['marker']}") + + exit_code = handlers.handle_ci_artifacts_list(client, args) + _ = capsys.readouterr() + + assert exit_code == handlers.EXIT_OK + assert client.calls == [ + ( + "execute_ci_artifacts_list", + { + "remote_or_provider": "gitlab", + "project": None, + "repo": "group/project", + "pipeline_id": 42, + "job_id": 18, + }, + ) + ] + + +def test_handle_ci_artifacts_grep_forwards_expected_arguments(monkeypatch: Any, capsys: Any) -> None: + client = _RecordingClient(payload={"marker": "ci-artifacts-grep"}) + args = _make_args( + command_id="pipelines.artifacts.grep", + remote="gitlab", + remote_provider="gitlab", + repo="group/project", + project=None, + path="reports", + glob="*.txt", + job_id=18, + from_line=None, + to_line=None, + reverse=True, + ) + monkeypatch.setattr(handlers, "render_text", lambda command, data: f"{command}:{data['marker']}") + + exit_code = handlers.handle_ci_artifacts_grep(client, args) + _ = capsys.readouterr() + + assert exit_code == handlers.EXIT_OK + assert client.calls == [ + ( + "execute_ci_artifacts_grep", + { + "remote_or_provider": "gitlab", + "project": None, + "repo": "group/project", + "pipeline_id": 42, + "job_id": 18, + "pattern": "error", + "path": "reports", + "glob": "*.txt", + "output_mode": "content", + "case_insensitive": True, + "context_lines": 2, + "from_line": None, + "to_line": None, + "reverse": True, + }, + ) + ] + + def test_handle_code_grep_uses_named_remote(monkeypatch: Any, capsys: Any) -> None: client = _RecordingClient(payload={"marker": "code-grep"}) args = _make_args(command_id="code.grep", remote="gitlab-infra", remote_provider="gitlab", project=None) diff --git a/tests/contract/test_client_contracts.py b/tests/contract/test_client_contracts.py index a2902fc..4a1af90 100644 --- a/tests/contract/test_client_contracts.py +++ b/tests/contract/test_client_contracts.py @@ -563,6 +563,58 @@ def test_execute_cache_clean_removes_requested_cache_dirs(monkeypatch: Any, tmp_ "reverse": True, }, ), + ( + "execute_ci_artifacts_list", + { + "remote_or_provider": "gitlab", + "project": None, + "repo": "group/project", + "pipeline_id": 19, + "job_id": 5, + }, + "gitlab", + "list_job_artifacts", + { + "repo": "group/project", + "pipeline_id": 19, + "job_id": 5, + }, + ), + ( + "execute_ci_artifacts_grep", + { + "remote_or_provider": "gitlab", + "project": None, + "repo": "group/project", + "pipeline_id": 19, + "job_id": 5, + "pattern": "error", + "path": "reports", + "glob": "*.txt", + "output_mode": "files_with_matches", + "case_insensitive": False, + "context_lines": 0, + "from_line": 1, + "to_line": 2, + "reverse": True, + }, + "gitlab", + "grep_job_artifacts", + { + "repo": "group/project", + "pipeline_id": 19, + "job_id": 5, + "pattern": "error", + "path": "reports", + "glob": "*.txt", + "output_mode": "files_with_matches", + "case_insensitive": False, + "context_lines": 0, + "from_line": 1, + "to_line": 2, + "reverse": True, + }, + ), ( "execute_work_get", {"remote_or_provider": "github", "project": "proj-a", "repo": None, "work_item_id": 21}, @@ -666,6 +718,39 @@ def test_execute_methods_dispatch_to_remote_operations( assert remote_entry["kwargs"] == expected_kwargs +@pytest.mark.parametrize("method_name", ["execute_ci_artifacts_list", "execute_ci_artifacts_grep"]) +def test_execute_ci_artifacts_commands_require_gitlab_remote( + monkeypatch: Any, + method_name: str, +) -> None: + runtime = make_runtime_config() + _install_client_fakes(monkeypatch, runtime) + client = SmithClient(session=object(), smith_config=_make_smith_config(runtime)) + + kwargs: dict[str, Any] = { + "remote_or_provider": "github", + "project": None, + "repo": "repo-a", + "pipeline_id": 19, + "job_id": 5, + } + if method_name == "execute_ci_artifacts_grep": + kwargs.update( + pattern="error", + path=None, + glob=None, + output_mode="content", + case_insensitive=True, + context_lines=3, + from_line=None, + to_line=None, + reverse=False, + ) + + with pytest.raises(ValueError, match="only supported for GitLab remotes"): + getattr(client, method_name)(**kwargs) + + def test_execute_pr_search_fans_out_to_all_supported_providers(monkeypatch: Any) -> None: runtime = make_runtime_config() calls = _install_client_fakes(monkeypatch, runtime) diff --git a/tests/contract/test_gitlab_provider_contracts.py b/tests/contract/test_gitlab_provider_contracts.py index 2f4c297..5d5c3d4 100644 --- a/tests/contract/test_gitlab_provider_contracts.py +++ b/tests/contract/test_gitlab_provider_contracts.py @@ -1,8 +1,10 @@ from __future__ import annotations +import io import json import os import time +import zipfile from typing import Any import pytest @@ -1723,6 +1725,130 @@ def _fake_paginated_list(path: str, **kwargs: Any) -> list[dict[str, Any]]: } +def test_gitlab_list_job_artifacts_uses_tree_api(monkeypatch: Any) -> None: + provider = _provider() + calls: list[dict[str, Any]] = [] + + def _fake_paginated_list(path: str, **kwargs: Any) -> list[dict[str, Any]]: + calls.append({"path": path, **kwargs}) + if path == f"/projects/{_ROOT_PROJECT_TOKEN}/pipelines/77/jobs": + return [ + { + "id": 88, + "name": "sonar", + "artifacts_file": {"filename": "artifacts.zip"}, + } + ] + if path == f"/projects/{_ROOT_PROJECT_TOKEN}/jobs/88/artifacts/tree": + return [ + {"path": "reports/"}, + {"path": "reports/sonar.log"}, + {"path": "coverage/index.html"}, + ] + raise AssertionError(f"unexpected paginated list: {path}") + + monkeypatch.setattr(provider, "_get_paginated_list", _fake_paginated_list) + + result = provider.list_job_artifacts(repo=_FULL_REPO, pipeline_id=77, job_id=88) + + assert result == { + "paths": [ + "coverage/index.html", + "reports/", + "reports/sonar.log", + ] + } + assert calls == [ + {"path": f"/projects/{_ROOT_PROJECT_TOKEN}/pipelines/77/jobs"}, + { + "path": f"/projects/{_ROOT_PROJECT_TOKEN}/jobs/88/artifacts/tree", + "params": {"recursive": "true"}, + }, + ] + + +def test_gitlab_grep_job_artifacts_downloads_extracts_and_reuses_temp_checkout( + monkeypatch: Any, + tmp_path: Any, +) -> None: + provider = _provider() + archive_buffer = io.BytesIO() + with zipfile.ZipFile(archive_buffer, "w") as archive: + archive.writestr("reports/sonar.log", "ok\nerror\nok\n") + archive.writestr("coverage/index.html", "ok\n") + archive_bytes = archive_buffer.getvalue() + download_calls: list[str] = [] + + class _BinaryResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def _fake_paginated_list(path: str, **kwargs: Any) -> list[dict[str, Any]]: + if path == f"/projects/{_ROOT_PROJECT_TOKEN}/pipelines/77/jobs": + return [ + { + "id": 88, + "name": "sonar", + "artifacts_file": {"filename": "artifacts.zip"}, + } + ] + raise AssertionError(f"unexpected paginated list: {path}") + + def _fake_request_response(method: str, path: str, **kwargs: Any) -> _BinaryResponse: + assert method == "GET" + download_calls.append(path) + if path == f"/projects/{_ROOT_PROJECT_TOKEN}/jobs/88/artifacts": + return _BinaryResponse(archive_bytes) + raise AssertionError(f"unexpected request response: {path}") + + monkeypatch.setattr(provider, "_get_paginated_list", _fake_paginated_list) + monkeypatch.setattr(provider, "_request_response", _fake_request_response) + monkeypatch.setattr(provider, "_artifacts_cache_root", lambda: str(tmp_path)) + + files_result = provider.grep_job_artifacts( + repo=_FULL_REPO, + pipeline_id=77, + job_id=88, + pattern="error", + output_mode="files_with_matches", + context_lines=0, + ) + count_result = provider.grep_job_artifacts( + repo=_FULL_REPO, + pipeline_id=77, + job_id=88, + pattern="error", + output_mode="count", + context_lines=0, + from_line=2, + to_line=2, + ) + + checkout_dir = ( + tmp_path + / provider._sanitize_cache_component(provider._gitlab_host()) + / provider._sanitize_cache_component(_FULL_REPO) + / "77" + / "88" + ) + extracted_file = checkout_dir / "files" / "reports" / "sonar.log" + + assert files_result == { + "text": "/reports/sonar.log", + "files_matched": 1, + "warnings": [], + "partial": False, + } + assert count_result == { + "text": "/reports/sonar.log:1", + "files_matched": 1, + "warnings": [], + "partial": False, + } + assert download_calls == [f"/projects/{_ROOT_PROJECT_TOKEN}/jobs/88/artifacts"] + assert extracted_file.read_text(encoding="utf-8") == "ok\nerror\nok\n" + + def test_gitlab_issue_search_ticket_mapping_and_my_work_items(monkeypatch: Any) -> None: provider = _provider() captured_calls: list[dict[str, Any]] = [] diff --git a/tests/unit/test_formatting_contracts.py b/tests/unit/test_formatting_contracts.py index 3a60838..94f0dcb 100644 --- a/tests/unit/test_formatting_contracts.py +++ b/tests/unit/test_formatting_contracts.py @@ -749,3 +749,18 @@ def test_render_text_pipelines_list_renders_stage_less_jobs_without_stage_header "returned_count: 1\n" "total_count: 1" ) + + +def test_render_text_pipelines_artifacts_list_emits_paths_only() -> None: + rendered = render_text( + "pipelines.artifacts.list", + { + "paths": [ + "reports/", + "reports/sonar.log", + "coverage/index.html", + ] + }, + ) + + assert rendered == "reports/\nreports/sonar.log\ncoverage/index.html" diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 7ab68ea..30ce82b 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -595,6 +595,72 @@ def test_pipelines_grep_parser_accepts_log_id_before_pattern() -> None: assert args.pattern == "error" +def test_gitlab_pipelines_artifacts_list_parser_uses_canonical_command_id() -> None: + parser = _build_test_parser() + args = parser.parse_args( + [ + "gitlab", + "pipelines", + "artifacts", + "list", + "engineering-tools/repo-a", + "42", + "18", + ] + ) + + assert args.command_id == "pipelines.artifacts.list" + assert args.remote == "gitlab" + assert args.remote_provider == "gitlab" + assert args.repo == "engineering-tools/repo-a" + assert args.id == 42 + assert args.job_id == 18 + + +def test_gitlab_pipelines_artifacts_grep_parser_accepts_grep_flags() -> None: + parser = _build_test_parser() + args = parser.parse_args( + [ + "gitlab", + "pipelines", + "artifacts", + "grep", + "engineering-tools/repo-a", + "42", + "18", + "--path", + "reports", + "--glob", + "*.txt", + "--output-mode", + "files_with_matches", + "--context-lines", + "1", + "--from-line", + "10", + "--to-line", + "20", + "--reverse", + "--case-sensitive", + "error", + ] + ) + + assert args.command_id == "pipelines.artifacts.grep" + assert args.repo == "engineering-tools/repo-a" + assert args.id == 42 + assert args.job_id == 18 + assert args.path == "reports" + assert args.glob == "*.txt" + assert args.output_mode == "files_with_matches" + assert args.context_lines == 1 + assert args.from_line == 10 + assert args.to_line == 20 + assert args.reverse is True + assert args.case_sensitive is True + assert args.pattern == "error" + + def test_code_search_parser_rejects_removed_global_filters() -> None: parser = _build_test_parser() @@ -713,7 +779,7 @@ def test_youtrack_remote_help_lists_only_stories(capsys: pytest.CaptureFixture[s assert "orgs" not in output -def test_pipelines_help_lists_list_and_grep(capsys: pytest.CaptureFixture[str]) -> None: +def test_gitlab_pipelines_help_lists_artifacts(capsys: pytest.CaptureFixture[str]) -> None: parser = _build_test_parser() with pytest.raises(SystemExit): @@ -722,13 +788,26 @@ def test_pipelines_help_lists_list_and_grep(capsys: pytest.CaptureFixture[str]) output = capsys.readouterr().out assert "list" in output assert "grep" in output + assert "artifacts" in output assert "List a pipeline and its downstream pipelines" in output assert "Search or read pipeline logs" in output - assert "{list,grep}" in output + assert "List and grep GitLab job artifacts" in output + assert "{list,grep,artifacts}" in output assert "{list,logs}" not in output assert "Inspect pipeline logs" not in output +def test_github_pipelines_help_omits_artifacts(capsys: pytest.CaptureFixture[str]) -> None: + parser = _build_test_parser() + + with pytest.raises(SystemExit): + parser.parse_args(["github", "pipelines", "--help"]) + + output = capsys.readouterr().out + assert "artifacts" not in output + assert "{list,grep}" in output + + def test_stories_query_path_fails_to_parse() -> None: parser = _build_test_parser() From 6aba1e6865204779a76978aaae049e91cdb634a0 Mon Sep 17 00:00:00 2001 From: fausto Date: Thu, 23 Apr 2026 17:36:25 +0200 Subject: [PATCH 2/2] Add grep flags reference table to SKILL.md for quick lookup Introduce a concise reference line documenting all common grep flags (--path, --glob, --output-mode, --context-lines, --from-line/--to-line, --reverse, --case-sensitive) and command-specific flags (--branch/--no-clone for code grep, --log-id for pipeline grep) to reduce flag discovery friction. Place reference immediately after command table for easy scanning. --- skills/smith/SKILL.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skills/smith/SKILL.md b/skills/smith/SKILL.md index 7342a45..2b54ac6 100644 --- a/skills/smith/SKILL.md +++ b/skills/smith/SKILL.md @@ -43,6 +43,8 @@ Full vocabulary and flags live in `references/usage-recipes.md`. The minimum you | Pipelines | `smith pipelines list `, `smith pipelines list `, `smith pipelines grep ""`, `smith pipelines grep ""`, `smith pipelines artifacts list `, `smith pipelines artifacts grep ""` | | Stories / Issues | `smith stories search --query`, `smith stories search --query`, `smith stories search --query` | +All grep commands (code, pipeline logs, artifacts) support: `--path`, `--glob`, `--output-mode` (content/files_with_matches/count), `--context-lines`, `--from-line`/`--to-line`, `--reverse`, `--case-sensitive`. Code grep adds: `--branch`, `--no-clone`. Pipeline grep adds: `--log-id`. + Rules that save retries: - **GitHub**: repo arg is bare ``, not `org/repo`. Search output may look like `org/repo:path` but commands still take ``.