Add autosolve actions and workflows for automated issue resolution

fantapop · roachdev-claude · fantapop · commit 7d64d1bedab3 · 2026-03-19T10:40:34.000-07:00
Co-Authored-By: roachdev-claude &lt;roachdev-claude-bot@cockroachlabs.com&gt;
diff --git a/.github/workflows/github-issue-autosolve.yml b/.github/workflows/github-issue-autosolve.yml
@@ -77,6 +77,10 @@ on:
         type: string
         required: false
         default: "autosolve[bot]@users.noreply.github.com"
+      timeout_minutes:
+        type: number
+        required: false
+        default: 20
     secrets:
       repo_token:
         required: true
@@ -148,7 +152,7 @@ jobs:
     needs: check
     if: needs.check.outputs.pr_exists != 'true'
     runs-on: ubuntu-latest
-    timeout-minutes: 120
+    timeout-minutes: ${{ inputs.timeout_minutes }}
     permissions:
       contents: read
       issues: write
@@ -165,6 +169,8 @@ jobs:
       - uses: actions/checkout@v5
         with:
           fetch-depth: 0
+          # Prevent the checkout credential helper from overriding the
+          # fork_push_token used later for git push to the fork.
           persist-credentials: false
 
       # Checkout cockroachdb/actions at the ref the caller used in their
@@ -224,13 +230,12 @@ jobs:
           CLAUDE_CODE_USE_VERTEX: ${{ inputs.auth_mode == 'vertex' && '1' || '' }}
           ANTHROPIC_VERTEX_PROJECT_ID: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_project_id || '' }}
           CLOUD_ML_REGION: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_region || '' }}
-          CLAUDE_CODE_USE_BEDROCK: ${{ inputs.auth_mode == 'bedrock' && '1' || '' }}
 
       - name: Install Claude CLI
         shell: bash
         run: ${{ env.ACTIONS_DIR }}/run_step.sh shared install_claude
         env:
-          CLAUDE_CLI_VERSION: "2.1.76"
+          CLAUDE_CLI_VERSION: "2.1.79"
 
       - name: Build assessment prompt
         id: assess_prompt
@@ -252,7 +257,6 @@ jobs:
           CLAUDE_CODE_USE_VERTEX: ${{ inputs.auth_mode == 'vertex' && '1' || '' }}
           ANTHROPIC_VERTEX_PROJECT_ID: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_project_id || '' }}
           CLOUD_ML_REGION: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_region || '' }}
-          CLAUDE_CODE_USE_BEDROCK: ${{ inputs.auth_mode == 'bedrock' && '1' || '' }}
           PROMPT_FILE: ${{ steps.assess_prompt.outputs.prompt_file }}
           INPUT_MODEL: ${{ inputs.model }}
 
@@ -298,7 +302,6 @@ jobs:
           CLAUDE_CODE_USE_VERTEX: ${{ inputs.auth_mode == 'vertex' && '1' || '' }}
           ANTHROPIC_VERTEX_PROJECT_ID: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_project_id || '' }}
           CLOUD_ML_REGION: ${{ inputs.auth_mode == 'vertex' && inputs.vertex_region || '' }}
-          CLAUDE_CODE_USE_BEDROCK: ${{ inputs.auth_mode == 'bedrock' && '1' || '' }}
           PROMPT_FILE: ${{ steps.impl_prompt.outputs.prompt_file }}
           INPUT_MODEL: ${{ inputs.model }}
           INPUT_ALLOWED_TOOLS: ${{ inputs.allowed_tools }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,7 +16,5 @@ Breaking changes are prefixed with "Breaking Change: ".
   security, push to fork, and create PRs using Claude.
 - `github-issue-autosolve` reusable workflow: turnkey GitHub Issues
   integration with issue comments and label management.
-- `jira-autosolve` reusable workflow: turnkey Jira integration composing
-  autosolve/assess + autosolve/implement with ticket comments and transitions.
 - `autotag-from-changelog` action: tag and push from CHANGELOG.md version
   change.
diff --git a/README.md b/README.md
@@ -63,7 +63,8 @@ task is suitable for automated resolution.
 | `result` | Full Claude result text |
 
 **`autosolve/implement`** — Runs Claude to implement a solution, validates
-changes against blocked paths, pushes to a fork, and creates a PR.
+changes against blocked paths, pushes to a fork, and creates a single-commit
+PR.
 
 ```yaml
 - uses: cockroachdb/actions/autosolve/implement@v1
@@ -84,13 +85,12 @@ changes against blocked paths, pushes to a fork, and creates a PR.
 | `allowed_tools` | *(read/write/git tools)* | Claude `--allowedTools` string |
 | `model` | `claude-opus-4-6` | Claude model ID |
 | `max_retries` | `3` | Maximum implementation attempts |
-| `timeout_minutes` | `60` | Maximum wall-clock time |
 | `create_pr` | `true` | Whether to create a PR from the changes |
 | `pr_base_branch` | *(repo default)* | Base branch for the PR |
 | `pr_labels` | `autosolve` | Comma-separated labels to apply |
 | `pr_draft` | `true` | Whether to create as a draft PR |
 | `pr_title` | *(from commit)* | PR title |
-| `pr_body_template` | *(built-in)* | Template with `{{SUMMARY}}`, `{{STATS}}`, `{{BRANCH}}` placeholders |
+| `pr_body_template` | *(built-in)* | Template with `{{SUMMARY}}`, `{{BRANCH}}` placeholders |
 | `fork_owner` | | GitHub user/org that owns the fork |
 | `fork_repo` | | Fork repository name |
 | `fork_push_token` | | PAT with push access to the fork |
@@ -110,26 +110,6 @@ changes against blocked paths, pushes to a fork, and creates a PR.
 
 #### Reusable Workflows
 
-**Jira Autosolve** — Composes assess + implement with Jira comments and ticket
-transitions. Triggered via `workflow_call`.
-
-```yaml
-jobs:
-  solve:
-    uses: cockroachdb/actions/.github/workflows/jira-autosolve.yml@v1
-    with:
-      ticket_id: PROJ-123
-      title: ${{ needs.parse.outputs.title }}
-      description: ${{ needs.parse.outputs.description }}
-      jira_base_url: https://yourcompany.atlassian.net
-      fork_owner: my-bot
-      fork_repo: my-repo
-    secrets:
-      jira_token: ${{ secrets.JIRA_TOKEN }}
-      fork_push_token: ${{ secrets.FORK_PUSH_TOKEN }}
-      pr_create_token: ${{ secrets.PR_CREATE_TOKEN }}
-```
-
 **GitHub Issue Autosolve** — Composes assess + implement with GitHub issue
 comments and label management. Triggered via `workflow_call`.
 
@@ -150,8 +130,8 @@ jobs:
 
 #### Authentication
 
-**Reusable workflows** accept `auth_mode` as an input (`vertex`, `bedrock`, or
-omit for API key) and handle env var setup internally.
+**Reusable workflows** accept `auth_mode` as an input (`vertex` or omit for API
+key) and handle env var setup internally.
 
 **Direct composite action usage** requires the caller to set up auth and pass
 the env vars on each action step:
@@ -174,24 +154,7 @@ the env vars on each action step:
 ```
 
 Alternatively, set `ANTHROPIC_API_KEY` in the environment for direct API
-access, or configure Bedrock with `CLAUDE_CODE_USE_BEDROCK=1` and `AWS_REGION`.
-
-#### Caller checkout
-
-When using `workflow_dispatch`, `actions/checkout` defaults to the branch that
-triggered the workflow. This can include unrelated commits from that branch in
-the autosolve PR. Always check out the PR base branch explicitly:
-
-```yaml
-- uses: actions/checkout@v5
-  with:
-    ref: main  # checkout the PR base branch, not the trigger ref
-    fetch-depth: 0
-    persist-credentials: false  # prevent checkout's credential helper from interfering with fork push
-```
-
-The `issues: [labeled]` trigger doesn't have this problem since it always runs
-on the default branch.
+access.
 
 ## Development
 
diff --git a/actions_helpers.sh b/actions_helpers.sh
@@ -5,6 +5,9 @@
 log_error()   { echo "::error::$*"; }
 log_warning() { echo "::warning::$*"; }
 log_notice()  { echo "::notice::$*"; }
+# Plain informational output — no GitHub annotation, just step log output.
+# Use for multi-line diagnostic data where ::notice:: would be inappropriate.
+log_info()    { echo "$*"; }
 
 # Write a single-line output: set_output key value
 set_output() {
diff --git a/autosolve/assess/action.yml b/autosolve/assess/action.yml
@@ -29,7 +29,7 @@ inputs:
   claude_cli_version:
     description: Claude CLI version to install.
     required: false
-    default: "2.1.76"
+    default: "2.1.79"
 
 outputs:
   assessment:
diff --git a/autosolve/implement/action.yml b/autosolve/implement/action.yml
@@ -26,10 +26,6 @@ inputs:
     description: Maximum implementation attempts.
     required: false
     default: "3"
-  timeout_minutes:
-    description: Maximum wall-clock time for implementation.
-    required: false
-    default: "60"
   create_pr:
     description: Whether to create a PR from the changes.
     required: false
@@ -51,7 +47,7 @@ inputs:
     required: false
     default: ""
   pr_body_template:
-    description: "Template for the PR body. Supports placeholders: {{SUMMARY}}, {{STATS}}, {{BRANCH}}."
+    description: "Template for the PR body. Supports placeholders: {{SUMMARY}}, {{BRANCH}}."
     required: false
     default: ""
   fork_owner:
@@ -89,7 +85,7 @@ inputs:
   claude_cli_version:
     description: Claude CLI version to install.
     required: false
-    default: "2.1.76"
+    default: "2.1.79"
 
 outputs:
   status:
diff --git a/autosolve/prompts/implementation-footer.md b/autosolve/prompts/implementation-footer.md
@@ -4,23 +4,45 @@ Implement the task described above.
 1. Read CLAUDE.md (if it exists) for project conventions, build commands,
    test commands, and commit message format.
 2. Understand the codebase and the task requirements.
-3. Implement the minimal changes required. Prefer backwards-compatible
+3. When fixing bugs, prefer a test-first approach:
+   a. Write a test that demonstrates the bug (verify it fails).
+   b. Apply the fix.
+   c. Verify the test passes.
+   Skip writing a dedicated test when the fix is trivial and self-evident
+   (e.g., adding a timeout, fixing a typo), the behavior is impractical to
+   unit test (e.g., network timeouts, OS-level behavior), or the fix is a
+   documentation-only change. The goal is to prove the bug existed and
+   confirm it's resolved, not to test for testing's sake.
+4. Implement the minimal changes required. Prefer backwards-compatible
    changes wherever possible — avoid breaking existing APIs, interfaces,
    or behavior unless the task explicitly requires it.
-4. Run relevant tests to verify your changes work. Only test the specific
+5. Run relevant tests to verify your changes work. Only test the specific
    packages/files affected by your changes.
-5. If tests fail, fix the issues and re-run. Only report FAILED if you
+6. If tests fail, fix the issues and re-run. Only report FAILED if you
    cannot make tests pass after reasonable effort.
-6. Stage all your changes with `git add`. Do not commit — the action
-   handles committing.
-7. Write a short commit message summary (one line, under 72 characters)
-   and save it to `.autosolve-commit-message` in the repo root. Focus on
-   *why* the change was made, not what files changed. Use imperative mood
-   (e.g., "Fix timeout in retry loop" not "Fixed timeout" or "Changes to
-   retry logic"). If CLAUDE.md specifies a commit message format, follow
-   that instead.
-8. Write a PR description and save it to `.autosolve-pr-body` in the repo
-   root. This will be used as the body of the pull request. Include:
+7. Stage all your changes with `git add`. Do not commit — the action
+   handles committing. All changes will be squashed into a single commit,
+   so organize your work accordingly.
+8. Write a commit message and save it to `.autosolve-commit-message` in
+   the repo root. Use standard git format: a subject line (under 72
+   characters, imperative mood), a blank line, then a body explaining
+   what was changed and why. Since all changes go into a single commit,
+   the message should cover the full scope of the change. Focus on
+   helping a reviewer understand the commit — do NOT list individual
+   files. Example:
+   ```
+   Fix timeout in retry loop
+
+   The retry loop was using a hardcoded 5s timeout which was too short
+   for large payloads. Increased to 30s and made it configurable via
+   the RETRY_TIMEOUT env var. Added a test that verifies retry behavior
+   with slow responses.
+   ```
+   If CLAUDE.md specifies a commit message format, follow that instead.
+9. Write a PR description and save it to `.autosolve-pr-body` in the repo
+   root. This will be used as the body of the pull request. The PR
+   description and commit message serve similar purposes for single-commit
+   PRs, but the PR description should be more reader-friendly. Include:
    - A brief summary of what was changed and why (2-3 sentences max).
    - What testing was done (tests added, tests run, manual verification).
    Do NOT include a list of changed files — reviewers can see that in the
diff --git a/autosolve/run_step.sh b/autosolve/run_step.sh
@@ -1,10 +1,17 @@
 #!/usr/bin/env bash
 # Entry point for autosolve action steps.
 #
-# Usage: run_step.sh <script> <function> [args...]
+# Composite action steps run in a fresh shell, so sourcing scripts directly
+# would leave them cd'd to the scripts/ directory instead of the workspace.
+# This wrapper solves three problems:
+#   1. Sources the target script (which cd's to its own directory for clean
+#      relative imports of shared.sh, actions_helpers.sh, etc.).
+#   2. Restores the original working directory so the function runs in the
+#      caller's workspace (where the repo checkout lives).
+#   3. Manages a shared AUTOSOLVE_TMPDIR across composite action steps
+#      (each step is a new shell process).
 #
-# Sources autosolve/scripts/<script>.sh (which sources its own deps),
-# then calls <function> from the original working directory.
+# Usage: run_step.sh <script> <function> [args...]
 #
 # Examples:
 #   run_step.sh shared   validate_inputs
diff --git a/autosolve/scripts/assess.sh b/autosolve/scripts/assess.sh
@@ -7,12 +7,12 @@ source ../../actions_helpers.sh
 source ./shared.sh
 
 run_assessment() {
-  command -v claude >/dev/null || { log_error "claude CLI not found on PATH"; return 1; }
+  require_command claude
   local prompt_file="${PROMPT_FILE:?PROMPT_FILE must be set}"
-  local model="${INPUT_MODEL:-claude-opus-4-6}"
+  local model="${INPUT_MODEL:?INPUT_MODEL must be set}"
   local output_file="$AUTOSOLVE_TMPDIR/assessment.json"
 
-  echo "Running assessment with model: $model"
+  log_notice "Running assessment with model: $model"
 
   local exit_code=0
   claude --print \
@@ -27,6 +27,8 @@ run_assessment() {
   fi
 
   local result_text
+  # extract_result returns non-zero when the marker isn't found; prevent
+  # set -e from exiting so we can handle missing results below.
   result_text="$(extract_result "$output_file" "ASSESSMENT_RESULT")" || true
 
   if [ -z "$result_text" ]; then
@@ -35,11 +37,14 @@ run_assessment() {
     return 1
   fi
 
+  # Log the full assessment result so it appears in the action run logs.
+  log_info "$result_text"
+
   if echo "$result_text" | grep --quiet "ASSESSMENT_RESULT - PROCEED"; then
-    echo "Assessment: PROCEED"
+    log_notice "Assessment: PROCEED"
     set_output "assessment" "PROCEED"
   elif echo "$result_text" | grep --quiet "ASSESSMENT_RESULT - SKIP"; then
-    echo "Assessment: SKIP"
+    log_notice "Assessment: SKIP"
     set_output "assessment" "SKIP"
   else
     log_error "Assessment result did not contain a valid PROCEED or SKIP marker"
@@ -60,18 +65,15 @@ set_assess_outputs() {
 
   # Extract summary: everything before the ASSESSMENT_RESULT line
   local summary
-  summary="$(echo "$result_text" | sed '/^ASSESSMENT_RESULT/d' | head -50)"
+  summary="$(truncate_output 200 "$(echo "$result_text" | sed '/^ASSESSMENT_RESULT/d')")"
 
   set_output "assessment" "$assessment"
   set_output_multiline "summary" "$summary"
   set_output_multiline "result" "$result_text"
 
-  {
-    echo "## Autosolve Assessment"
-    echo "**Result:** $assessment"
-    if [ -n "$summary" ]; then
-      echo "### Summary"
-      echo "$summary"
-    fi
-  } >> "${GITHUB_STEP_SUMMARY:-/dev/null}"
+  write_step_summary <<EOF
+## Autosolve Assessment
+**Result:** $assessment
+$([ -n "$summary" ] && printf '### Summary\n%s' "$summary")
+EOF
 }
diff --git a/autosolve/scripts/assess_test.sh b/autosolve/scripts/assess_test.sh
@@ -2,7 +2,6 @@
 # Tests for assess.sh functions.
 # shellcheck disable=SC2034  # Variables are read by sourced functions
 set -euo pipefail
-trap 'echo "Error occurred at line $LINENO"; exit 1' ERR
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 source ../../test_helpers.sh
@@ -23,7 +22,7 @@ test_outputs_proceed() {
   printf 'The task is clear and bounded.\nASSESSMENT_RESULT - PROCEED\n' > "$AUTOSOLVE_TMPDIR/assessment_result.txt"
   ASSESS_RESULT=PROCEED
   set_assess_outputs
-  grep -q 'assessment=PROCEED' "$GITHUB_OUTPUT"
+  check_contains 'assessment=PROCEED' "$GITHUB_OUTPUT"
 }
 expect_success "set_assess_outputs: PROCEED" test_outputs_proceed
 
@@ -33,7 +32,7 @@ test_outputs_skip() {
   printf 'Too ambiguous for automation.\nASSESSMENT_RESULT - SKIP\n' > "$AUTOSOLVE_TMPDIR/assessment_result.txt"
   ASSESS_RESULT=SKIP
   set_assess_outputs
-  grep -q 'assessment=SKIP' "$GITHUB_OUTPUT"
+  check_contains 'assessment=SKIP' "$GITHUB_OUTPUT"
 }
 expect_success "set_assess_outputs: SKIP" test_outputs_skip
 
@@ -46,7 +45,7 @@ test_outputs_summary_strips_marker() {
   # Extract just the summary block (between summary<<DELIM and DELIM) and verify marker is absent
   local summary
   summary=$(sed -n '/^summary<</,/^GHEOF_/p' "$GITHUB_OUTPUT")
-  echo "$summary" | grep -q 'This is the reasoning' && ! echo "$summary" | grep -q 'ASSESSMENT_RESULT'
+  echo "$summary" | check_contains 'This is the reasoning' && ! echo "$summary" | check_contains 'ASSESSMENT_RESULT'
 }
 expect_success "set_assess_outputs: summary strips marker" test_outputs_summary_strips_marker
 
@@ -56,8 +55,8 @@ test_outputs_no_result_file() {
   rm -f "$AUTOSOLVE_TMPDIR/assessment_result.txt"
   ASSESS_RESULT=ERROR
   set_assess_outputs
-  grep -q 'assessment=ERROR' "$GITHUB_OUTPUT"
+  check_contains 'assessment=ERROR' "$GITHUB_OUTPUT"
 }
-expect_success "set_assess_outputs: no result file" test_outputs_no_result_file
+expect_success "set_assess_outputs: no result file results in ERROR" test_outputs_no_result_file
 
 print_results
diff --git a/autosolve/scripts/github_issues.sh b/autosolve/scripts/github_issues.sh
diff --git a/autosolve/scripts/implement.sh b/autosolve/scripts/implement.sh
diff --git a/autosolve/scripts/implement_test.sh b/autosolve/scripts/implement_test.sh
diff --git a/autosolve/scripts/shared.sh b/autosolve/scripts/shared.sh
diff --git a/autosolve/scripts/shared_test.sh b/autosolve/scripts/shared_test.sh