From 1821bc840109bad6269b371e8b558fbbe18aa862 Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Mon, 18 May 2026 12:05:02 -0700 Subject: [PATCH] feat: harden EvalOps PR review automation --- .github/actionlint.yaml | 5 + .github/scripts/evalops-pr-lens-review.rb | 307 +++++++++++++++++- .github/scripts/evalopsbot-webhook-relay.rb | 147 +++++++++ .github/workflows/codex-rails-check.yml | 17 + .github/workflows/evalops-pr-lens-review.yml | 55 ++-- .../evalopsbot-review-request-dispatch.yml | 2 +- README.md | 9 +- test/evalops_pr_lens_review_test.rb | 131 +++++++- test/evalopsbot_webhook_relay_test.rb | 63 ++++ 9 files changed, 699 insertions(+), 37 deletions(-) create mode 100644 .github/actionlint.yaml create mode 100644 .github/scripts/evalopsbot-webhook-relay.rb create mode 100644 test/evalopsbot_webhook_relay_test.rb diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000..f48d92a --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,5 @@ +self-hosted-runner: + labels: + - blacksmith-4vcpu-ubuntu-2404 + +config-variables: null diff --git a/.github/scripts/evalops-pr-lens-review.rb b/.github/scripts/evalops-pr-lens-review.rb index d68cba8..003fc45 100644 --- a/.github/scripts/evalops-pr-lens-review.rb +++ b/.github/scripts/evalops-pr-lens-review.rb @@ -1,6 +1,8 @@ #!/usr/bin/env ruby # frozen_string_literal: true +require "base64" +require "fileutils" require "json" require "net/http" require "open3" @@ -73,6 +75,48 @@ module EvalOpsPrLensReview } }.freeze + LENS_PATH_RULES = { + "migration-safety" => [ + %r{\A(db|database|migrations?)/}i, + %r{migrations?/}i, + %r{\.(sql|tf)\z}i, + %r{\A(infrastructure|terraform|helm|charts|k8s|clusters)/}i, + %r{(disk|cache|state|cleanup|backfill|bootstrap|startup)}i + ], + "nats-contract-drift" => [ + %r{(^|/)(nats|jetstream|streams?|consumers?|subjects?)(/|\.)}i, + %r{(^|/)(proto|protos|protobuf|schemas?)/}i, + %r{\.(proto|avsc)\z}i, + %r{(cloudevents?|event[-_ ]?catalog|publisher|subscriber)}i + ], + "argo-manifest-skew" => [ + %r{\A(argocd|argo|clusters|k8s|kubernetes|overlays|base|helm|charts)/}i, + %r{(^|/)(kustomization|values)\.ya?ml\z}i, + %r{(^|/)applications?/}i, + %r{(^|/)(deployment|service|configmap|secret|externalsecret|namespace|ingress)\.ya?ml\z}i + ], + "iam-blast-radius" => [ + %r{\A\.github/workflows/}i, + %r{(^|/)(iam|rbac|serviceaccount|service-account|policy|permissions?)(/|\.)}i, + %r{(^|/)(secrets?|external-secrets?|vault|oidc)(/|\.)}i, + %r{\.(tf|tfvars)\z}i, + %r{(token|credential|workload[-_ ]?identity|rolebinding|clusterrole)}i + ], + "generated-sdk-delta" => [ + %r{(^|/)(gen|generated|sdk|openapi|swagger|proto|protos|protobuf|schemas?)/}i, + %r{\.(proto|openapi\.ya?ml|swagger\.json|schema\.json)\z}i, + %r{(^|/)(package\.json|pyproject\.toml|go\.mod|buf\.yaml|buf\.gen\.yaml)\z}i, + %r{(^|/)(CHANGELOG|release-please-config|\.release-please-manifest)}i + ], + "eval-regression-risk" => [ + %r{(^|/)(evals?|evaluations?|fixtures?|datasets?|goldens?|scenarios?|judges?|prompts?)/}i, + %r{(^|/)(prompt|judge|rubric|score|scoring|golden|fixture)}i, + %r{(^|/)testdata/}i + ] + }.freeze + + DOC_ONLY_PATH = %r{\A(README|SECURITY|CONTRIBUTING|CHANGELOG|docs/|profile/|.*\.(md|mdx|txt))}i + MARKER = "" REVIEW_REQUESTED_DISPATCH_EVENT = "evalopsbot-review-requested" REVIEW_REQUESTED_DISPATCH_SOURCE = "evalopsbot-review-request-dispatch" @@ -126,6 +170,28 @@ def valid_lens!(lens) raise ArgumentError, "unknown lens #{lens.inspect}; expected one of #{LENSES.keys.join(", ")}" end + def lens_reason_for_path(path) + LENS_PATH_RULES.each do |lens, patterns| + return lens if patterns.any? { |pattern| path.match?(pattern) } + end + nil + end + + def lenses_for_paths(paths) + normalized = paths.map(&:to_s).map(&:strip).reject(&:empty?) + return LENSES.keys if normalized.empty? + + lenses = normalized.flat_map do |path| + LENS_PATH_RULES.each_with_object([]) do |(lens, patterns), matches| + matches << lens if patterns.any? { |pattern| path.match?(pattern) } + end + end.uniq + return lenses if lenses.any? + return [] if normalized.all? { |path| path.match?(DOC_ONLY_PATH) } + + ["eval-regression-risk"] + end + def gh_api(*args, input: nil, token: ENV["GH_TOKEN"]) env = {} env["GH_TOKEN"] = token if token && !token.empty? @@ -267,12 +333,19 @@ def dispatch_requested_reviews(owner:, reviewer:, limit:, dry_run:, target_url:, summary end + def pr_files_metadata(repo:, pr:) + gh_api_json("repos/#{repo}/pulls/#{pr}/files?per_page=100") + end + def discover_open_prs(repos:, pr_filter: nil) repos.flat_map do |repo| normalized_repo = normalize_repo(repo) prs = gh_api_json("repos/#{normalized_repo}/pulls?state=open&per_page=100") prs.select! { |pr| pr_filter.fetch(normalized_repo, []).include?(Integer(pr.fetch("number"))) } if pr_filter prs.map do |pr| + files = pr_files_metadata(repo: normalized_repo, pr: Integer(pr.fetch("number"))) + filenames = files.map { |file| file.fetch("filename") } + lenses = lenses_for_paths(filenames) { "repo" => normalized_repo, "repo_slug" => normalized_repo.tr("/", "-"), @@ -283,7 +356,9 @@ def discover_open_prs(repos:, pr_filter: nil) "head_sha" => pr.fetch("head").fetch("sha"), "base_sha" => pr.fetch("base").fetch("sha"), "base_ref" => pr.fetch("base").fetch("ref"), - "head_ref" => pr.fetch("head").fetch("ref") + "head_ref" => pr.fetch("head").fetch("ref"), + "changed_files" => filenames, + "lenses" => lenses } end end @@ -291,7 +366,8 @@ def discover_open_prs(repos:, pr_filter: nil) def matrix_for(prs, lenses: LENSES.keys) prs.flat_map do |pr| - lenses.map do |lens| + pr_lenses = pr.fetch("lenses", lenses) + pr_lenses.map do |lens| valid_lens!(lens) { "repo" => pr.fetch("repo"), @@ -299,7 +375,10 @@ def matrix_for(prs, lenses: LENSES.keys) "pr" => pr.fetch("number"), "lens" => lens, "check_context" => check_context(lens), - "head_sha" => pr.fetch("head_sha") + "head_sha" => pr.fetch("head_sha"), + "base_sha" => pr.fetch("base_sha", nil), + "base_ref" => pr.fetch("base_ref", nil), + "head_ref" => pr.fetch("head_ref", nil) } end end @@ -326,6 +405,55 @@ def post_status(repo:, sha:, context:, state:, description:, target_url: nil) gh_api("--method", "POST", "repos/#{repo}/statuses/#{sha}", *fields) end + def write_json(path, payload) + File.write(path, JSON.pretty_generate(payload)) + payload + end + + def skipped_lens_review(repo:, pr:, lens:, head_sha:, reason:, output:) + write_json( + output, + { + "schema_version" => 1, + "repo" => repo, + "pr" => Integer(pr), + "lens" => lens, + "check_id" => check_context(lens), + "head_sha" => head_sha.to_s, + "generated_at" => Time.now.utc.iso8601, + "status" => "skipped", + "skip_reason" => reason, + "summary" => "Skipped #{lens} lens review: #{reason}", + "confidence_score" => 0.0, + "findings" => [] + } + ) + end + + def write_prepare_outputs(path, outputs) + write_github_outputs(path, outputs) + end + + def git_authorization_header(token) + return nil if token.to_s.empty? + + "AUTHORIZATION: basic #{Base64.strict_encode64("x-access-token:#{token}")}" + end + + def git_capture_auth(workspace, *args, token: nil) + env = { "GIT_TERMINAL_PROMPT" => "0" } + command = ["git"] + command += ["-C", workspace] if workspace + header = git_authorization_header(token) + command += ["-c", "http.https://github.com/.extraheader=#{header}"] if header + command += args + + stdout, stderr, status = Open3.capture3(env, *command) + raise "git #{args.join(" ")} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? + + stdout + end + def git_capture(workspace, *args) stdout, stderr, status = Open3.capture3("git", "-C", workspace, *args) raise "git #{args.join(" ")} failed: #{stderr}" unless status.success? @@ -333,6 +461,76 @@ def git_capture(workspace, *args) stdout end + def prepare_workspace(repo:, pr:, lens:, workspace:, output:, github_output:, snapshot_head_sha:, snapshot_base_sha:, token:) + head_sha = snapshot_head_sha.to_s + begin + pr_json = pr_metadata(repo: repo, pr: pr) + current_state = pr_json.fetch("state", "").downcase + current_head_sha = pr_json.fetch("head").fetch("sha") + current_base_sha = pr_json.fetch("base").fetch("sha") + base_ref = pr_json.fetch("base").fetch("ref") + reason = nil + + if current_state != "open" + reason = "pull request is #{current_state.empty? ? "not open" : current_state}" + elsif !snapshot_head_sha.to_s.empty? && current_head_sha != snapshot_head_sha + reason = "pull request head changed since discovery" + elsif !snapshot_base_sha.to_s.empty? && current_base_sha != snapshot_base_sha + reason = "pull request base changed since discovery" + end + + if reason + skipped_lens_review(repo: repo, pr: pr, lens: lens, head_sha: head_sha.empty? ? current_head_sha : head_sha, reason: reason, output: output) + write_prepare_outputs( + github_output, + "skip" => "true", + "skip_reason" => reason, + "head_sha" => head_sha.empty? ? current_head_sha : head_sha, + "base_sha" => snapshot_base_sha.to_s.empty? ? current_base_sha : snapshot_base_sha + ) + return { "skip" => true, "reason" => reason } + end + + FileUtils.rm_rf(workspace) + FileUtils.mkdir_p(workspace) + git_capture_auth(nil, "init", workspace, token: token) + git_capture_auth(workspace, "remote", "add", "origin", "https://github.com/#{repo}.git", token: token) + git_capture_auth(workspace, "fetch", "--no-tags", "origin", base_ref, "+refs/pull/#{pr}/head:refs/remotes/pull/#{pr}/head", token: token) + git_capture_auth(workspace, "checkout", "--detach", current_head_sha, token: token) + + checked_out = git_capture_auth(workspace, "rev-parse", "HEAD", token: token).strip + if checked_out != current_head_sha + raise "checked out #{checked_out}, expected #{current_head_sha}" + end + + write_prepare_outputs( + github_output, + "skip" => "false", + "skip_reason" => "", + "base_ref" => base_ref, + "base_sha" => current_base_sha, + "head_sha" => current_head_sha + ) + { + "skip" => false, + "base_ref" => base_ref, + "base_sha" => current_base_sha, + "head_sha" => current_head_sha + } + rescue StandardError => e + reason = "target ref unavailable: #{e.message.lines.first.to_s.strip}" + skipped_lens_review(repo: repo, pr: pr, lens: lens, head_sha: head_sha, reason: reason, output: output) + write_prepare_outputs( + github_output, + "skip" => "true", + "skip_reason" => reason, + "head_sha" => head_sha, + "base_sha" => snapshot_base_sha.to_s + ) + { "skip" => true, "reason" => reason } + end + end + def truncated(text, max_bytes) raw = text.to_s return [raw, false] if raw.bytesize <= max_bytes @@ -669,6 +867,11 @@ def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provi end def lens_status_description(review) + if review.fetch("status", "") == "skipped" + reason = review.fetch("skip_reason", "not applicable") + return "Skipped: #{reason}" + end + findings = review.fetch("findings", []) confidence = findings.map { |finding| finding.fetch("confidence_score") }.max || 0.0 "#{findings.length} finding#{findings.length == 1 ? "" : "s"}; top confidence #{format("%.2f", confidence)}" @@ -680,6 +883,23 @@ def read_lens_reviews(root) end end + def read_expected_reviews(root) + Dir.glob(File.join(root, "**", "pr-lens-targets.json")).sort.flat_map do |path| + JSON.parse(File.read(path)).flat_map do |pr| + Array(pr.fetch("lenses", [])).map do |lens| + { + "repo" => pr.fetch("repo"), + "pr" => Integer(pr.fetch("number")), + "lens" => lens, + "head_sha" => pr.fetch("head_sha"), + "check_id" => check_context(lens), + "_artifact_path" => path + } + end + end + end + end + def high_confidence_findings(reviews, min_confidence:) reviews.flat_map do |review| review.fetch("findings", []).map do |finding| @@ -798,12 +1018,18 @@ def delete_marker_comments(repo:, pr:) end end - def meta_state(findings) + def meta_state(findings, coverage_incomplete: false) + return "error" if coverage_incomplete + findings.any? { |finding| finding.fetch("priority") <= 1 } ? "failure" : "success" end - def meta_description(findings) - if findings.empty? + def meta_description(findings, missing_count: 0, skipped_count: 0) + if missing_count.positive? + "PR lens coverage incomplete: #{missing_count} missing" + elsif findings.empty? && skipped_count.positive? + "No findings; #{skipped_count} lens review#{skipped_count == 1 ? "" : "s"} skipped" + elsif findings.empty? "No high-confidence PR lens findings" else "#{findings.length} high-confidence finding#{findings.length == 1 ? "" : "s"}" @@ -812,12 +1038,41 @@ def meta_description(findings) def meta_review(artifact_root:, min_confidence:, output:) reviews = read_lens_reviews(artifact_root) + expected_reviews = read_expected_reviews(artifact_root) + reviews_by_key = reviews.each_with_object({}) do |review, hash| + hash[[review.fetch("repo"), Integer(review.fetch("pr")), review.fetch("lens"), review.fetch("head_sha")]] = review + end ranked = dedupe_and_rank(high_confidence_findings(reviews, min_confidence: min_confidence)) grouped = grouped_by_pr(ranked) target_url = run_url + coverage_by_pr = {} + + expected_reviews.group_by { |review| [review.fetch("repo"), review.fetch("pr"), review.fetch("head_sha")] }.each do |key, rows| + coverage_by_pr[key] = { + "expected" => rows.length, + "missing" => rows.count do |row| + !reviews_by_key.key?([row.fetch("repo"), row.fetch("pr"), row.fetch("lens"), row.fetch("head_sha")]) + end, + "skipped" => rows.count do |row| + review = reviews_by_key[[row.fetch("repo"), row.fetch("pr"), row.fetch("lens"), row.fetch("head_sha")]] + review && review.fetch("status", "") == "skipped" + end, + "lenses" => rows.map { |row| row.fetch("lens") }.sort + } + end - reviews.group_by { |review| [review.fetch("repo"), review.fetch("pr"), review.fetch("head_sha")] }.each_key do |repo, pr, head_sha| + review_keys = reviews.group_by { |review| [review.fetch("repo"), review.fetch("pr"), review.fetch("head_sha")] }.keys + (coverage_by_pr.keys + review_keys).uniq.each do |repo, pr, head_sha| findings = grouped.fetch([repo, pr, head_sha], []) + coverage = coverage_by_pr.fetch( + [repo, pr, head_sha], + { + "expected" => reviews.count { |review| review.fetch("repo") == repo && review.fetch("pr") == pr && review.fetch("head_sha") == head_sha }, + "missing" => 0, + "skipped" => 0, + "lenses" => reviews.select { |review| review.fetch("repo") == repo && review.fetch("pr") == pr && review.fetch("head_sha") == head_sha }.map { |review| review.fetch("lens") }.sort + } + ) if findings.empty? delete_marker_comments(repo: repo, pr: pr) else @@ -831,8 +1086,12 @@ def meta_review(artifact_root:, min_confidence:, output:) repo: repo, sha: head_sha, context: meta_context, - state: meta_state(findings), - description: meta_description(findings), + state: meta_state(findings, coverage_incomplete: coverage.fetch("missing").positive?), + description: meta_description( + findings, + missing_count: coverage.fetch("missing"), + skipped_count: coverage.fetch("skipped") + ), target_url: target_url ) end @@ -842,6 +1101,10 @@ def meta_review(artifact_root:, min_confidence:, output:) "generated_at" => Time.now.utc.iso8601, "min_confidence" => min_confidence, "reviews" => reviews.length, + "expected_reviews" => expected_reviews.length, + "coverage" => coverage_by_pr.map do |(repo, pr, head_sha), coverage| + coverage.merge("repo" => repo, "pr" => pr, "head_sha" => head_sha) + end, "published_findings" => ranked, "run_url" => target_url } @@ -892,6 +1155,30 @@ def meta_review(artifact_root:, min_confidence:, output:) parser.on("--target-url URL") { |value| options[:target_url] = value } end.parse! EvalOpsPrLensReview.post_status(**options) + when "prepare-workspace" + options = { + token: ENV["REVIEW_TOKEN"] || ENV["GH_TOKEN"], + github_output: ENV["GITHUB_OUTPUT"], + snapshot_head_sha: "", + snapshot_base_sha: "" + } + OptionParser.new do |parser| + parser.on("--repo OWNER/REPO") { |value| options[:repo] = value } + parser.on("--pr NUMBER", Integer) { |value| options[:pr] = value } + parser.on("--lens LENS") { |value| options[:lens] = value } + parser.on("--workspace PATH") { |value| options[:workspace] = value } + parser.on("--output PATH") { |value| options[:output] = value } + parser.on("--github-output PATH") { |value| options[:github_output] = value } + parser.on("--snapshot-head-sha SHA") { |value| options[:snapshot_head_sha] = value } + parser.on("--snapshot-base-sha SHA") { |value| options[:snapshot_base_sha] = value } + parser.on("--token TOKEN") { |value| options[:token] = value } + end.parse! + required = %i[repo pr lens workspace output] + missing = required.select { |key| options[key].nil? || options[key].to_s.empty? } + raise OptionParser::MissingArgument, missing.join(", ") unless missing.empty? + + result = EvalOpsPrLensReview.prepare_workspace(**options) + puts(result.fetch("skip") ? "Skipped #{options.fetch(:lens)}: #{result.fetch("reason")}" : "Prepared #{options.fetch(:repo)}##{options.fetch(:pr)}") when "run-lens" options = { provider: ENV.fetch("PR_LENS_PROVIDER", "anthropic"), @@ -964,7 +1251,7 @@ def meta_review(artifact_root:, min_confidence:, output:) ) puts "Found #{result.fetch("candidate_count")} EvalOpsBot review request(s); dispatched #{result.fetch("dispatched_count")}, skipped #{result.fetch("skipped_count")}." else - warn "usage: #{$PROGRAM_NAME} discover|post-status|run-lens|lens-status-description|meta-review|dispatch-review-requests" + warn "usage: #{$PROGRAM_NAME} discover|post-status|prepare-workspace|run-lens|lens-status-description|meta-review|dispatch-review-requests" exit 2 end end diff --git a/.github/scripts/evalopsbot-webhook-relay.rb b/.github/scripts/evalopsbot-webhook-relay.rb new file mode 100644 index 0000000..e4ebcad --- /dev/null +++ b/.github/scripts/evalopsbot-webhook-relay.rb @@ -0,0 +1,147 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "json" +require "net/http" +require "openssl" +require "optparse" +require "time" +require "uri" + +module EvalOpsBotWebhookRelay + DISPATCH_EVENT = "evalopsbot-review-requested" + DISPATCH_SOURCE = "evalopsbot-webhook-relay" + DEFAULT_TARGET_REPO = "evalops/.github" + + module_function + + def secure_compare(left, right) + return false unless left.bytesize == right.bytesize + + left.bytes.zip(right.bytes).reduce(0) { |memo, pair| memo | (pair.fetch(0) ^ pair.fetch(1)) }.zero? + end + + def verify_signature!(body:, signature:, secret:) + return true if secret.to_s.empty? + + expected = "sha256=#{OpenSSL::HMAC.hexdigest("SHA256", secret, body)}" + unless signature.to_s.start_with?("sha256=") && secure_compare(signature.to_s, expected) + raise "invalid GitHub webhook signature" + end + + true + end + + def dispatch_payload(event_name:, body:, reviewer:, delivery: nil) + return skip("unsupported event #{event_name}") unless event_name == "pull_request" + + payload = JSON.parse(body) + return skip("unsupported action #{payload["action"]}") unless payload["action"] == "review_requested" + + requested_reviewer = payload.dig("requested_reviewer", "login").to_s + return skip("review requested for #{requested_reviewer}") unless requested_reviewer == reviewer + + repo = payload.dig("repository", "full_name").to_s + return skip("repository is not in evalops org") unless repo.start_with?("evalops/") + + pr = payload.dig("pull_request", "number") + return skip("missing pull request number") if pr.nil? + + { + "event_type" => DISPATCH_EVENT, + "client_payload" => { + "target_repo" => repo, + "target_pr" => "#{repo}##{Integer(pr)}", + "requested_reviewer" => requested_reviewer, + "source" => DISPATCH_SOURCE, + "delivery" => delivery.to_s + }.reject { |_key, value| value.to_s.empty? } + } + rescue JSON::ParserError + skip("invalid JSON payload") + end + + def skip(reason) + { + "skipped" => true, + "reason" => reason + } + end + + def dispatch_to_github(payload:, token:, target_repo: DEFAULT_TARGET_REPO) + raise "GITHUB_TOKEN is required" if token.to_s.empty? + + uri = URI("https://api.github.com/repos/#{target_repo}/dispatches") + request = Net::HTTP::Post.new(uri) + request["accept"] = "application/vnd.github+json" + request["authorization"] = "Bearer #{token}" + request["content-type"] = "application/json" + request["user-agent"] = "evalopsbot-webhook-relay" + request.body = JSON.generate(payload) + + response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| + http.request(request) + end + unless response.code.to_i.between?(200, 299) + raise "repository dispatch failed with HTTP #{response.code}: #{response.body}" + end + + { + "dispatched" => true, + "target_repo" => target_repo, + "event_type" => payload.fetch("event_type"), + "generated_at" => Time.now.utc.iso8601 + } + end +end + +if $PROGRAM_NAME == __FILE__ + options = { + event_name: ENV["GITHUB_WEBHOOK_EVENT"], + delivery: ENV["GITHUB_WEBHOOK_DELIVERY"], + signature: ENV["GITHUB_WEBHOOK_SIGNATURE_256"], + secret: ENV["GITHUB_WEBHOOK_SECRET"], + reviewer: "EvalOpsBot", + token: ENV["GITHUB_TOKEN"], + target_repo: EvalOpsBotWebhookRelay::DEFAULT_TARGET_REPO, + dry_run: false + } + OptionParser.new do |parser| + parser.on("--event EVENT") { |value| options[:event_name] = value } + parser.on("--delivery ID") { |value| options[:delivery] = value } + parser.on("--signature SIGNATURE") { |value| options[:signature] = value } + parser.on("--secret SECRET") { |value| options[:secret] = value } + parser.on("--reviewer LOGIN") { |value| options[:reviewer] = value } + parser.on("--token TOKEN") { |value| options[:token] = value } + parser.on("--target-repo OWNER/REPO") { |value| options[:target_repo] = value } + parser.on("--input PATH") { |value| options[:input] = value } + parser.on("--output PATH") { |value| options[:output] = value } + parser.on("--dry-run") { options[:dry_run] = true } + end.parse! + + body = options[:input] ? File.read(options[:input]) : STDIN.read + EvalOpsBotWebhookRelay.verify_signature!( + body: body, + signature: options[:signature], + secret: options[:secret] + ) + payload = EvalOpsBotWebhookRelay.dispatch_payload( + event_name: options[:event_name], + body: body, + reviewer: options[:reviewer], + delivery: options[:delivery] + ) + result = if payload["skipped"] + payload + elsif options[:dry_run] + { "would_dispatch" => true, "payload" => payload } + else + EvalOpsBotWebhookRelay.dispatch_to_github( + payload: payload, + token: options[:token], + target_repo: options[:target_repo] + ) + end + File.write(options[:output], JSON.pretty_generate(result)) if options[:output] + puts JSON.pretty_generate(result) +end diff --git a/.github/workflows/codex-rails-check.yml b/.github/workflows/codex-rails-check.yml index b3a9c74..c7b9862 100644 --- a/.github/workflows/codex-rails-check.yml +++ b/.github/workflows/codex-rails-check.yml @@ -9,6 +9,7 @@ on: - "README.md" - ".agents/skills/**" - ".github/agent-mcp/**" + - ".github/actionlint.yaml" - ".github/codex/hooks/**" - ".github/codex/schemas/**" - ".github/contracts/**" @@ -67,6 +68,22 @@ jobs: fi ruby -e 'require "yaml"; ARGV.each { |f| YAML.load_file(f); puts "ok #{f}" }' "${files[@]}" + - name: Lint workflow semantics + shell: bash + run: | + set -euo pipefail + shopt -s nullglob + files=(.github/workflows/*.yml .github/workflows/*.yaml .github/workflow-templates/*.yml .github/workflow-templates/*.yaml) + if [ "${#files[@]}" -eq 0 ]; then + echo "No workflow YAML files found." + exit 0 + fi + if command -v actionlint >/dev/null 2>&1; then + actionlint "${files[@]}" + else + go run github.com/rhysd/actionlint/cmd/actionlint@v1.7.12 "${files[@]}" + fi + - name: Validate workflow template metadata shell: bash run: | diff --git a/.github/workflows/evalops-pr-lens-review.yml b/.github/workflows/evalops-pr-lens-review.yml index 4bf697a..269a0d0 100644 --- a/.github/workflows/evalops-pr-lens-review.yml +++ b/.github/workflows/evalops-pr-lens-review.yml @@ -142,35 +142,29 @@ jobs: --description "Running ${{ matrix.lens }} lens review" \ --target-url "${RUN_URL}" - - name: Checkout target pull request head - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - with: - repository: ${{ matrix.repo }} - ref: refs/pull/${{ matrix.pr }}/head - token: ${{ env.REVIEW_TOKEN }} - fetch-depth: 0 - path: target - - - name: Fetch base and head refs + - name: Prepare target pull request head id: refs shell: bash env: TARGET_REPO: ${{ matrix.repo }} PR_NUMBER: ${{ matrix.pr }} + LENS: ${{ matrix.lens }} + SNAPSHOT_HEAD_SHA: ${{ matrix.head_sha }} + SNAPSHOT_BASE_SHA: ${{ matrix.base_sha }} run: | set -euo pipefail - pr_json="$(gh api "repos/${TARGET_REPO}/pulls/${PR_NUMBER}")" - base_ref="$(jq -r '.base.ref' <<<"${pr_json}")" - base_sha="$(jq -r '.base.sha' <<<"${pr_json}")" - head_sha="$(jq -r '.head.sha' <<<"${pr_json}")" - git -C target fetch --no-tags origin "${base_ref}" "+refs/pull/${PR_NUMBER}/head" - { - echo "base_ref=${base_ref}" - echo "base_sha=${base_sha}" - echo "head_sha=${head_sha}" - } >> "${GITHUB_OUTPUT}" + ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb prepare-workspace \ + --repo "${TARGET_REPO}" \ + --pr "${PR_NUMBER}" \ + --lens "${LENS}" \ + --workspace target \ + --output lens-review.json \ + --github-output "${GITHUB_OUTPUT}" \ + --snapshot-head-sha "${SNAPSHOT_HEAD_SHA}" \ + --snapshot-base-sha "${SNAPSHOT_BASE_SHA}" - name: Run lens reviewer + if: ${{ steps.refs.outputs.skip != 'true' }} shell: bash env: TARGET_REPO: ${{ matrix.repo }} @@ -193,7 +187,7 @@ jobs: --max-diff-bytes "${PR_LENS_MAX_DIFF_BYTES}" - name: Complete lens status - if: ${{ success() }} + if: ${{ success() && steps.refs.outputs.skip != 'true' }} shell: bash run: | set -euo pipefail @@ -209,13 +203,30 @@ jobs: --description "${description}" \ --target-url "${RUN_URL}" + - name: Complete skipped lens status + if: ${{ steps.refs.outputs.skip == 'true' }} + shell: bash + run: | + set -euo pipefail + description="$( + ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb \ + lens-status-description --review-json lens-review.json + )" + ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ + --repo "${{ matrix.repo }}" \ + --sha "${{ steps.refs.outputs.head_sha || matrix.head_sha }}" \ + --context "${{ matrix.check_context }}" \ + --state success \ + --description "${description}" \ + --target-url "${RUN_URL}" + - name: Mark lens errored if: ${{ failure() }} shell: bash run: | ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ --repo "${{ matrix.repo }}" \ - --sha "${{ matrix.head_sha }}" \ + --sha "${{ steps.refs.outputs.head_sha || matrix.head_sha }}" \ --context "${{ matrix.check_context }}" \ --state error \ --description "${{ matrix.lens }} lens review failed" \ diff --git a/.github/workflows/evalopsbot-review-request-dispatch.yml b/.github/workflows/evalopsbot-review-request-dispatch.yml index ffed3d7..dd12ee2 100644 --- a/.github/workflows/evalopsbot-review-request-dispatch.yml +++ b/.github/workflows/evalopsbot-review-request-dispatch.yml @@ -2,7 +2,7 @@ name: EvalOpsBot review request dispatch on: schedule: - - cron: "*/5 * * * *" + - cron: "17 * * * *" workflow_dispatch: inputs: reviewer: diff --git a/README.md b/README.md index 5b6d280..8f79fc9 100644 --- a/README.md +++ b/README.md @@ -356,7 +356,10 @@ per-repository workflow copies. The relay should listen for GitHub - `repository.full_name` is an EvalOps repository - `pull_request.number` is present -When those checks pass, dispatch this repository's review workflow: +`.github/scripts/evalopsbot-webhook-relay.rb` is the checked-in relay core for +that endpoint. It verifies `X-Hub-Signature-256` when `GITHUB_WEBHOOK_SECRET` +is set, ignores non-matching deliveries, and dispatches this repository's +review workflow: ```bash gh api --method POST repos/evalops/.github/dispatches --input - <<'JSON' @@ -375,4 +378,6 @@ The workflow also accepts `target_prs`, `target_repos`, `provider`, `model`, `max_diff_bytes`, and `min_confidence` in `client_payload` for controlled operator overrides. Keep the relay token scoped to dispatching workflows in `evalops/.github`; the review workflow itself owns the cross-repo read/write -token and model-provider credentials. +token and model-provider credentials. The scheduled +`evalopsbot-review-request-dispatch.yml` workflow remains as an hourly fallback +for missed webhook deliveries, not the primary trigger path. diff --git a/test/evalops_pr_lens_review_test.rb b/test/evalops_pr_lens_review_test.rb index 824fba3..c0580a8 100644 --- a/test/evalops_pr_lens_review_test.rb +++ b/test/evalops_pr_lens_review_test.rb @@ -2,6 +2,7 @@ require "json" require "minitest/autorun" +require "tmpdir" require_relative "../.github/scripts/evalops-pr-lens-review" class EvalOpsPrLensReviewTest < Minitest::Test @@ -49,6 +50,46 @@ def test_matrix_for_uses_stable_lens_contexts assert_equal "abc123", matrix.fetch(0).fetch("head_sha") end + def test_matrix_for_uses_classified_lenses + prs = [ + { + "repo" => "evalops/deploy", + "repo_slug" => "evalops-deploy", + "number" => 10, + "head_sha" => "head", + "base_sha" => "base", + "base_ref" => "main", + "head_ref" => "branch", + "lenses" => %w[iam-blast-radius argo-manifest-skew] + } + ] + + matrix = EvalOpsPrLensReview.matrix_for(prs) + + assert_equal %w[iam-blast-radius argo-manifest-skew], matrix.map { |row| row.fetch("lens") } + assert_equal ["base"], matrix.map { |row| row.fetch("base_sha") }.uniq + end + + def test_lenses_for_paths_selects_targeted_review_lenses + lenses = EvalOpsPrLensReview.lenses_for_paths( + [ + ".github/workflows/release.yml", + "clusters/prod/kustomization.yaml", + "proto/platform/v1/service.proto" + ] + ) + + assert_includes lenses, "iam-blast-radius" + assert_includes lenses, "argo-manifest-skew" + assert_includes lenses, "nats-contract-drift" + assert_includes lenses, "generated-sdk-delta" + refute_includes lenses, "eval-regression-risk" + end + + def test_lenses_for_paths_skips_docs_only_prs + assert_empty EvalOpsPrLensReview.lenses_for_paths(["README.md", "docs/runbook.md"]) + end + def test_normalize_lens_review_drops_invalid_findings raw = { "summary" => "Found one issue", @@ -299,11 +340,97 @@ def test_build_lens_prompt_includes_review_context def test_lens_workflow_checks_out_pull_request_head_ref workflow = File.read(File.expand_path("../.github/workflows/evalops-pr-lens-review.yml", __dir__)) - assert_includes workflow, "Checkout target pull request head" - assert_includes workflow, 'ref: refs/pull/${{ matrix.pr }}/head' + assert_includes workflow, "Prepare target pull request head" + assert_includes workflow, "prepare-workspace" refute_includes workflow, 'ref: refs/pull/${{ matrix.pr }}/merge' end + def test_prepare_workspace_writes_skipped_review_when_head_changes + Dir.mktmpdir do |dir| + output = File.join(dir, "lens-review.json") + github_output = File.join(dir, "github-output") + pr_json = { + "state" => "open", + "head" => { "sha" => "new-head" }, + "base" => { "sha" => "base", "ref" => "main" } + } + + EvalOpsPrLensReview.stub(:pr_metadata, ->(**_kwargs) { pr_json }) do + result = EvalOpsPrLensReview.prepare_workspace( + repo: "evalops/deploy", + pr: 10, + lens: "iam-blast-radius", + workspace: File.join(dir, "target"), + output: output, + github_output: github_output, + snapshot_head_sha: "old-head", + snapshot_base_sha: "base", + token: nil + ) + + review = JSON.parse(File.read(output)) + assert_equal true, result.fetch("skip") + assert_equal "skipped", review.fetch("status") + assert_equal "pull request head changed since discovery", review.fetch("skip_reason") + assert_includes File.read(github_output), "skip=true" + end + end + end + + def test_meta_review_marks_incomplete_coverage_when_expected_lens_artifact_is_missing + Dir.mktmpdir do |dir| + discovery_dir = File.join(dir, "pr-lens-discovery") + review_dir = File.join(dir, "pr-lens-evalops-deploy-10-iam-blast-radius") + FileUtils.mkdir_p(discovery_dir) + FileUtils.mkdir_p(review_dir) + File.write( + File.join(discovery_dir, "pr-lens-targets.json"), + JSON.pretty_generate( + [ + { + "repo" => "evalops/deploy", + "number" => 10, + "head_sha" => "head", + "lenses" => %w[iam-blast-radius argo-manifest-skew] + } + ] + ) + ) + File.write( + File.join(review_dir, "lens-review.json"), + JSON.pretty_generate( + { + "schema_version" => 1, + "repo" => "evalops/deploy", + "pr" => 10, + "lens" => "iam-blast-radius", + "check_id" => "evalops-pr-lens/iam-blast-radius", + "head_sha" => "head", + "findings" => [] + } + ) + ) + statuses = [] + + EvalOpsPrLensReview.stub(:run_url, "https://github.com/evalops/.github/actions/runs/1") do + EvalOpsPrLensReview.stub(:delete_marker_comments, ->(**_kwargs) {}) do + EvalOpsPrLensReview.stub(:post_status, ->(**kwargs) { statuses << kwargs }) do + result = EvalOpsPrLensReview.meta_review( + artifact_root: dir, + min_confidence: 0.82, + output: File.join(dir, "meta-review.json") + ) + + assert_equal 2, result.fetch("expected_reviews") + assert_equal 1, result.fetch("coverage").fetch(0).fetch("missing") + assert_equal "error", statuses.fetch(0).fetch(:state) + assert_includes statuses.fetch(0).fetch(:description), "coverage incomplete" + end + end + end + end + end + def test_migration_safety_lens_covers_stateful_infra_rollouts pr_json = { "title" => "Buildfarm disk headroom", diff --git a/test/evalopsbot_webhook_relay_test.rb b/test/evalopsbot_webhook_relay_test.rb new file mode 100644 index 0000000..cf737ea --- /dev/null +++ b/test/evalopsbot_webhook_relay_test.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "openssl" +require_relative "../.github/scripts/evalopsbot-webhook-relay" + +class EvalOpsBotWebhookRelayTest < Minitest::Test + def test_dispatch_payload_for_evalopsbot_review_request + payload = EvalOpsBotWebhookRelay.dispatch_payload( + event_name: "pull_request", + reviewer: "EvalOpsBot", + delivery: "delivery-1", + body: JSON.generate( + "action" => "review_requested", + "requested_reviewer" => { "login" => "EvalOpsBot" }, + "repository" => { "full_name" => "evalops/deploy" }, + "pull_request" => { "number" => 3671 } + ) + ) + + assert_equal "evalopsbot-review-requested", payload.fetch("event_type") + assert_equal "evalops/deploy#3671", payload.dig("client_payload", "target_pr") + assert_equal "evalopsbot-webhook-relay", payload.dig("client_payload", "source") + end + + def test_dispatch_payload_skips_other_reviewers + result = EvalOpsBotWebhookRelay.dispatch_payload( + event_name: "pull_request", + reviewer: "EvalOpsBot", + body: JSON.generate( + "action" => "review_requested", + "requested_reviewer" => { "login" => "someone-else" }, + "repository" => { "full_name" => "evalops/deploy" }, + "pull_request" => { "number" => 1 } + ) + ) + + assert_equal true, result.fetch("skipped") + assert_includes result.fetch("reason"), "someone-else" + end + + def test_verify_signature_accepts_github_sha256_signature + body = JSON.generate("ok" => true) + signature = "sha256=#{OpenSSL::HMAC.hexdigest("SHA256", "secret", body)}" + + assert EvalOpsBotWebhookRelay.verify_signature!( + body: body, + signature: signature, + secret: "secret" + ) + end + + def test_verify_signature_rejects_mismatch + assert_raises RuntimeError do + EvalOpsBotWebhookRelay.verify_signature!( + body: "{}", + signature: "sha256=bad", + secret: "secret" + ) + end + end +end