From ab09d220629ad7c4d2157e3d183e4fcd2e2d1edf Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Tue, 19 May 2026 18:34:47 -0700 Subject: [PATCH] feat: harden evalopsbot requested review control plane --- .github/evalopsbot-review-targets.yml | 31 ++ .github/pr-lens-routing.yml | 18 ++ .github/scripts/evalops-pr-lens-review.rb | 303 ++++++++++++++++-- .../scripts/verify-evalopsbot-review-setup.rb | 140 ++++++++ .github/workflows/evalops-pr-lens-review.yml | 104 +++++- .../workflows/evalopsbot-review-canary.yml | 149 +++++++++ .../evalopsbot-review-request-dispatch.yml | 22 ++ .../evalopsbot-review-setup-audit.yml | 61 ++++ README.md | 46 +-- test/evalops_pr_lens_review_test.rb | 98 ++++++ test/verify_evalopsbot_review_setup_test.rb | 42 +++ 11 files changed, 961 insertions(+), 53 deletions(-) create mode 100644 .github/evalopsbot-review-targets.yml create mode 100644 .github/pr-lens-routing.yml create mode 100644 .github/scripts/verify-evalopsbot-review-setup.rb create mode 100644 .github/workflows/evalopsbot-review-canary.yml create mode 100644 .github/workflows/evalopsbot-review-setup-audit.yml create mode 100644 test/verify_evalopsbot_review_setup_test.rb diff --git a/.github/evalopsbot-review-targets.yml b/.github/evalopsbot-review-targets.yml new file mode 100644 index 0000000..6fd8321 --- /dev/null +++ b/.github/evalopsbot-review-targets.yml @@ -0,0 +1,31 @@ +version: 1 +org: evalops +reviewer: EvalOpsBot +central_repo: evalops/.github +dispatch_secret: EVALOPS_PR_LENS_TOKEN +app_secrets: + - EVALOPS_PR_LENS_APP_ID + - EVALOPS_PR_LENS_APP_PRIVATE_KEY + - EVALOPS_PR_LENS_APP_INSTALLATION_ID +central_workflows: + - .github/workflows/evalops-pr-lens-review.yml + - .github/workflows/evalopsbot-review-request-dispatch.yml + - .github/workflows/evalopsbot-review-canary.yml +target_repositories: + - repo: evalops/cerebro + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/chat + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/deploy + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/diffscope + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/ensemble + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/maestro + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/maestro-internal + fallback_workflow: .github/workflows/evalopsbot-review-request.yml + - repo: evalops/platform + fallback_workflow: .github/workflows/evalopsbot-review-request.yml +exemptions: [] diff --git a/.github/pr-lens-routing.yml b/.github/pr-lens-routing.yml new file mode 100644 index 0000000..9834560 --- /dev/null +++ b/.github/pr-lens-routing.yml @@ -0,0 +1,18 @@ +defaults: + provider: anthropic + model: claude-opus-4-7 + max_diff_bytes: 180000 + +lenses: + iam-blast-radius: + model: claude-opus-4-7 + max_diff_bytes: 220000 + argo-manifest-skew: + model: claude-opus-4-7 + max_diff_bytes: 220000 + generated-sdk-delta: + model: claude-opus-4-7 + max_diff_bytes: 260000 + eval-regression-risk: + model: claude-opus-4-7 + max_diff_bytes: 220000 diff --git a/.github/scripts/evalops-pr-lens-review.rb b/.github/scripts/evalops-pr-lens-review.rb index 003fc45..48bff3f 100644 --- a/.github/scripts/evalops-pr-lens-review.rb +++ b/.github/scripts/evalops-pr-lens-review.rb @@ -2,13 +2,16 @@ # frozen_string_literal: true require "base64" +require "digest" require "fileutils" require "json" require "net/http" +require "openssl" require "open3" require "optparse" require "time" require "uri" +require "yaml" module EvalOpsPrLensReview TARGET_REPOS = %w[ @@ -122,9 +125,15 @@ module EvalOpsPrLensReview REVIEW_REQUESTED_DISPATCH_SOURCE = "evalopsbot-review-request-dispatch" DEFAULT_MIN_CONFIDENCE = 0.82 DEFAULT_MODEL = "claude-opus-4-7" + DEFAULT_PROVIDER = "anthropic" DEFAULT_MAX_DIFF_BYTES = 180_000 MAX_FINDINGS_PER_COMMENT = 12 MAX_CONTEXT_ITEMS = 25 + DEFAULT_ROUTING_CONFIG_PATH = ".github/pr-lens-routing.yml" + COMMON_FINGERPRINT_TOKENS = %w[ + the and for with from that this into when are was were has have should would could + evalops review finding issue risk missing unsafe fails failure causes cause because + ].freeze module_function @@ -170,6 +179,29 @@ def valid_lens!(lens) raise ArgumentError, "unknown lens #{lens.inspect}; expected one of #{LENSES.keys.join(", ")}" end + def load_routing_config(path) + return {} if path.to_s.empty? || !File.exist?(path) + + YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) || {} + end + + def routing_for_lens(lens, config) + lens = valid_lens!(lens) + defaults = config.fetch("defaults", {}) || {} + lenses = config.fetch("lenses", {}) || {} + route = lenses.fetch(lens, {}) || {} + defaults.merge(route) + end + + def effective_review_options(lens:, provider:, model:, max_diff_bytes:, routing_config:) + route = routing_for_lens(lens, load_routing_config(routing_config)) + { + provider: route.fetch("provider", provider || DEFAULT_PROVIDER).to_s, + model: route.fetch("model", model || DEFAULT_MODEL).to_s, + max_diff_bytes: Integer(route.fetch("max_diff_bytes", max_diff_bytes || DEFAULT_MAX_DIFF_BYTES)) + } + end + def lens_reason_for_path(path) LENS_PATH_RULES.each do |lens, patterns| return lens if patterns.any? { |pattern| path.match?(pattern) } @@ -288,6 +320,61 @@ def dispatch_review_requested(repo:, pr:, requested_reviewer:) gh_api("--method", "POST", "repos/evalops/.github/dispatches", input: JSON.generate(payload)) end + def base64url(value) + Base64.strict_encode64(value).tr("+/", "-_").delete("=") + end + + def normalize_private_key(raw) + raw.to_s.gsub("\\n", "\n") + end + + def github_app_jwt(app_id:, private_key:, now: Time.now) + key = OpenSSL::PKey.read(normalize_private_key(private_key)) + header = base64url(JSON.generate({ alg: "RS256", typ: "JWT" })) + payload = base64url( + JSON.generate( + { + iat: now.to_i - 60, + exp: now.to_i + 540, + iss: app_id.to_s + } + ) + ) + unsigned = "#{header}.#{payload}" + "#{unsigned}.#{base64url(key.sign(OpenSSL::Digest::SHA256.new, unsigned))}" + end + + def github_app_installation_id(owner:, jwt:) + gh_api_json("orgs/#{owner}/installation", token: jwt).fetch("id") + end + + def create_app_installation_token(app_id:, private_key:, owner:, installation_id: nil, repositories: [], permissions: {}) + jwt = github_app_jwt(app_id: app_id, private_key: private_key) + resolved_installation_id = installation_id.to_s.empty? ? github_app_installation_id(owner: owner, jwt: jwt) : installation_id + payload = {} + repo_names = repositories.map { |repo| repo.to_s.split("/").last }.reject(&:empty?).uniq + payload["repositories"] = repo_names unless repo_names.empty? + payload["permissions"] = permissions unless permissions.empty? + response = gh_api_json( + "--method", + "POST", + "app/installations/#{resolved_installation_id}/access_tokens", + input: JSON.generate(payload), + token: jwt + ) + response.fetch("token") + end + + def default_app_token_permissions + { + "checks" => "write", + "contents" => "write", + "issues" => "write", + "pull_requests" => "write", + "statuses" => "write" + } + end + def mark_review_queued(repo:, head_sha:, target_url:) post_status( repo: repo, @@ -403,6 +490,76 @@ def post_status(repo:, sha:, context:, state:, description:, target_url: nil) fields += ["-f", "target_url=#{target_url}"] if target_url && !target_url.empty? gh_api("--method", "POST", "repos/#{repo}/statuses/#{sha}", *fields) + post_check_run( + repo: repo, + sha: sha, + name: context, + state: state, + description: description, + target_url: target_url + ) + end + + def check_run_external_id(name:, sha:) + digest = Digest::SHA256.hexdigest("#{name}\0#{sha}") + "evalops-pr-lens:#{digest}" + end + + def check_run_state(state) + case state.to_s + when "pending" + ["in_progress", nil] + when "success" + ["completed", "success"] + when "failure" + ["completed", "failure"] + when "error" + ["completed", "failure"] + else + ["completed", "neutral"] + end + end + + def existing_check_run_id(repo:, sha:, name:, external_id:) + encoded_name = URI.encode_www_form_component(name) + response = gh_api_json("repos/#{repo}/commits/#{sha}/check-runs?check_name=#{encoded_name}&per_page=100") + Array(response.fetch("check_runs", [])).find { |row| row["external_id"] == external_id }&.fetch("id", nil) + end + + def post_check_run(repo:, sha:, name:, state:, description:, target_url: nil) + status, conclusion = check_run_state(state) + external_id = check_run_external_id(name: name, sha: sha) + output = { + title: description.to_s[0, 255], + summary: description.to_s.empty? ? name : description.to_s + } + payload = { + name: name, + external_id: external_id, + details_url: target_url, + status: status, + output: output + }.compact + if status == "completed" + payload[:conclusion] = conclusion + payload[:completed_at] = Time.now.utc.iso8601 + else + payload[:started_at] = Time.now.utc.iso8601 + end + + existing_id = existing_check_run_id(repo: repo, sha: sha, name: name, external_id: external_id) + if existing_id + gh_api("--method", "PATCH", "repos/#{repo}/check-runs/#{existing_id}", input: JSON.generate(payload)) + else + gh_api( + "--method", + "POST", + "repos/#{repo}/check-runs", + input: JSON.generate(payload.merge(head_sha: sha)) + ) + end + rescue StandardError => e + warn "check-run publish skipped for #{repo}@#{sha} #{name}: #{e.message.lines.first.to_s.strip}" end def write_json(path, payload) @@ -828,7 +985,14 @@ def normalize_lens_review(raw_review, repo:, pr:, lens:, head_sha:) } end - def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provider:, model:, max_diff_bytes:) + def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provider:, model:, max_diff_bytes:, routing_config: nil) + effective = effective_review_options( + lens: lens, + provider: provider, + model: model, + max_diff_bytes: max_diff_bytes, + routing_config: routing_config + ) pr_json = pr_metadata(repo: repo, pr: pr) file_summary = pr_file_summary(repo: repo, pr: pr) review_context = pr_review_context(repo: repo, pr: pr, pr_json: pr_json, head_sha: head_sha) @@ -837,7 +1001,7 @@ def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provi workspace: workspace, base_sha: base_sha, head_sha: head_sha, - max_bytes: max_diff_bytes + max_bytes: effective.fetch(:max_diff_bytes) ) prompt = build_lens_prompt( repo: repo, @@ -852,8 +1016,8 @@ def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provi ) raw_response = call_llm( prompt: prompt, - provider: provider, - model: model + provider: effective.fetch(:provider), + model: effective.fetch(:model) ) normalized = normalize_lens_review( extract_json(raw_response), @@ -916,24 +1080,66 @@ def high_confidence_findings(reviews, min_confidence:) end end + def fingerprint_tokens(text) + text.to_s.downcase.scan(/[a-z0-9][a-z0-9_-]{2,}/).map do |token| + normalized = token.sub(/ies\z/, "y").sub(/s\z/, "") + normalized = "repository" if %w[repo repository].include?(normalized) + normalized = "write" if %w[write writes writable].include?(normalized) + normalized + end.reject do |token| + COMMON_FINGERPRINT_TOKENS.include?(token) + end.uniq + end + + def token_similarity(left, right) + left_tokens = fingerprint_tokens(left) + right_tokens = fingerprint_tokens(right) + return 0.0 if left_tokens.empty? || right_tokens.empty? + + intersection = (left_tokens & right_tokens).length + union = (left_tokens | right_tokens).length + intersection.to_f / union + end + + def duplicate_finding?(left, right) + return false unless left.fetch("repo") == right.fetch("repo") + return false unless Integer(left.fetch("pr")) == Integer(right.fetch("pr")) + return false unless left.fetch("head_sha") == right.fetch("head_sha") + + left_location = left.fetch("code_location") + right_location = right.fetch("code_location") + same_path = left_location.fetch("path") == right_location.fetch("path") + return false unless same_path + + line_distance = (Integer(left_location.fetch("line")) - Integer(right_location.fetch("line"))).abs + title_similarity = token_similarity(left.fetch("title"), right.fetch("title")) + body_similarity = token_similarity(left.fetch("body"), right.fetch("body")) + + title_similarity >= 0.74 || body_similarity >= 0.82 || (line_distance <= 5 && title_similarity >= 0.5) + end + + def better_finding(left, right) + [left, right].max_by do |finding| + [ + finding.fetch("confidence_score"), + -finding.fetch("priority"), + finding.fetch("body").to_s.length + ] + end + end + def dedupe_and_rank(findings) - best_by_key = {} + deduped = [] findings.each do |finding| - location = finding.fetch("code_location") - key = [ - finding.fetch("repo"), - finding.fetch("pr"), - location.fetch("path"), - location.fetch("line"), - finding.fetch("title").downcase.gsub(/\s+/, " ") - ] - existing = best_by_key[key] - if existing.nil? || finding.fetch("confidence_score") > existing.fetch("confidence_score") - best_by_key[key] = finding + existing_index = deduped.index { |candidate| duplicate_finding?(candidate, finding) } + if existing_index + deduped[existing_index] = better_finding(deduped.fetch(existing_index), finding) + else + deduped << finding end end - best_by_key.values.sort_by do |finding| + deduped.sort_by do |finding| [ -finding.fetch("confidence_score"), finding.fetch("priority"), @@ -1111,6 +1317,34 @@ def meta_review(artifact_root:, min_confidence:, output:) File.write(output, JSON.pretty_generate(result)) result end + + def markdown_meta_report(result) + lines = [ + "## EvalOps PR Lens Review", + "", + "- Reviews: #{result.fetch("reviews")}", + "- Expected reviews: #{result.fetch("expected_reviews")}", + "- Published findings: #{result.fetch("published_findings").length}", + "- Run: #{result.fetch("run_url") || "unavailable"}", + "", + "### Coverage" + ] + result.fetch("coverage", []).each do |row| + lines << "- #{row.fetch("repo")}##{row.fetch("pr")}: #{row.fetch("expected")} expected, #{row.fetch("missing")} missing, #{row.fetch("skipped")} skipped; lenses #{Array(row.fetch("lenses", [])).join(", ")}" + end + if result.fetch("published_findings", []).empty? + lines << "" + lines << "No high-confidence findings cleared the publication threshold." + else + lines << "" + lines << "### Findings" + result.fetch("published_findings").first(MAX_FINDINGS_PER_COMMENT).each do |finding| + location = finding.fetch("code_location") + lines << "- P#{finding.fetch("priority")} #{format("%.2f", finding.fetch("confidence_score"))} #{finding.fetch("repo")}##{finding.fetch("pr")} #{location.fetch("path")}:#{location.fetch("line")} #{finding.fetch("title")}" + end + end + lines.join("\n") + end end if $PROGRAM_NAME == __FILE__ @@ -1181,9 +1415,10 @@ def meta_review(artifact_root:, min_confidence:, output:) puts(result.fetch("skip") ? "Skipped #{options.fetch(:lens)}: #{result.fetch("reason")}" : "Prepared #{options.fetch(:repo)}##{options.fetch(:pr)}") when "run-lens" options = { - provider: ENV.fetch("PR_LENS_PROVIDER", "anthropic"), + provider: ENV.fetch("PR_LENS_PROVIDER", EvalOpsPrLensReview::DEFAULT_PROVIDER), model: ENV.fetch("PR_LENS_MODEL", EvalOpsPrLensReview::DEFAULT_MODEL), - max_diff_bytes: Integer(ENV.fetch("PR_LENS_MAX_DIFF_BYTES", EvalOpsPrLensReview::DEFAULT_MAX_DIFF_BYTES)) + max_diff_bytes: Integer(ENV.fetch("PR_LENS_MAX_DIFF_BYTES", EvalOpsPrLensReview::DEFAULT_MAX_DIFF_BYTES)), + routing_config: ENV.fetch("PR_LENS_ROUTING_CONFIG", nil) } OptionParser.new do |parser| parser.on("--repo OWNER/REPO") { |value| options[:repo] = value } @@ -1196,6 +1431,7 @@ def meta_review(artifact_root:, min_confidence:, output:) parser.on("--provider PROVIDER") { |value| options[:provider] = value } parser.on("--model MODEL") { |value| options[:model] = value } parser.on("--max-diff-bytes BYTES", Integer) { |value| options[:max_diff_bytes] = value } + parser.on("--routing-config PATH") { |value| options[:routing_config] = value } end.parse! required = %i[repo pr lens workspace base_sha head_sha output] missing = required.select { |key| options[key].nil? || options[key].to_s.empty? } @@ -1219,10 +1455,13 @@ def meta_review(artifact_root:, min_confidence:, output:) parser.on("--artifact-root PATH") { |value| options[:artifact_root] = value } parser.on("--min-confidence NUMBER", Float) { |value| options[:min_confidence] = value } parser.on("--output PATH") { |value| options[:output] = value } + parser.on("--markdown-output PATH") { |value| options[:markdown_output] = value } end.parse! raise OptionParser::MissingArgument, "artifact-root" if options[:artifact_root].to_s.empty? + markdown_output = options.delete(:markdown_output) result = EvalOpsPrLensReview.meta_review(**options) + File.write(markdown_output, EvalOpsPrLensReview.markdown_meta_report(result)) if markdown_output puts "Published #{result.fetch("published_findings").length} high-confidence finding(s)." when "dispatch-review-requests" options = { @@ -1250,8 +1489,32 @@ def meta_review(artifact_root:, min_confidence:, output:) "skipped_count" => result.fetch("skipped_count") ) puts "Found #{result.fetch("candidate_count")} EvalOpsBot review request(s); dispatched #{result.fetch("dispatched_count")}, skipped #{result.fetch("skipped_count")}." + when "mint-app-token" + options = { + app_id: ENV["EVALOPS_PR_LENS_APP_ID"] || ENV["GITHUB_APP_ID"], + private_key: ENV["EVALOPS_PR_LENS_APP_PRIVATE_KEY"] || ENV["GITHUB_APP_PRIVATE_KEY"], + installation_id: ENV["EVALOPS_PR_LENS_APP_INSTALLATION_ID"] || ENV["GITHUB_APP_INSTALLATION_ID"], + owner: "evalops", + repositories: [], + permissions: EvalOpsPrLensReview.default_app_token_permissions + } + OptionParser.new do |parser| + parser.on("--app-id ID") { |value| options[:app_id] = value } + parser.on("--private-key-file PATH") { |value| options[:private_key] = File.read(value) } + parser.on("--installation-id ID") { |value| options[:installation_id] = value } + parser.on("--owner OWNER") { |value| options[:owner] = value } + parser.on("--repositories CSV") { |value| options[:repositories] = EvalOpsPrLensReview.parse_list(value) } + parser.on("--permission KEY=VALUE") do |value| + key, permission = value.split("=", 2) + options[:permissions][key] = permission + end + end.parse! + raise OptionParser::MissingArgument, "app-id" if options[:app_id].to_s.empty? + raise OptionParser::MissingArgument, "private-key" if options[:private_key].to_s.empty? + + puts EvalOpsPrLensReview.create_app_installation_token(**options) else - warn "usage: #{$PROGRAM_NAME} discover|post-status|prepare-workspace|run-lens|lens-status-description|meta-review|dispatch-review-requests" + warn "usage: #{$PROGRAM_NAME} discover|post-status|prepare-workspace|run-lens|lens-status-description|meta-review|dispatch-review-requests|mint-app-token" exit 2 end end diff --git a/.github/scripts/verify-evalopsbot-review-setup.rb b/.github/scripts/verify-evalopsbot-review-setup.rb new file mode 100644 index 0000000..92bd478 --- /dev/null +++ b/.github/scripts/verify-evalopsbot-review-setup.rb @@ -0,0 +1,140 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "json" +require "open3" +require "optparse" +require "time" +require "yaml" + +module EvalOpsBotReviewSetup + module_function + + def gh_api_json(path) + stdout, stderr, status = Open3.capture3("gh", "api", path) + raise "gh api #{path} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? + + JSON.parse(stdout) + end + + def load_contract(path) + YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) + end + + def workflow_active?(repo, path) + encoded = path.split("/").last + workflow = gh_api_json("repos/#{repo}/actions/workflows/#{encoded}") + workflow.fetch("path") == path && workflow.fetch("state") == "active" + rescue StandardError + false + end + + def selected_secret_repositories(org:, secret:) + response = gh_api_json("orgs/#{org}/actions/secrets/#{secret}/repositories?per_page=100") + Array(response.fetch("repositories", [])).map { |repo| repo.fetch("full_name") }.sort + rescue StandardError + [] + end + + def verify(contract, live: true, generated_at: Time.now.utc) + errors = [] + warnings = [] + org = contract.fetch("org") + central_repo = contract.fetch("central_repo") + dispatch_secret = contract.fetch("dispatch_secret") + target_repos = Array(contract.fetch("target_repositories", [])) + exemptions = Array(contract.fetch("exemptions", [])) + + errors << "reviewer must be EvalOpsBot" unless contract.fetch("reviewer") == "EvalOpsBot" + errors << "target_repositories must not be empty" if target_repos.empty? + errors << "exemptions must be empty unless an owner and expiry are recorded" unless exemptions.all? do |row| + row["repo"].to_s.start_with?("#{org}/") && row["owner"].to_s.length.positive? && row["expires"].to_s.length.positive? + end + + central_workflows = Array(contract.fetch("central_workflows", [])) + central_workflows.each do |path| + local_path = File.expand_path("../../#{path}", __dir__) + errors << "missing central workflow #{path}" unless File.exist?(local_path) + end + + missing_secret_repos = [] + inactive_workflows = [] + if live + secret_repos = selected_secret_repositories(org: org, secret: dispatch_secret) + target_repos.each do |target| + repo = target.fetch("repo") + fallback_workflow = target.fetch("fallback_workflow") + missing_secret_repos << repo unless secret_repos.include?(repo) + inactive_workflows << "#{repo}:#{fallback_workflow}" unless workflow_active?(repo, fallback_workflow) + end + central_workflows.each do |path| + errors << "central workflow #{path} is not active" unless workflow_active?(central_repo, path) + end + end + + errors.concat(missing_secret_repos.map { |repo| "#{repo} is missing from #{dispatch_secret} selected repositories" }) + errors.concat(inactive_workflows.map { |entry| "#{entry} is not active" }) + app_secrets = Array(contract.fetch("app_secrets", [])) + warnings << "GitHub App secrets are declared but cannot be value-verified by this audit" unless app_secrets.empty? + + { + "schema_version" => 1, + "generated_at" => generated_at.iso8601, + "status" => errors.empty? ? "pass" : "fail", + "org" => org, + "reviewer" => contract.fetch("reviewer"), + "central_repo" => central_repo, + "target_repository_count" => target_repos.length, + "central_workflows" => central_workflows, + "missing_secret_repositories" => missing_secret_repos, + "inactive_fallback_workflows" => inactive_workflows, + "warnings" => warnings, + "errors" => errors + } + end + + def markdown_report(report) + lines = [ + "## EvalOpsBot Review Setup Audit", + "", + "- Status: `#{report.fetch("status")}`", + "- Reviewer: `#{report.fetch("reviewer")}`", + "- Central repo: `#{report.fetch("central_repo")}`", + "- Target repos: #{report.fetch("target_repository_count")}", + "", + "### Errors" + ] + errors = report.fetch("errors") + lines.concat(errors.empty? ? ["- None"] : errors.map { |error| "- #{error}" }) + warnings = report.fetch("warnings") + unless warnings.empty? + lines << "" + lines << "### Warnings" + lines.concat(warnings.map { |warning| "- #{warning}" }) + end + lines.join("\n") + end +end + +if $PROGRAM_NAME == __FILE__ + options = { + contract: ".github/evalopsbot-review-targets.yml", + live: true, + output: "evalopsbot-review-setup-audit.json" + } + OptionParser.new do |parser| + parser.on("--contract PATH") { |value| options[:contract] = value } + parser.on("--offline") { options[:live] = false } + parser.on("--output PATH") { |value| options[:output] = value } + parser.on("--markdown-output PATH") { |value| options[:markdown_output] = value } + end.parse! + + report = EvalOpsBotReviewSetup.verify( + EvalOpsBotReviewSetup.load_contract(options.fetch(:contract)), + live: options.fetch(:live) + ) + File.write(options.fetch(:output), JSON.pretty_generate(report)) + File.write(options[:markdown_output], EvalOpsBotReviewSetup.markdown_report(report)) if options[:markdown_output] + puts JSON.pretty_generate(report) + exit(report.fetch("status") == "pass" ? 0 : 1) +end diff --git a/.github/workflows/evalops-pr-lens-review.yml b/.github/workflows/evalops-pr-lens-review.yml index 269a0d0..854f1e7 100644 --- a/.github/workflows/evalops-pr-lens-review.yml +++ b/.github/workflows/evalops-pr-lens-review.yml @@ -51,9 +51,31 @@ jobs: GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} TARGET_REPOS: ${{ github.event_name == 'repository_dispatch' && (github.event.client_payload.target_repos || github.event.client_payload.target_repo) || inputs.target_repos || 'evalops/platform,evalops/deploy,evalops/maestro-internal' }} TARGET_PRS: ${{ github.event_name == 'repository_dispatch' && (github.event.client_payload.target_prs || github.event.client_payload.target_pr) || inputs.target_prs || '' }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + - name: Require cross-repo GitHub token shell: bash run: | @@ -98,9 +120,10 @@ jobs: REVIEW_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY || secrets.EVALOPS_ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.EVALOPS_OPENAI_API_KEY }} - PR_LENS_PROVIDER: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.provider || inputs.provider || 'anthropic' }} - PR_LENS_MODEL: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.model || inputs.model || 'claude-opus-4-7' }} - PR_LENS_MAX_DIFF_BYTES: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.max_diff_bytes || inputs.max_diff_bytes || '180000' }} + PR_LENS_PROVIDER_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.provider || github.event_name == 'workflow_dispatch' && inputs.provider || '' }} + PR_LENS_MODEL_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.model || github.event_name == 'workflow_dispatch' && inputs.model || '' }} + PR_LENS_MAX_DIFF_BYTES_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.max_diff_bytes || github.event_name == 'workflow_dispatch' && inputs.max_diff_bytes || '' }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} steps: - name: Checkout org review helpers @@ -108,6 +131,28 @@ jobs: with: path: org-defaults + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + echo "REVIEW_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + - name: Require cross-repo write token shell: bash run: | @@ -121,12 +166,8 @@ jobs: shell: bash run: | set -euo pipefail - if [ "${PR_LENS_PROVIDER}" = "anthropic" ] && [ -z "${ANTHROPIC_API_KEY}" ]; then - echo "::error::Set ANTHROPIC_API_KEY or EVALOPS_ANTHROPIC_API_KEY for Opus lens review." - exit 2 - fi - if [ "${PR_LENS_PROVIDER}" = "openai" ] && [ -z "${OPENAI_API_KEY}" ]; then - echo "::error::Set OPENAI_API_KEY or EVALOPS_OPENAI_API_KEY for OpenAI lens review." + if [ -z "${ANTHROPIC_API_KEY}" ] && [ -z "${OPENAI_API_KEY}" ]; then + echo "::error::Set ANTHROPIC_API_KEY/EVALOPS_ANTHROPIC_API_KEY or OPENAI_API_KEY/EVALOPS_OPENAI_API_KEY for PR lens review." exit 2 fi @@ -174,17 +215,27 @@ jobs: HEAD_SHA: ${{ steps.refs.outputs.head_sha }} run: | set -euo pipefail - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb run-lens \ + args=( + run-lens --repo "${TARGET_REPO}" \ --pr "${PR_NUMBER}" \ --lens "${LENS}" \ --workspace target \ --base-sha "${BASE_SHA}" \ --head-sha "${HEAD_SHA}" \ - --output lens-review.json \ - --provider "${PR_LENS_PROVIDER}" \ - --model "${PR_LENS_MODEL}" \ - --max-diff-bytes "${PR_LENS_MAX_DIFF_BYTES}" + --output lens-review.json + --routing-config org-defaults/.github/pr-lens-routing.yml + ) + if [ -n "${PR_LENS_PROVIDER_OVERRIDE}" ]; then + args+=(--provider "${PR_LENS_PROVIDER_OVERRIDE}") + fi + if [ -n "${PR_LENS_MODEL_OVERRIDE}" ]; then + args+=(--model "${PR_LENS_MODEL_OVERRIDE}") + fi + if [ -n "${PR_LENS_MAX_DIFF_BYTES_OVERRIDE}" ]; then + args+=(--max-diff-bytes "${PR_LENS_MAX_DIFF_BYTES_OVERRIDE}") + fi + ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb "${args[@]}" - name: Complete lens status if: ${{ success() && steps.refs.outputs.skip != 'true' }} @@ -251,11 +302,33 @@ jobs: env: GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} PR_LENS_MIN_CONFIDENCE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.min_confidence || inputs.min_confidence || '0.82' }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} steps: - name: Checkout org review helpers uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + - name: Download lens artifacts uses: actions/download-artifact@v4 with: @@ -277,7 +350,8 @@ jobs: ruby .github/scripts/evalops-pr-lens-review.rb meta-review \ --artifact-root artifacts \ --min-confidence "${PR_LENS_MIN_CONFIDENCE}" \ - --output meta-review.json + --output meta-review.json \ + --markdown-output "${GITHUB_STEP_SUMMARY}" - name: Upload meta review ledger uses: actions/upload-artifact@v4 diff --git a/.github/workflows/evalopsbot-review-canary.yml b/.github/workflows/evalopsbot-review-canary.yml new file mode 100644 index 0000000..d2de220 --- /dev/null +++ b/.github/workflows/evalopsbot-review-canary.yml @@ -0,0 +1,149 @@ +name: EvalOpsBot review request canary + +on: + schedule: + - cron: "37 15 * * *" + workflow_dispatch: + inputs: + cleanup: + description: "Close the canary PR after the deep review is observed" + required: false + default: "true" + +permissions: + contents: read + +concurrency: + group: evalopsbot-review-canary + cancel-in-progress: false + +jobs: + canary: + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} + CANARY_BRANCH: evalopsbot-review-canary + CANARY_REPO: evalops/.github + CLEANUP: ${{ github.event_name == 'workflow_dispatch' && inputs.cleanup || 'true' }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + + - name: Create canary review request + id: canary + shell: bash + run: | + set -euo pipefail + git config user.name "EvalOpsBot canary" + git config user.email "evalopsbot-canary@users.noreply.github.com" + git fetch origin main --depth=1 + git checkout -B "${CANARY_BRANCH}" origin/main + mkdir -p .github/workflows + cat > .github/workflows/evalopsbot-review-canary-fixture.yml </dev/null + head_sha="$(git rev-parse HEAD)" + { + echo "pr_number=${pr_number}" + echo "head_sha=${head_sha}" + } >> "${GITHUB_OUTPUT}" + + - name: Wait for deep review signal + shell: bash + env: + PR_NUMBER: ${{ steps.canary.outputs.pr_number }} + HEAD_SHA: ${{ steps.canary.outputs.head_sha }} + run: | + set -euo pipefail + ruby -rjson -ropen3 -e ' + repo = ENV.fetch("CANARY_REPO") + sha = ENV.fetch("HEAD_SHA") + context = "evalops-pr-lens/meta-review" + 12.times do |attempt| + status_json, status_err, status = Open3.capture3("gh", "api", "repos/#{repo}/commits/#{sha}/status") + check_json, check_err, check_status = Open3.capture3("gh", "api", "repos/#{repo}/commits/#{sha}/check-runs?check_name=#{context}&per_page=100") + statuses = status.success? ? JSON.parse(status_json).fetch("statuses", []) : [] + checks = check_status.success? ? JSON.parse(check_json).fetch("check_runs", []) : [] + status_match = statuses.find { |row| row["context"] == context && row["state"] != "pending" } + check_match = checks.find { |row| row["name"] == context && row["status"] == "completed" } + if status_match || check_match + puts "observed #{context} for #{repo}@#{sha}" + exit 0 + end + warn "attempt #{attempt + 1}/12: no completed #{context} yet" + sleep 45 + end + abort "timed out waiting for #{context} on #{repo}@#{sha}" + ' + + - name: Close canary PR + if: ${{ always() && env.CLEANUP == 'true' && steps.canary.outputs.pr_number != '' }} + shell: bash + env: + PR_NUMBER: ${{ steps.canary.outputs.pr_number }} + run: | + set -euo pipefail + gh pr close "${PR_NUMBER}" \ + --repo "${CANARY_REPO}" \ + --comment "EvalOpsBot requested-review canary complete for ${GITHUB_RUN_ID}." \ + --delete-branch || true diff --git a/.github/workflows/evalopsbot-review-request-dispatch.yml b/.github/workflows/evalopsbot-review-request-dispatch.yml index dd12ee2..d0de6e4 100644 --- a/.github/workflows/evalopsbot-review-request-dispatch.yml +++ b/.github/workflows/evalopsbot-review-request-dispatch.yml @@ -30,9 +30,31 @@ jobs: REVIEWER: ${{ github.event_name == 'workflow_dispatch' && inputs.reviewer || 'EvalOpsBot' }} DRY_RUN: ${{ github.event_name == 'workflow_dispatch' && inputs.dry_run || 'false' }} RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + - name: Require cross-repo review token shell: bash run: | diff --git a/.github/workflows/evalopsbot-review-setup-audit.yml b/.github/workflows/evalopsbot-review-setup-audit.yml new file mode 100644 index 0000000..ac4105b --- /dev/null +++ b/.github/workflows/evalopsbot-review-setup-audit.yml @@ -0,0 +1,61 @@ +name: EvalOpsBot review setup audit + +on: + schedule: + - cron: "19 16 * * 1" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: evalopsbot-review-setup-audit + cancel-in-progress: false + +jobs: + audit: + runs-on: ubuntu-latest + timeout-minutes: 10 + env: + GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} + PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + + - name: Configure GitHub App token + shell: bash + env: + APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} + APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} + run: | + set -euo pipefail + if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then + token="$( + EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ + EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ + EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ + ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ + --owner evalops \ + --repositories "${PR_LENS_APP_REPOSITORIES}" + )" + echo "::add-mask::${token}" + echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" + fi + + - name: Verify review request setup + shell: bash + run: | + set -euo pipefail + ruby .github/scripts/verify-evalopsbot-review-setup.rb \ + --output evalopsbot-review-setup-audit.json \ + --markdown-output "${GITHUB_STEP_SUMMARY}" + + - name: Upload audit ledger + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: evalopsbot-review-setup-audit + path: evalopsbot-review-setup-audit.json + if-no-files-found: error + retention-days: 30 diff --git a/README.md b/README.md index 8f79fc9..b310862 100644 --- a/README.md +++ b/README.md @@ -43,17 +43,17 @@ operational assumptions. ### EvalOpsBot Review Requests -Use `.github/workflows/evalopsbot-review-request-dispatch.yml` as the org-wide -bridge from GitHub review requests to the deep PR lens workflow. Every five -minutes it searches open EvalOps PRs with `review-requested:EvalOpsBot`, skips -head SHAs that already have an `evalops-pr-lens/meta-review` status, marks new -matches as pending, and dispatches `.github/workflows/evalops-pr-lens-review.yml` -for that exact `repo#PR`. - -This keeps the request path inside GitHub Actions and existing review secrets -instead of adding a standalone webhook relay. If lower latency becomes important, -the same `evalopsbot-review-requested` `repository_dispatch` contract can be -called by an org webhook relay. +Use the EvalOpsBot webhook relay as the primary bridge from GitHub review +requests to the deep PR lens workflow. The relay receives +`pull_request.review_requested`, filters for `requested_reviewer.login == +EvalOpsBot`, and dispatches `.github/workflows/evalops-pr-lens-review.yml` for +that exact `repo#PR`. + +`.github/workflows/evalopsbot-review-request-dispatch.yml` remains as the +hourly fallback. It searches open EvalOps PRs with +`review-requested:EvalOpsBot`, skips head SHAs that already have an +`evalops-pr-lens/meta-review` signal, marks new matches as pending, and +dispatches the same workflow contract. ### Codex Workflow Templates @@ -331,15 +331,19 @@ reviewer per lens: - generated SDK delta - eval regression risk -Each lens writes a stable commit status context named +Each lens writes a stable commit status context and best-effort Check Run named `evalops-pr-lens/`. The meta-review step ranks findings by confidence, -updates `evalops-pr-lens/meta-review`, and only posts a PR comment when findings -clear the configured high-confidence threshold. +updates `evalops-pr-lens/meta-review`, writes an operator summary to the workflow +run, and only posts a PR comment when findings clear the configured +high-confidence threshold. Required secrets in `evalops/.github`: - `EVALOPS_PR_LENS_TOKEN`: GitHub token with read/write access to the target - repos for statuses and PR comments. + repos for statuses and PR comments. This is the fallback path. +- `EVALOPS_PR_LENS_APP_ID`, `EVALOPS_PR_LENS_APP_PRIVATE_KEY`, and + `EVALOPS_PR_LENS_APP_INSTALLATION_ID`: preferred GitHub App auth path for + dispatch, comments, statuses, and Checks. - `ANTHROPIC_API_KEY` or `EVALOPS_ANTHROPIC_API_KEY`: Anthropic key for Opus lens reviewers. - `OPENAI_API_KEY` or `EVALOPS_OPENAI_API_KEY`: optional fallback when manually @@ -378,6 +382,12 @@ The workflow also accepts `target_prs`, `target_repos`, `provider`, `model`, `max_diff_bytes`, and `min_confidence` in `client_payload` for controlled operator overrides. Keep the relay token scoped to dispatching workflows in `evalops/.github`; the review workflow itself owns the cross-repo read/write -token and model-provider credentials. The scheduled -`evalopsbot-review-request-dispatch.yml` workflow remains as an hourly fallback -for missed webhook deliveries, not the primary trigger path. +token and model-provider credentials. Lens-specific routing defaults live in +`.github/pr-lens-routing.yml`. + +`.github/workflows/evalopsbot-review-canary.yml` creates a harmless canary PR, +requests review from `EvalOpsBot`, waits for the deep-review meta signal, and +then closes the canary PR. `.github/workflows/evalopsbot-review-setup-audit.yml` +checks the configured target repository list, fallback workflows, and selected +review secret coverage so onboarding drift is visible before a real review +request is missed. diff --git a/test/evalops_pr_lens_review_test.rb b/test/evalops_pr_lens_review_test.rb index c0580a8..1ce340f 100644 --- a/test/evalops_pr_lens_review_test.rb +++ b/test/evalops_pr_lens_review_test.rb @@ -146,6 +146,104 @@ def test_high_confidence_findings_filters_and_ranks_by_confidence assert_equal %w[Higher Lower], ranked.map { |finding| finding.fetch("title") } end + def test_dedupe_and_rank_merges_same_defect_across_nearby_lens_findings + findings = [ + finding("Workflow token can write every repo", 0.91, 1, ".github/workflows/release.yml", 22).merge( + "repo" => "evalops/deploy", + "pr" => 10, + "lens" => "iam-blast-radius", + "head_sha" => "abc123", + "check_id" => "evalops-pr-lens/iam-blast-radius" + ), + finding("Release workflow token writes every repository", 0.96, 1, ".github/workflows/release.yml", 24).merge( + "repo" => "evalops/deploy", + "pr" => 10, + "lens" => "migration-safety", + "head_sha" => "abc123", + "check_id" => "evalops-pr-lens/migration-safety" + ) + ] + + ranked = EvalOpsPrLensReview.dedupe_and_rank(findings) + + assert_equal 1, ranked.length + assert_equal "Release workflow token writes every repository", ranked.fetch(0).fetch("title") + end + + def test_post_status_also_attempts_check_run_without_breaking_status_publication + calls = [] + ok = Object.new + ok.define_singleton_method(:success?) { true } + capture = lambda do |_env, *command, stdin_data: nil| + calls << { command: command, stdin_data: stdin_data } + if command.include?("check-runs?check_name=evalops-pr-lens%2Fmeta-review&per_page=100") + [JSON.generate("check_runs" => []), "", ok] + else + ["{}", "", ok] + end + end + + Open3.stub(:capture3, capture) do + EvalOpsPrLensReview.post_status( + repo: "evalops/deploy", + sha: "abc123", + context: "evalops-pr-lens/meta-review", + state: "success", + description: "No high-confidence PR lens findings", + target_url: "https://github.com/evalops/.github/actions/runs/1" + ) + end + + assert calls.any? { |call| call.fetch(:command).include?("repos/evalops/deploy/statuses/abc123") } + check_create = calls.find { |call| call.fetch(:command).include?("repos/evalops/deploy/check-runs") } + assert check_create + body = JSON.parse(check_create.fetch(:stdin_data)) + assert_equal "evalops-pr-lens/meta-review", body.fetch("name") + assert_equal "completed", body.fetch("status") + assert_equal "success", body.fetch("conclusion") + end + + def test_github_app_jwt_uses_app_id_as_issuer + key = OpenSSL::PKey::RSA.generate(2048) + jwt = EvalOpsPrLensReview.github_app_jwt(app_id: "12345", private_key: key.to_pem, now: Time.utc(2026, 5, 20, 1, 2, 3)) + _header, payload, _signature = jwt.split(".") + decoded = JSON.parse(Base64.urlsafe_decode64(payload + ("=" * ((4 - payload.length % 4) % 4)))) + + assert_equal "12345", decoded.fetch("iss") + assert_equal Time.utc(2026, 5, 20, 1, 1, 3).to_i, decoded.fetch("iat") + end + + def test_lens_routing_config_overrides_default_review_options + Dir.mktmpdir do |dir| + config = File.join(dir, "routing.yml") + File.write( + config, + <<~YAML + defaults: + provider: anthropic + model: claude-opus-4-7 + max_diff_bytes: 180000 + lenses: + generated-sdk-delta: + model: claude-opus-4-7-generated + max_diff_bytes: 260000 + YAML + ) + + options = EvalOpsPrLensReview.effective_review_options( + lens: "generated-sdk-delta", + provider: "openai", + model: "gpt-5.2", + max_diff_bytes: 1000, + routing_config: config + ) + + assert_equal "anthropic", options.fetch(:provider) + assert_equal "claude-opus-4-7-generated", options.fetch(:model) + assert_equal 260000, options.fetch(:max_diff_bytes) + end + end + def test_comment_body_contains_only_ranked_findings findings = [ finding("Unsafe IAM expansion", 0.94, 1, "infra/main.tf", 22).merge( diff --git a/test/verify_evalopsbot_review_setup_test.rb b/test/verify_evalopsbot_review_setup_test.rb new file mode 100644 index 0000000..85597f4 --- /dev/null +++ b/test/verify_evalopsbot_review_setup_test.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require_relative "../.github/scripts/verify-evalopsbot-review-setup" + +class VerifyEvalOpsBotReviewSetupTest < Minitest::Test + def test_contract_passes_offline + contract = EvalOpsBotReviewSetup.load_contract(".github/evalopsbot-review-targets.yml") + report = EvalOpsBotReviewSetup.verify(contract, live: false, generated_at: Time.utc(2026, 5, 20, 12, 0, 0)) + + assert_equal "pass", report.fetch("status") + assert_equal "EvalOpsBot", report.fetch("reviewer") + assert_equal 8, report.fetch("target_repository_count") + assert_includes report.fetch("central_workflows"), ".github/workflows/evalopsbot-review-canary.yml" + end + + def test_contract_requires_evalopsbot_reviewer + contract = EvalOpsBotReviewSetup.load_contract(".github/evalopsbot-review-targets.yml") + contract["reviewer"] = "someone-else" + + report = EvalOpsBotReviewSetup.verify(contract, live: false) + + assert_equal "fail", report.fetch("status") + assert_includes report.fetch("errors"), "reviewer must be EvalOpsBot" + end + + def test_markdown_report_surfaces_errors + report = { + "status" => "fail", + "reviewer" => "EvalOpsBot", + "central_repo" => "evalops/.github", + "target_repository_count" => 8, + "errors" => ["evalops/deploy missing"], + "warnings" => [] + } + + markdown = EvalOpsBotReviewSetup.markdown_report(report) + + assert_includes markdown, "Status: `fail`" + assert_includes markdown, "evalops/deploy missing" + end +end