diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..d8a79b2 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,8 @@ +# EvalOps org-default changes are broad by definition. +* @haasonsaas + +# Workflow, contract, and helper-script changes deserve extra operator attention. +.github/contracts/ @haasonsaas +.github/scripts/ @haasonsaas +.github/workflows/ @haasonsaas +services.yaml @haasonsaas diff --git a/.github/contracts/engineering-practices.yml b/.github/contracts/engineering-practices.yml new file mode 100644 index 0000000..28f1c4f --- /dev/null +++ b/.github/contracts/engineering-practices.yml @@ -0,0 +1,201 @@ +schema_version: evalops.engineering_practices.v1 +contract_id: evalops.github.engineering-practices +owner_repo: evalops/.github +status: proposed +workflow: + name: engineering-practices-control-plane + correctness_model: > + EvalOps engineering practices are correct when they are backed by live + GitHub evidence, scoped by repository tier, and connected to a runnable + check or adoption ledger instead of living only as prose. + threat_model: > + The highest-risk failure mode is high-throughput agent-assisted change + landing without a durable review, release, security, or evidence contract. + Audits must degrade to non-mutating reports when credentials are missing + and must never publish a green report from partial or empty data. +source_records: + - id: evalops.github.engineering-practices.source.org-contract + path: profile/ENGINEERING_PRACTICES.md + digest: sha256 + - id: evalops.github.engineering-practices.source.service-catalog + path: services.yaml + digest: sha256 + - id: evalops.github.engineering-practices.source.control-plane-readme + path: README.md + digest: sha256 +repo_tiers: + critical: + description: "Product, runtime, deployment, and org-control-plane repos that should block merges on practice drift." + repos: + - evalops/platform + - evalops/deploy + - evalops/ensemble + - evalops/maestro-internal + - evalops/maestro + - evalops/cerebro + - evalops/chat + - evalops/.github + required_controls: + - org-rulesets + - agent-review-lane + - backlog-lifecycle + - release-train-state + - security-slo + - operating-rails + - evidence-first-done + standard: + description: "Actively maintained product, SDK, data, and infrastructure repos that should report drift before enforcement." + repos: + - evalops/hopper + - evalops/nimbus + - evalops/kestrel + - evalops/diffscope + - evalops/conductor + - evalops/console + - evalops/eval2otel + - evalops/agent-pm + required_controls: + - backlog-lifecycle + - security-slo + - operating-rails + - evidence-first-done + experimental: + description: "Research and spike repos where lightweight reporting is preferred over blocking policy." + repos: [] + required_controls: + - operating-rails +practices: + - id: org-rulesets + title: "GitHub-native rulesets for repo tiers" + why: "Branch protection is currently repo-local and uneven; org rulesets give EvalOps a central merge-safety contract." + adoption: "Start in evaluate mode for critical repos, then promote required checks once each repo has the matching workflows." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Org Rulesets" + checked_by: + - .github/scripts/audit-engineering-practices.rb + - .github/workflows/engineering-practices-audit.yml + signals: + - org_ruleset_count + - protected_critical_repos + - id: backlog-lifecycle + title: "Generated backlog lifecycle" + why: "Guardrail and conformance issues are useful only when fingerprints, ownership, and close conditions stay machine-readable." + adoption: "Require generated backlog issues to carry a class key, source fingerprints, last-seen window, and explicit close evidence." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Backlog Lifecycle" + checked_by: + - .github/scripts/audit-engineering-practices.rb + - .github/scripts/sweep-recent-review-feedback.rb + signals: + - open_guardrail_backlog_issues + - stale_closing_comments + - id: release-train-state + title: "Release-train state machine" + why: "Repeated hold and image-sync PRs should converge on a single desired-state record instead of multiplying operational PRs." + adoption: "Track one active train record per environment with owner, TTL, receipt, rollback receipt, and idempotent PR updates." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Release Trains" + checked_by: + - .github/scripts/audit-engineering-practices.rb + signals: + - deploy_release_train_duplicate_prs + - deploy_image_sync_prs + - id: agent-review-lane + title: "Required agent review lane" + why: "Agent-assisted throughput is high enough that review-thread closure, EvalOpsBot review, and CODEOWNERS need to be standard rails." + adoption: "Critical repos require EvalOpsBot review request plumbing, review-thread guard, CODEOWNERS, and stable check contexts." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Agent Review" + checked_by: + - .github/scripts/audit-engineering-practices.rb + - .github/scripts/verify-evalopsbot-review-setup.rb + signals: + - evalopsbot_workflow_adoption + - review_thread_guard_adoption + - codeowners_adoption + - id: security-slo + title: "Security remediation SLOs" + why: "Security defaults exist, but open alerts need explicit tiered owners, burn-down windows, and suppression evidence without enabling expensive default scanners." + adoption: "Critical repos should track critical/high Dependabot and secret-scanning alerts against age-based SLOs. CodeQL and GitHub default code scanning are explicitly not part of this baseline." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Security SLO" + checked_by: + - .github/scripts/audit-engineering-practices.rb + signals: + - dependabot_open_alerts + - secret_scanning_open_alerts + - id: operating-rails + title: "Repo operating rails by class" + why: "AGENTS.md, CODEOWNERS, dependency policy, Codex rails, Pysa, and runner-label config should be applied by repo class, not memory." + adoption: "Critical repos get the full rail set; standard repos report missing rails until promoted." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Operating Rails" + checked_by: + - .github/scripts/audit-engineering-practices.rb + - .github/workflows/codex-rails-check.yml + signals: + - agents_adoption + - codex_rails_adoption + - dependency_policy_adoption + - runner_label_config_adoption + - id: evidence-first-done + title: "Evidence-first definition of done" + why: "EvalOps sells governance and operational proof; engineering changes should leave smoke evidence, artifact receipts, and withheld-data notes." + adoption: "Every critical repo PR should connect user-visible changes to smoke fixtures, artifact receipts, telemetry, and rollback evidence." + source: + path: profile/ENGINEERING_PRACTICES.md + heading: "Evidence First" + checked_by: + - .github/scripts/audit-engineering-practices.rb + - .github/pull_request_template.md + signals: + - pr_template_evidence_checklist + - runtime_smoke_guardrail_backlog +live_audit: + owner: evalops + sampled_repos: + - evalops/platform + - evalops/deploy + - evalops/ensemble + - evalops/maestro-internal + - evalops/maestro + - evalops/cerebro + - evalops/chat + - evalops/.github + - evalops/hopper + - evalops/nimbus + - evalops/kestrel + required_files: + critical: + - AGENTS.md + - .github/CODEOWNERS + - .github/workflows/review-thread-guard.yml + - .github/workflows/evalopsbot-review-request.yml + - .github/workflows/codex-rails-check.yml + standard: + - AGENTS.md + issue_queries: + guardrail_candidate: 'org:evalops is:issue is:open archived:false "Guardrail candidate" in:title' + acceptance_harness: 'org:evalops is:issue is:open archived:false "Add a research-backed acceptance harness" in:title' + conformance_contract: 'org:evalops is:issue is:open archived:false "Promote latent specs into a documented conformance contract" in:title' + provenance_evidence: 'org:evalops is:issue is:open archived:false "Make provenance and evidence traceability first-class" in:title' + telemetry_slo: 'org:evalops is:issue is:open archived:false "Expose operational telemetry and SLO gates" in:title' + release_train_queries: + deploy_hold_prs: 'repo:evalops/deploy is:pr is:merged merged:>=2026-05-06 "Hold prod-continuous release train" in:title' + deploy_image_sync_prs: 'repo:evalops/deploy is:pr is:merged merged:>=2026-05-06 "sync" "image" in:title' + security_alert_slo: + critical_days: 1 + high_days: 7 + medium_days: 30 + excluded_scanners: + - codeql + - github-code-scanning-default-setup +commands: + local_contract_check: "ruby .github/scripts/audit-engineering-practices.rb --contract-only" + live_report: "ruby .github/scripts/audit-engineering-practices.rb --json-output engineering-practices-audit.json --markdown-output engineering-practices-audit.md" diff --git a/.github/scripts/audit-engineering-practices.rb b/.github/scripts/audit-engineering-practices.rb new file mode 100644 index 0000000..41e30fd --- /dev/null +++ b/.github/scripts/audit-engineering-practices.rb @@ -0,0 +1,558 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "digest" +require "json" +require "open3" +require "optparse" +require "set" +require "time" +require "yaml" + +module EvalOpsEngineeringPracticesAudit + SCHEMA_VERSION = "evalops.engineering_practices.v1" + REPORT_SCHEMA_VERSION = "evalops.engineering_practices_audit.v1" + REQUIRED_TOP_LEVEL = %w[ + schema_version + contract_id + owner_repo + workflow + source_records + repo_tiers + practices + live_audit + ].freeze + REQUIRED_PRACTICES = %w[ + org-rulesets + backlog-lifecycle + release-train-state + agent-review-lane + security-slo + operating-rails + evidence-first-done + ].freeze + SEARCH_TOTAL_FALLBACK = { + "total_count" => 0, + "incomplete_results" => false + }.freeze + + module_function + + def load_contract(path) + YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) + end + + def relative_path(root, path) + File.expand_path(path, root) + end + + def file_digest(root, path) + absolute = relative_path(root, path) + return nil unless File.file?(absolute) + + Digest::SHA256.file(absolute).hexdigest + end + + def repo_name(repo) + repo.to_s.split("/", 2).last + end + + def check_path(root, path, errors, warnings, required: true) + absolute = relative_path(root, path) + return true if File.file?(absolute) + + message = "#{path} does not exist" + required ? errors << message : warnings << message + false + end + + def duplicates(values) + seen = Set.new + values.each_with_object(Set.new) do |value, repeated| + repeated << value if seen.include?(value) + seen << value + end.to_a + end + + def validate_contract(contract, root: Dir.pwd) + errors = [] + warnings = [] + REQUIRED_TOP_LEVEL.each { |key| errors << "#{key} is required" unless contract.key?(key) } + errors << "schema_version must be #{SCHEMA_VERSION}" unless contract["schema_version"] == SCHEMA_VERSION + errors << "workflow.name is required" if contract.dig("workflow", "name").to_s.empty? + errors << "workflow.correctness_model is required" if contract.dig("workflow", "correctness_model").to_s.empty? + errors << "workflow.threat_model is required" if contract.dig("workflow", "threat_model").to_s.empty? + + Array(contract["source_records"]).each do |record| + errors << "source_records.id is required" if record["id"].to_s.empty? + path = record["path"].to_s + errors << "#{record["id"]}: path is required" if path.empty? + check_path(root, path, errors, warnings) unless path.empty? + end + + tier_controls = Set.new + repos_by_tier = contract.fetch("repo_tiers", {}).flat_map do |tier, data| + errors << "repo_tiers.#{tier}.repos must not be empty" if Array(data["repos"]).empty? && tier != "experimental" + Array(data["required_controls"]).each { |control| tier_controls << control.to_s } + Array(data["repos"]).map { |repo| [tier, repo] } + end + duplicate_repos = duplicates(repos_by_tier.map(&:last)) + errors << "repo listed in more than one tier: #{duplicate_repos.join(", ")}" unless duplicate_repos.empty? + + practices = Array(contract["practices"]) + practice_ids = practices.map { |practice| practice["id"].to_s } + duplicate_practices = duplicates(practice_ids) + errors << "duplicate practice ids: #{duplicate_practices.join(", ")}" unless duplicate_practices.empty? + missing_practices = REQUIRED_PRACTICES - practice_ids + errors << "missing required practices: #{missing_practices.join(", ")}" unless missing_practices.empty? + unknown_controls = tier_controls - Set.new(practice_ids) + errors << "repo tier references unknown practice controls: #{unknown_controls.to_a.join(", ")}" unless unknown_controls.empty? + + practices.each do |practice| + id = practice["id"].to_s + %w[title why adoption].each do |field| + errors << "#{id}: #{field} is required" if practice[field].to_s.strip.empty? + end + source_path = practice.dig("source", "path").to_s + errors << "#{id}: source.path is required" if source_path.empty? + check_path(root, source_path, errors, warnings) unless source_path.empty? + checked_by = Array(practice["checked_by"]) + errors << "#{id}: checked_by is required" if checked_by.empty? + checked_by.each { |path| check_path(root, path, errors, warnings) } + errors << "#{id}: at least one signal is required" if Array(practice["signals"]).empty? + end + + required_files = contract.dig("live_audit", "required_files") || {} + %w[critical standard].each do |tier| + errors << "live_audit.required_files.#{tier} is required" unless required_files.key?(tier) + end + errors << "live_audit.owner is required" if contract.dig("live_audit", "owner").to_s.empty? + errors << "live_audit.sampled_repos must not be empty" if Array(contract.dig("live_audit", "sampled_repos")).empty? + + { + "status" => errors.empty? ? "pass" : "fail", + "errors" => errors, + "warnings" => warnings + } + end + + def evidence(contract, root) + Array(contract["source_records"]).map do |record| + { + "source_id" => record["id"], + "path" => record["path"], + "sha256" => file_digest(root, record["path"]) + } + end + end + + def gh_runner + lambda do |args| + stdout, stderr, status = Open3.capture3("gh", *args) + [stdout, stderr, status.success?] + end + end + + def parse_json(stdout) + JSON.parse(stdout) + rescue JSON::ParserError + nil + end + + def run_gh(args, runner, warnings, fallback) + stdout, stderr, success = runner.call(args) + unless success + warnings << "gh #{args.join(" ")} failed: #{stderr.to_s.strip}" + return fallback + end + parsed = parse_json(stdout) + return parsed unless parsed.nil? + + warnings << "gh #{args.join(" ")} returned non-JSON output" + fallback + end + + def search_count(query, runner, warnings) + payload = run_gh( + ["api", "-X", "GET", "/search/issues", "-f", "q=#{query}", "-f", "per_page=1"], + runner, + warnings, + SEARCH_TOTAL_FALLBACK + ) + payload.fetch("total_count", 0) + end + + def org_rulesets(owner, runner, warnings) + payload = run_gh( + ["api", "-X", "GET", "/orgs/#{owner}/rulesets"], + runner, + warnings, + [] + ) + Array(payload).map do |ruleset| + { + "id" => ruleset["id"], + "name" => ruleset["name"], + "target" => ruleset["target"], + "enforcement" => ruleset["enforcement"] + } + end + end + + def branch_protection(repo, runner, warnings) + payload = run_gh( + ["api", "-X", "GET", "/repos/#{repo}/branches/main/protection"], + runner, + warnings, + {} + ) + contexts = Array(payload.dig("required_status_checks", "contexts")) + + Array(payload.dig("required_status_checks", "checks")).map { |check| check["context"] }.compact + { + "repo" => repo, + "has_protection" => !payload.empty?, + "required_status_checks" => contexts.uniq.sort, + "requires_reviews" => payload.key?("required_pull_request_reviews"), + "enforce_admins" => payload.dig("enforce_admins", "enabled") == true + } + end + + def file_exists?(repo, path, runner) + _stdout, _stderr, success = runner.call(["api", "-X", "GET", "/repos/#{repo}/contents/#{path}"]) + success + end + + def repo_file_adoption(repos, required_by_tier, tiers, runner, owner_repo:, root:) + repos.map do |repo| + tier = tiers.fetch(repo, "unknown") + required = Array(required_by_tier[tier]) + checks = required.to_h do |path| + present = if repo == owner_repo + File.file?(relative_path(root, path)) + else + file_exists?(repo, path, runner) + end + [path, present] + end + { + "repo" => repo, + "tier" => tier, + "required_files" => checks, + "missing_required_files" => checks.select { |_path, present| !present }.keys + } + end + end + + def dependabot_alerts(owner, runner, warnings) + stdout, stderr, success = runner.call( + ["api", "--paginate", "-X", "GET", "/orgs/#{owner}/dependabot/alerts", "-f", "state=open", "-f", "per_page=100", "--jq", ".[]"] + ) + unless success + warnings << "dependabot alert fetch failed: #{stderr.to_s.strip}" + return { "total" => 0, "by_severity" => {}, "by_repo" => {} } + end + alerts = stdout.lines.map { |line| parse_json(line) }.compact + if alerts.empty? + parsed = parse_json(stdout) + alerts = parsed if parsed.is_a?(Array) + end + alerts = Array(alerts) + { + "total" => alerts.length, + "by_severity" => alerts.group_by { |alert| alert.dig("security_vulnerability", "severity").to_s }.transform_values(&:length), + "by_repo" => alerts.group_by { |alert| alert.dig("repository", "full_name").to_s }.transform_values(&:length) + } + end + + def alert_count(owner, kind, runner, warnings) + stdout, stderr, success = runner.call( + ["api", "--paginate", "-X", "GET", "/orgs/#{owner}/#{kind}/alerts", "-f", "state=open", "-f", "per_page=100", "--jq", ".[]"] + ) + unless success + warnings << "#{kind} alert fetch failed: #{stderr.to_s.strip}" + return 0 + end + lines = stdout.lines.reject { |line| line.strip.empty? } + return lines.length if lines.all? { |line| parse_json(line).is_a?(Hash) } + + parsed = parse_json(stdout) + parsed.is_a?(Array) ? parsed.length : 0 + end + + def issue_list(repo, runner, warnings) + payload = run_gh( + ["issue", "list", "--repo", repo, "--state", "open", "--limit", "100", "--json", "number,title,updatedAt"], + runner, + warnings, + [] + ) + Array(payload) + end + + def stale_closing_comment?(repo, number, runner) + stdout, _stderr, success = runner.call( + ["issue", "view", number.to_s, "--repo", repo, "--json", "comments", "--jq", ".comments[-1].body // \"\""] + ) + return false unless success + + stdout.include?("Closing because") + end + + def backlog_hygiene(repo, runner, warnings) + issues = issue_list(repo, runner, warnings).select do |issue| + issue["title"].to_s.start_with?("[codex] Guardrail backlog:") + end + stale = issues.select { |issue| stale_closing_comment?(repo, issue["number"], runner) } + { + "repo" => repo, + "open_guardrail_backlog_issues" => issues.map { |issue| issue.slice("number", "title", "updatedAt") }, + "stale_closing_comments" => stale.map { |issue| issue.slice("number", "title", "updatedAt") } + } + end + + def build_findings(report) + findings = [] + rulesets = report.dig("live", "org_rulesets") || [] + if rulesets.empty? + findings << { + "practice" => "org-rulesets", + "severity" => "high", + "message" => "No EvalOps org rulesets are configured; repo-local branch protection is carrying all merge policy." + } + end + + Array(report.dig("live", "branch_protection")).each do |item| + next unless item["tier"] == "critical" + next unless item["required_status_checks"].empty? + + findings << { + "practice" => "org-rulesets", + "severity" => "medium", + "repo" => item["repo"], + "message" => "Critical repo has no required status checks in branch protection." + } + end + + Array(report.dig("live", "repo_rails")).each do |item| + missing = Array(item["missing_required_files"]) + next if missing.empty? + + findings << { + "practice" => "operating-rails", + "severity" => item["tier"] == "critical" ? "high" : "medium", + "repo" => item["repo"], + "message" => "Missing required rails: #{missing.join(", ")}" + } + end + + stale = Array(report.dig("live", "backlog_hygiene", "stale_closing_comments")) + unless stale.empty? + findings << { + "practice" => "backlog-lifecycle", + "severity" => "medium", + "message" => "#{stale.length} guardrail backlog issue(s) have closing comments but remain open.", + "issues" => stale + } + end + + security = report.dig("live", "security_alerts") || {} + critical = security.dig("dependabot", "by_severity", "critical").to_i + high = security.dig("dependabot", "by_severity", "high").to_i + if critical.positive? || high.positive? + findings << { + "practice" => "security-slo", + "severity" => critical.positive? ? "high" : "medium", + "message" => "Open Dependabot alerts exceed zero for critical/high severities.", + "critical" => critical, + "high" => high + } + end + + Array(report.dig("live", "release_train_queries")).each do |query| + next unless query["total_count"].to_i.positive? + + findings << { + "practice" => "release-train-state", + "severity" => "medium", + "message" => "#{query["key"]} matched #{query["total_count"]} merged PR(s) in the audit window." + } + end + + findings + end + + def live_audit(contract, runner: gh_runner, root: Dir.pwd, generated_at: Time.now.utc) + warnings = [] + owner = contract.dig("live_audit", "owner") + sampled_repos = Array(contract.dig("live_audit", "sampled_repos")) + tiers = contract.fetch("repo_tiers", {}).each_with_object({}) do |(tier, data), memo| + Array(data["repos"]).each { |repo| memo[repo] = tier } + end + required_files = contract.dig("live_audit", "required_files") || {} + + branch = sampled_repos.map do |repo| + branch_protection(repo, runner, warnings).merge("tier" => tiers.fetch(repo, "unknown")) + end + issue_queries = (contract.dig("live_audit", "issue_queries") || {}).map do |key, query| + { "key" => key, "query" => query, "total_count" => search_count(query, runner, warnings) } + end + release_queries = (contract.dig("live_audit", "release_train_queries") || {}).map do |key, query| + { "key" => key, "query" => query, "total_count" => search_count(query, runner, warnings) } + end + backlog = backlog_hygiene(contract.fetch("owner_repo"), runner, warnings) + live = { + "owner" => owner, + "org_rulesets" => org_rulesets(owner, runner, warnings), + "branch_protection" => branch, + "repo_rails" => repo_file_adoption( + sampled_repos, + required_files, + tiers, + runner, + owner_repo: contract.fetch("owner_repo"), + root: root + ), + "issue_queries" => issue_queries, + "release_train_queries" => release_queries, + "backlog_hygiene" => backlog, + "security_alerts" => { + "dependabot" => dependabot_alerts(owner, runner, warnings), + "secret_scanning_open" => alert_count(owner, "secret-scanning", runner, warnings), + "excluded_scanners" => Array(contract.dig("live_audit", "security_alert_slo", "excluded_scanners")) + } + } + + static = validate_contract(contract, root: root) + report = { + "schema_version" => REPORT_SCHEMA_VERSION, + "contract_schema_version" => contract["schema_version"], + "contract_id" => contract["contract_id"], + "owner_repo" => contract["owner_repo"], + "generated_at" => generated_at.utc.iso8601, + "status" => static.fetch("status"), + "static_validation" => static, + "evidence" => evidence(contract, root), + "live" => live, + "warnings" => warnings + } + findings = build_findings(report) + report["findings"] = findings + report["status"] = "attention" if report["status"] == "pass" && findings.any? + report + end + + def markdown_report(report) + lines = [ + "# Engineering Practices Audit", + "", + "- Contract: `#{report["contract_id"]}`", + "- Owner: `#{report["owner_repo"]}`", + "- Generated at: `#{report["generated_at"]}`", + "- Status: `#{report["status"]}`", + "", + "## Findings" + ] + findings = Array(report["findings"]) + if findings.empty? + lines << "No practice drift findings." + else + findings.each do |finding| + prefix = finding["repo"] ? "`#{finding["repo"]}` " : "" + lines << "- `#{finding["severity"]}` `#{finding["practice"]}` #{prefix}#{finding["message"]}" + end + end + + lines << "" + lines << "## Live Signals" + rulesets = Array(report.dig("live", "org_rulesets")) + lines << "- Org rulesets: `#{rulesets.length}`" + security = report.dig("live", "security_alerts") || {} + lines << "- Dependabot open alerts: `#{security.dig("dependabot", "total") || 0}`" + lines << "- Secret scanning open alerts: `#{security["secret_scanning_open"] || 0}`" + unless Array(security["excluded_scanners"]).empty? + lines << "- Excluded scanners: `#{security["excluded_scanners"].join(", ")}`" + end + Array(report.dig("live", "issue_queries")).each do |query| + lines << "- #{query["key"]}: `#{query["total_count"]}`" + end + Array(report.dig("live", "release_train_queries")).each do |query| + lines << "- #{query["key"]}: `#{query["total_count"]}`" + end + + lines << "" + lines << "## Missing Repo Rails" + missing = Array(report.dig("live", "repo_rails")).select { |item| Array(item["missing_required_files"]).any? } + if missing.empty? + lines << "No sampled repo rail gaps." + else + missing.each do |item| + lines << "- `#{item["repo"]}` (#{item["tier"]}): #{item["missing_required_files"].join(", ")}" + end + end + + unless Array(report["warnings"]).empty? + lines << "" + lines << "## Warnings" + report["warnings"].each { |warning| lines << "- #{warning}" } + end + + lines.join("\n") + end + + def write_report(report, json_output, markdown_output, root) + json = JSON.pretty_generate(report) + if json_output + File.write(relative_path(root, json_output), "#{json}\n") + else + puts json + end + File.write(relative_path(root, markdown_output), "#{markdown_report(report)}\n") if markdown_output + end + + def run(argv) + options = { + contract: ".github/contracts/engineering-practices.yml", + json_output: nil, + markdown_output: nil, + contract_only: false, + fail_on_findings: false + } + OptionParser.new do |parser| + parser.on("--contract PATH", "Contract YAML path") { |value| options[:contract] = value } + parser.on("--json-output PATH", "Write JSON report") { |value| options[:json_output] = value } + parser.on("--markdown-output PATH", "Write Markdown report") { |value| options[:markdown_output] = value } + parser.on("--contract-only", "Validate the static contract without GitHub API calls") { options[:contract_only] = true } + parser.on("--fail-on-findings", "Exit non-zero when live practice drift is found") { options[:fail_on_findings] = true } + end.parse!(argv) + + root = Dir.pwd + contract = load_contract(relative_path(root, options.fetch(:contract))) + report = if options[:contract_only] + static = validate_contract(contract, root: root) + { + "schema_version" => REPORT_SCHEMA_VERSION, + "contract_schema_version" => contract["schema_version"], + "contract_id" => contract["contract_id"], + "owner_repo" => contract["owner_repo"], + "generated_at" => Time.now.utc.iso8601, + "status" => static.fetch("status"), + "static_validation" => static, + "evidence" => evidence(contract, root), + "findings" => [], + "warnings" => static.fetch("warnings") + } + else + live_audit(contract, root: root) + end + + write_report(report, options[:json_output], options[:markdown_output], root) + return 1 if report["static_validation"].fetch("status") == "fail" + return 1 if options[:fail_on_findings] && Array(report["findings"]).any? + + 0 + end +end + +if $PROGRAM_NAME == __FILE__ + exit EvalOpsEngineeringPracticesAudit.run(ARGV) +end diff --git a/.github/workflows/codex-rails-check.yml b/.github/workflows/codex-rails-check.yml index c7b9862..de75844 100644 --- a/.github/workflows/codex-rails-check.yml +++ b/.github/workflows/codex-rails-check.yml @@ -7,6 +7,7 @@ on: - "**/AGENTS.md" - "labels.yml" - "README.md" + - ".github/CODEOWNERS" - ".agents/skills/**" - ".github/agent-mcp/**" - ".github/actionlint.yaml" @@ -133,6 +134,21 @@ jobs: --markdown-output org-control-plane-contract-report.md cat org-control-plane-contract-report.md >> "${GITHUB_STEP_SUMMARY}" + - name: Validate engineering practices contract + shell: bash + run: | + set -euo pipefail + if [ ! -f .github/contracts/engineering-practices.yml ]; then + echo "No engineering practices contract found." + exit 0 + fi + + ruby .github/scripts/audit-engineering-practices.rb \ + --contract-only \ + --json-output engineering-practices-contract-report.json \ + --markdown-output engineering-practices-contract-report.md + cat engineering-practices-contract-report.md >> "${GITHUB_STEP_SUMMARY}" + - name: Validate canonical labels shell: bash run: | diff --git a/.github/workflows/engineering-practices-audit.yml b/.github/workflows/engineering-practices-audit.yml new file mode 100644 index 0000000..2fe73d3 --- /dev/null +++ b/.github/workflows/engineering-practices-audit.yml @@ -0,0 +1,88 @@ +name: Engineering Practices Audit + +on: + pull_request: + paths: + - ".github/contracts/engineering-practices.yml" + - ".github/scripts/audit-engineering-practices.rb" + - ".github/workflows/engineering-practices-audit.yml" + - "profile/ENGINEERING_PRACTICES.md" + - "test/audit_engineering_practices_test.rb" + schedule: + - cron: "31 16 * * 1" + workflow_dispatch: + inputs: + fail_on_findings: + description: "Fail the run when live practice drift is found" + required: false + default: "false" + +permissions: + contents: read + +jobs: + contract: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v5 + + - name: Validate engineering practices contract + run: | + set -euo pipefail + ruby .github/scripts/audit-engineering-practices.rb \ + --contract-only \ + --json-output engineering-practices-contract.json \ + --markdown-output engineering-practices-contract.md + cat engineering-practices-contract.md >> "${GITHUB_STEP_SUMMARY}" + + - name: Upload contract report + uses: actions/upload-artifact@v4 + with: + name: engineering-practices-contract + path: | + engineering-practices-contract.json + engineering-practices-contract.md + if-no-files-found: error + retention-days: 30 + + live-audit: + if: ${{ github.event_name != 'pull_request' }} + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + GH_TOKEN: ${{ secrets.EVALOPS_ORG_READ_TOKEN }} + FAIL_ON_FINDINGS: ${{ inputs.fail_on_findings || 'false' }} + steps: + - uses: actions/checkout@v5 + + - name: Require org read token + run: | + set -euo pipefail + if [ -z "${GH_TOKEN}" ]; then + echo "::error::Set secrets.EVALOPS_ORG_READ_TOKEN with org-wide repo, issue, and security read access before running the live engineering practices audit." + exit 2 + fi + + - name: Audit live engineering practice drift + run: | + set -euo pipefail + args=( + --json-output engineering-practices-audit.json + --markdown-output engineering-practices-audit.md + ) + if [ "${FAIL_ON_FINDINGS}" = "true" ]; then + args+=(--fail-on-findings) + fi + ruby .github/scripts/audit-engineering-practices.rb "${args[@]}" + cat engineering-practices-audit.md >> "${GITHUB_STEP_SUMMARY}" + + - name: Upload live audit report + uses: actions/upload-artifact@v4 + with: + name: engineering-practices-audit + path: | + engineering-practices-audit.json + engineering-practices-audit.md + if-no-files-found: error + retention-days: 30 diff --git a/.github/workflows/evalopsbot-review-request.yml b/.github/workflows/evalopsbot-review-request.yml new file mode 100644 index 0000000..0c207f4 --- /dev/null +++ b/.github/workflows/evalopsbot-review-request.yml @@ -0,0 +1,71 @@ +name: EvalOpsBot requested review + +on: + pull_request_target: + types: [review_requested] + +permissions: + contents: read + +jobs: + dispatch: + if: ${{ github.event.requested_reviewer.login == 'EvalOpsBot' }} + runs-on: ubuntu-latest + timeout-minutes: 5 + env: + GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN }} + TARGET_REPO: ${{ github.repository }} + TARGET_PR_NUMBER: ${{ github.event.pull_request.number }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + REQUESTED_REVIEWER: ${{ github.event.requested_reviewer.login }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + steps: + - name: Skip when dispatch token is unavailable + if: ${{ env.GH_TOKEN == '' }} + run: | + echo "EVALOPS_PR_LENS_TOKEN is unavailable; scheduled dispatcher remains the fallback." + + - name: Dispatch deep review + if: ${{ env.GH_TOKEN != '' }} + shell: bash + run: | + set -euo pipefail + dispatch_payload="$( + jq -n \ + --arg target_repo "${TARGET_REPO}" \ + --arg target_pr "${TARGET_REPO}#${TARGET_PR_NUMBER}" \ + --arg requested_reviewer "${REQUESTED_REVIEWER}" \ + --arg source "repo-review-request-workflow" \ + --arg requester "${GITHUB_ACTOR}" \ + '{ + event_type: "evalopsbot-review-requested", + client_payload: { + target_repo: $target_repo, + target_pr: $target_pr, + requested_reviewer: $requested_reviewer, + source: $source, + requester: $requester + } + }' + )" + gh api --method POST repos/evalops/.github/dispatches --input - <<<"${dispatch_payload}" + + - name: Mark deep review queued + if: ${{ env.GH_TOKEN != '' }} + shell: bash + run: | + set -euo pipefail + status_payload="$( + jq -n \ + --arg state "pending" \ + --arg context "evalops-pr-lens/meta-review" \ + --arg description "Queued EvalOpsBot requested deep review" \ + --arg target_url "${RUN_URL}" \ + '{ + state: $state, + context: $context, + description: $description, + target_url: $target_url + }' + )" + gh api --method POST "repos/${TARGET_REPO}/statuses/${HEAD_SHA}" --input - <<<"${status_payload}" diff --git a/README.md b/README.md index b310862..4ddba35 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ rails: - workflow and workflow-template YAML - workflow template metadata - org control-plane contract shape and evidence chain +- engineering-practices contract shape and live-audit entrypoint - canonical `labels.yml` shape - `AGENTS.md` presence and non-empty content - skill frontmatter @@ -243,6 +244,31 @@ inputs and decisions behind an org-default change. See `profile/ORG_CONTROL_PLANE_CONTRACT.md` for the design note. +### Engineering Practices Audit + +`.github/contracts/engineering-practices.yml` turns the current EvalOps +engineering-practice standard into an auditable contract. It covers org +rulesets, generated backlog lifecycle, release-train state, agent review, +security SLOs, repo operating rails, and evidence-first completion. + +Validate only the contract shape locally: + +```bash +ruby .github/scripts/audit-engineering-practices.rb --contract-only +``` + +Run the live audit with `gh` authenticated to EvalOps: + +```bash +ruby .github/scripts/audit-engineering-practices.rb \ + --json-output engineering-practices-audit.json \ + --markdown-output engineering-practices-audit.md +``` + +`.github/workflows/engineering-practices-audit.yml` validates the contract on +PRs and runs the live, non-mutating audit on schedule or manual dispatch with +`EVALOPS_ORG_READ_TOKEN`. + ### Label Taxonomy Sync `labels.yml` is the canonical EvalOps label set, seeded from diff --git a/profile/ENGINEERING_PRACTICES.md b/profile/ENGINEERING_PRACTICES.md new file mode 100644 index 0000000..a52d0f3 --- /dev/null +++ b/profile/ENGINEERING_PRACTICES.md @@ -0,0 +1,135 @@ +# EvalOps Engineering Practices + +EvalOps should run engineering like an agent-native control system: high +throughput is welcome, but every repeated decision needs a durable contract, +live evidence, and a clear close condition. + +The contract lives in `.github/contracts/engineering-practices.yml`. The audit +entrypoint is `.github/scripts/audit-engineering-practices.rb`. + +## Org Rulesets + +Use GitHub-native org rulesets as the central merge-safety layer. Repo-local +branch protection can stay for special cases, but the baseline should be tiered +from `services.yaml` and this contract: + +- critical repos: evaluate rulesets first, then require the matching checks once + each repo has adopted them +- standard repos: report missing rails and promote to enforcement after a clean + adoption window +- experimental repos: keep lightweight reporting unless they become customer or + production paths + +The first rule should be boring: protect default branches from deletion and +non-fast-forward updates, require PRs for critical repos, and only add required +status checks after their workflows are present. + +## Backlog Lifecycle + +Generated issues are operational data, not a parking lot. Every generated +guardrail, conformance, telemetry, or evidence issue should carry: + +- stable class key +- source fingerprints or representative feedback URLs +- owner repo +- smallest guardrail location +- last-seen window +- close condition with merged PR or audit evidence + +If a bot comments that an issue is closing, the issue should actually close in +the same mutation. A "closing" comment on an open issue is stale state. + +## Release Trains + +Deploy PRs should change desired state, not serve as the state machine. Release +train holds, image syncs, rollback requests, and gate decisions should converge +on one active train record per environment. + +Each active train record should include: + +- environment and train id +- owner and TTL +- current desired image or artifact revision +- hold reason and unblock condition +- release receipt +- rollback receipt or explicit no-rollback note +- latest PR number that mutated the record + +Automation should update the existing train record when possible instead of +opening repeated hold PRs with the same intent. + +## Agent Review + +Agent-assisted work should have an agent-native review lane: + +- `EvalOpsBot` review requests route to the PR lens workflow +- review-thread guard blocks unresolved high-severity feedback +- CODEOWNERS names risky surfaces +- stable check contexts make the result queryable + +The point is not more comments. The point is fewer missed regressions, faster +review-thread closure, and a durable query surface for follow-through. + +## Security SLO + +Security alerts need owners and burn-down windows, but the baseline must stay +cheap and targeted. Do not enable CodeQL, GitHub default code scanning, or any +other long-running blanket scanner as part of this practice. Use existing alert +state, Dependabot, secret scanning, and tuned lightweight analyzers only when +they have a clear owner and runtime budget. + +Default SLO: + +- critical: 1 day +- high: 7 days +- medium: 30 days + +Suppressions need a reason, expiry, and artifact link. If an expensive scanner +is already producing bad signal, the practice is to disable or replace it with a +bounded check, not to make it required. + +## Operating Rails + +Repos should adopt rails by class rather than by memory: + +- `AGENTS.md` for local agent behavior +- CODEOWNERS for risky surfaces +- dependency policy through Dependabot or Renovate +- Codex rails check for org-default contracts +- review-thread guard where review feedback should block +- EvalOpsBot review request workflow on high-churn repos +- shared runner-label/actionlint config for custom runner labels +- Pysa on active Python repos, or a documented exception + +Critical repos should have all of the above unless an exception is captured in +the audit output. + +## Evidence First + +Done means the operator can see why the change is safe. For critical repos, +user-visible or production-visible PRs should carry at least one of: + +- runtime smoke fixture +- artifact or release receipt +- telemetry or SLO gate +- rollback evidence +- explicit withheld-data note when customer data cannot be included + +This makes EvalOps engineering practice reinforce the product promise: governed +work, with evidence, across human and agent operators. + +## Local Audit + +Validate the static contract without GitHub access: + +```bash +ruby .github/scripts/audit-engineering-practices.rb --contract-only +``` + +Run the live audit with an authenticated `gh` session: + +```bash +ruby .github/scripts/audit-engineering-practices.rb \ + --json-output engineering-practices-audit.json \ + --markdown-output engineering-practices-audit.md +``` diff --git a/profile/README.md b/profile/README.md index 37a2cf6..05c839e 100644 --- a/profile/README.md +++ b/profile/README.md @@ -6,6 +6,7 @@ The organizational operating system for AI agent workforces — evaluation, gove - [Agent authorship attribution](AGENT_AUTHORSHIP.md) — git trailers, PR labels, and audit indexing for Maestro-authored code. - [GitHub Actions quota hygiene](GITHUB_ACTIONS_QUOTA.md) — CodeQL scoping, artifact retention, and quota-safe diagnostics. +- [Engineering practices](ENGINEERING_PRACTICES.md) — tiered merge policy, backlog lifecycle, release trains, security SLOs, and evidence-first completion. ## Platform Services diff --git a/test/audit_engineering_practices_test.rb b/test/audit_engineering_practices_test.rb new file mode 100644 index 0000000..0d9c143 --- /dev/null +++ b/test/audit_engineering_practices_test.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "time" +require_relative "../.github/scripts/audit-engineering-practices" + +class AuditEngineeringPracticesTest < Minitest::Test + def test_static_contract_passes_and_emits_source_evidence + contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") + validation = EvalOpsEngineeringPracticesAudit.validate_contract(contract, root: Dir.pwd) + + assert_equal "pass", validation.fetch("status"), validation.fetch("errors").join("\n") + evidence = EvalOpsEngineeringPracticesAudit.evidence(contract, Dir.pwd) + assert evidence.all? { |item| item.fetch("sha256").match?(/\A[0-9a-f]{64}\z/) } + end + + def test_missing_required_practice_fails_static_validation + contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") + contract["practices"].reject! { |practice| practice["id"] == "security-slo" } + + validation = EvalOpsEngineeringPracticesAudit.validate_contract(contract, root: Dir.pwd) + + assert_equal "fail", validation.fetch("status") + assert validation.fetch("errors").any? { |error| error.include?("missing required practices: security-slo") } + end + + def test_live_audit_reports_ruleset_rail_backlog_security_and_release_findings + contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") + runner = FakeGhRunner.new + + report = EvalOpsEngineeringPracticesAudit.live_audit( + contract, + runner: runner, + root: Dir.pwd, + generated_at: Time.utc(2026, 5, 20, 4, 0, 0) + ) + + assert_equal "attention", report.fetch("status") + findings = report.fetch("findings") + assert findings.any? { |finding| finding.fetch("practice") == "org-rulesets" } + assert findings.any? { |finding| finding.fetch("practice") == "operating-rails" && finding.fetch("repo") == "evalops/platform" } + assert findings.any? { |finding| finding.fetch("practice") == "backlog-lifecycle" } + assert findings.any? { |finding| finding.fetch("practice") == "security-slo" } + assert findings.any? { |finding| finding.fetch("practice") == "release-train-state" } + + markdown = EvalOpsEngineeringPracticesAudit.markdown_report(report) + assert_includes markdown, "Engineering Practices Audit" + assert_includes markdown, "Missing Repo Rails" + JSON.parse(JSON.pretty_generate(report)) + end + + class FakeGhRunner + def initialize + @files = { + "evalops/platform" => { + "AGENTS.md" => true, + ".github/workflows/review-thread-guard.yml" => true + }, + "evalops/deploy" => { + "AGENTS.md" => true, + ".github/CODEOWNERS" => true, + ".github/workflows/review-thread-guard.yml" => true, + ".github/workflows/evalopsbot-review-request.yml" => true, + ".github/workflows/codex-rails-check.yml" => true + } + } + end + + def call(args) + command = args.join(" ") + return json([]) if command == "api -X GET /orgs/evalops/rulesets" + return json(branch_protection(args)) if command.include?("/branches/main/protection") + return content_response(args) if command.include?("/contents/") + return search_response(args) if command.start_with?("api -X GET /search/issues") + return issue_list_response if command.start_with?("issue list") + return ["Closing because the sentinel no longer ranks this class.\n", "", true] if command.start_with?("issue view 69") + return [JSON.generate(dependabot_alert) + "\n", "", true] if command.include?("/dependabot/alerts") + return ["{}\n{}\n", "", true] if command.include?("/secret-scanning/alerts") + raise "audit must not fetch code scanning alerts" if command.include?("/code-scanning/alerts") + + json({}) + end + + private + + def json(value) + [JSON.generate(value), "", true] + end + + def branch_protection(args) + repo = args.find { |arg| arg.start_with?("/repos/") }.split("/")[2, 2].join("/") + return {} if repo == "evalops/platform" + + { + "required_status_checks" => { + "contexts" => ["ci"] + }, + "required_pull_request_reviews" => {}, + "enforce_admins" => { + "enabled" => true + } + } + end + + def content_response(args) + path = args.find { |arg| arg.start_with?("/repos/") } + parts = path.split("/") + repo = parts[2, 2].join("/") + file = parts[5, parts.length].join("/") + present = @files.fetch(repo, {}).fetch(file, false) + present ? json({ "path" => file }) : ["", "not found", false] + end + + def search_response(args) + query_arg = args.find { |arg| arg.start_with?("q=") }.to_s + count = if query_arg.include?("Hold prod-continuous") + 4 + elsif query_arg.include?("Guardrail candidate") + 2 + else + 0 + end + json({ "total_count" => count, "incomplete_results" => false }) + end + + def issue_list_response + json( + [ + { + "number" => 69, + "title" => "[codex] Guardrail backlog: Workflow shell footgun (workflow-shell-footgun)", + "updatedAt" => "2026-05-20T01:22:06Z" + } + ] + ) + end + + def dependabot_alert + { + "repository" => { + "full_name" => "evalops/platform" + }, + "security_vulnerability" => { + "severity" => "high" + } + } + end + end +end diff --git a/test/verify_org_control_plane_contract_test.rb b/test/verify_org_control_plane_contract_test.rb index c54e7a2..aaa0768 100644 --- a/test/verify_org_control_plane_contract_test.rb +++ b/test/verify_org_control_plane_contract_test.rb @@ -62,9 +62,13 @@ def write_minimal_repo(root) ".github/scripts/verify-org-control-plane-contract.rb", ".github/scripts/validate-services-catalog.rb", ".github/scripts/sweep-recent-review-feedback.rb", + ".github/scripts/audit-engineering-practices.rb", ".github/workflows/codex-rails-check.yml", + ".github/workflows/engineering-practices-audit.yml", ".github/workflows/review-feedback-sentinel.yml", + "profile/ENGINEERING_PRACTICES.md", "test/verify_org_control_plane_contract_test.rb", + "test/audit_engineering_practices_test.rb", "test/validate_services_catalog_test.rb", "test/sweep_recent_review_feedback_test.rb", "test/evalops_pr_lens_review_test.rb"