-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
200 lines (150 loc) · 9.25 KB
/
Makefile
File metadata and controls
200 lines (150 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
.PHONY: run run-sandbox run-sandbox-container verify-work-run-resume-docker test lint format migrate migrate-new dev-web build-web sync-types dev install db db-stop stop restart k8s-validate k8s-preview-plan k8s-preview-install validate validate-full validate-ci validate-release validate-release-live-backend validate-release-scan verify-api eval-core eval-smoke eval-live eval-agent-scenarios eval-dogfooding-core eval-customer-signal-triage eval-customer-delivery-risk eval-report eval-generate-benchmark eval-live-benchmark eval-sample-workspaces eval-benchmark-samples eval-benchmark-sample-report eval-benchmark-trends eval-benchmark-suite kb-check kb-check-strict kb-check-all kb-snapshot kb-new-answer kb-new-audit
# === Backend ===
run:
cd server && uv run uvicorn hive.main:app --reload --port 8080
run-sandbox:
cd server && uv run uvicorn hive.container.sandbox_entrypoint:app --reload --port 8091
run-sandbox-container:
docker compose --profile sandbox up -d sandbox
verify-work-run-resume-docker:
-HIVE_SANDBOX_CODING_BACKEND=deterministic_proof docker compose --profile sandbox stop sandbox
-HIVE_SANDBOX_CODING_BACKEND=deterministic_proof docker compose --profile sandbox rm -f sandbox
-docker rm -f openhive-sandbox-1
HIVE_SANDBOX_CODING_BACKEND=deterministic_proof docker compose --profile sandbox up -d --force-recreate sandbox
server/.venv/bin/python scripts/verify/work_run_resume_docker_live.py
test:
cd server && uv run pytest tests/ -v --tb=short -m "not integration"
test-e2e:
cd server && uv run pytest tests/test_e2e/ -v --tb=short -m integration
lint:
cd server && uv run ruff check hive/ tests/
cd web && npx eslint src/
format:
cd server && uv run ruff format hive/ tests/
migrate:
cd server && uv run alembic upgrade head
migrate-new:
cd server && uv run alembic revision --autogenerate -m "$(msg)"
# === Database (local dev) ===
db:
docker compose up -d postgres
db-stop:
docker compose stop postgres
# === Frontend ===
dev-web:
cd web && npm run dev
build-web:
cd web && npm run build
# === Shared types ===
sync-types:
cd server && .venv/bin/python scripts/generate_dashboard_api_types.py
# === Full stack ===
stop:
@echo "Stopping backend and frontend..."
@-pkill -f 'uvicorn hive.main:app' 2>/dev/null || true
@-pkill -f 'next dev' 2>/dev/null || true
@sleep 1
restart: stop dev
dev:
DEBUG_PROMPT=1 $(MAKE) run & $(MAKE) dev-web
install:
cd server && uv sync --all-extras
cd web && npm install
validate:
python3 scripts/validate.py quick
validate-full:
python3 scripts/validate.py full
validate-ci:
python3 scripts/validate.py quick --profile ci
validate-release:
python3 scripts/validate.py full --profile release --scope frontend
validate-release-live-backend:
python3 scripts/validate.py full --profile release --include-live-backend
validate-release-scan:
python3 scripts/validate.py full --profile release --scope frontend $(if $(scan_path),--trust-boundary-scan-path "$(scan_path)")
verify-api:
python3 scripts/verify/api_smoke.py
eval-core:
python3 scripts/evals/run.py --pack evals/packs/openhive-core-offline.json
eval-smoke:
python3 scripts/evals/run.py --pack evals/packs/openhive-core-smoke.json
eval-live:
python3 scripts/evals/run_live_pack.py --pack evals/packs/openhive-live-operator.json --label live
eval-agent-scenarios:
python3 scripts/evals/run.py --pack evals/packs/openhive-agent-scenarios.json --label scenarios
eval-dogfooding-core:
python3 scripts/evals/run.py --pack evals/packs/openhive-dogfooding-core.json --label dogfooding-core
eval-customer-signal-triage:
python3 scripts/evals/run.py --pack evals/packs/openhive-customer-signal-triage.json --label customer-signal-triage
eval-customer-delivery-risk:
python3 scripts/evals/run.py --pack evals/packs/openhive-customer-delivery-risk.json --label customer-delivery-risk
eval-report:
@test -n "$(pack)" || (echo "Usage: make eval-report pack=<pack-id>" && exit 1)
python3 scripts/evals/report.py --pack-id "$(pack)"
eval-generate-benchmark:
@test -n "$(benchmark)" || (echo "Usage: make eval-generate-benchmark benchmark=<gaia|terminal-bench|swe-bench-verified> source=<path> profile=<path> [pack_id=<id>]" && exit 1)
@test -n "$(source)" || (echo "Missing required source=<path>" && exit 1)
@test -n "$(profile)" || (echo "Missing required profile=<path>" && exit 1)
python3 scripts/evals/generate_benchmark_pack.py --benchmark "$(benchmark)" --source "$(source)" --profile "$(profile)" $(if $(pack_id),--pack-id "$(pack_id)")
eval-live-benchmark:
@test -n "$(benchmark)" || (echo "Usage: make eval-live-benchmark benchmark=<gaia|terminal-bench|swe-bench-verified> source=<path> project_id=<project-id> [pack_id=<id>]" && exit 1)
@test -n "$(source)" || (echo "Missing required source=<path>" && exit 1)
@test -n "$(project_id)" || (echo "Missing required project_id=<project-id>" && exit 1)
python3 scripts/evals/run_live_benchmark.py --benchmark "$(benchmark)" --source "$(source)" --project-id "$(project_id)" $(if $(pack_id),--pack-id "$(pack_id)") $(if $(profile),--profile "$(profile)") $(if $(workspace_root),--workspace-root "$(workspace_root)") $(if $(workspace_map),--workspace-map "$(workspace_map)") $(if $(room_id),--room-id "$(room_id)") $(if $(room_name),--room-name "$(room_name)")
eval-sample-workspaces:
python3 scripts/evals/materialize_sample_workspaces.py
eval-benchmark-samples:
@test -n "$(project_id)" || (echo "Usage: make eval-benchmark-samples project_id=<project-id>" && exit 1)
python3 scripts/evals/materialize_sample_workspaces.py
python3 scripts/evals/run_live_benchmark.py --benchmark gaia --source evals/samples/benchmark-sources/gaia-smoke.jsonl --project-id "$(project_id)" --pack-id gaia-smoke-samples --label samples
python3 scripts/evals/run_live_benchmark.py --benchmark terminal-bench --source evals/samples/benchmark-sources/terminal-bench-smoke.json --project-id "$(project_id)" --workspace-map .artifacts/evals/sample-workspaces/workspace-map.json --pack-id terminal-bench-smoke-samples --label samples
python3 scripts/evals/run_live_benchmark.py --benchmark swe-bench-verified --source evals/samples/benchmark-sources/swebench-verified-smoke.json --project-id "$(project_id)" --workspace-map .artifacts/evals/sample-workspaces/workspace-map.json --pack-id swebench-verified-smoke-samples --label samples
eval-benchmark-sample-report:
python3 scripts/evals/report_sample_benchmarks.py
eval-benchmark-trends:
python3 scripts/evals/report_benchmark_trends.py
eval-benchmark-suite:
@if [ -z "$(project_id)" ] && [ -z "$(skip_live)" ]; then echo "Usage: make eval-benchmark-suite project_id=<project-id> [include_probes=1] [skip_live=1] [skip_offline=1]"; exit 1; fi
python3 scripts/evals/run_benchmark_suite.py $(if $(project_id),--project-id "$(project_id)") $(if $(include_probes),--include-probes) $(if $(skip_live),--skip-live) $(if $(skip_offline),--skip-offline) $(if $(label),--label "$(label)") $(if $(api_base_url),--api-base-url "$(api_base_url)") $(if $(room_id),--room-id "$(room_id)") $(if $(room_name),--room-name "$(room_name)")
# === Knowledge base ===
# These KB targets use repo-local wrappers in `scripts/`, while the reusable
# implementation lives in the shared `llm-knowledge-base` Codex skill.
# OpenHive-specific defaults:
# - repo root: this repository
# - KB root resolution order:
# 1. $OPENHIVE_KB_ROOT
# 2. $OPENHIVE_INTERNAL_DOCS_ROOT/kb
# 3. ../openhive-internal-docs/kb
# Prerequisite:
# - optionally install the shared skill under `$CODEX_HOME/skills/llm-knowledge-base`
# or `~/.codex/skills/llm-knowledge-base`
# - if not installed, these wrappers fall back to `tools/vendor/llm-knowledge-base`
kb-check:
python3 scripts/kb_status_report.py
kb-check-strict:
python3 scripts/kb_status_report.py --strict
kb-snapshot:
python3 scripts/kb_snapshot.py --write
k8s-validate:
kubectl kustomize deploy/k8s/base >/dev/null
kubectl kustomize deploy/k8s/jobs/external-postgres-migration >/dev/null
kubectl kustomize deploy/k8s/overlays/full-gateway-runtime >/dev/null
kubectl kustomize deploy/k8s/overlays/full-platform-runtime >/dev/null
kubectl kustomize deploy/k8s/overlays/sandbox-whitelist-example >/dev/null
sh scripts/k8s/verify-bootstrap.sh
k8s-preview-plan:
@test -n "$(env_file)" || (echo "Usage: make k8s-preview-plan env_file=deploy/k8s/preview-installer/values.env" && exit 1)
python3 scripts/k8s/preview_installer.py plan --env-file "$(env_file)"
k8s-preview-install:
@test -n "$(env_file)" || (echo "Usage: make k8s-preview-install env_file=deploy/k8s/preview-installer/values.env" && exit 1)
python3 scripts/k8s/preview_installer.py install --env-file "$(env_file)"
kb-check-all:
python3 scripts/kb_status_report.py && python3 scripts/kb_status_report.py --strict
kb-new-answer:
@test -n "$(slug)" || (echo "Usage: make kb-new-answer slug=<slug> title=<title> [question=<question>] [scope=<scope>]" && exit 1)
@test -n "$(title)" || (echo "Missing required title=<title>" && exit 1)
python3 scripts/kb_create_output.py answer "$(slug)" --title "$(title)" $(if $(question),--question "$(question)") $(if $(scope),--scope "$(scope)")
kb-new-audit:
@test -n "$(slug)" || (echo "Usage: make kb-new-audit slug=<slug> title=<title> [scope=<scope>]" && exit 1)
@test -n "$(title)" || (echo "Missing required title=<title>" && exit 1)
python3 scripts/kb_create_output.py audit "$(slug)" --title "$(title)" $(if $(scope),--scope "$(scope)")