-
Notifications
You must be signed in to change notification settings - Fork 0
307 lines (293 loc) · 14 KB
/
Copy pathci.yml
File metadata and controls
307 lines (293 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Ruff check
run: ruff check src/ tests/ scripts/
- name: Ruff format check
run: ruff format --check src/ tests/ scripts/
- name: Type check
run: mypy src/ --ignore-missing-imports
schema-check:
runs-on: ubuntu-latest
needs: lint
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
fetch-depth: 2
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Check schema evolution
run: python scripts/check_schema_evolution.py
test-unit:
runs-on: ubuntu-latest
needs: lint
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: |
pip install -e ".[dev,cloud]"
pip install -e "./sdk"
pip install -e "./integrations[mcp]"
- name: Prepare pytest temp directory
run: mkdir -p .tmp
- name: Run unit and property tests with coverage
run: |
# Full src/sdk baseline floor; changed-code coverage stays at 80% via Codecov patch status.
# --cov-branch turns the floor into a combined line+branch metric — local baseline
# 2026-05-25 is 62% (7716 lines / 2010 branches measured on HEAD `22b1be9`), so the
# 60% gate stays passing with a 2pp cushion. Raise the gate once that cushion grows.
python -m pytest tests/unit/ tests/property/ -v --tb=short --cov=src --cov=sdk --cov-branch --cov-report=xml --cov-report=term-missing --cov-fail-under=60
- name: Run quality validators coverage gate
run: |
python -m pytest tests/unit/test_validators.py -v --tb=short --cov=src.quality.validators --cov-report=term-missing --cov-fail-under=90
- name: Run freshness monitor coverage gate
run: |
python -m pytest tests/unit/test_freshness_monitor.py -v --tb=short --cov=src.quality.monitors.freshness_monitor --cov-report=term-missing --cov-fail-under=90
- name: Run event producer coverage gate
run: |
python -m pytest tests/unit/test_event_producer.py -v --tb=short --cov=src.ingestion.producers.event_producer --cov-report=term-missing --cov-fail-under=90
- name: Run SQL guard coverage gate
run: |
# Security-critical NL->SQL allowlist/denylist guard; local module
# coverage is 100%, so the 90% gate keeps a 10pp regression cushion.
python -m pytest tests/unit/test_sql_guard.py -v --tb=short --cov=src.serving.semantic_layer.sql_guard --cov-report=term-missing --cov-fail-under=90
- name: Run PII masking coverage gate
run: |
# Security-critical PII masker (email/phone/address/name + query-result
# masking); local module coverage is 99%, so the 90% gate keeps a ~9pp
# regression cushion on a mutmut target.
python -m pytest tests/unit/test_masking.py -v --tb=short --cov=src.serving.masking --cov-report=term-missing --cov-fail-under=90
- name: Run rate limiter coverage gate
run: |
# Security-critical sliding-window rate limiter (Redis + in-memory
# fail-open fallback); local module coverage is 98% (only the optional
# redis auto-construct line is env-gated), so the 90% gate keeps a
# cushion on a mutmut target.
python -m pytest tests/unit/test_rate_limiter.py -v --tb=short --cov=src.serving.api.rate_limiter --cov-report=term-missing --cov-fail-under=90
- name: Run auth manager coverage gate
run: |
# Security-critical auth manager (key match/verify, tenant isolation,
# rate-limit/failed-auth windows, rotation grace) and a mutmut target;
# the gate runs its dedicated unit files. Module coverage is 94% so the
# 90% gate keeps a cushion; the remaining gap is the platform-divergent
# SIGHUP handler and bcrypt rotation paths the integration/e2e auth
# suites cover.
#
# NOTE: unlike the other per-module gates this uses `coverage run` +
# `coverage report --include`, NOT `pytest --cov=<module>`. The auth
# manager pulls in duckdb (usage table), and pytest-cov's source
# instrumentation of a duckdb-importing module trips duckdb's lazy
# `_duckdb._sqltypes` import at COLLECTION time, both locally and on CI
# runners. `coverage run` imports duckdb normally and avoids the break.
python -m coverage run -m pytest tests/unit/test_auth.py tests/unit/test_auth_manager_pure_logic.py tests/unit/test_auth_manager_memory_bounds.py tests/unit/test_auth_hashed_key_guidance.py tests/unit/test_auth_argon2_lookup.py -p no:schemathesis
python -m coverage report --include="*/serving/api/auth/manager.py" --show-missing --fail-under=90
- name: Run key rotation coverage gate
run: |
# Security-critical key-rotation lifecycle (create/rotate/revoke,
# grace-period scheduling, rotation status) and a mutmut target. Like
# the auth manager gate it pulls in duckdb, so it uses coverage run +
# coverage report --include (not pytest --cov) to avoid the
# duckdb _duckdb._sqltypes collection break. Module coverage is 93%.
python -m coverage run -m pytest tests/unit/test_key_rotation.py -p no:schemathesis
python -m coverage report --include="*/serving/api/auth/key_rotation.py" --show-missing --fail-under=90
- name: Run outbox coverage gate
run: |
# Security/reliability-critical at-least-once outbox dispatch loop
# (delivery, retry/backoff, poison-to-failed, mark-sent transactions)
# and a mutmut target. Imports duckdb, so it uses coverage run +
# coverage report --include like the auth gates. Module coverage is
# 92% across the two dedicated unit files.
python -m coverage run -m pytest tests/unit/test_outbox_processor.py tests/unit/test_outbox_connection_guard.py -p no:schemathesis
python -m coverage report --include="*/processing/outbox.py" --show-missing --fail-under=90
- name: Run query package coverage gate
run: |
# The NL->SQL orchestration surface (engine, entity/metric/NL query
# mixins, SQL builder) and a mutmut target set; the old single-file
# query_engine.py is a re-export shim, so the gate spans the whole
# query package. The engine imports duckdb, so it uses coverage run +
# coverage report --include like the auth/outbox gates. Package
# coverage is 97% across the five dedicated unit files; the gap is
# the OTel span-recording branches the integration suites cover.
python -m coverage run -m pytest tests/unit/test_query_engine.py tests/unit/test_query_engine_injection.py tests/unit/test_query_engine_mixin_contracts.py tests/unit/test_paginated_nl_query.py tests/unit/test_query_package_logic.py -p no:schemathesis
python -m coverage report --include="*/serving/semantic_layer/query/*" --show-missing --fail-under=90
- name: Upload coverage
uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0
with:
files: coverage.xml
use_oidc: true
fail_ci_if_error: false
test-integration:
runs-on: ubuntu-latest
needs: lint
services:
kafka:
image: confluentinc/cp-kafka:7.7.0
ports:
- 9092:9092
env:
KAFKA_NODE_ID: 1
KAFKA_PROCESS_ROLES: broker,controller
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@localhost:29093
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:29093
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
CLUSTER_ID: "CITestCluster01"
clickhouse:
# Live coverage for the ClickHouse serving backend's sqlglot
# transpile path (H-C2); test_clickhouse_backend_live.py skips
# itself when CLICKHOUSE_LIVE_HOST is absent.
image: clickhouse/clickhouse-server:25.3
ports:
- 8123:8123
env:
CLICKHOUSE_USER: agentflow
CLICKHOUSE_PASSWORD: agentflow
CLICKHOUSE_DB: agentflow
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: |
pip install -e ".[dev,cloud]"
pip install -e "./sdk"
- name: Prepare pytest temp directory
run: mkdir -p .tmp
- name: Wait for Kafka
run: |
timeout 30 bash -c 'until nc -z localhost 9092; do sleep 1; done'
- name: Wait for ClickHouse
run: |
timeout 60 bash -c 'until curl -sf http://localhost:8123/ping; do sleep 1; done'
- name: Run integration tests
env:
CLICKHOUSE_LIVE_HOST: localhost
CLICKHOUSE_LIVE_PORT: "8123"
CLICKHOUSE_LIVE_USER: agentflow
CLICKHOUSE_LIVE_PASSWORD: agentflow
CLICKHOUSE_LIVE_DATABASE: agentflow
run: pytest tests/integration/ -v --tb=short
helm-schema-live:
runs-on: ubuntu-latest
needs: lint
timeout-minutes: 8
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: pip install -e ".[dev]"
- uses: azure/setup-helm@9bc31f4ebc9c6b171d7bfbaa5d006ae7abdb4310 # v5.0.1
- uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
with:
install_only: true
- name: Prepare pytest temp directory
run: mkdir -p .tmp
- name: Run Helm schema live validation
run: python -m pytest tests/integration/test_helm_values_live_validation.py -v -m integration --tb=short
perf-check:
runs-on: ubuntu-latest
needs:
- test-unit
- test-integration
timeout-minutes: 20
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
with:
python-version: "3.11"
- name: Install dependencies
run: pip install -e ".[dev,load,cloud]"
- name: Run benchmark
run: python scripts/run_benchmark.py
- name: Convert benchmark report to JSON
run: |
python - <<'PY'
import json
import re
from pathlib import Path
report_path = Path("docs/benchmark.md")
report = report_path.read_text(encoding="utf-8")
lines = [line.strip() for line in report.splitlines() if line.startswith("|")]
if len(lines) < 3:
raise SystemExit("Benchmark results table not found in docs/benchmark.md")
generated_at_match = re.search(r"Generated: `([^`]+)`", report)
endpoints = {}
for line in lines[2:]:
columns = [column.strip() for column in line.strip("|").split("|")]
if len(columns) != 8:
continue
endpoint, requests, failures, failure_rate, rps, p50, p95, p99 = columns
endpoints[endpoint] = {
"request_count": int(requests),
"failure_count": int(failures),
"failure_rate_percent": float(failure_rate.removesuffix("%")),
"requests_per_second": float(rps),
"p50_latency_ms": float(p50.removesuffix(" ms")),
"p95_latency_ms": float(p95.removesuffix(" ms")),
"p99_latency_ms": float(p99.removesuffix(" ms")),
}
aggregate = endpoints.pop("ALL", None)
if aggregate is None:
raise SystemExit("Missing ALL aggregate row in benchmark report.")
current_report = {
"generated_at": generated_at_match.group(1) if generated_at_match else None,
"source": str(report_path),
"aggregate": aggregate,
"endpoints": endpoints,
}
Path("/tmp/current.json").write_text(
json.dumps(current_report, indent=2) + "\n",
encoding="utf-8",
)
PY
- name: Compare to baseline
run: python scripts/check_performance.py docs/benchmark-baseline.json /tmp/current.json
terraform-validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: hashicorp/setup-terraform@dfe3c3f87815947d99a8997f908cb6525fc44e9e # v4.0.1
with:
terraform_version: "1.8.0"
- name: Terraform fmt check
run: terraform fmt -check -recursive infrastructure/terraform/
- name: Terraform init
run: |
cd infrastructure/terraform
terraform init -backend=false
- name: Terraform validate
run: |
cd infrastructure/terraform
terraform validate