Skip to content

Commit d95884f

Browse files
committed
chore: add benchmark + stress test guardrail sections (25-26)
1 parent 50d4edc commit d95884f

1 file changed

Lines changed: 45 additions & 0 deletions

File tree

scripts/check-canonical-sister.sh

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,51 @@ PAPER_I_DIR="$(ls -d paper/paper-i-* 2>/dev/null | head -1)"
350350
ls "$PAPER_I_DIR"/*.tex >/dev/null 2>&1 || fail "Missing .tex file in $PAPER_I_DIR"
351351
assert_file "$PAPER_I_DIR/references.bib"
352352

353+
# ── 25. Benchmark suite for paper quality ──────────────────────────────────
354+
# Every sister with a research paper must have a Criterion benchmark suite
355+
# that produces real measured data. No estimates allowed in papers.
356+
# SPEC-RESEARCH-PAPER-CANONICAL.md v2.0 mandates: benchmarks BEFORE paper.
357+
358+
BENCH_FOUND=0
359+
for bench_dir in "crates/agentic-${SISTER_KEY}/benches" "crates/${SISTER_KEY}/benches" "benches"; do
360+
if [ -d "$bench_dir" ] && ls "$bench_dir"/*.rs >/dev/null 2>&1; then
361+
BENCH_FOUND=1
362+
find_fixed "criterion" "$bench_dir" >/dev/null 2>&1 \
363+
|| find_fixed "Criterion" "$bench_dir" >/dev/null 2>&1 \
364+
|| fail "Benchmark files in $bench_dir must use Criterion framework"
365+
# Minimum benchmark count: at least 5 benchmark functions
366+
BENCH_COUNT=$({ grep -rcE 'criterion_group!|fn bench_|\.bench_function|\.bench_with_input|BenchmarkId' "$bench_dir" 2>/dev/null || true; } | awk -F: '{sum+=$NF} END {print sum+0}')
367+
[ "$BENCH_COUNT" -ge 5 ] || fail "Benchmark suite needs ≥5 benchmark references (found ${BENCH_COUNT})"
368+
break
369+
fi
370+
done
371+
[ "$BENCH_FOUND" -eq 1 ] || fail "Missing benchmark suite (benches/ directory with Criterion benchmarks required for paper data)"
372+
373+
# ── 26. Stress / edge-case test suite ──────────────────────────────────────
374+
# Every sister must have stress tests or edge-case tests that cover boundary
375+
# conditions, heavy loads, and error paths. Without these, the paper's
376+
# "robustness" claims have no backing.
377+
378+
STRESS_FOUND=0
379+
for test_dir in "crates/agentic-${SISTER_KEY}/tests" "crates/${SISTER_KEY}/tests" "tests"; do
380+
if [ -d "$test_dir" ]; then
381+
# Look for files containing stress/edge/boundary test patterns
382+
STRESS_HITS=0
383+
for keyword in stress edge_ boundary heavy; do
384+
hits=$({ find_fixed "$keyword" "$test_dir" 2>/dev/null || true; } | wc -l | tr -d ' ')
385+
STRESS_HITS=$((STRESS_HITS + hits))
386+
done
387+
if [ "$STRESS_HITS" -gt 0 ]; then
388+
STRESS_FOUND=1
389+
# Count actual test functions in the test directory
390+
STRESS_TEST_COUNT=$({ grep -rcE '#\[test\]' "$test_dir" 2>/dev/null || true; } | awk -F: '{sum+=$NF} END {print sum+0}')
391+
[ "$STRESS_TEST_COUNT" -ge 10 ] || fail "Stress test suite needs ≥10 test functions (found ${STRESS_TEST_COUNT})"
392+
break
393+
fi
394+
fi
395+
done
396+
[ "$STRESS_FOUND" -eq 1 ] || fail "Missing stress/edge-case test suite (tests/ directory with stress or edge-case tests required)"
397+
353398
# ── Done ────────────────────────────────────────────────────────────────────
354399

355400
echo "Canonical sister guardrails passed."

0 commit comments

Comments
 (0)