@@ -350,6 +350,51 @@ PAPER_I_DIR="$(ls -d paper/paper-i-* 2>/dev/null | head -1)"
350350ls " $PAPER_I_DIR " /* .tex > /dev/null 2>&1 || fail " Missing .tex file in $PAPER_I_DIR "
351351assert_file " $PAPER_I_DIR /references.bib"
352352
353+ # ── 25. Benchmark suite for paper quality ──────────────────────────────────
354+ # Every sister with a research paper must have a Criterion benchmark suite
355+ # that produces real measured data. No estimates allowed in papers.
356+ # SPEC-RESEARCH-PAPER-CANONICAL.md v2.0 mandates: benchmarks BEFORE paper.
357+
358+ BENCH_FOUND=0
359+ for bench_dir in " crates/agentic-${SISTER_KEY} /benches" " crates/${SISTER_KEY} /benches" " benches" ; do
360+ if [ -d " $bench_dir " ] && ls " $bench_dir " /* .rs > /dev/null 2>&1 ; then
361+ BENCH_FOUND=1
362+ find_fixed " criterion" " $bench_dir " > /dev/null 2>&1 \
363+ || find_fixed " Criterion" " $bench_dir " > /dev/null 2>&1 \
364+ || fail " Benchmark files in $bench_dir must use Criterion framework"
365+ # Minimum benchmark count: at least 5 benchmark functions
366+ BENCH_COUNT=$( { grep -rcE ' criterion_group!|fn bench_|\.bench_function|\.bench_with_input|BenchmarkId' " $bench_dir " 2> /dev/null || true ; } | awk -F: ' {sum+=$NF} END {print sum+0}' )
367+ [ " $BENCH_COUNT " -ge 5 ] || fail " Benchmark suite needs ≥5 benchmark references (found ${BENCH_COUNT} )"
368+ break
369+ fi
370+ done
371+ [ " $BENCH_FOUND " -eq 1 ] || fail " Missing benchmark suite (benches/ directory with Criterion benchmarks required for paper data)"
372+
373+ # ── 26. Stress / edge-case test suite ──────────────────────────────────────
374+ # Every sister must have stress tests or edge-case tests that cover boundary
375+ # conditions, heavy loads, and error paths. Without these, the paper's
376+ # "robustness" claims have no backing.
377+
378+ STRESS_FOUND=0
379+ for test_dir in " crates/agentic-${SISTER_KEY} /tests" " crates/${SISTER_KEY} /tests" " tests" ; do
380+ if [ -d " $test_dir " ]; then
381+ # Look for files containing stress/edge/boundary test patterns
382+ STRESS_HITS=0
383+ for keyword in stress edge_ boundary heavy; do
384+ hits=$( { find_fixed " $keyword " " $test_dir " 2> /dev/null || true ; } | wc -l | tr -d ' ' )
385+ STRESS_HITS=$(( STRESS_HITS + hits))
386+ done
387+ if [ " $STRESS_HITS " -gt 0 ]; then
388+ STRESS_FOUND=1
389+ # Count actual test functions in the test directory
390+ STRESS_TEST_COUNT=$( { grep -rcE ' #\[test\]' " $test_dir " 2> /dev/null || true ; } | awk -F: ' {sum+=$NF} END {print sum+0}' )
391+ [ " $STRESS_TEST_COUNT " -ge 10 ] || fail " Stress test suite needs ≥10 test functions (found ${STRESS_TEST_COUNT} )"
392+ break
393+ fi
394+ fi
395+ done
396+ [ " $STRESS_FOUND " -eq 1 ] || fail " Missing stress/edge-case test suite (tests/ directory with stress or edge-case tests required)"
397+
353398# ── Done ────────────────────────────────────────────────────────────────────
354399
355400echo " Canonical sister guardrails passed."
0 commit comments