diff --git a/bin/way-match b/bin/way-match index 970d7b9..bcd02c1 100755 Binary files a/bin/way-match and b/bin/way-match differ diff --git a/skills/sync-upstream/SKILL.md b/skills/sync-upstream/SKILL.md new file mode 100644 index 0000000..4f78ee8 --- /dev/null +++ b/skills/sync-upstream/SKILL.md @@ -0,0 +1,108 @@ +--- +name: sync-upstream +description: Sync fork with upstream (aaronsb/claude-code-config). Fetches upstream/main, shows what's new, and merges into your current branch or main. Handles conflicts by preserving local fixes. Use when the user says "sync upstream", "pull upstream", "update from upstream", or invokes /sync-upstream. +allowed-tools: Bash, Read, Grep, Glob +--- + +# Sync Upstream + +Integrate changes from the upstream repo (aaronsb/claude-code-config) into this fork. + +## Assess First + +Run these in parallel to understand current state: + +```bash +git remote -v # Verify upstream is configured +git branch --show-current # What branch are we on? +git status --short # Any uncommitted work? +git log --oneline upstream/main..main # Local-only commits (our fixes) +``` + +### If upstream remote is missing + +```bash +git remote add upstream https://github.com/aaronsb/claude-code-config.git +``` + +## Flow + +### 1. Stash uncommitted work (if any) + +```bash +git stash push -m "sync-upstream: stash before merge" +``` + +### 2. Fetch upstream + +```bash +git fetch upstream +``` + +### 3. Show what's incoming + +```bash +git log --oneline main..upstream/main +``` + +If nothing new, report "already up to date" and stop. + +### 4. Show divergence + +```bash +git log --oneline upstream/main..main # Our local-only commits +git log --oneline main..upstream/main # Incoming from upstream +``` + +Report both sides so the user sees the full picture. + +### 5. Merge upstream into main + +If on a feature branch, switch to main first: + +```bash +git checkout main +git merge upstream/main +``` + +If conflicts arise: + +- **bin/way-match**: Keep ours (`git checkout --ours bin/way-match`). Upstream ships a Linux ELF; we need the arm64 macOS binary. After resolving, rebuild with `make -f tools/way-match/Makefile local` to ensure we have the latest source compiled natively. +- **tools/way-match/test-harness.sh** or **test-integration.sh**: Inspect carefully. If upstream added new test cases, incorporate them into our bash 3.2 compatible version. Don't accept upstream's `declare -A` or `mapfile` patterns. +- **Other files**: Accept upstream's version unless we have intentional local changes. + +After resolving all conflicts: + +```bash +git add +git commit # Accept or adjust the merge commit message +``` + +### 6. Push to origin + +```bash +git push origin main +``` + +### 7. Rebase feature branch (if we were on one) + +If the user was on a feature branch before sync: + +```bash +git checkout +git rebase main +``` + +### 8. Restore stashed work (if any) + +```bash +git stash pop +``` + +## Key Principles + +- **Show the diff summary before merging** — let the user see what's incoming +- **Preserve local platform fixes** — our arm64 binary and bash 3.2 compat are intentional divergences +- **If upstream changed way-match.c source**, rebuild locally after merge: `make -f tools/way-match/Makefile local` +- **Don't force-push main** — always fast-forward or merge +- **Report what happened** — summarize commits integrated, conflicts resolved, and current state diff --git a/tools/way-match/test-harness.sh b/tools/way-match/test-harness.sh index ec2ebcc..01ac0f9 100755 --- a/tools/way-match/test-harness.sh +++ b/tools/way-match/test-harness.sh @@ -6,6 +6,8 @@ # - Per-test pass/fail # - Match matrix (TP, FP, TN, FN per scorer) # - Head-to-head comparison (BM25 wins, NCD wins, ties) +# +# Compatible with bash 3.2+ (macOS default) set -euo pipefail @@ -14,89 +16,131 @@ FIXTURES="$SCRIPT_DIR/test-fixtures.jsonl" NCD_SCRIPT="$SCRIPT_DIR/../../hooks/ways/semantic-match.sh" BM25_BINARY="$SCRIPT_DIR/../../bin/way-match" -# Way corpus: id|description|vocabulary|threshold -declare -A WAY_DESC WAY_VOCAB WAY_THRESH -WAY_DESC[softwaredev-code-testing]="writing unit tests, test coverage, mocking dependencies, test-driven development" -WAY_VOCAB[softwaredev-code-testing]="unittest coverage mock tdd assertion jest pytest rspec testcase spec fixture describe expect verify" -WAY_THRESH[softwaredev-code-testing]="2.0" - -WAY_DESC[softwaredev-docs-api]="designing REST APIs, HTTP endpoints, API versioning, request response structure" -WAY_VOCAB[softwaredev-docs-api]="endpoint api rest route http status pagination versioning graphql request response header payload crud webhook" -WAY_THRESH[softwaredev-docs-api]="2.0" - -WAY_DESC[softwaredev-environment-debugging]="debugging, troubleshooting failures, investigating broken behavior" -WAY_VOCAB[softwaredev-environment-debugging]="debug breakpoint stacktrace investigate troubleshoot regression bisect crash crashes crashing error fail bug log trace exception segfault hang timeout step broken" -WAY_THRESH[softwaredev-environment-debugging]="2.0" - -WAY_DESC[softwaredev-code-security]="security, authentication, secrets management, input validation" -WAY_VOCAB[softwaredev-code-security]="authentication secrets password credentials owasp injection xss sql sanitize vulnerability bcrypt hash encrypt token cert ssl tls csrf cors rotate login expose exposed harden" -WAY_THRESH[softwaredev-code-security]="2.0" - -WAY_DESC[softwaredev-architecture-design]="software system design, architecture patterns, database schema, component modeling, proposals, RFCs, design deliberation" -WAY_VOCAB[softwaredev-architecture-design]="architecture pattern database schema modeling interface component modules factory observer strategy monolith microservice microservices domain layer coupling cohesion abstraction singleton proposal rfc sketch deliberation whiteboard" -WAY_THRESH[softwaredev-architecture-design]="2.0" - -WAY_DESC[softwaredev-environment-config]="configuration, environment variables, dotenv files, connection settings" -WAY_VOCAB[softwaredev-environment-config]="dotenv environment configuration envvar config.json config.yaml connection port host url setting variable string" -WAY_THRESH[softwaredev-environment-config]="2.0" - -WAY_DESC[softwaredev-architecture-adr-context]="planning how to implement a feature, deciding an approach, understanding existing project decisions, starting work on an item, investigating why something was built a certain way" -WAY_VOCAB[softwaredev-architecture-adr-context]="plan approach debate implement build work pick understand investigate why how decision context tradeoff evaluate option consider scope" -WAY_THRESH[softwaredev-architecture-adr-context]="2.0" - -WAY_DESC[softwaredev-delivery-commits]="git commit messages, branch naming, conventional commits, atomic changes" -WAY_VOCAB[softwaredev-delivery-commits]="commit message branch conventional feat fix refactor scope atomic squash amend stash rebase cherry" -WAY_THRESH[softwaredev-delivery-commits]="2.0" - -WAY_DESC[softwaredev-delivery-github]="GitHub pull requests, issues, code review, CI checks, repository management" -WAY_VOCAB[softwaredev-delivery-github]="pr pullrequest issue review checks ci label milestone fork repository upstream draft" -WAY_THRESH[softwaredev-delivery-github]="2.0" - -WAY_DESC[softwaredev-delivery-patches]="creating and applying patch files, git diff generation, patch series management" -WAY_VOCAB[softwaredev-delivery-patches]="patch diff apply hunk unified series format-patch" -WAY_THRESH[softwaredev-delivery-patches]="2.0" - -WAY_DESC[softwaredev-delivery-release]="software releases, changelog generation, version bumping, semantic versioning, tagging" -WAY_VOCAB[softwaredev-delivery-release]="release changelog version bump semver tag publish ship major minor breaking" -WAY_THRESH[softwaredev-delivery-release]="2.0" - -WAY_DESC[softwaredev-delivery-migrations]="database migrations, schema changes, table alterations, rollback procedures" -WAY_VOCAB[softwaredev-delivery-migrations]="migration schema alter table column index rollback seed ddl prisma alembic knex flyway" -WAY_THRESH[softwaredev-delivery-migrations]="2.0" - -WAY_DESC[softwaredev-code-errors]="error handling patterns, exception management, try-catch boundaries, error wrapping and propagation" -WAY_VOCAB[softwaredev-code-errors]="exception handling catch throw boundary wrap rethrow fallback graceful recovery propagate unhandled" -WAY_THRESH[softwaredev-code-errors]="2.0" - -WAY_DESC[softwaredev-code-quality]="code quality, refactoring, SOLID principles, code review standards, technical debt, maintainability" -WAY_VOCAB[softwaredev-code-quality]="refactor quality solid principle decompose extract method responsibility coupling cohesion maintainability readability" -WAY_THRESH[softwaredev-code-quality]="2.0" - -WAY_DESC[softwaredev-code-performance]="performance optimization, profiling, benchmarking, latency" -WAY_VOCAB[softwaredev-code-performance]="optimize profile benchmark latency throughput memory cache bottleneck flamegraph allocation heap speed slow" -WAY_THRESH[softwaredev-code-performance]="2.0" - -WAY_DESC[softwaredev-environment-deps]="dependency management, package installation, library evaluation, security auditing of third-party code" -WAY_VOCAB[softwaredev-environment-deps]="dependency package library install upgrade outdated audit vulnerability license bundle npm pip cargo" -WAY_THRESH[softwaredev-environment-deps]="2.0" - -WAY_DESC[softwaredev-environment-ssh]="SSH remote access, key management, secure file transfer, non-interactive authentication" -WAY_VOCAB[softwaredev-environment-ssh]="ssh remote key agent scp rsync bastion jumphost tunnel forwarding batchmode noninteractive" -WAY_THRESH[softwaredev-environment-ssh]="2.0" - -WAY_DESC[softwaredev-docs]="README authoring, docstrings, technical prose, Mermaid diagrams, project guides" -WAY_VOCAB[softwaredev-docs]="readme docstring technical writing mermaid diagram flowchart sequence onboarding" -WAY_THRESH[softwaredev-docs]="2.0" - -WAY_DESC[softwaredev-architecture-threat-modeling]="threat modeling, STRIDE analysis, trust boundaries, attack surface assessment, security design review" -WAY_VOCAB[softwaredev-architecture-threat-modeling]="threat model stride attack surface trust boundary mitigation adversary dread spoofing tampering repudiation elevation" -WAY_THRESH[softwaredev-architecture-threat-modeling]="2.0" +# Way corpus: parallel arrays (bash 3.2 compatible — no associative arrays) +WAY_IDS=() +WAY_DESCS=() +WAY_VOCABS=() +WAY_THRESHS=() + +add_way() { + WAY_IDS+=("$1") + WAY_DESCS+=("$2") + WAY_VOCABS+=("$3") + WAY_THRESHS+=("$4") +} -WAY_DESC[softwaredev-docs-standards]="establishing team norms, coding conventions, testing philosophy, dependency policy, accessibility requirements" -WAY_VOCAB[softwaredev-docs-standards]="convention norm guideline accessibility style guide linting rule agreement philosophy" -WAY_THRESH[softwaredev-docs-standards]="2.0" +# Lookup index by way ID; returns via global __idx (-1 if not found) +__idx=-1 +way_index() { + local target="$1" + local i + for (( i=0; i<${#WAY_IDS[@]}; i++ )); do + if [[ "${WAY_IDS[$i]}" == "$target" ]]; then + __idx=$i; return 0 + fi + done + __idx=-1; return 1 +} -WAY_IDS=(softwaredev-code-testing softwaredev-docs-api softwaredev-environment-debugging softwaredev-code-security softwaredev-architecture-design softwaredev-environment-config softwaredev-architecture-adr-context softwaredev-delivery-commits softwaredev-delivery-github softwaredev-delivery-patches softwaredev-delivery-release softwaredev-delivery-migrations softwaredev-code-errors softwaredev-code-quality softwaredev-code-performance softwaredev-environment-deps softwaredev-environment-ssh softwaredev-docs softwaredev-architecture-threat-modeling softwaredev-docs-standards) +add_way "softwaredev-code-testing" \ + "writing unit tests, test coverage, mocking dependencies, test-driven development" \ + "unittest coverage mock tdd assertion jest pytest rspec testcase spec fixture describe expect verify" \ + "2.0" + +add_way "softwaredev-docs-api" \ + "designing REST APIs, HTTP endpoints, API versioning, request response structure" \ + "endpoint api rest route http status pagination versioning graphql request response header payload crud webhook" \ + "2.0" + +add_way "softwaredev-environment-debugging" \ + "debugging, troubleshooting failures, investigating broken behavior" \ + "debug breakpoint stacktrace investigate troubleshoot regression bisect crash crashes crashing error fail bug log trace exception segfault hang timeout step broken" \ + "2.0" + +add_way "softwaredev-code-security" \ + "security, authentication, secrets management, input validation" \ + "authentication secrets password credentials owasp injection xss sql sanitize vulnerability bcrypt hash encrypt token cert ssl tls csrf cors rotate login expose exposed harden" \ + "2.0" + +add_way "softwaredev-architecture-design" \ + "software system design, architecture patterns, database schema, component modeling, proposals, RFCs, design deliberation" \ + "architecture pattern database schema modeling interface component modules factory observer strategy monolith microservice microservices domain layer coupling cohesion abstraction singleton proposal rfc sketch deliberation whiteboard" \ + "2.0" + +add_way "softwaredev-environment-config" \ + "configuration, environment variables, dotenv files, connection settings" \ + "dotenv environment configuration envvar config.json config.yaml connection port host url setting variable string" \ + "2.0" + +add_way "softwaredev-architecture-adr-context" \ + "planning how to implement a feature, deciding an approach, understanding existing project decisions, starting work on an item, investigating why something was built a certain way" \ + "plan approach debate implement build work pick understand investigate why how decision context tradeoff evaluate option consider scope" \ + "2.0" + +add_way "softwaredev-delivery-commits" \ + "git commit messages, branch naming, conventional commits, atomic changes" \ + "commit message branch conventional feat fix refactor scope atomic squash amend stash rebase cherry" \ + "2.0" + +add_way "softwaredev-delivery-github" \ + "GitHub pull requests, issues, code review, CI checks, repository management" \ + "pr pullrequest issue review checks ci label milestone fork repository upstream draft" \ + "2.0" + +add_way "softwaredev-delivery-patches" \ + "creating and applying patch files, git diff generation, patch series management" \ + "patch diff apply hunk unified series format-patch" \ + "2.0" + +add_way "softwaredev-delivery-release" \ + "software releases, changelog generation, version bumping, semantic versioning, tagging" \ + "release changelog version bump semver tag publish ship major minor breaking" \ + "2.0" + +add_way "softwaredev-delivery-migrations" \ + "database migrations, schema changes, table alterations, rollback procedures" \ + "migration schema alter table column index rollback seed ddl prisma alembic knex flyway" \ + "2.0" + +add_way "softwaredev-code-errors" \ + "error handling patterns, exception management, try-catch boundaries, error wrapping and propagation" \ + "exception handling catch throw boundary wrap rethrow fallback graceful recovery propagate unhandled" \ + "2.0" + +add_way "softwaredev-code-quality" \ + "code quality, refactoring, SOLID principles, code review standards, technical debt, maintainability" \ + "refactor quality solid principle decompose extract method responsibility coupling cohesion maintainability readability" \ + "2.0" + +add_way "softwaredev-code-performance" \ + "performance optimization, profiling, benchmarking, latency" \ + "optimize profile benchmark latency throughput memory cache bottleneck flamegraph allocation heap speed slow" \ + "2.0" + +add_way "softwaredev-environment-deps" \ + "dependency management, package installation, library evaluation, security auditing of third-party code" \ + "dependency package library install upgrade outdated audit vulnerability license bundle npm pip cargo" \ + "2.0" + +add_way "softwaredev-environment-ssh" \ + "SSH remote access, key management, secure file transfer, non-interactive authentication" \ + "ssh remote key agent scp rsync bastion jumphost tunnel forwarding batchmode noninteractive" \ + "2.0" + +add_way "softwaredev-docs" \ + "README authoring, docstrings, technical prose, Mermaid diagrams, project guides" \ + "readme docstring technical writing mermaid diagram flowchart sequence onboarding" \ + "2.0" + +add_way "softwaredev-architecture-threat-modeling" \ + "threat modeling, STRIDE analysis, trust boundaries, attack surface assessment, security design review" \ + "threat model stride attack surface trust boundary mitigation adversary dread spoofing tampering repudiation elevation" \ + "2.0" + +add_way "softwaredev-docs-standards" \ + "establishing team norms, coding conventions, testing philosophy, dependency policy, accessibility requirements" \ + "convention norm guideline accessibility style guide linting rule agreement philosophy" \ + "2.0" # --- Options --- RUN_NCD=true @@ -131,10 +175,9 @@ total=0 # --- NCD scorer --- ncd_matches_way() { local prompt="$1" way_id="$2" - local desc="${WAY_DESC[$way_id]}" - local vocab="${WAY_VOCAB[$way_id]}" - # NCD uses distance metric (0-1), not BM25 score threshold - # Must match check-prompt.sh hardcoded value (0.58) + way_index "$way_id" || return 1 + local desc="${WAY_DESCS[$__idx]}" + local vocab="${WAY_VOCABS[$__idx]}" local ncd_thresh="0.58" if bash "$NCD_SCRIPT" "$prompt" "$desc" "$vocab" "$ncd_thresh" 2>/dev/null; then @@ -147,9 +190,10 @@ ncd_matches_way() { # --- BM25 scorer --- bm25_matches_way() { local prompt="$1" way_id="$2" - local desc="${WAY_DESC[$way_id]}" - local vocab="${WAY_VOCAB[$way_id]}" - local thresh="${WAY_THRESH[$way_id]}" + way_index "$way_id" || return 1 + local desc="${WAY_DESCS[$__idx]}" + local vocab="${WAY_VOCABS[$__idx]}" + local thresh="${WAY_THRESHS[$__idx]}" if "$BM25_BINARY" pair \ --description "$desc" \ @@ -162,52 +206,6 @@ bm25_matches_way() { fi } -# --- Score a prompt against all ways, return best match --- -# For BM25: scores all ways, returns highest-scoring match. -# For NCD: binary scorer (no score output), returns first match. -find_best_match() { - local scorer="$1" prompt="$2" - - if [[ "$scorer" == "bm25" ]]; then - local best_way="none" best_score="0" - for way_id in "${WAY_IDS[@]}"; do - local stderr_out - stderr_out=$("$BM25_BINARY" pair \ - --description "${WAY_DESC[$way_id]}" \ - --vocabulary "${WAY_VOCAB[$way_id]}" \ - --query "$prompt" \ - --threshold "0" 2>&1 >/dev/null) - local score - score=$(echo "$stderr_out" | sed -n 's/match: score=\([0-9.]*\).*/\1/p') - if [[ -n "$score" ]] && command -v bc >/dev/null 2>&1; then - if (( $(echo "$score > $best_score" | bc -l) )); then - best_score="$score" - best_way="$way_id" - fi - fi - done - # Verify best actually meets its threshold - if [[ "$best_way" != "none" ]]; then - local thresh="${WAY_THRESH[$best_way]}" - if command -v bc >/dev/null 2>&1 && (( $(echo "$best_score < $thresh" | bc -l) )); then - best_way="none" - fi - fi - echo "$best_way" - return 0 - fi - - # NCD fallback: binary match, return first - for way_id in "${WAY_IDS[@]}"; do - if "${scorer}_matches_way" "$prompt" "$way_id"; then - echo "$way_id" - return 0 - fi - done - echo "none" - return 0 -} - # --- Colors --- RED='\033[0;31m' GREEN='\033[0;32m' @@ -226,7 +224,7 @@ while IFS= read -r line; do category=$(echo "$line" | jq -r '.category') note=$(echo "$line" | jq -r '.note // ""') - # Parse expected: null → negative, string → single, array → co-activation + # Parse expected: null -> negative, string -> single, array -> co-activation expected_type=$(echo "$line" | jq -r '.expected | type') expected_list=() is_negative=false @@ -235,8 +233,12 @@ while IFS= read -r line; do case "$expected_type" in null) is_negative=true ;; string) expected_list=("$(echo "$line" | jq -r '.expected')") ;; - array) mapfile -t expected_list < <(echo "$line" | jq -r '.expected[]') - [[ ${#expected_list[@]} -gt 1 ]] && is_coact=true ;; + array) + while IFS= read -r item; do + expected_list+=("$item") + done < <(echo "$line" | jq -r '.expected[]') + [[ ${#expected_list[@]} -gt 1 ]] && is_coact=true + ;; esac total=$((total + 1)) @@ -246,8 +248,6 @@ while IFS= read -r line; do bm25_result="skip" # --- Scorer evaluation function --- - # Usage: eval_scorer - # Sets: ${scorer}_result variable eval_scorer() { local scorer="$1" prompt="$2" shift 2 @@ -255,12 +255,12 @@ while IFS= read -r line; do local result="" if $is_negative; then - # Negative test: check no way matches local any_match=false - for way_id in "${WAY_IDS[@]}"; do - if "${scorer}_matches_way" "$prompt" "$way_id"; then + local i + for (( i=0; i<${#WAY_IDS[@]}; i++ )); do + if "${scorer}_matches_way" "$prompt" "${WAY_IDS[$i]}"; then any_match=true - result="FP:$way_id" + result="FP:${WAY_IDS[$i]}" break fi done @@ -268,9 +268,9 @@ while IFS= read -r line; do result="TN" fi elif $is_coact; then - # Co-activation: check ALL expected ways match local matched=0 local missed="" + local exp for exp in "${exp_list[@]}"; do if "${scorer}_matches_way" "$prompt" "$exp"; then matched=$((matched + 1)) @@ -286,7 +286,6 @@ while IFS= read -r line; do result="MISS" fi else - # Single-expected: check the one expected way matches if "${scorer}_matches_way" "$prompt" "${exp_list[0]}"; then result="TP" else diff --git a/tools/way-match/test-integration.sh b/tools/way-match/test-integration.sh index 23f2f85..6c9e1f1 100755 --- a/tools/way-match/test-integration.sh +++ b/tools/way-match/test-integration.sh @@ -2,7 +2,9 @@ # Integration test: run way-match against actual way.md files # Reads frontmatter from real semantic ways and scores test prompts # -# This tests the real pipeline: way files → frontmatter extraction → BM25 scoring +# This tests the real pipeline: way files -> frontmatter extraction -> BM25 scoring +# +# Compatible with bash 3.2+ (macOS default) set -euo pipefail @@ -24,8 +26,25 @@ CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' -# --- Extract frontmatter from actual way files --- -declare -A WAY_DESC WAY_VOCAB WAY_THRESH WAY_PATH +# --- Parallel arrays for way data (bash 3.2 compatible) --- +WAY_IDS=() +WAY_DESCS=() +WAY_VOCABS=() +WAY_THRESHS=() +WAY_PATHS=() + +# Lookup index by way ID; returns via global __idx (-1 if not found) +__idx=-1 +way_index() { + local target="$1" + local i + for (( i=0; i<${#WAY_IDS[@]}; i++ )); do + if [[ "${WAY_IDS[$i]}" == "$target" ]]; then + __idx=$i; return 0 + fi + done + __idx=-1; return 1 +} echo -e "${BOLD}=== Integration Test: Real Way Files ===${NC}" echo "" @@ -46,23 +65,24 @@ while IFS= read -r wayfile; do # Skip ways without semantic matching fields [[ -z "$desc" || -z "$vocab" ]] && continue - WAY_DESC[$way_id]="$desc" - WAY_VOCAB[$way_id]="$vocab" - WAY_THRESH[$way_id]="${thresh:-2.0}" - WAY_PATH[$way_id]="$wayfile" + WAY_IDS+=("$way_id") + WAY_DESCS+=("$desc") + WAY_VOCABS+=("$vocab") + WAY_THRESHS+=("${thresh:-2.0}") + WAY_PATHS+=("$wayfile") printf " %-30s thresh=%-5s %s\n" "$way_id" "${thresh:-2.0}" "$(echo "$desc" | cut -c1-60)" done < <(find "$WAYS_DIR" -name "way.md" -type f | sort) echo "" -echo "Found ${#WAY_DESC[@]} semantic ways" +echo "Found ${#WAY_IDS[@]} semantic ways" echo "" # --- Test prompts with expected matches --- # Format: "expected_way_id|prompt" # Use "NONE" for prompts that shouldn't match anything TEST_CASES=( - # Direct matches — vocabulary terms present + # Direct matches -- vocabulary terms present "softwaredev-code-testing|write some unit tests for this module" "softwaredev-code-testing|run pytest with coverage" "softwaredev-code-testing|mock the database connection in tests" @@ -84,7 +104,7 @@ TEST_CASES=( "softwaredev-architecture-adr-context|plan how to build the notification system" "softwaredev-architecture-adr-context|why was this feature designed this way" "softwaredev-architecture-adr-context|pick up work on the auth implementation" - # Negative cases — should not trigger any semantic way + # Negative cases -- should not trigger any semantic way "NONE|what is the capital of France" "NONE|tell me about photosynthesis" "NONE|how tall is Mount Everest" @@ -96,7 +116,7 @@ TEST_CASES=( "softwaredev-code-security|are our API keys exposed anywhere" "softwaredev-architecture-design|should we use a monolith or microservices architecture" "softwaredev-environment-config|the database connection string needs updating" - # Co-activation cases — comma-separated expected ways + # Co-activation cases -- comma-separated expected ways "softwaredev-environment-debugging,softwaredev-code-errors|debug the unhandled exception and add proper error handling" "softwaredev-environment-deps,softwaredev-code-security|audit our dependencies for security vulnerabilities" "softwaredev-architecture-design,softwaredev-delivery-migrations|design the database schema for the new microservice" @@ -118,16 +138,16 @@ for test_case in "${TEST_CASES[@]}"; do # Score against all ways with BM25 bm25_matches=() bm25_scores="" - for way_id in "${!WAY_DESC[@]}"; do + for (( i=0; i<${#WAY_IDS[@]}; i++ )); do + way_id="${WAY_IDS[$i]}" score=$("$BM25_BINARY" pair \ - --description "${WAY_DESC[$way_id]}" \ - --vocabulary "${WAY_VOCAB[$way_id]}" \ + --description "${WAY_DESCS[$i]}" \ + --vocabulary "${WAY_VOCABS[$i]}" \ --query "$prompt" \ - --threshold 0.0 2>&1 | grep -oP 'score=\K[0-9.]+') - if (( $(echo "$score > 0" | bc -l 2>/dev/null || echo 0) )); then + --threshold 0.0 2>&1 | sed -n 's/.*score=\([0-9.]*\).*/\1/p') + if [[ -n "$score" ]] && (( $(echo "$score > 0" | bc -l 2>/dev/null || echo 0) )); then bm25_scores="$bm25_scores $way_id=$score" - # Check against per-way threshold from way.md - thresh="${WAY_THRESH[$way_id]}" + thresh="${WAY_THRESHS[$i]}" if (( $(echo "$score >= $thresh" | bc -l 2>/dev/null || echo 0) )); then bm25_matches+=("$way_id") fi @@ -136,8 +156,9 @@ for test_case in "${TEST_CASES[@]}"; do # Score against all ways with NCD (uses fixed NCD threshold, not BM25 threshold) ncd_matches=() - for way_id in "${!WAY_DESC[@]}"; do - if bash "$NCD_SCRIPT" "$prompt" "${WAY_DESC[$way_id]}" "${WAY_VOCAB[$way_id]}" "0.55" 2>/dev/null; then + for (( i=0; i<${#WAY_IDS[@]}; i++ )); do + way_id="${WAY_IDS[$i]}" + if bash "$NCD_SCRIPT" "$prompt" "${WAY_DESCS[$i]}" "${WAY_VOCABS[$i]}" "0.55" 2>/dev/null; then ncd_matches+=("$way_id") fi done @@ -159,7 +180,7 @@ for test_case in "${TEST_CASES[@]}"; do all_found=true for exp in "${expected_list[@]}"; do found=false - for m in "${bm25_matches[@]}"; do + for m in "${bm25_matches[@]+"${bm25_matches[@]}"}"; do [[ "$m" == "$exp" ]] && found=true && break done [[ "$found" == false ]] && all_found=false @@ -183,7 +204,7 @@ for test_case in "${TEST_CASES[@]}"; do all_found=true for exp in "${expected_list[@]}"; do found=false - for m in "${ncd_matches[@]}"; do + for m in "${ncd_matches[@]+"${ncd_matches[@]}"}"; do [[ "$m" == "$exp" ]] && found=true && break done [[ "$found" == false ]] && all_found=false