Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 72 additions & 2 deletions internal/contractlint/structural_checks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
// userSkills is the published user-skill surface: each owns a SKILL.md the host
// discovers. The test-only `integration` package is deliberately absent.
var userSkills = []string{
"commission", "debrief", "refit", "ensign",
"commission", "debrief", "refit", "survey", "ensign",
"first-officer", "using-claude-team", "present-gate", "feedback-rejection-flow",
}

Expand Down Expand Up @@ -82,6 +82,75 @@ func frontmatterHasKey(fm, key string) bool {
return false
}

// frontmatterField returns the trimmed scalar value of a top-level `key:` line in
// a flat frontmatter block.
func frontmatterField(fm, key string) string {
prefix := key + ":"
for _, line := range strings.Split(fm, "\n") {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, prefix) {
return strings.TrimSpace(strings.TrimPrefix(trimmed, prefix))
}
}
return ""
}

// discoverUserInvocableSkills scans the shipped skills tree the way the host does:
// every subdirectory with a SKILL.md whose frontmatter declares `user-invocable: true`
// is exposed as `/spacedock:<name>`.
func discoverUserInvocableSkills(t *testing.T) map[string]string {
t.Helper()
root := skillsRoot(t)
entries, err := os.ReadDir(root)
if err != nil {
t.Fatalf("read skills root %s: %v", root, err)
}
out := map[string]string{}
for _, e := range entries {
if !e.IsDir() || e.Name() == "integration" {
continue
}
data, err := os.ReadFile(filepath.Join(root, e.Name(), "SKILL.md"))
if err != nil {
continue
}
fm, ok := frontmatter(string(data))
if !ok || frontmatterField(fm, "user-invocable") != "true" {
continue
}
name := frontmatterField(fm, "name")
if name == "" {
t.Errorf("user-invocable skill dir %q has no name field", e.Name())
continue
}
out[name] = e.Name()
}
return out
}

// TestSurveyIsDiscoverableUserCommand is a structural frontmatter/discovery check
// kept inside the instruction-read quarantine. The behavior proof for survey's scan
// lives in skills/integration; this check only guards that the host can discover the
// `/spacedock:survey` command from the shipped skill tree.
func TestSurveyIsDiscoverableUserCommand(t *testing.T) {
discovered := discoverUserInvocableSkills(t)
dir, ok := discovered["survey"]
if !ok {
t.Fatalf("survey is not discoverable as /spacedock:survey; discovered user commands: %v", sortedUniqueKeys(discovered))
}
if dir != "survey" {
t.Errorf("survey command resolves from dir %q, want skills/survey", dir)
}
}

func sortedUniqueKeys(m map[string]string) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
return sortedUnique(keys)
}

// referenceRe matches the two reference-include forms a SKILL.md uses: an
// `@references/foo.md` directive and a bare `references/foo.md` read path.
var referenceRe = regexp.MustCompile(`@?(references/[A-Za-z0-9_./-]+\.md)`)
Expand Down Expand Up @@ -286,7 +355,8 @@ func isClaudeAdapter(path string) bool {
if strings.HasPrefix(base, "claude-") && strings.HasSuffix(base, "-runtime.md") {
return true
}
return strings.Contains(path, filepath.Join("using-claude-team", "SKILL.md"))
return strings.Contains(path, filepath.Join("using-claude-team", "SKILL.md")) ||
strings.Contains(path, filepath.Join("survey", "SKILL.md"))
}

// TestShippedSurfaceHasNoHiddenMachineDependency is a no-MACHINE-DEPENDENCY
Expand Down
151 changes: 151 additions & 0 deletions skills/integration/survey_extraction_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// ABOUTME: AC-2 Claude extraction proof — runs the survey scan artifact against
// ABOUTME: a committed agentsview-shaped fixture DB and asserts the Claude signals surface.
package integration

import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"testing"
)

// buildFixtureDB shells out to the sqlite3 CLI to materialize the committed
// fixture-sessions.sql into a temp sessions.db, returning its path. The skill's
// scan artifact uses sqlite3, so sqlite3 is the faithful executor; it is
// a standard POSIX tool present in CI, and the test skips (not fails) when it or bash
// is absent so the suite stays runnable on a minimal box without claiming a false pass.
func buildFixtureDB(t *testing.T) string {
t.Helper()
sqlite3, err := exec.LookPath("sqlite3")
if err != nil {
t.Skip("sqlite3 not on PATH; survey extraction proof needs it to run the skill's inline queries")
}
sqlPath := filepath.Join("testdata", "survey", "fixture-sessions.sql")
sql, err := os.ReadFile(sqlPath)
if err != nil {
t.Fatalf("read fixture SQL %s: %v", sqlPath, err)
}
db := filepath.Join(t.TempDir(), "sessions.db")
cmd := exec.Command(sqlite3, db)
cmd.Stdin = strings.NewReader(string(sql))
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("build fixture DB: %v\n%s", err, out)
}
return db
}

// runSurveyScan runs the survey scan artifact against the given DB, from a working dir
// named for the project key (the script derives PROJECT from the cwd basename), and
// returns the combined output. DB is normally set by the sync step; the test injects the
// fixture DB directly.
func runSurveyScan(t *testing.T, db, projectKey string) string {
t.Helper()
script := filepath.Join(repoRoot(t), "skills", "survey", "bin", "scan-project")
projDir := filepath.Join(t.TempDir(), projectKey)
if err := os.Mkdir(projDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
cmd := exec.Command(script)
cmd.Dir = projDir
cmd.Env = append(os.Environ(), "DB="+db)
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("run survey scan artifact %s: %v\n%s", script, err, out)
}
return string(out)
}

// outputSection returns the lines of the survey step-2 run output under the `## X`
// marker line (exact-match on the marker), up to but excluding the next `## ` line.
// The skill echoes section markers like `## OVERVIEW`, so this scopes an assertion to a
// single section's rows — a stray token in another section cannot satisfy the check.
func outputSection(out, marker string) string {
lines := strings.Split(out, "\n")
start := -1
for i, line := range lines {
if strings.TrimSpace(line) == marker {
start = i + 1
break
}
}
if start == -1 {
return ""
}
end := len(lines)
for i := start; i < len(lines); i++ {
if strings.HasPrefix(strings.TrimSpace(lines[i]), "## ") {
end = i
break
}
}
return strings.Join(lines[start:end], "\n")
}

// TestSurveyExtractionSurfacesClaudeSignals is the AC-2 extraction proof. It runs the
// survey scan artifact against a committed agentsview-shaped fixture DB and asserts the
// produced output surfaces the project's Claude decisions
// (the OPEN frontier and a representative answered row), the interruption counts, and
// EXCLUDES a sibling out-of-scope Codex session under the same project key.
//
// This is behavior-fixture coverage, not a SKILL.md string-match: the expected values
// (the AskUserQuestion decisions, the OPEN-vs-done status, the veto count, the codex
// step that must NOT surface) come from the FIXTURE rows — an independent source that
// diverges from the skill text. The skill's bug was a project filter that returned "no
// history" on the real key; if a project-filter or agent-scope regression returned, a
// known row would vanish (or the codex row would leak) and this test would RED. The
// proof is the EXECUTION of the survey scan artifact against known rows, never a
// substring over the instruction file.
func TestSurveyExtractionSurfacesClaudeSignals(t *testing.T) {
db := buildFixtureDB(t)
got := runSurveyScan(t, db, "survey_fixture_proj")
t.Logf("survey scan output:\n%s", got)

// 1. OVERVIEW counts only the two Claude sessions — the Codex sibling under the same
// project key must NOT inflate the count. A dropped agent='claude' scope would.
overview := outputSection(got, "## OVERVIEW")
if !strings.Contains(overview, "2 sessions") {
t.Errorf("OVERVIEW should count exactly the 2 Claude sessions, got: %q", overview)
}

// 2. The OPEN frontier and an answered Claude decision surface in DECISIONS, with
// OPEN first. The fixture has 20 answered decisions newer than the OPEN row, so
// dropping ORDER BY status ASC lets the LIMIT truncate the OPEN frontier.
decisions := outputSection(got, "## DECISIONS (header :: status :: question; OPEN = still needs the human)")
if decisions == "" {
t.Fatalf("no DECISIONS section in output:\n%s", got)
}
for _, header := range []string{"Test framework", "Recent answered 20"} {
if !strings.Contains(decisions, header) {
t.Errorf("DECISIONS missing the AskUserQuestion header %q:\n%s", header, decisions)
}
}
decisionLines := strings.Split(strings.TrimSpace(decisions), "\n")
if len(decisionLines) == 0 || !regexp.MustCompile(`^Test framework\s+::\s+OPEN`).MatchString(decisionLines[0]) {
t.Errorf("the OPEN frontier should lead DECISIONS so the recency LIMIT cannot hide it:\n%s", decisions)
}
// The session-2 rejected decision is OPEN; the newer answered decision is done.
if !regexp.MustCompile(`Test framework\s+::\s+OPEN`).MatchString(decisions) {
t.Errorf("the unanswered 'Test framework' decision should be OPEN:\n%s", decisions)
}
if !regexp.MustCompile(`Recent answered 20\s+::\s+done`).MatchString(decisions) {
t.Errorf("the answered 'Recent answered 20' decision should be done:\n%s", decisions)
}

// 3. Interruption math: asks=22 (the OPEN decision + 21 answered AskUserQuestion
// calls), vetoes=1 (one interrupt marker).
interruptions := outputSection(got, "## INTERRUPTIONS (how often you had to step in)")
if !strings.Contains(interruptions, "asks=22") {
t.Errorf("INTERRUPTIONS should count all fixture AskUserQuestion calls (asks=22):\n%s", interruptions)
}
if !strings.Contains(interruptions, "vetoes=1") {
t.Errorf("INTERRUPTIONS should count the one veto marker (vetoes=1):\n%s", interruptions)
}

// 4. The out-of-scope Codex session's step must NOT leak into any section — Claude
// scope excludes it. This is the regression guard for an over-broad query.
if strings.Contains(got, "A codex-only step that must not surface") {
t.Errorf("the out-of-scope Codex session leaked into the Claude-scoped survey output:\n%s", got)
}
}
79 changes: 79 additions & 0 deletions skills/integration/survey_scaffold_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// ABOUTME: AC-3 scaffold-classifier proof — runs the survey scaffold detector artifact
// ABOUTME: against committed fixture repos and asserts each resolves to its label.
package integration

import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)

// TestSurveyScaffoldClassifier is the AC-3 detection-half proof. It runs the survey
// scaffold detector artifact in each committed fixture repo and asserts the
// emitted label matches the scaffold that fixture carries.
//
// The expected label for each case comes from the committed
// fixture file tree (testdata/survey/scaffolds/<name>) — an independent source. The
// classifier reads those files; if its detection logic regresses (a dropped superpowers
// skill-name check, a swapped gsd/superpowers branch, a missing generic fallback), the
// run over the fixture emits the wrong label and this test REDs. The proof is the
// EXECUTION of the detector artifact against known trees, never a substring over SKILL.md.
func TestSurveyScaffoldClassifier(t *testing.T) {
scaffoldsRoot, err := filepath.Abs(filepath.Join("testdata", "survey", "scaffolds"))
if err != nil {
t.Fatal(err)
}
detector := filepath.Join(repoRoot(t), "skills", "survey", "bin", "detect-scaffold")

cases := []struct {
fixture string
wantLine string // the expected first non-marker output line, or a prefix for "similar:"
exact bool
}{
{fixture: "superpowers", wantLine: "superpowers", exact: true},
{fixture: "gsd", wantLine: "gsd", exact: true},
{fixture: "similar", wantLine: "similar:", exact: false}, // generic fallback names the dirs
{fixture: "none", wantLine: "none", exact: true},
}

for _, tc := range cases {
t.Run(tc.fixture, func(t *testing.T) {
dir := filepath.Join(scaffoldsRoot, tc.fixture)
if _, err := os.Stat(dir); err != nil {
t.Fatalf("missing scaffold fixture %s: %v", dir, err)
}
cmd := exec.Command(detector)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("run scaffold detector %s in %s: %v\n%s", detector, tc.fixture, err, out)
}
label := scaffoldLabel(string(out))
t.Logf("%s -> %q", tc.fixture, label)
if tc.exact {
if label != tc.wantLine {
t.Errorf("fixture %s classified as %q, want %q", tc.fixture, label, tc.wantLine)
}
} else {
if !strings.HasPrefix(label, tc.wantLine) {
t.Errorf("fixture %s classified as %q, want a line beginning %q", tc.fixture, label, tc.wantLine)
}
}
})
}
}

// scaffoldLabel pulls the detection block's emitted label: the first output line that
// is not the `## SCAFFOLD` marker or blank.
func scaffoldLabel(out string) string {
for _, line := range strings.Split(out, "\n") {
l := strings.TrimSpace(line)
if l == "" || l == "## SCAFFOLD" {
continue
}
return l
}
return ""
}
Loading
Loading