diff --git a/internal/contractlint/structural_checks_test.go b/internal/contractlint/structural_checks_test.go index 3baba8f3..7444fd5c 100644 --- a/internal/contractlint/structural_checks_test.go +++ b/internal/contractlint/structural_checks_test.go @@ -19,7 +19,7 @@ import ( // userSkills is the published user-skill surface: each owns a SKILL.md the host // discovers. The test-only `integration` package is deliberately absent. var userSkills = []string{ - "commission", "debrief", "refit", "ensign", + "commission", "debrief", "refit", "survey", "ensign", "first-officer", "using-claude-team", "present-gate", "feedback-rejection-flow", } @@ -82,6 +82,75 @@ func frontmatterHasKey(fm, key string) bool { return false } +// frontmatterField returns the trimmed scalar value of a top-level `key:` line in +// a flat frontmatter block. +func frontmatterField(fm, key string) string { + prefix := key + ":" + for _, line := range strings.Split(fm, "\n") { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, prefix) { + return strings.TrimSpace(strings.TrimPrefix(trimmed, prefix)) + } + } + return "" +} + +// discoverUserInvocableSkills scans the shipped skills tree the way the host does: +// every subdirectory with a SKILL.md whose frontmatter declares `user-invocable: true` +// is exposed as `/spacedock:`. +func discoverUserInvocableSkills(t *testing.T) map[string]string { + t.Helper() + root := skillsRoot(t) + entries, err := os.ReadDir(root) + if err != nil { + t.Fatalf("read skills root %s: %v", root, err) + } + out := map[string]string{} + for _, e := range entries { + if !e.IsDir() || e.Name() == "integration" { + continue + } + data, err := os.ReadFile(filepath.Join(root, e.Name(), "SKILL.md")) + if err != nil { + continue + } + fm, ok := frontmatter(string(data)) + if !ok || frontmatterField(fm, "user-invocable") != "true" { + continue + } + name := frontmatterField(fm, "name") + if name == "" { + t.Errorf("user-invocable skill dir %q has no name field", e.Name()) + continue + } + out[name] = e.Name() + } + return out +} + +// TestSurveyIsDiscoverableUserCommand is a structural frontmatter/discovery check +// kept inside the instruction-read quarantine. The behavior proof for survey's scan +// lives in skills/integration; this check only guards that the host can discover the +// `/spacedock:survey` command from the shipped skill tree. +func TestSurveyIsDiscoverableUserCommand(t *testing.T) { + discovered := discoverUserInvocableSkills(t) + dir, ok := discovered["survey"] + if !ok { + t.Fatalf("survey is not discoverable as /spacedock:survey; discovered user commands: %v", sortedUniqueKeys(discovered)) + } + if dir != "survey" { + t.Errorf("survey command resolves from dir %q, want skills/survey", dir) + } +} + +func sortedUniqueKeys(m map[string]string) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return sortedUnique(keys) +} + // referenceRe matches the two reference-include forms a SKILL.md uses: an // `@references/foo.md` directive and a bare `references/foo.md` read path. var referenceRe = regexp.MustCompile(`@?(references/[A-Za-z0-9_./-]+\.md)`) @@ -286,7 +355,8 @@ func isClaudeAdapter(path string) bool { if strings.HasPrefix(base, "claude-") && strings.HasSuffix(base, "-runtime.md") { return true } - return strings.Contains(path, filepath.Join("using-claude-team", "SKILL.md")) + return strings.Contains(path, filepath.Join("using-claude-team", "SKILL.md")) || + strings.Contains(path, filepath.Join("survey", "SKILL.md")) } // TestShippedSurfaceHasNoHiddenMachineDependency is a no-MACHINE-DEPENDENCY diff --git a/skills/integration/survey_extraction_test.go b/skills/integration/survey_extraction_test.go new file mode 100644 index 00000000..88898c75 --- /dev/null +++ b/skills/integration/survey_extraction_test.go @@ -0,0 +1,151 @@ +// ABOUTME: AC-2 Claude extraction proof — runs the survey scan artifact against +// ABOUTME: a committed agentsview-shaped fixture DB and asserts the Claude signals surface. +package integration + +import ( + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "testing" +) + +// buildFixtureDB shells out to the sqlite3 CLI to materialize the committed +// fixture-sessions.sql into a temp sessions.db, returning its path. The skill's +// scan artifact uses sqlite3, so sqlite3 is the faithful executor; it is +// a standard POSIX tool present in CI, and the test skips (not fails) when it or bash +// is absent so the suite stays runnable on a minimal box without claiming a false pass. +func buildFixtureDB(t *testing.T) string { + t.Helper() + sqlite3, err := exec.LookPath("sqlite3") + if err != nil { + t.Skip("sqlite3 not on PATH; survey extraction proof needs it to run the skill's inline queries") + } + sqlPath := filepath.Join("testdata", "survey", "fixture-sessions.sql") + sql, err := os.ReadFile(sqlPath) + if err != nil { + t.Fatalf("read fixture SQL %s: %v", sqlPath, err) + } + db := filepath.Join(t.TempDir(), "sessions.db") + cmd := exec.Command(sqlite3, db) + cmd.Stdin = strings.NewReader(string(sql)) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("build fixture DB: %v\n%s", err, out) + } + return db +} + +// runSurveyScan runs the survey scan artifact against the given DB, from a working dir +// named for the project key (the script derives PROJECT from the cwd basename), and +// returns the combined output. DB is normally set by the sync step; the test injects the +// fixture DB directly. +func runSurveyScan(t *testing.T, db, projectKey string) string { + t.Helper() + script := filepath.Join(repoRoot(t), "skills", "survey", "bin", "scan-project") + projDir := filepath.Join(t.TempDir(), projectKey) + if err := os.Mkdir(projDir, 0o755); err != nil { + t.Fatalf("mkdir project dir: %v", err) + } + cmd := exec.Command(script) + cmd.Dir = projDir + cmd.Env = append(os.Environ(), "DB="+db) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("run survey scan artifact %s: %v\n%s", script, err, out) + } + return string(out) +} + +// outputSection returns the lines of the survey step-2 run output under the `## X` +// marker line (exact-match on the marker), up to but excluding the next `## ` line. +// The skill echoes section markers like `## OVERVIEW`, so this scopes an assertion to a +// single section's rows — a stray token in another section cannot satisfy the check. +func outputSection(out, marker string) string { + lines := strings.Split(out, "\n") + start := -1 + for i, line := range lines { + if strings.TrimSpace(line) == marker { + start = i + 1 + break + } + } + if start == -1 { + return "" + } + end := len(lines) + for i := start; i < len(lines); i++ { + if strings.HasPrefix(strings.TrimSpace(lines[i]), "## ") { + end = i + break + } + } + return strings.Join(lines[start:end], "\n") +} + +// TestSurveyExtractionSurfacesClaudeSignals is the AC-2 extraction proof. It runs the +// survey scan artifact against a committed agentsview-shaped fixture DB and asserts the +// produced output surfaces the project's Claude decisions +// (the OPEN frontier and a representative answered row), the interruption counts, and +// EXCLUDES a sibling out-of-scope Codex session under the same project key. +// +// This is behavior-fixture coverage, not a SKILL.md string-match: the expected values +// (the AskUserQuestion decisions, the OPEN-vs-done status, the veto count, the codex +// step that must NOT surface) come from the FIXTURE rows — an independent source that +// diverges from the skill text. The skill's bug was a project filter that returned "no +// history" on the real key; if a project-filter or agent-scope regression returned, a +// known row would vanish (or the codex row would leak) and this test would RED. The +// proof is the EXECUTION of the survey scan artifact against known rows, never a +// substring over the instruction file. +func TestSurveyExtractionSurfacesClaudeSignals(t *testing.T) { + db := buildFixtureDB(t) + got := runSurveyScan(t, db, "survey_fixture_proj") + t.Logf("survey scan output:\n%s", got) + + // 1. OVERVIEW counts only the two Claude sessions — the Codex sibling under the same + // project key must NOT inflate the count. A dropped agent='claude' scope would. + overview := outputSection(got, "## OVERVIEW") + if !strings.Contains(overview, "2 sessions") { + t.Errorf("OVERVIEW should count exactly the 2 Claude sessions, got: %q", overview) + } + + // 2. The OPEN frontier and an answered Claude decision surface in DECISIONS, with + // OPEN first. The fixture has 20 answered decisions newer than the OPEN row, so + // dropping ORDER BY status ASC lets the LIMIT truncate the OPEN frontier. + decisions := outputSection(got, "## DECISIONS (header :: status :: question; OPEN = still needs the human)") + if decisions == "" { + t.Fatalf("no DECISIONS section in output:\n%s", got) + } + for _, header := range []string{"Test framework", "Recent answered 20"} { + if !strings.Contains(decisions, header) { + t.Errorf("DECISIONS missing the AskUserQuestion header %q:\n%s", header, decisions) + } + } + decisionLines := strings.Split(strings.TrimSpace(decisions), "\n") + if len(decisionLines) == 0 || !regexp.MustCompile(`^Test framework\s+::\s+OPEN`).MatchString(decisionLines[0]) { + t.Errorf("the OPEN frontier should lead DECISIONS so the recency LIMIT cannot hide it:\n%s", decisions) + } + // The session-2 rejected decision is OPEN; the newer answered decision is done. + if !regexp.MustCompile(`Test framework\s+::\s+OPEN`).MatchString(decisions) { + t.Errorf("the unanswered 'Test framework' decision should be OPEN:\n%s", decisions) + } + if !regexp.MustCompile(`Recent answered 20\s+::\s+done`).MatchString(decisions) { + t.Errorf("the answered 'Recent answered 20' decision should be done:\n%s", decisions) + } + + // 3. Interruption math: asks=22 (the OPEN decision + 21 answered AskUserQuestion + // calls), vetoes=1 (one interrupt marker). + interruptions := outputSection(got, "## INTERRUPTIONS (how often you had to step in)") + if !strings.Contains(interruptions, "asks=22") { + t.Errorf("INTERRUPTIONS should count all fixture AskUserQuestion calls (asks=22):\n%s", interruptions) + } + if !strings.Contains(interruptions, "vetoes=1") { + t.Errorf("INTERRUPTIONS should count the one veto marker (vetoes=1):\n%s", interruptions) + } + + // 4. The out-of-scope Codex session's step must NOT leak into any section — Claude + // scope excludes it. This is the regression guard for an over-broad query. + if strings.Contains(got, "A codex-only step that must not surface") { + t.Errorf("the out-of-scope Codex session leaked into the Claude-scoped survey output:\n%s", got) + } +} diff --git a/skills/integration/survey_scaffold_test.go b/skills/integration/survey_scaffold_test.go new file mode 100644 index 00000000..ef7d70be --- /dev/null +++ b/skills/integration/survey_scaffold_test.go @@ -0,0 +1,79 @@ +// ABOUTME: AC-3 scaffold-classifier proof — runs the survey scaffold detector artifact +// ABOUTME: against committed fixture repos and asserts each resolves to its label. +package integration + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// TestSurveyScaffoldClassifier is the AC-3 detection-half proof. It runs the survey +// scaffold detector artifact in each committed fixture repo and asserts the +// emitted label matches the scaffold that fixture carries. +// +// The expected label for each case comes from the committed +// fixture file tree (testdata/survey/scaffolds/) — an independent source. The +// classifier reads those files; if its detection logic regresses (a dropped superpowers +// skill-name check, a swapped gsd/superpowers branch, a missing generic fallback), the +// run over the fixture emits the wrong label and this test REDs. The proof is the +// EXECUTION of the detector artifact against known trees, never a substring over SKILL.md. +func TestSurveyScaffoldClassifier(t *testing.T) { + scaffoldsRoot, err := filepath.Abs(filepath.Join("testdata", "survey", "scaffolds")) + if err != nil { + t.Fatal(err) + } + detector := filepath.Join(repoRoot(t), "skills", "survey", "bin", "detect-scaffold") + + cases := []struct { + fixture string + wantLine string // the expected first non-marker output line, or a prefix for "similar:" + exact bool + }{ + {fixture: "superpowers", wantLine: "superpowers", exact: true}, + {fixture: "gsd", wantLine: "gsd", exact: true}, + {fixture: "similar", wantLine: "similar:", exact: false}, // generic fallback names the dirs + {fixture: "none", wantLine: "none", exact: true}, + } + + for _, tc := range cases { + t.Run(tc.fixture, func(t *testing.T) { + dir := filepath.Join(scaffoldsRoot, tc.fixture) + if _, err := os.Stat(dir); err != nil { + t.Fatalf("missing scaffold fixture %s: %v", dir, err) + } + cmd := exec.Command(detector) + cmd.Dir = dir + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("run scaffold detector %s in %s: %v\n%s", detector, tc.fixture, err, out) + } + label := scaffoldLabel(string(out)) + t.Logf("%s -> %q", tc.fixture, label) + if tc.exact { + if label != tc.wantLine { + t.Errorf("fixture %s classified as %q, want %q", tc.fixture, label, tc.wantLine) + } + } else { + if !strings.HasPrefix(label, tc.wantLine) { + t.Errorf("fixture %s classified as %q, want a line beginning %q", tc.fixture, label, tc.wantLine) + } + } + }) + } +} + +// scaffoldLabel pulls the detection block's emitted label: the first output line that +// is not the `## SCAFFOLD` marker or blank. +func scaffoldLabel(out string) string { + for _, line := range strings.Split(out, "\n") { + l := strings.TrimSpace(line) + if l == "" || l == "## SCAFFOLD" { + continue + } + return l + } + return "" +} diff --git a/skills/integration/testdata/survey/fixture-sessions.sql b/skills/integration/testdata/survey/fixture-sessions.sql new file mode 100644 index 00000000..3a4efd3e --- /dev/null +++ b/skills/integration/testdata/survey/fixture-sessions.sql @@ -0,0 +1,171 @@ +-- ABOUTME: Committed agentsview-shaped sessions.db fixture for the survey skill's +-- ABOUTME: Claude extraction test — two Claude sessions under one project key, with decisions + a veto. +-- +-- These rows mirror what `agentsview sync` produces for this repo's Claude history: +-- the project key is the cwd basename with non-alphanumerics replaced by '_' +-- (survey_fixture_proj here). Survey filters by that `project` column AND agent='claude'. +-- The fixture seeds a known set of decision/interruption signals so the skill's step-2 +-- block, run verbatim, must surface them; a regression in the queries (wrong project +-- filter, dropped agent scope, dropped OPEN-first ordering, broken json_extract) drops +-- a known row and reds the test. +-- +-- CRITICAL — the decision results match the PRODUCTION shape. agentsview v0.32.1 emits a +-- NON-EMPTY result_content for EVERY decision: an answered one begins "Your questions have +-- been answered…"/"User has answered…", a rejected/abandoned one begins "The user doesn't +-- want to proceed… was rejected" (questions left "(No answer provided)"). OPEN-detection +-- must key on the ABSENCE of an answered-confirmation — a NULL-result OPEN row would be a +-- shape production never emits, making the test a tautology against a fiction. +-- +-- A sibling NON-Claude (codex) session under the SAME project key is included to prove +-- the Claude-only scope: it must NOT leak into the Claude counts. (Surfacing non-Claude +-- agents' decision signals is a deferred follow-up — out of scope for now.) +-- +-- The 20 answered Claude decisions with ids newer than the OPEN row are intentional: +-- the shipped query's recency tail is `ORDER BY status ASC, t.id DESC LIMIT 20`. +-- If a regression removes `status ASC`, those 20 answered rows fill the LIMIT window +-- and truncate the older OPEN frontier row out of DECISIONS. +-- +-- The columns are the subset of agentsview's v1 schema the skill's queries touch. + +CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + project TEXT, + agent TEXT, + file_path TEXT, + started_at TEXT, + ended_at TEXT, + first_message TEXT, + message_count INTEGER, + user_message_count INTEGER +); + +CREATE TABLE tool_calls ( + id INTEGER PRIMARY KEY, + session_id TEXT, + tool_name TEXT, + input_json TEXT, + result_content TEXT +); + +CREATE TABLE messages ( + id INTEGER PRIMARY KEY, + session_id TEXT, + role TEXT, + content TEXT +); + +-- Claude session 1: one ANSWERED decision + a normal user prompt. The answered result +-- mirrors the PRODUCTION shape: real agentsview v0.32.1 records an answered decision with +-- result_content beginning "Your questions have been answered: …" (or "User has answered …"). +INSERT INTO sessions VALUES + ('claude:11111111-2222-3333-4444-555555555555', 'survey_fixture_proj', 'claude', + '/u/.claude/projects/-tmp-survey_fixture_proj/11111111.jsonl', + '2026-06-05', '2026-06-05', 'Pick up the parser refactor and ship it.', 8, 3); + +INSERT INTO tool_calls VALUES + (1, 'claude:11111111-2222-3333-4444-555555555555', 'AskUserQuestion', + '{"questions":[{"header":"Refactor scope","question":"Which scope should the parser refactor cover?"}]}', + 'Your questions have been answered: "Which scope should the parser refactor cover?"="tokenizer + entrypoint"'); + +INSERT INTO messages VALUES + (1, 'claude:11111111-2222-3333-4444-555555555555', 'user', 'Pick up the parser refactor and ship it.'); + +-- Claude session 2: one OPEN decision matching the PRODUCTION shape — a REJECTED decision. +-- Real agentsview NEVER emits a NULL result for a decision; a rejected/abandoned fork still +-- carries a non-empty result_content: "The user doesn't want to proceed… The tool use was +-- rejected", with the questions left "(No answer provided)". OPEN-detection MUST key on the +-- ABSENCE of an answered-confirmation, not on a NULL that production never produces. (Plus a +-- user-veto interrupt marker in the message stream.) +INSERT INTO sessions VALUES + ('claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'survey_fixture_proj', 'claude', + '/u/.claude/projects/-tmp-survey_fixture_proj/66666666.jsonl', + '2026-06-06', '2026-06-06', 'Now wire up the regression suite.', 6, 2); + +INSERT INTO tool_calls VALUES + (2, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Test framework","question":"Which test framework should the regression suite use?"}]}', + 'The user doesn''t want to proceed with this tool use. The tool use was rejected. Questions asked: +- "Which test framework should the regression suite use?" + (No answer provided)'); + +INSERT INTO tool_calls VALUES + (4, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 01","question":"Which recent answered decision 01 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 01 should be kept?"="answer 01"'), + (5, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 02","question":"Which recent answered decision 02 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 02 should be kept?"="answer 02"'), + (6, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 03","question":"Which recent answered decision 03 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 03 should be kept?"="answer 03"'), + (7, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 04","question":"Which recent answered decision 04 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 04 should be kept?"="answer 04"'), + (8, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 05","question":"Which recent answered decision 05 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 05 should be kept?"="answer 05"'), + (9, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 06","question":"Which recent answered decision 06 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 06 should be kept?"="answer 06"'), + (10, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 07","question":"Which recent answered decision 07 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 07 should be kept?"="answer 07"'), + (11, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 08","question":"Which recent answered decision 08 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 08 should be kept?"="answer 08"'), + (12, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 09","question":"Which recent answered decision 09 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 09 should be kept?"="answer 09"'), + (13, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 10","question":"Which recent answered decision 10 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 10 should be kept?"="answer 10"'), + (14, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 11","question":"Which recent answered decision 11 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 11 should be kept?"="answer 11"'), + (15, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 12","question":"Which recent answered decision 12 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 12 should be kept?"="answer 12"'), + (16, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 13","question":"Which recent answered decision 13 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 13 should be kept?"="answer 13"'), + (17, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 14","question":"Which recent answered decision 14 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 14 should be kept?"="answer 14"'), + (18, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 15","question":"Which recent answered decision 15 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 15 should be kept?"="answer 15"'), + (19, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 16","question":"Which recent answered decision 16 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 16 should be kept?"="answer 16"'), + (20, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 17","question":"Which recent answered decision 17 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 17 should be kept?"="answer 17"'), + (21, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 18","question":"Which recent answered decision 18 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 18 should be kept?"="answer 18"'), + (22, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 19","question":"Which recent answered decision 19 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 19 should be kept?"="answer 19"'), + (23, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'AskUserQuestion', + '{"questions":[{"header":"Recent answered 20","question":"Which recent answered decision 20 should be kept?"}]}', + 'Your questions have been answered: "Which recent answered decision 20 should be kept?"="answer 20"'); + +INSERT INTO messages VALUES + (2, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'user', 'Now wire up the regression suite.'), + (3, 'claude:66666666-7777-8888-9999-aaaaaaaaaaaa', 'user', '[Request interrupted by user]'); + +-- Out-of-scope sibling: a Codex session under the SAME project key. Survey's Claude +-- scope must EXCLUDE it — it must not appear in the session count, the decisions, or the +-- interruption totals. +INSERT INTO sessions VALUES + ('codex:cccccccc-dddd-eeee-ffff-000000000000', 'survey_fixture_proj', 'codex', + '/u/.codex/sessions/rollout-cccccccc.jsonl', + '2026-06-04', '2026-06-04', 'Out-of-scope codex session under the same project key.', 4, 1); + +INSERT INTO tool_calls VALUES + (3, 'codex:cccccccc-dddd-eeee-ffff-000000000000', 'update_plan', + '{"explanation":"x","plan":[{"step":"A codex-only step that must not surface","status":"in_progress"}]}', + NULL); + +INSERT INTO messages VALUES + (4, 'codex:cccccccc-dddd-eeee-ffff-000000000000', 'user', 'Out-of-scope codex session under the same project key.'); diff --git a/skills/integration/testdata/survey/scaffolds/gsd/.claude/skills/gsd/SKILL.md b/skills/integration/testdata/survey/scaffolds/gsd/.claude/skills/gsd/SKILL.md new file mode 100644 index 00000000..31129ce0 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/gsd/.claude/skills/gsd/SKILL.md @@ -0,0 +1,6 @@ +--- +name: gsd +description: Fixture marker for a get-shit-done scaffold. +--- + +# GSD Fixture diff --git a/skills/integration/testdata/survey/scaffolds/none/README.md b/skills/integration/testdata/survey/scaffolds/none/README.md new file mode 100644 index 00000000..53547949 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/none/README.md @@ -0,0 +1 @@ +# Just a readme diff --git a/skills/integration/testdata/survey/scaffolds/similar/.claude/commands/deploy.md b/skills/integration/testdata/survey/scaffolds/similar/.claude/commands/deploy.md new file mode 100644 index 00000000..6d6b87b2 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/similar/.claude/commands/deploy.md @@ -0,0 +1,3 @@ +# Deploy Fixture + +Fixture marker for a project-local command scaffold. diff --git a/skills/integration/testdata/survey/scaffolds/similar/.claude/skills/my-pipeline/SKILL.md b/skills/integration/testdata/survey/scaffolds/similar/.claude/skills/my-pipeline/SKILL.md new file mode 100644 index 00000000..63fca9b3 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/similar/.claude/skills/my-pipeline/SKILL.md @@ -0,0 +1,6 @@ +--- +name: my-pipeline +description: Fixture marker for a custom multi-phase agent discipline. +--- + +# Similar Scaffold Fixture diff --git a/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/brainstorming/SKILL.md b/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/brainstorming/SKILL.md new file mode 100644 index 00000000..49dab8d8 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/brainstorming/SKILL.md @@ -0,0 +1,6 @@ +--- +name: brainstorming +description: Fixture marker for the superpowers brainstorming discipline. +--- + +# Brainstorming Fixture diff --git a/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/superpowers/SKILL.md b/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/superpowers/SKILL.md new file mode 100644 index 00000000..d2a082c5 --- /dev/null +++ b/skills/integration/testdata/survey/scaffolds/superpowers/.claude/skills/superpowers/SKILL.md @@ -0,0 +1,6 @@ +--- +name: superpowers +description: Fixture marker for an installed superpowers scaffold. +--- + +# Superpowers Fixture diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md new file mode 100644 index 00000000..7332ab72 --- /dev/null +++ b/skills/survey/SKILL.md @@ -0,0 +1,162 @@ +--- +name: survey +description: Use when arriving at or returning to a project that already has AI-agent history and you want the lay of the land before doing anything else — "survey this project", "what have we been doing here", "catch me up", "orient me", "where did we leave off", "what's the state of this project", or picking up a brownfield repo with several in-flight agent tracks. Reads existing agent session history (read-only), reports the implicit workflow, the open decisions, and how often you had to step in, then offers to commission a spacedock workflow from it. +user-invocable: true +--- + +# Survey a Project + +## Overview + +Survey is the first thing you run on unfamiliar ground: it reconstructs what the AI agents in this project have implicitly been doing, from their session history. It reports the inferred workflow, the workstreams, the recent decisions, and — load-bearing — the OPEN decisions (the abandoned or unanswered forks) plus how often the human had to step in. Then it offers to commission a real spacedock workflow with explicit gates from what it found. + +It reads **agentsview**'s session DB and is strictly read-only — every query is shown inline so nothing is a black box. For now it surveys **Claude Code** history (the decision and interruption signals below are Claude's); agentsview also ingests Codex, Gemini, and more, and surfacing those agents' decision signals is a deferred follow-up. The closing move is the discovery → commission bridge: the OPEN decisions become candidate gates, the workstreams become candidate entities, the inferred loop becomes the stage list. + +Run the four steps in order: **check agentsview → scan → recognize scaffold → report and offer**. + +--- + +## 1. Check agentsview, then sync THIS project only (scoped) + +This skill may run in a sandboxed agent that **cannot read `~/.agentsview/` directly** (macOS TCC denies raw FS access to a limited-permission process, even though the `agentsview` binary itself reads it). So do NOT `sqlite3 ~/.agentsview/sessions.db` blindly. Instead, drive the read through the `agentsview` binary into a process-readable data directory under `AGENTSVIEW_DATA_DIR`, then query that copy: + +```bash +SURVEY_DB_DIR="${SPACEDOCK_SURVEY_DB_DIR:-${TMPDIR:-/tmp}/spacedock-survey}" +DB="$SURVEY_DB_DIR/sessions.db" + +if ! command -v agentsview >/dev/null; then echo "AGENTSVIEW MISSING"; fi +``` + +If it prints `AGENTSVIEW MISSING`: tell the user agentsview is needed (it ingests the agent logs this skill reads), **ask consent**, and only on a yes run the install (`brew install --cask agentsview`; fallback `curl -fsSL https://agentsview.io/install.sh | bash`). NEVER install without an explicit yes — stop at the consent prompt otherwise. + +With the binary present, sync — but **scope the sync to this project**. A bare `agentsview sync` enumerates the ENTIRE `~/.claude/projects` history (16k+ sessions, growing): on a real machine that walk exhausts any sane timeout, so the survey data dir ends up empty or partial. The fix is to narrow Claude's source root to just this repo's session directories before syncing, so the walk is bounded and this project's Claude sessions land in seconds: + +```bash +mkdir -p "$SURVEY_DB_DIR" + +# Claude Code stores each project's sessions in ~/.claude/projects/, +# so this repo's sessions live under dirs that begin with the dash-encoded cwd. Point +# CLAUDE_PROJECTS_DIR at a symlink farm of just those dirs — the sync then walks only +# this project's Claude sessions (this cwd plus its worktrees), not the whole backlog. +CLAUDE_ROOT="${CLAUDE_PROJECTS_DIR:-$HOME/.claude/projects}" +DASH_CWD=$(pwd | sed 's#/#-#g') # ~/.claude/projects dir-name convention +NARROW="$SURVEY_DB_DIR/claude-narrow" +rm -rf "$NARROW"; mkdir -p "$NARROW" +shopt -s nullglob 2>/dev/null +for d in "$CLAUDE_ROOT/$DASH_CWD" "$CLAUDE_ROOT/$DASH_CWD"-*; do + [ -d "$d" ] && ln -s "$d" "$NARROW/$(basename "$d")" +done + +AGENTSVIEW_DATA_DIR="$SURVEY_DB_DIR" CLAUDE_PROJECTS_DIR="$NARROW" timeout 300 agentsview sync +``` + +The survey data dir persists between runs, so a re-survey of the same project is incremental (seconds). Do not pass `--full` — a full resync re-ingests everything and can fill the disk. If the symlink farm is empty (this project has no Claude sessions under `~/.claude/projects`), the synced DB has no history for it; step 2 reports "no agent history" and stops. + +If `agentsview sync` fails (network, disk, permissions), report the exact failure and stop — do not fall back to raw `~/.agentsview/` reads (they fail under TCC). + +## 2. Scan the project + +agentsview derives a `project` column for every session from that session's working directory — its **basename, with non-alphanumerics replaced by `_`** (so this repo's cwd `…/spacedock-v1` keys as `spacedock_v1`). Filter by that `project` column. Do NOT filter by `file_path LIKE` a dash-mangled cwd: the project key is the stable, agentsview-computed key, and `file_path` matching is brittle across agentsview versions and source layouts. + +The runnable scan surface is `bin/scan-project`, resolved relative to this skill directory (in this repo: `skills/survey/bin/scan-project`). It contains the explicit sqlite queries and their comments; keep it paired with this intent list as the source of truth: + +- `OVERVIEW` counts this project's top-level Claude sessions by agentsview `project`; +- `INTERRUPTIONS` counts AskUserQuestion/ExitPlanMode stops, hard-veto markers, and user turns; +- `DECISIONS` lists AskUserQuestion/ExitPlanMode decisions, marks only answered-confirmation results as `done`, marks every rejection/error/prompt-echo as `OPEN`, and sorts OPEN before done so the frontier cannot be truncated by the recency `LIMIT`; +- `RECENT PROMPTS` provides secondary workstream signal. + +Run the artifact, then read the labelled output: + +```bash +SURVEY_SKILL_DIR="${SPACEDOCK_SURVEY_SKILL_DIR:-skills/survey}" +"$SURVEY_SKILL_DIR/bin/scan-project" +``` + +`OVERVIEW` is empty / `0 sessions` → there is no Claude agent history for this project; say so and stop. Nothing to discover. (Survey reads Claude history only for now; a project whose only agent history is Codex/Gemini will report "no agent history" here — surfacing those agents is a deferred follow-up.) + +**Honest signal accounting.** The DECISIONS section lists the human-decision points; `OPEN` = still needs the human, and you lead the report with those. The interruption total is `asks + plans + vetoes` (the AskUserQuestion / ExitPlanMode decision tools plus the hard-veto markers Claude sessions retain); `pct = total*100/user_turns`. Never dress an empty section up as "no decisions" — if a section is empty, say the run found none of that signal. + +## 3. Recognize an incumbent scaffold + +Before the report, detect whether the project already runs a common agent scaffold — by reading project FILES (not the session DB). The runnable classifier is `bin/detect-scaffold`, resolved relative to this skill directory (in this repo: `skills/survey/bin/detect-scaffold`). It checks: + +- superpowers via `.claude/skills/superpowers`, marketplace/plugin config, or superpowers discipline skill names; +- gsd/get-shit-done via a gsd skill/command dir or gsd config; +- similar scaffolds via any other `.claude/skills` or `.claude/commands` tree; +- none when no scaffold is present. + +```bash +SURVEY_SKILL_DIR="${SPACEDOCK_SURVEY_SKILL_DIR:-skills/survey}" +"$SURVEY_SKILL_DIR/bin/detect-scaffold" +``` + +The detected scaffold name drives the comparative benefit in the report (step 4). The detection reads files; the comparison's *numbers* come from the scan (step 2). + +## 4. Confirm, then report and offer + +Every `{slot}` below is a FILL slot: substitute the real value from the step-2 scan before you show the user. A literal `{slot}` (or a `<…>` angle token) left in what you present is a bug — never show the user an unfilled slot. If a slot has no data (e.g. zero OPEN decisions), drop that line rather than printing an empty slot. + +Tell the user what you found and wait for a yes: + +> Found **{N} sessions** in `{project}` (`{date range}`), with **{D} decision points** and **{V} interruptions**. Want me to lay it out? + +Then synthesize this, one screen: + +``` +PROJECT: {basename} {sessions} Claude sessions · {date range} + +INFERRED WORKFLOW + {the implicit loop across the decisions + prompts, as an arrow chain} — {one honest line} + +WORKSTREAMS + {cluster the decisions + prompts into tracks; one line each, status glyph} + +NEEDS YOU (only if any decision is OPEN) + ⚠ {the OPEN forks — abandoned/unanswered decision questions; lead with them} + +RECENT DECISIONS (answered) + {the rest: header — short question} + +INTERRUPTIONS (where spacedock can help) + {total} times you stepped in across {sessions} sessions — {decisions} decision points + + {vetoes} course-corrections, {pct}% of your turns. +``` + +### The discovery → commission bridge (close every report with this) + +After the synthesis, recognize the scaffold (step 3) and offer a COMPARABLE spacedock workflow, with a benefit stated **concretely and comparatively**, anchored to the actual scan numbers — never a placeholder, never a generic pitch. As in the synthesis above, every `{slot}` is a FILL slot: substitute the real step-2 number/forks before you show the user; a literal `{slot}` in your output is a bug. Use the per-scaffold framing: + +- **superpowers** is a library of disciplines an agent invokes (brainstorming → writing-plans → executing-plans → subagent-driven-development), with human interruption left implicit — *the human decides when to step in.* Offer a spacedock workflow that maps those disciplines to stages (ideation → implementation → validation) and makes the interruption points EXPLICIT gates. State it tied to the scan's interruption count: + > superpowers gives your agent the *plays* but leaves *when you step in* up to you — this scan counted **{V} interruptions across {N} sessions** where you had to. A spacedock workflow turns those into explicit approval gates, so the agent advances on its own between your calls and only stops where you marked a gate. + +- **gsd / get-shit-done** runs a fixed phase sequence per task, one task at a time. Offer a spacedock workflow that maps the gsd phases to stages and adds gates + durable entity state, so multiple work items move through the same phases concurrently and pause only at gates. State it tied to the OPEN forks: + > gsd drives one task through its phases; spacedock tracks every work item through the same stages as durable on-disk state, gates the steps you flagged as needing you (this scan found these OPEN forks: **{the actual OPEN decisions}**), and lets several run in parallel without you re-driving each. + +- **similar / unknown scaffold** — name it (use the names the step-3 detection emitted), then offer the generic spacedock benefit (gates from the interruption count, entity state, parallelism) without inventing a false-specific comparison. + +- **none** — offer the generic spacedock benefit anchored to the interruption count and OPEN forks. + +The two comparisons MUST differ — superpowers-vs-spacedock (implicit-interruption → explicit-gates) is a different claim from gsd-vs-spacedock (single-task-phases → parallel-gated-entities). Each must cite a real scan number (the filled `{V}`/`{N}` or the filled OPEN forks), not a placeholder. + +Then make the offer: + +> Want me to commission a spacedock workflow from this? + +On a **yes**, invoke commission in batch mode, supplying inputs derived from the scan (commission already accepts batch design inputs in its first message — see its Batch Mode). Assemble: + +- **stages** ← the inferred workflow loop (for a detected scaffold, the comparable-workflow stage mapping above is the proposed stage list); +- **seed entities** ← the workstreams; +- **approval gates** ← the OPEN forks (each OPEN decision is a candidate gate); +- **mission / entity** ← inferred from the workstreams and the project. + +Hand off by invoking `commission` with those assembled inputs. Survey does NOT generate workflow files itself — file generation stays commission's job; survey only assembles the invocation and hands off. + +On a **no**, stop — the survey stands on its own as an orientation. + +## Synthesis guidance + +- **Project name** = path basename. +- **Workflow + workstreams: infer them**, primarily from the decisions (the `PROMPTS` are sparse/noisy — secondary). Be honest when a track is one-off or stalled. +- **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead the report with those. Don't claim a decision was implemented — the scan doesn't know that. +- **Fill every slot, never invent.** Every `{slot}` in the report and the comparison comes from the step-2 numbers; a literal `{slot}` shown to the user is a bug. If a section's signal is empty (no OPEN decisions, no interruptions), say the run found none — never dress an empty section up as "no decisions." +- **Claude-only for now.** Survey reads Claude history; Codex/Gemini decision signals are a deferred follow-up. Don't imply a Codex/Gemini-only project has "no history" in the user-facing wording beyond what step 2 reports. diff --git a/skills/survey/bin/detect-scaffold b/skills/survey/bin/detect-scaffold new file mode 100755 index 00000000..07967496 --- /dev/null +++ b/skills/survey/bin/detect-scaffold @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "## SCAFFOLD" + +shopt -s nullglob 2>/dev/null || true + +has_superpowers_skill_name() { + local name + for path in .claude/skills/*; do + [ -e "$path" ] || continue + name=$(basename "$path") + case "$name" in + brainstorming|writing-plans|executing-plans|subagent-driven-development) + return 0 + ;; + esac + done + return 1 +} + +similar_names() { + local path name count=0 out="" + for path in .claude/skills/* .claude/commands/*; do + [ -e "$path" ] || continue + name=$(basename "$path") + out="${out:+$out,}$name" + count=$((count + 1)) + [ "$count" -ge 5 ] && break + done + printf "%s" "$out" +} + +if [ -d .claude/skills/superpowers ] || grep -rqs "superpowers" .claude-plugin/ 2>/dev/null || has_superpowers_skill_name; then + echo "superpowers" +elif [ -d .claude/skills/gsd ] || [ -d .claude/skills/get-shit-done ] || [ -d .claude/commands/gsd ] || ls GSD.md gsd.md .gsd >/dev/null 2>&1; then + echo "gsd" +elif [ -d .claude/skills ] || [ -d .claude/commands ]; then + echo "similar: $(similar_names)" +else + echo "none" +fi diff --git a/skills/survey/bin/scan-project b/skills/survey/bin/scan-project new file mode 100755 index 00000000..c39bd9d4 --- /dev/null +++ b/skills/survey/bin/scan-project @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! command -v sqlite3 >/dev/null; then + echo "sqlite3 missing; survey scan needs sqlite3 to query agentsview's sessions.db" >&2 + exit 127 +fi + +if [ -z "${DB:-}" ]; then + SURVEY_DB_DIR="${SPACEDOCK_SURVEY_DB_DIR:-${TMPDIR:-/tmp}/spacedock-survey}" + DB="$SURVEY_DB_DIR/sessions.db" +fi + +if [ ! -f "$DB" ]; then + echo "survey DB not found: $DB" >&2 + exit 1 +fi + +sql_quote() { + printf "%s" "$1" | sed "s/'/''/g" +} + +PROJECT="${SURVEY_PROJECT:-$(basename "$(pwd)" | sed 's/[^a-zA-Z0-9]/_/g')}" +PROJECT_SQL=$(sql_quote "$PROJECT") +NS="file_path NOT LIKE '%/subagents/%'" +CL="project='$PROJECT_SQL' AND agent='claude' AND $NS" +SIDS="(SELECT id FROM sessions WHERE $CL)" + +echo "## OVERVIEW" +sqlite3 "$DB" "SELECT COUNT(*)||' sessions '||substr(MIN(started_at),1,10)||' .. '||substr(MAX(ended_at),1,10) FROM sessions WHERE $CL;" + +echo "## INTERRUPTIONS (how often you had to step in)" +# Human-decision tools: AskUserQuestion + ExitPlanMode are the points where the agent stopped to ask you. +sqlite3 "$DB" "SELECT 'asks='||COALESCE(SUM(tool_name='AskUserQuestion'),0)||' plans='||COALESCE(SUM(tool_name='ExitPlanMode'),0) FROM tool_calls t JOIN sessions s ON t.session_id=s.id WHERE s.$CL;" +# Veto: a "[Request interrupted" / "Request interrupted by user" / "doesn't want to proceed" marker is a hard course-correction. +sqlite3 "$DB" "SELECT 'vetoes='||COUNT(*) FROM messages m JOIN sessions s ON m.session_id=s.id WHERE s.$CL AND (m.content LIKE '%[Request interrupted%' OR m.content LIKE '%Request interrupted by user%' OR m.content LIKE '%doesn''t want to proceed%');" +sqlite3 "$DB" "SELECT 'user_turns='||COUNT(*) FROM messages WHERE session_id IN $SIDS AND role='user' AND content NOT LIKE '%tool_result%' AND content NOT LIKE '[{%';" + +echo "## DECISIONS (header :: status :: question; OPEN = still needs the human)" +# A decision is done ONLY when Claude emitted an answered-confirmation. Every +# other result shape is OPEN: rejected questions, malformed-question tool errors, +# and prompt echoes are all non-answers on real agentsview v0.32.1 data. +# OPEN sorts before done so the load-bearing frontier cannot be truncated by LIMIT. +sqlite3 -separator ' :: ' "$DB" "SELECT COALESCE(json_extract(input_json,'\$.questions[0].header'),'PLAN'), CASE WHEN result_content LIKE 'User has answered%' OR result_content LIKE 'Your questions have been answered%' OR result_content LIKE 'Your question has been answered%' THEN 'done' ELSE 'OPEN' END AS status, substr(replace(COALESCE(json_extract(input_json,'\$.questions[0].question'),input_json),char(10),' '),1,110) FROM tool_calls t JOIN sessions s ON t.session_id=s.id WHERE s.$CL AND tool_name IN ('AskUserQuestion','ExitPlanMode') ORDER BY status ASC, t.id DESC LIMIT 20;" + +echo "## RECENT PROMPTS (workstream signal; noisy, secondary)" +sqlite3 "$DB" "SELECT DISTINCT substr(trim(COALESCE(NULLIF(first_message,''),'?')),1,70) FROM sessions WHERE $CL AND COALESCE(first_message,'') NOT LIKE '{%' AND COALESCE(first_message,'') NOT LIKE '<%' AND length(COALESCE(first_message,''))>14 ORDER BY started_at DESC LIMIT 25;"