From 80bc3846549476d09e7d5b134060f058e889e32d Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 9 May 2026 13:14:24 +0800 Subject: [PATCH 1/6] fix(knowledge): bootstrap workspace root before sentinel lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit withSentinelLock used to call acquireSentinelLock directly, which opens the sentinel with O_CREATE but doesn't MkdirAll the parent. When an operator wipes ~/.flashduty (or a fresh container starts before any prior run touched the path), every ReconcileKnowledgeManifest then loops on "open .../.safari-knowledge-sentinel.json: no such file or directory" and the workspace stays permanently empty until the user manually recreates the directory. MkdirAll(filepath.Dir(sentinelPath)) inside withSentinelLock so the self-heal path runs on every reconcile, not only when something else happens to have created the dir first. Test: TestReconcileKnowledgeManifest_RecreatesWipedRoot — wipes the workspace mid-test and asserts the next reconcile recreates the sentinel + reports the manifest entry as needing stage. --- environment/knowledge.go | 9 +++++++++ environment/knowledge_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/environment/knowledge.go b/environment/knowledge.go index 455d535..cf2cb6a 100644 --- a/environment/knowledge.go +++ b/environment/knowledge.go @@ -117,6 +117,15 @@ func atomicWriteFile(path string, data []byte, perm os.FileMode) error { // - knowledge_flock_windows.go: no-op (runner not shipped on Windows; // also avoids holding a handle that would block os.Rename of the sentinel) func withSentinelLock(sentinelPath string, fn func() error) error { + // Bootstrap the workspace root if a user (or test harness) has wiped it + // out from under us. acquireSentinelLock opens with O_CREATE but won't + // MkdirAll the parent — without this, a freshly-deleted ~/.flashduty + // surfaces as repeated "no such file or directory" errors that block + // every subsequent ReconcileKnowledgeManifest until the user manually + // re-creates the directory. + if err := os.MkdirAll(filepath.Dir(sentinelPath), 0o755); err != nil { + return fmt.Errorf("ensure sentinel parent dir: %w", err) + } release, err := acquireSentinelLock(sentinelPath) if err != nil { return err diff --git a/environment/knowledge_test.go b/environment/knowledge_test.go index e8384e7..2c05795 100644 --- a/environment/knowledge_test.go +++ b/environment/knowledge_test.go @@ -289,6 +289,35 @@ func stageOne(t *testing.T, ws *Environment, relPath, sum, body string) { // Reconcile case 1: every staged file matches the manifest → no prune, // kept_count == staged count, sentinel untouched, nothing needs staging. +// TestReconcileKnowledgeManifest_RecreatesWipedRoot locks in the bootstrap +// guarantee: if the user (or test cleanup, or `rm -rf ~/.flashduty/`) removes +// the workspace root after the runner has come up, the next reconcile must +// re-create the directory and proceed instead of returning ENOENT on every +// turn until manual intervention. Pre-fix this surfaced as repeated +// "open .../.safari-knowledge-sentinel.json: no such file or directory" warns +// and a permanently empty knowledge workspace. +func TestReconcileKnowledgeManifest_RecreatesWipedRoot(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Wipe the entire workspace root the way an operator would. + require.NoError(t, os.RemoveAll(ws.Root())) + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/runbook.md", Checksum: "sum-a"}, + }, + }) + require.NoError(t, err) + // Empty workspace → manifest entry is "needs stage", no kept/pruned files. + assert.Equal(t, []string{"knowledge/account/runbook.md"}, res.NeedsStage) + assert.Equal(t, 0, res.KeptCount) + assert.Empty(t, res.Pruned) + // Sentinel file (and therefore its parent dir) should now exist again. + _, statErr := os.Stat(filepath.Join(ws.Root(), sentinelName)) + require.NoError(t, statErr, "sentinel must be re-created after workspace wipe") +} + func TestReconcileKnowledgeManifest_AllMatch(t *testing.T) { ws := newTestEnvironment(t) ctx := context.Background() From 883fb49fe75213cc6d95fd70a65782d3b95aeb17 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 9 May 2026 16:38:47 +0800 Subject: [PATCH 2/6] feat(env): add pack_folder RPC for runner-side skill zipping --- environment/pack_folder.go | 110 ++++++++++++++++++++++++++++ environment/pack_folder_test.go | 125 ++++++++++++++++++++++++++++++++ protocol/messages.go | 17 +++++ ws/handler.go | 8 ++ 4 files changed, 260 insertions(+) create mode 100644 environment/pack_folder.go create mode 100644 environment/pack_folder_test.go diff --git a/environment/pack_folder.go b/environment/pack_folder.go new file mode 100644 index 0000000..0749f6c --- /dev/null +++ b/environment/pack_folder.go @@ -0,0 +1,110 @@ +package environment + +import ( + "archive/zip" + "bytes" + "context" + "encoding/base64" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +// PackFolder walks args.Path, applies the skill exclusion rules, and +// returns a base64-encoded zip whose entries are rooted at the basename +// of args.Path. +func (e *Environment) PackFolder(ctx context.Context, args *protocol.PackFolderArgs) (*protocol.PackFolderResult, error) { + if args == nil || args.Path == "" { + return nil, errors.New("path is required") + } + info, err := os.Stat(args.Path) + if err != nil { + return nil, fmt.Errorf("stat %q: %w", args.Path, err) + } + if !info.IsDir() { + return nil, fmt.Errorf("path is not a directory: %s", args.Path) + } + + base := filepath.Base(args.Path) + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + fileCount := 0 + + walkErr := filepath.WalkDir(args.Path, func(p string, d fs.DirEntry, werr error) error { + if werr != nil { + return werr + } + rel, rerr := filepath.Rel(args.Path, p) + if rerr != nil { + return rerr + } + if rel == "." { + return nil + } + if shouldExcludePackPath(rel, d.IsDir(), args.SkillRoot) { + if d.IsDir() { + return filepath.SkipDir + } + return nil + } + if d.IsDir() { + return nil + } + zipPath := base + "/" + filepath.ToSlash(rel) + f, oerr := os.Open(p) + if oerr != nil { + return oerr + } + defer f.Close() + w, cerr := zw.Create(zipPath) + if cerr != nil { + return cerr + } + if _, ierr := io.Copy(w, f); ierr != nil { + return ierr + } + fileCount++ + if args.MaxBytes > 0 && int64(buf.Len()) > args.MaxBytes { + return fmt.Errorf("zip exceeds max_bytes (%d)", args.MaxBytes) + } + return nil + }) + if walkErr != nil { + return nil, walkErr + } + if err := zw.Close(); err != nil { + return nil, err + } + if args.MaxBytes > 0 && int64(buf.Len()) > args.MaxBytes { + return nil, fmt.Errorf("zip exceeds max_bytes (%d)", args.MaxBytes) + } + + return &protocol.PackFolderResult{ + ZipB64: base64.StdEncoding.EncodeToString(buf.Bytes()), + SizeBytes: int64(buf.Len()), + FileCount: fileCount, + }, nil +} + +// shouldExcludePackPath mirrors scripts/package_skill.py's exclusion list. +func shouldExcludePackPath(rel string, isDir bool, skillRoot bool) bool { + parts := strings.Split(rel, string(os.PathSeparator)) + name := parts[len(parts)-1] + if name == "__pycache__" || name == "node_modules" || name == ".DS_Store" { + return true + } + if !isDir && strings.HasSuffix(name, ".pyc") { + return true + } + // evals/ is only excluded when it is a direct child of the skill root + if skillRoot && len(parts) == 1 && isDir && name == "evals" { + return true + } + return false +} diff --git a/environment/pack_folder_test.go b/environment/pack_folder_test.go new file mode 100644 index 0000000..bf4c3d1 --- /dev/null +++ b/environment/pack_folder_test.go @@ -0,0 +1,125 @@ +package environment + +import ( + "archive/zip" + "bytes" + "context" + "encoding/base64" + "os" + "path/filepath" + "testing" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +func TestPackFolder_HappyPath(t *testing.T) { + dir := t.TempDir() + skill := filepath.Join(dir, "weekly-report") + mustMkdir(t, skill) + mustWrite(t, filepath.Join(skill, "SKILL.md"), "---\nname: weekly-report\n---\nbody") + mustMkdir(t, filepath.Join(skill, "evals")) + mustWrite(t, filepath.Join(skill, "evals", "evals.json"), "{}") + mustMkdir(t, filepath.Join(skill, "__pycache__")) + mustWrite(t, filepath.Join(skill, "__pycache__", "x.pyc"), "junk") + + e := &Environment{} + res, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ + Path: skill, + SkillRoot: true, + MaxBytes: 1024 * 1024, + }) + if err != nil { + t.Fatalf("pack: %v", err) + } + + raw, _ := base64.StdEncoding.DecodeString(res.ZipB64) + zr, _ := zip.NewReader(bytes.NewReader(raw), int64(len(raw))) + names := make(map[string]bool) + for _, f := range zr.File { + names[f.Name] = true + } + + if !names["weekly-report/SKILL.md"] { + t.Errorf("missing SKILL.md, got %v", names) + } + if names["weekly-report/evals/evals.json"] { + t.Errorf("evals/ should be excluded, got %v", names) + } + if names["weekly-report/__pycache__/x.pyc"] { + t.Errorf("__pycache__ should be excluded, got %v", names) + } + if res.FileCount != 1 { + t.Errorf("file_count = %d, want 1", res.FileCount) + } +} + +func TestPackFolder_NotADir(t *testing.T) { + f := filepath.Join(t.TempDir(), "x") + mustWrite(t, f, "hi") + e := &Environment{} + if _, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{Path: f}); err == nil { + t.Fatal("expected error for non-dir") + } +} + +func TestPackFolder_SizeCap(t *testing.T) { + dir := t.TempDir() + skill := filepath.Join(dir, "big") + mustMkdir(t, skill) + big := make([]byte, 200*1024) + for i := range big { + big[i] = byte(i % 251) + } + mustWrite(t, filepath.Join(skill, "SKILL.md"), string(big)) + e := &Environment{} + if _, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ + Path: skill, + MaxBytes: 1024, + }); err == nil { + t.Fatal("expected size cap error") + } +} + +func TestPackFolder_SkillRootFalseKeepsEvals(t *testing.T) { + dir := t.TempDir() + skill := filepath.Join(dir, "x") + mustMkdir(t, skill) + mustWrite(t, filepath.Join(skill, "SKILL.md"), "x") + mustMkdir(t, filepath.Join(skill, "evals")) + mustWrite(t, filepath.Join(skill, "evals", "y.json"), "{}") + + e := &Environment{} + res, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ + Path: skill, + SkillRoot: false, + MaxBytes: 1024 * 1024, + }) + if err != nil { + t.Fatal(err) + } + raw, _ := base64.StdEncoding.DecodeString(res.ZipB64) + zr, _ := zip.NewReader(bytes.NewReader(raw), int64(len(raw))) + found := false + for _, f := range zr.File { + if f.Name == "x/evals/y.json" { + found = true + } + } + if !found { + t.Error("evals/ should NOT be excluded when skill_root=false") + } +} + +func mustMkdir(t *testing.T, p string) { + t.Helper() + if err := os.MkdirAll(p, 0o755); err != nil { + t.Fatal(err) + } +} + +func mustWrite(t *testing.T, p, body string) { + t.Helper() + if err := os.WriteFile(p, []byte(body), 0o644); err != nil { + t.Fatal(err) + } +} diff --git a/protocol/messages.go b/protocol/messages.go index 0ef403a..60b55da 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -115,6 +115,7 @@ const ( TaskOpMCPCall TaskOperation = "mcp_call" TaskOpMCPListTools TaskOperation = "mcp_list_tools" TaskOpSyncSkill TaskOperation = "sync_skill" + TaskOpPackFolder TaskOperation = "pack_folder" TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files" TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files" TaskOpReconcileKnowledgeManifest TaskOperation = "reconcile_knowledge_manifest" @@ -322,6 +323,22 @@ type SyncSkillArgs struct { Checksum string `json:"checksum"` } +// PackFolderArgs walks a directory inside the workspace and returns a zip +// archive of its contents. Used by Safari's sync_files(type="skill") path +// so the agent never has to package skills by hand. +type PackFolderArgs struct { + Path string `json:"path"` // absolute path inside workspace + SkillRoot bool `json:"skill_root"` // when true, also exclude evals/ at the root + MaxBytes int64 `json:"max_bytes"` // 0 = no cap; >0 = error if zip exceeds +} + +// PackFolderResult carries the produced archive back to Safari. +type PackFolderResult struct { + ZipB64 string `json:"zip_b64"` // base64-encoded zip bytes + SizeBytes int64 `json:"size_bytes"` + FileCount int `json:"file_count"` +} + // SyncSkillResult is the result of a sync_skill operation. // // Probe semantics: when SyncSkillArgs.ZipData is empty, the runner checks its diff --git a/ws/handler.go b/ws/handler.go index 1452d79..7d8a06f 100644 --- a/ws/handler.go +++ b/ws/handler.go @@ -254,6 +254,14 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl logger.Info("syncing skill", "skill", args.SkillName, "dir", args.SkillDir) return h.ws.SyncSkill(ctx, args) + case protocol.TaskOpPackFolder: + args, err := parseArgs[protocol.PackFolderArgs](req.Args) + if err != nil { + return nil, fmt.Errorf("invalid pack_folder args: %w", err) + } + logger.Info("pack folder", "path", args.Path) + return h.ws.PackFolder(ctx, args) + case protocol.TaskOpStageKnowledgeFiles: args, err := parseArgs[protocol.StageKnowledgeFilesArgs](req.Args) if err != nil { From 59612f9f49cc0a9a4a9adb6651b270e9c9642563 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 9 May 2026 17:19:08 +0800 Subject: [PATCH 3/6] fix(environment): pack_folder resolves wire path against workspace root PackFolder was calling os.Stat(args.Path) directly, but the safari side sends a workspace-relative path (mirroring how Read receives paths). Without safePath() the runner stat'd the path relative to its cwd and returned `no such file or directory` for `skills/`. Switch to e.safePath() like every other op, and update tests to construct an Environment with a real workspace root + relative paths. --- environment/pack_folder.go | 17 +++++++++----- environment/pack_folder_test.go | 39 +++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/environment/pack_folder.go b/environment/pack_folder.go index 0749f6c..e15e13a 100644 --- a/environment/pack_folder.go +++ b/environment/pack_folder.go @@ -23,24 +23,31 @@ func (e *Environment) PackFolder(ctx context.Context, args *protocol.PackFolderA if args == nil || args.Path == "" { return nil, errors.New("path is required") } - info, err := os.Stat(args.Path) + // Resolve the wire-relative path against the workspace root, same way + // Read does. Without this, walking args.Path directly stats a path + // relative to the runner's cwd instead of ~/.flashduty/skills/. + realPath, err := e.safePath(args.Path) if err != nil { - return nil, fmt.Errorf("stat %q: %w", args.Path, err) + return nil, err + } + info, err := os.Stat(realPath) + if err != nil { + return nil, fmt.Errorf("stat %q: %w", realPath, err) } if !info.IsDir() { return nil, fmt.Errorf("path is not a directory: %s", args.Path) } - base := filepath.Base(args.Path) + base := filepath.Base(realPath) var buf bytes.Buffer zw := zip.NewWriter(&buf) fileCount := 0 - walkErr := filepath.WalkDir(args.Path, func(p string, d fs.DirEntry, werr error) error { + walkErr := filepath.WalkDir(realPath, func(p string, d fs.DirEntry, werr error) error { if werr != nil { return werr } - rel, rerr := filepath.Rel(args.Path, p) + rel, rerr := filepath.Rel(realPath, p) if rerr != nil { return rerr } diff --git a/environment/pack_folder_test.go b/environment/pack_folder_test.go index bf4c3d1..91cbff8 100644 --- a/environment/pack_folder_test.go +++ b/environment/pack_folder_test.go @@ -9,12 +9,23 @@ import ( "path/filepath" "testing" + "github.com/flashcatcloud/flashduty-runner/permission" "github.com/flashcatcloud/flashduty-runner/protocol" ) +func newPackEnv(t *testing.T) *Environment { + t.Helper() + checker := permission.NewChecker(map[string]string{"*": "allow"}) + e, err := New(t.TempDir(), checker) + if err != nil { + t.Fatalf("New: %v", err) + } + return e +} + func TestPackFolder_HappyPath(t *testing.T) { - dir := t.TempDir() - skill := filepath.Join(dir, "weekly-report") + e := newPackEnv(t) + skill := filepath.Join(e.Root(), "weekly-report") mustMkdir(t, skill) mustWrite(t, filepath.Join(skill, "SKILL.md"), "---\nname: weekly-report\n---\nbody") mustMkdir(t, filepath.Join(skill, "evals")) @@ -22,9 +33,8 @@ func TestPackFolder_HappyPath(t *testing.T) { mustMkdir(t, filepath.Join(skill, "__pycache__")) mustWrite(t, filepath.Join(skill, "__pycache__", "x.pyc"), "junk") - e := &Environment{} res, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ - Path: skill, + Path: "weekly-report", SkillRoot: true, MaxBytes: 1024 * 1024, }) @@ -54,26 +64,24 @@ func TestPackFolder_HappyPath(t *testing.T) { } func TestPackFolder_NotADir(t *testing.T) { - f := filepath.Join(t.TempDir(), "x") - mustWrite(t, f, "hi") - e := &Environment{} - if _, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{Path: f}); err == nil { + e := newPackEnv(t) + mustWrite(t, filepath.Join(e.Root(), "x"), "hi") + if _, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{Path: "x"}); err == nil { t.Fatal("expected error for non-dir") } } func TestPackFolder_SizeCap(t *testing.T) { - dir := t.TempDir() - skill := filepath.Join(dir, "big") + e := newPackEnv(t) + skill := filepath.Join(e.Root(), "big") mustMkdir(t, skill) big := make([]byte, 200*1024) for i := range big { big[i] = byte(i % 251) } mustWrite(t, filepath.Join(skill, "SKILL.md"), string(big)) - e := &Environment{} if _, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ - Path: skill, + Path: "big", MaxBytes: 1024, }); err == nil { t.Fatal("expected size cap error") @@ -81,16 +89,15 @@ func TestPackFolder_SizeCap(t *testing.T) { } func TestPackFolder_SkillRootFalseKeepsEvals(t *testing.T) { - dir := t.TempDir() - skill := filepath.Join(dir, "x") + e := newPackEnv(t) + skill := filepath.Join(e.Root(), "x") mustMkdir(t, skill) mustWrite(t, filepath.Join(skill, "SKILL.md"), "x") mustMkdir(t, filepath.Join(skill, "evals")) mustWrite(t, filepath.Join(skill, "evals", "y.json"), "{}") - e := &Environment{} res, err := e.PackFolder(context.Background(), &protocol.PackFolderArgs{ - Path: skill, + Path: "x", SkillRoot: false, MaxBytes: 1024 * 1024, }) From 4b8642b6d0148dea0377f4fc0920d6706a1103a6 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 14 May 2026 10:41:56 +0800 Subject: [PATCH 4/6] feat(env): add Env map to BashArgs for per-call env injection Lets a caller (typically the cloud safari forwarder) hand the runner per-tenant secrets (e.g. FLASHDUTY_APP_KEY) via the bash protocol instead of baking them into the SKILL.md the LLM sees. The shell expands them at exec time so the secret only lives in process memory and never appears in transport-layer logs that mirror Command. - protocol.BashArgs gains Env map[string]string, omitempty for backward compat with older callers. - executeBashCommand merges Env on top of os.Environ() and sets cmd.Env only when extraEnv is non-empty, so existing zero-Env callers continue to inherit unchanged. - No log site touches args.Env (ws/handler.go logs command + workdir only); doc comment on BashArgs.Env spells out the no-log invariant for future log surfaces. --- environment/environment.go | 18 ++++++++++++++++-- environment/environment_test.go | 31 +++++++++++++++++++++++++++++++ protocol/messages.go | 14 +++++++++++--- 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/environment/environment.go b/environment/environment.go index 2291f26..1e50270 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -598,7 +598,7 @@ func (e *Environment) Bash(ctx context.Context, args *protocol.BashArgs) (*proto } timeout := e.resolveTimeout(args.Timeout) - result, err := e.executeBashCommand(ctx, args.Command, workdir, timeout) + result, err := e.executeBashCommand(ctx, args.Command, workdir, timeout, args.Env) if err != nil { return result, err } @@ -631,12 +631,26 @@ func (e *Environment) resolveTimeout(timeoutSec int) time.Duration { } // executeBashCommand executes a bash command with the given parameters. -func (e *Environment) executeBashCommand(ctx context.Context, command, workdir string, timeout time.Duration) (*protocol.BashResult, error) { +// +// extraEnv is merged on top of the runner process's own environment. Values +// may be secrets — they are NEVER logged here or in callers. The bash command +// should reference them as `$VAR_NAME` so expansion happens in-shell. +func (e *Environment) executeBashCommand(ctx context.Context, command, workdir string, timeout time.Duration, extraEnv map[string]string) (*protocol.BashResult, error) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool cmd.Dir = workdir + if len(extraEnv) > 0 { + // Inherit parent env, then layer caller-provided entries on top so + // duplicates take the caller value (matches `exec.Command` semantics + // when cmd.Env is set — last KEY=VALUE wins). + merged := os.Environ() + for k, v := range extraEnv { + merged = append(merged, k+"="+v) + } + cmd.Env = merged + } // 10MB per-stream cap prevents OOM from runaway commands while leaving // plenty of headroom for normal LLM-context output. diff --git a/environment/environment_test.go b/environment/environment_test.go index 5a3c81f..c40680b 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -269,6 +269,37 @@ func TestEnvironment_Bash(t *testing.T) { assert.Equal(t, 42, result.ExitCode) } +func TestEnvironment_Bash_Env(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Caller-provided env reaches the shell. + result, err := ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf %s "$MY_TOKEN"`, + Env: map[string]string{"MY_TOKEN": "s3cret-value"}, + }) + require.NoError(t, err) + assert.Equal(t, "s3cret-value", result.Stdout) + assert.Equal(t, 0, result.ExitCode) + + // Caller env overrides any inherited value (last KEY=VALUE wins). + t.Setenv("OVERRIDE_ME", "inherited") + result, err = ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf %s "$OVERRIDE_ME"`, + Env: map[string]string{"OVERRIDE_ME": "caller-wins"}, + }) + require.NoError(t, err) + assert.Equal(t, "caller-wins", result.Stdout) + + // Empty Env preserves inherited environment. + t.Setenv("KEEP_ME", "still-here") + result, err = ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf %s "$KEEP_ME"`, + }) + require.NoError(t, err) + assert.Equal(t, "still-here", result.Stdout) +} + func TestEnvironment_Bash_PermissionDenied(t *testing.T) { tmpDir := t.TempDir() // Note: rules are sorted alphabetically, so "echo *" comes after "*" diff --git a/protocol/messages.go b/protocol/messages.go index 60b55da..61c4d99 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -177,10 +177,18 @@ type GrepArgs struct { } // BashArgs are the arguments for bash operation. +// +// Env carries per-call environment variables the caller wants merged on top of +// the runner process's own environment. Values may be secrets (auth tokens, +// per-tenant API keys), so the runner MUST NOT log them — only the keys are +// safe to surface. The bash command itself should reference these as +// `$VAR_NAME` so the secret expands inside the shell and never appears in +// transport-layer logs that mirror Command. type BashArgs struct { - Command string `json:"command"` - Workdir string `json:"workdir,omitempty"` - Timeout int `json:"timeout,omitempty"` // seconds + Command string `json:"command"` + Workdir string `json:"workdir,omitempty"` + Timeout int `json:"timeout,omitempty"` // seconds + Env map[string]string `json:"env,omitempty"` } // TaskOutputPayload is the payload for streaming task output. From 5235ffe04dbb7e8363a7e8199d1bb67329e6828c Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 14 May 2026 10:50:55 +0800 Subject: [PATCH 5/6] test(env): wire-format coverage for new BashArgs.Env field Adds two assertions that didn't fit the original commit: - Safari-emitted JSON ({"command":"...","env":{...}}) round-trips through json.Unmarshal into protocol.BashArgs and the value reaches the executing shell. - An older safari that omits "env" entirely still parses and runs unchanged (legacy.Env stays nil, no spurious overrides). Together with TestEnvironment_Bash_Env this gives end-to-end coverage from the wire format through to bash -c without depending on the WebSocket transport. --- environment/environment_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/environment/environment_test.go b/environment/environment_test.go index c40680b..2307f83 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "encoding/base64" + "encoding/json" "errors" "os" "path/filepath" @@ -269,6 +270,33 @@ func TestEnvironment_Bash(t *testing.T) { assert.Equal(t, 42, result.ExitCode) } +// TestEnvironment_Bash_EnvWireFormat exercises the JSON shape the cloud +// safari emits — the runner must accept the new "env" field added in this +// commit, and an older safari that omits it must still work unchanged. +func TestEnvironment_Bash_EnvWireFormat(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Safari-emitted payload with env. + raw := []byte(`{"command":"printf %s \"$WIRE_TOKEN\"","env":{"WIRE_TOKEN":"on-the-wire"}}`) + var args protocol.BashArgs + require.NoError(t, json.Unmarshal(raw, &args)) + assert.Equal(t, "on-the-wire", args.Env["WIRE_TOKEN"]) + + result, err := ws.Bash(ctx, &args) + require.NoError(t, err) + assert.Equal(t, "on-the-wire", result.Stdout) + + // Older safari shape (no "env" key) must still parse + run. + raw = []byte(`{"command":"echo legacy"}`) + var legacy protocol.BashArgs + require.NoError(t, json.Unmarshal(raw, &legacy)) + assert.Nil(t, legacy.Env) + result, err = ws.Bash(ctx, &legacy) + require.NoError(t, err) + assert.Equal(t, "legacy\n", result.Stdout) +} + func TestEnvironment_Bash_Env(t *testing.T) { ws := newTestEnvironment(t) ctx := context.Background() From b9bed4cea377b922c9de25c330052513e8b740e1 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 14 May 2026 11:18:50 +0800 Subject: [PATCH 6/6] fix(ci): clear lint + Windows test backlog uncovered by PR #46 PR #46 was the first CI run after pack_folder commits 883fb49 + 59612f9 landed on main via direct push, so three pre-existing issues surfaced here for the first time: - environment/pack_folder.go errcheck: defer f.Close() now wraps in func() { _ = f.Close() } so the unchecked return value goes away. - protocol/messages.go gofmt: collapse the extra spaces between the json tag and the trailing comment on PackFolderResult.ZipB64 (only the first field had a comment, so gofmt wants a single space). - environment/knowledge_flock_windows.go: acquireSentinelLock on Windows previously returned a no-op without materializing the sentinel file, breaking TestReconcileKnowledgeManifest_RecreatesWipedRoot whenever the workspace was wiped (the unix path implicitly creates the file via the lockfd's O_CREATE; Windows didn't). Open+immediately- close the file under O_CREATE so callers see the same post-condition on both platforms, then return the same no-op release so concurrent rename semantics aren't blocked. --- environment/knowledge_flock_windows.go | 28 +++++++++++++++++++------- environment/pack_folder.go | 2 +- protocol/messages.go | 2 +- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/environment/knowledge_flock_windows.go b/environment/knowledge_flock_windows.go index a3d4b7c..56e9f56 100644 --- a/environment/knowledge_flock_windows.go +++ b/environment/knowledge_flock_windows.go @@ -2,12 +2,26 @@ package environment -// acquireSentinelLock is a no-op on Windows. The runner is not shipped on -// Windows; this stub exists only so `go build` and `go test` succeed on -// windows/amd64 in CI. Returning an unlocked sentinel path avoids holding an -// open handle that would otherwise block os.Rename on the sentinel file. -// Concurrent sessions on Windows would race — acceptable for a non-production -// platform. -func acquireSentinelLock(_ string) (release func(), err error) { +import "os" + +// acquireSentinelLock is effectively a no-op on Windows: the runner is not +// shipped on Windows; this stub exists only so `go build` and `go test` +// succeed on windows/amd64 in CI. Concurrent sessions on Windows would race — +// acceptable for a non-production platform. +// +// One thing we DO need to mirror from the unix path: O_CREATE the sentinel +// file if it's missing, then close immediately. The unix lockfd holds an +// O_CREATE+O_RDWR handle so callers like ReconcileKnowledgeManifest implicitly +// get the file materialized before they probe it; Windows callers must see +// the same post-condition (sentinel exists after a successful lock-and-run +// cycle), otherwise tests like TestReconcileKnowledgeManifest_RecreatesWipedRoot +// fail with "GetFileAttributesEx ... cannot find the file". We then close the +// handle right away so subsequent os.Rename calls aren't blocked. +func acquireSentinelLock(sentinelPath string) (release func(), err error) { + f, err := os.OpenFile(sentinelPath, os.O_RDWR|os.O_CREATE, 0o644) + if err != nil { + return nil, err + } + _ = f.Close() return func() {}, nil } diff --git a/environment/pack_folder.go b/environment/pack_folder.go index e15e13a..0b272c8 100644 --- a/environment/pack_folder.go +++ b/environment/pack_folder.go @@ -68,7 +68,7 @@ func (e *Environment) PackFolder(ctx context.Context, args *protocol.PackFolderA if oerr != nil { return oerr } - defer f.Close() + defer func() { _ = f.Close() }() w, cerr := zw.Create(zipPath) if cerr != nil { return cerr diff --git a/protocol/messages.go b/protocol/messages.go index 61c4d99..696bb75 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -342,7 +342,7 @@ type PackFolderArgs struct { // PackFolderResult carries the produced archive back to Safari. type PackFolderResult struct { - ZipB64 string `json:"zip_b64"` // base64-encoded zip bytes + ZipB64 string `json:"zip_b64"` // base64-encoded zip bytes SizeBytes int64 `json:"size_bytes"` FileCount int `json:"file_count"` }