@@ -8,11 +8,18 @@ import (
88 "log/slog"
99 "os"
1010 "path/filepath"
11+ "regexp"
1112 "strings"
1213
1314 "github.com/flashcatcloud/flashduty-runner/protocol"
1415)
1516
17+ // teamScopeRe matches the only legal team-scope directory name: `team_` plus
18+ // one or more digits. Anything else (e.g. `team_42a`, `team_`, `Team_42`) is
19+ // rejected so the runner can never be tricked into mkdir'ing an attacker-
20+ // supplied directory name.
21+ var teamScopeRe = regexp .MustCompile (`^team_\d+$` )
22+
1623const (
1724 // sentinelName is the hidden JSON map that tracks staged-file checksums.
1825 // Safari reads this to decide which knowledge pack files are already current.
@@ -21,27 +28,43 @@ const (
2128
2229// validateKnowledgeRelPath enforces the path rules for knowledge file operations.
2330//
24- // Rules (from the Safari-side contract):
25- // - Must not contain path separators or double-dot components — the runner
26- // only writes flat files in the workspace root, never in sub-directories.
27- // - Leading-dot filenames are rejected because they are hidden by convention;
28- // the sentinel is written by the runner itself and is never staged by clients.
31+ // Layout: every staged file lives under `knowledge/<scope>/<leaf>` where
32+ // scope is `account` or `team_<digits>`. The leading `knowledge/` segment
33+ // matches the runner-relative form of the read path Safari uses
34+ // (`<root>/knowledge/<scope>/<leaf>`), so a freshly staged file lands at
35+ // exactly the location a follow-up read will probe — no contract drift.
36+ //
37+ // Bare-leaf paths (`DUTY.md`), deeper trees (`knowledge/account/sub/foo.md`),
38+ // missing prefix (`account/foo.md`), unknown scope segments, and the
39+ // sentinel filename are all rejected. Backslashes are blocked to cover the
40+ // Windows-style traversal vector defensively even though the runner only
41+ // ships on unix.
2942func validateKnowledgeRelPath (relPath string ) error {
3043 if relPath == "" {
3144 return fmt .Errorf ("rel_path must not be empty" )
3245 }
33- if strings .ContainsAny (relPath , `/\` ) {
34- return fmt .Errorf ("rel_path must not contain path separators: %q" , relPath )
46+ if strings .ContainsRune (relPath , '\\' ) {
47+ return fmt .Errorf ("rel_path must not contain backslash: %q" , relPath )
48+ }
49+ parts := strings .Split (relPath , "/" )
50+ if len (parts ) != 3 {
51+ return fmt .Errorf ("rel_path must be knowledge/<scope>/<leaf>, got %q" , relPath )
52+ }
53+ root , scope , leaf := parts [0 ], parts [1 ], parts [2 ]
54+ if root != "knowledge" {
55+ return fmt .Errorf ("rel_path must start with 'knowledge/', got %q" , relPath )
3556 }
36- // Reject the bare ".." token. Slash-separated traversal like "foo/../bar"
37- // is already blocked above, but a plain ".." with no slashes still escapes.
38- if relPath == ".." {
39- return fmt .Errorf ("rel_path must not be '..': %q" , relPath )
57+ if scope != "account" && ! teamScopeRe .MatchString (scope ) {
58+ return fmt .Errorf ("rel_path scope must be 'account' or 'team_<digits>', got %q" , scope )
4059 }
41- if strings .HasPrefix (relPath , "." ) {
42- // Hidden files (including the sentinel itself) cannot be staged by clients.
43- // The runner owns the sentinel exclusively.
44- return fmt .Errorf ("rel_path must not start with '.': %q" , relPath )
60+ if leaf == "" || leaf == "." || leaf == ".." {
61+ return fmt .Errorf ("rel_path leaf must be a real filename, got %q" , leaf )
62+ }
63+ if strings .HasPrefix (leaf , "." ) {
64+ return fmt .Errorf ("rel_path leaf must not start with '.': %q" , leaf )
65+ }
66+ if leaf == sentinelName {
67+ return fmt .Errorf ("rel_path leaf must not be the sentinel filename" )
4568 }
4669 return nil
4770}
@@ -166,6 +189,12 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St
166189 }
167190
168191 targetPath := filepath .Join (e .root , f .RelPath )
192+ if err := os .MkdirAll (filepath .Dir (targetPath ), 0o755 ); err != nil {
193+ status .Success = false
194+ status .Error = fmt .Sprintf ("failed to create scope directory: %v" , err )
195+ result .Files = append (result .Files , status )
196+ continue
197+ }
169198 if err := atomicWriteFile (targetPath , content , 0o644 ); err != nil {
170199 status .Success = false
171200 status .Error = err .Error ()
@@ -197,6 +226,138 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St
197226 return result , nil
198227}
199228
229+ // ReconcileKnowledgeManifest reconciles the on-disk knowledge tree against the
230+ // supplied manifest. Files present on disk but absent from the manifest are
231+ // orphans (pruned). Files present in the manifest with a checksum that
232+ // disagrees with the sentinel are stale (also pruned, so the next read
233+ // triggers a fresh lazy install from Safari). Files whose checksum already
234+ // matches are left in place.
235+ //
236+ // The runner does NOT pre-stage anything in this call: the manifest declares
237+ // what *should* exist if it were read, not what *must* be cached. Cold packs
238+ // stay cold; only drift is corrected. The whole pass runs under the sentinel
239+ // lock so it is safe with concurrent stage calls from the same Safari.
240+ func (e * Environment ) ReconcileKnowledgeManifest (ctx context.Context , args * protocol.ReconcileKnowledgeManifestArgs ) (* protocol.ReconcileKnowledgeManifestResult , error ) {
241+ expected := make (map [string ]string , len (args .Files ))
242+ for _ , f := range args .Files {
243+ if err := validateKnowledgeRelPath (f .RelPath ); err != nil {
244+ slog .Warn ("skipping invalid manifest entry" , "rel_path" , f .RelPath , "error" , err )
245+ continue
246+ }
247+ expected [f .RelPath ] = f .Checksum
248+ }
249+
250+ result := & protocol.ReconcileKnowledgeManifestResult {}
251+ knowledgeRoot := filepath .Join (e .root , "knowledge" )
252+ sentinelPath := filepath .Join (e .root , sentinelName )
253+
254+ err := withSentinelLock (sentinelPath , func () error {
255+ sentinel := readSentinel (sentinelPath )
256+ // onDisk enumerates `knowledge/<scope>/<leaf>` paths actually present.
257+ // We walk the tree (rather than trusting the sentinel) so a manual
258+ // drop into the workspace — e.g. an operator copying a file in for
259+ // debugging — also gets reconciled instead of lingering forever.
260+ onDisk , err := walkKnowledgeTree (knowledgeRoot )
261+ if err != nil {
262+ return err
263+ }
264+
265+ dirty := false
266+ // onDiskSet lets us answer "is this manifest entry already cached?"
267+ // in O(1) below without a second walk.
268+ onDiskSet := make (map [string ]struct {}, len (onDisk ))
269+ for _ , relPath := range onDisk {
270+ expectedSum , want := expected [relPath ]
271+ switch {
272+ case ! want :
273+ // Orphan: not declared in the current manifest.
274+ if rmErr := os .Remove (filepath .Join (e .root , relPath )); rmErr != nil && ! os .IsNotExist (rmErr ) {
275+ slog .Warn ("failed to prune orphan knowledge file" , "rel_path" , relPath , "error" , rmErr )
276+ continue
277+ }
278+ delete (sentinel , relPath )
279+ result .Pruned = append (result .Pruned , relPath )
280+ dirty = true
281+ case sentinel [relPath ] != expectedSum :
282+ // Stale: sentinel disagrees with the manifest. Drop the file;
283+ // it'll come back via NeedsStage so Safari refetches it from S3.
284+ if rmErr := os .Remove (filepath .Join (e .root , relPath )); rmErr != nil && ! os .IsNotExist (rmErr ) {
285+ slog .Warn ("failed to prune stale knowledge file" , "rel_path" , relPath , "error" , rmErr )
286+ continue
287+ }
288+ delete (sentinel , relPath )
289+ result .Pruned = append (result .Pruned , relPath )
290+ result .StaleCount ++
291+ dirty = true
292+ default :
293+ result .KeptCount ++
294+ onDiskSet [relPath ] = struct {}{}
295+ }
296+ }
297+
298+ // Anything in the manifest that isn't in onDiskSet is a cache miss
299+ // the caller needs to fix — either it was just pruned for being stale
300+ // or it was never staged in the first place. The list is what powers
301+ // Safari's eager-stage step so the full pack lands on disk in one
302+ // batch instead of waiting for the agent to read each file.
303+ for relPath := range expected {
304+ if _ , ok := onDiskSet [relPath ]; ! ok {
305+ result .NeedsStage = append (result .NeedsStage , relPath )
306+ }
307+ }
308+
309+ if dirty {
310+ return writeSentinel (sentinelPath , sentinel )
311+ }
312+ return nil
313+ })
314+ if err != nil {
315+ return nil , fmt .Errorf ("reconcile manifest: %w" , err )
316+ }
317+ return result , nil
318+ }
319+
320+ // walkKnowledgeTree returns every leaf file under <knowledgeRoot>/<scope>/
321+ // as `knowledge/<scope>/<leaf>` rel-paths. Hidden files (the sentinel) and
322+ // anything failing validation are skipped; nested directories beneath a scope
323+ // are ignored because the layout forbids them.
324+ func walkKnowledgeTree (knowledgeRoot string ) ([]string , error ) {
325+ entries , err := os .ReadDir (knowledgeRoot )
326+ if err != nil {
327+ if os .IsNotExist (err ) {
328+ return nil , nil
329+ }
330+ return nil , fmt .Errorf ("read knowledge root: %w" , err )
331+ }
332+
333+ var paths []string
334+ for _ , scope := range entries {
335+ if ! scope .IsDir () {
336+ continue
337+ }
338+ scopeName := scope .Name ()
339+ if scopeName != "account" && ! teamScopeRe .MatchString (scopeName ) {
340+ continue
341+ }
342+ leaves , err := os .ReadDir (filepath .Join (knowledgeRoot , scopeName ))
343+ if err != nil {
344+ slog .Warn ("failed to read scope directory" , "scope" , scopeName , "error" , err )
345+ continue
346+ }
347+ for _ , leaf := range leaves {
348+ if leaf .IsDir () {
349+ continue
350+ }
351+ rel := "knowledge/" + scopeName + "/" + leaf .Name ()
352+ if err := validateKnowledgeRelPath (rel ); err != nil {
353+ continue
354+ }
355+ paths = append (paths , rel )
356+ }
357+ }
358+ return paths , nil
359+ }
360+
200361// DeleteKnowledgeFiles removes the supplied files from the workspace root and
201362// scrubs their entries from the sentinel.
202363func (e * Environment ) DeleteKnowledgeFiles (ctx context.Context , args * protocol.DeleteKnowledgeFilesArgs ) (* protocol.DeleteKnowledgeFilesResult , error ) {
0 commit comments