Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions internal/scanner/rootkind_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package scanner

import (
"fmt"
"path/filepath"
"testing"

"github.com/perplexityai/bumblebee/internal/model"
)

func TestNewRootKindLookup(t *testing.T) {
sep := string(filepath.Separator)
homeUser := filepath.Join(sep+"home", "alice")
homeProj := filepath.Join(homeUser, "src", "proj")
system := filepath.Join(sep+"usr", "local")

roots := []Root{
{Path: homeUser, Kind: model.RootKindUserPackage},
{Path: homeProj, Kind: model.RootKindProject},
{Path: system, Kind: model.RootKindHomebrew},
{Path: "", Kind: "ignored"},
}
lookup := newRootKindLookup(roots)

tests := []struct {
name string
path string
want string
}{
{"empty path", "", model.RootKindUnknown},
{"exact match user home", homeUser, model.RootKindUserPackage},
{"deep under user home", filepath.Join(homeUser, "Library", "Caches"), model.RootKindUserPackage},
{"longest-match wins", filepath.Join(homeProj, "package.json"), model.RootKindProject},
{"system root", filepath.Join(system, "bin", "go"), model.RootKindHomebrew},
{"outside all roots", filepath.Join(sep+"var", "log", "x"), model.RootKindUnknown},
{"prefix overlap without separator is not a match", homeUser + "extra", model.RootKindUnknown},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if got := lookup(tc.path); got != tc.want {
t.Fatalf("lookup(%q) = %q, want %q", tc.path, got, tc.want)
}
})
}
}

func BenchmarkNewRootKindLookup(b *testing.B) {
sep := string(filepath.Separator)
roots := []Root{
{Path: filepath.Join(sep+"home", "alice"), Kind: model.RootKindUserPackage},
{Path: filepath.Join(sep+"home", "alice", "src", "proj"), Kind: model.RootKindProject},
{Path: filepath.Join(sep+"home", "alice", "src", "other"), Kind: model.RootKindProject},
{Path: filepath.Join(sep+"usr", "local"), Kind: model.RootKindHomebrew},
{Path: filepath.Join(sep+"opt", "homebrew"), Kind: model.RootKindHomebrew},
{Path: filepath.Join(sep+"Applications"), Kind: model.RootKindHomebrew},
}
lookup := newRootKindLookup(roots)

paths := make([]string, 0, 64)
for i := range 16 {
paths = append(paths,
filepath.Join(sep+"home", "alice", "src", "proj", "node_modules", "dep", fmt.Sprintf("file-%d.json", i)),
filepath.Join(sep+"home", "alice", "Library", "Caches", fmt.Sprintf("pkg-%d.json", i)),
filepath.Join(sep+"usr", "local", "lib", fmt.Sprintf("x-%d.json", i)),
filepath.Join(sep+"var", "log", fmt.Sprintf("y-%d.json", i)),
)
}

b.ReportAllocs()
var sink string
for b.Loop() {
for _, p := range paths {
sink = lookup(p)
}
}
_ = sink
}
42 changes: 29 additions & 13 deletions internal/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"io/fs"
"os"
"path/filepath"
"slices"
"strings"
"sync"
"syscall"
Expand Down Expand Up @@ -497,8 +498,19 @@ func rootPaths(roots []Root) []string {
// to the RootKind of the longest root that contains it. Paths that are
// outside any configured root (e.g. when a parser hands us a value
// the walker did not visit) get RootKindUnknown.
//
// The returned closure is called per emitted record, so the per-root
// "path + separator" strings used for prefix matching are pre-computed
// once and the roots are sorted longest-first so the first match wins
// without a linear scan over the rest.
func newRootKindLookup(roots []Root) func(string) string {
cleaned := make([]Root, 0, len(roots))
type rootMatch struct {
path string
prefix string // path + filepath.Separator
kind string
}
sep := string(filepath.Separator)
cleaned := make([]rootMatch, 0, len(roots))
for _, r := range roots {
if r.Path == "" {
continue
Expand All @@ -507,27 +519,31 @@ func newRootKindLookup(roots []Root) func(string) string {
if err != nil {
p = r.Path
}
cleaned = append(cleaned, Root{Path: filepath.Clean(p), Kind: r.Kind})
p = filepath.Clean(p)
cleaned = append(cleaned, rootMatch{path: p, prefix: p + sep, kind: r.Kind})
}
// Longest path first so the first prefix hit is also the longest.
// Stable to preserve input order among equal-length roots, matching
// the previous strict-greater-than tie-break.
slices.SortStableFunc(cleaned, func(a, b rootMatch) int {
return len(b.path) - len(a.path)
})
return func(path string) string {
if path == "" {
return model.RootKindUnknown
}
abs, err := filepath.Abs(path)
if err != nil {
abs = path
abs := path
if !filepath.IsAbs(abs) {
if a, err := filepath.Abs(abs); err == nil {
abs = a
}
}
abs = filepath.Clean(abs)
bestLen := -1
best := model.RootKindUnknown
for _, r := range cleaned {
if abs == r.Path || strings.HasPrefix(abs, r.Path+string(filepath.Separator)) {
if len(r.Path) > bestLen {
bestLen = len(r.Path)
best = r.Kind
}
if abs == r.path || strings.HasPrefix(abs, r.prefix) {
return r.kind
}
}
return best
return model.RootKindUnknown
}
}