diff --git a/.run/godedup.run.xml b/.run/godedup.run.xml new file mode 100644 index 0000000..8511fc9 --- /dev/null +++ b/.run/godedup.run.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 24da9cc..608e84c 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ godedup ./... ``` ``` -$ godedup --output=table --no-tests +$ godedup --output=table --exclude '_test\.go$' GROUP TYPE SIM FUNCTION LOCATION STMTS LINES ------------------------------------------------------------------------------------------ @@ -57,10 +57,14 @@ calls hash identically. ## Install +#### Package + ```bash go install github.com/hashmap-kz/godedup@latest ``` +#### Brew + ```bash brew tap hashmap-kz/homebrew-tap brew install godedup @@ -73,22 +77,23 @@ brew install godedup ``` godedup [flags] [path ...] +Examples: + godedup ./... + godedup --exact ./... + godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... + godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... + godedup --output table ./... + godedup --output json ./... | jq . + Flags: --min-similarity float minimum similarity threshold (default: 0.85) --min-stmts int minimum statements to analyze (default: 3) --exact report only exact structural clones - --no-tests exclude test files + --exclude exclude files matching regexp (may be repeated) --output string output format: text, table, json (default: text) --version print version ``` -**CI integration** - exits with code 1 if any clones found: - -```yaml -- name: Check for duplicate code - run: godedup --exact ./... -``` - **JSON output** for custom tooling: ```bash diff --git a/internal/cmd/input.go b/internal/cmd/input.go new file mode 100644 index 0000000..1861c42 --- /dev/null +++ b/internal/cmd/input.go @@ -0,0 +1,17 @@ +package cmd + +import "regexp" + +type LoadInput struct { + ExcludePatterns []*regexp.Regexp +} + +// Matches reports whether path matches any of the exclude patterns. +func (inp *LoadInput) Matches(path string) bool { + for _, re := range inp.ExcludePatterns { + if re.MatchString(path) { + return true + } + } + return false +} diff --git a/internal/load/load.go b/internal/load/load.go index 772fb43..0d63f38 100644 --- a/internal/load/load.go +++ b/internal/load/load.go @@ -8,19 +8,25 @@ import ( "path/filepath" "strings" + "github.com/hashmap-kz/godedup/internal/cmd" + "github.com/hashmap-kz/godedup/internal/hash" ) // Result holds all analyzed functions from the given paths. type Result struct { Funcs []hash.FuncInfo - Fset *token.FileSet } +// TODO: later this may be done parallel in three steps: +// 1. Collect files +// 2. Parse them concurrently into []funcs +// 3. Join and sort results + // Load parses all Go files under the given paths and returns // a FuncInfo for every function declaration found. // Paths may be files or directories (walked recursively). -func Load(paths []string, excludeTests bool) (*Result, error) { +func Load(paths []string, inp *cmd.LoadInput) (*Result, error) { fset := token.NewFileSet() hasher := hash.New(fset) var funcs []hash.FuncInfo @@ -32,20 +38,20 @@ func Load(paths []string, excludeTests bool) (*Result, error) { } if info.IsDir() { - if err := walkDir(path, fset, hasher, excludeTests, &funcs); err != nil { + if err := walkDir(path, fset, hasher, inp, &funcs); err != nil { return nil, err } } else { - if err := parseFile(path, fset, hasher, excludeTests, &funcs); err != nil { + if err := parseFile(path, fset, hasher, inp, &funcs); err != nil { return nil, err } } } - return &Result{Funcs: funcs, Fset: fset}, nil + return &Result{Funcs: funcs}, nil } -func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, excludeTests bool, out *[]hash.FuncInfo) error { +func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.LoadInput, out *[]hash.FuncInfo) error { return filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { if err != nil { return err @@ -61,12 +67,15 @@ func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, excludeTests if !strings.HasSuffix(path, ".go") { return nil } - return parseFile(path, fset, hasher, excludeTests, out) + if inp.Matches(path) { + return nil + } + return parseFile(path, fset, hasher, inp, out) }) } -func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, excludeTests bool, out *[]hash.FuncInfo) error { - if excludeTests && strings.HasSuffix(path, "_test.go") { +func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.LoadInput, out *[]hash.FuncInfo) error { + if inp.Matches(path) { return nil } diff --git a/internal/load/load_test.go b/internal/load/load_test.go index 0ee37f6..76f9723 100644 --- a/internal/load/load_test.go +++ b/internal/load/load_test.go @@ -3,9 +3,22 @@ package load import ( "os" "path/filepath" + "regexp" "testing" + + "github.com/hashmap-kz/godedup/internal/cmd" ) +func emptyLoadInput() *cmd.LoadInput { + return &cmd.LoadInput{} +} + +func excludeTestsLoadInput() *cmd.LoadInput { + return &cmd.LoadInput{ + ExcludePatterns: []*regexp.Regexp{regexp.MustCompile(`_test\.go$`)}, + } +} + func writeFile(t *testing.T, path, content string) { t.Helper() if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { @@ -31,16 +44,13 @@ func Two() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } if len(result.Funcs) != 2 { t.Fatalf("len(Funcs) = %d, want 2", len(result.Funcs)) } - if result.Fset == nil { - t.Fatal("Fset is nil") - } } func TestLoadExcludesTests(t *testing.T) { @@ -58,7 +68,7 @@ func TestOne() int { return a + b }`) - result, err := Load([]string{dir}, true) + result, err := Load([]string{dir}, excludeTestsLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -85,7 +95,7 @@ func TestOne() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -109,7 +119,7 @@ func Good() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -142,7 +152,7 @@ func Vendored() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -164,7 +174,7 @@ func Single() int { return a + b }`) - result, err := Load([]string{file}, false) + result, err := Load([]string{file}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } diff --git a/main.go b/main.go index 98c6719..fb5530d 100644 --- a/main.go +++ b/main.go @@ -4,11 +4,12 @@ import ( "flag" "fmt" "os" + "regexp" - "github.com/hashmap-kz/godedup/internal/x/fmtx" - + "github.com/hashmap-kz/godedup/internal/cmd" "github.com/hashmap-kz/godedup/internal/load" "github.com/hashmap-kz/godedup/internal/report" + "github.com/hashmap-kz/godedup/internal/x/fmtx" ) var Version = "dev" @@ -24,9 +25,10 @@ Usage: Examples: godedup ./... - godedup --min-similarity 0.90 ./pkg/... godedup --exact ./... - godedup --output table --no-tests ./... + godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... + godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... + godedup --output table ./... godedup --output json ./... | jq . Flags: @@ -36,7 +38,15 @@ func main() { minSim := flag.Float64("min-similarity", 0.85, "minimum similarity threshold (0.0-1.0)") minStmts := flag.Int("min-stmts", 3, "minimum statements in a function to analyze") exactOnly := flag.Bool("exact", false, "report only exact structural clones") - noTests := flag.Bool("no-tests", false, "exclude test files") + var excludePatterns []*regexp.Regexp + flag.Func("exclude", "exclude files matching `regexp` (may be repeated)", func(s string) error { + re, err := regexp.Compile(s) + if err != nil { + return err + } + excludePatterns = append(excludePatterns, re) + return nil + }) output := flag.String("output", "text", "output format: text, table, json") showVer := flag.Bool("version", false, "print version and exit") @@ -67,7 +77,9 @@ func main() { paths = expandPaths(paths) - result, err := load.Load(paths, *noTests) + result, err := load.Load(paths, &cmd.LoadInput{ + ExcludePatterns: excludePatterns, + }) if err != nil { fmtx.Fprintf(os.Stderr, "godedup: load error: %v\n", err) os.Exit(1) @@ -88,7 +100,7 @@ func main() { cwd, err := os.Getwd() if err != nil { - fmt.Printf("cannot get cwd: %v", err) + fmt.Fprintf(os.Stderr, "godedup: cannot get cwd: %v\n", err) os.Exit(2) } @@ -100,10 +112,6 @@ func main() { default: report.Print(os.Stdout, clones, cwd) } - - if len(clones) > 0 { - os.Exit(1) - } } // expandPaths handles the ./... pattern by walking from the given root.