From 752b07e0b5af1d50df4313d18e2c5afc73325fef Mon Sep 17 00:00:00 2001 From: "alexey.zh" Date: Mon, 4 May 2026 19:05:13 +0500 Subject: [PATCH 1/4] feat: exclude regex --- .run/godedup.run.xml | 12 ++++++++++++ internal/cmd/input.go | 7 +++++++ internal/load/load.go | 27 ++++++++++++++++++--------- internal/load/load_test.go | 28 +++++++++++++++++++--------- main.go | 29 ++++++++++++++++++++++------- 5 files changed, 78 insertions(+), 25 deletions(-) create mode 100644 .run/godedup.run.xml create mode 100644 internal/cmd/input.go diff --git a/.run/godedup.run.xml b/.run/godedup.run.xml new file mode 100644 index 0000000..8511fc9 --- /dev/null +++ b/.run/godedup.run.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/internal/cmd/input.go b/internal/cmd/input.go new file mode 100644 index 0000000..89c29db --- /dev/null +++ b/internal/cmd/input.go @@ -0,0 +1,7 @@ +package cmd + +import "regexp" + +type LoadInput struct { + ExcludeRegex *regexp.Regexp +} diff --git a/internal/load/load.go b/internal/load/load.go index 772fb43..216c266 100644 --- a/internal/load/load.go +++ b/internal/load/load.go @@ -8,19 +8,25 @@ import ( "path/filepath" "strings" + "github.com/hashmap-kz/godedup/internal/cmd" + "github.com/hashmap-kz/godedup/internal/hash" ) // Result holds all analyzed functions from the given paths. type Result struct { Funcs []hash.FuncInfo - Fset *token.FileSet } +// TODO: later this may be done parallel in three steps: +// 1. Collect files +// 2. Parse them concurrently into []funcs +// 3. Join and sort results + // Load parses all Go files under the given paths and returns // a FuncInfo for every function declaration found. // Paths may be files or directories (walked recursively). -func Load(paths []string, excludeTests bool) (*Result, error) { +func Load(paths []string, inp *cmd.LoadInput) (*Result, error) { fset := token.NewFileSet() hasher := hash.New(fset) var funcs []hash.FuncInfo @@ -32,20 +38,20 @@ func Load(paths []string, excludeTests bool) (*Result, error) { } if info.IsDir() { - if err := walkDir(path, fset, hasher, excludeTests, &funcs); err != nil { + if err := walkDir(path, fset, hasher, inp, &funcs); err != nil { return nil, err } } else { - if err := parseFile(path, fset, hasher, excludeTests, &funcs); err != nil { + if err := parseFile(path, fset, hasher, inp, &funcs); err != nil { return nil, err } } } - return &Result{Funcs: funcs, Fset: fset}, nil + return &Result{Funcs: funcs}, nil } -func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, excludeTests bool, out *[]hash.FuncInfo) error { +func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.LoadInput, out *[]hash.FuncInfo) error { return filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { if err != nil { return err @@ -61,12 +67,15 @@ func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, excludeTests if !strings.HasSuffix(path, ".go") { return nil } - return parseFile(path, fset, hasher, excludeTests, out) + if inp.ExcludeRegex != nil && inp.ExcludeRegex.MatchString(path) { + return nil + } + return parseFile(path, fset, hasher, inp, out) }) } -func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, excludeTests bool, out *[]hash.FuncInfo) error { - if excludeTests && strings.HasSuffix(path, "_test.go") { +func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.LoadInput, out *[]hash.FuncInfo) error { + if inp.ExcludeRegex != nil && inp.ExcludeRegex.MatchString(path) { return nil } diff --git a/internal/load/load_test.go b/internal/load/load_test.go index 0ee37f6..bfeb8c2 100644 --- a/internal/load/load_test.go +++ b/internal/load/load_test.go @@ -3,9 +3,22 @@ package load import ( "os" "path/filepath" + "regexp" "testing" + + "github.com/hashmap-kz/godedup/internal/cmd" ) +func emptyLoadInput() *cmd.LoadInput { + return &cmd.LoadInput{} +} + +func excludeTestsLoadInput() *cmd.LoadInput { + return &cmd.LoadInput{ + ExcludeRegex: regexp.MustCompile(`_test\.go$`), + } +} + func writeFile(t *testing.T, path, content string) { t.Helper() if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { @@ -31,16 +44,13 @@ func Two() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } if len(result.Funcs) != 2 { t.Fatalf("len(Funcs) = %d, want 2", len(result.Funcs)) } - if result.Fset == nil { - t.Fatal("Fset is nil") - } } func TestLoadExcludesTests(t *testing.T) { @@ -58,7 +68,7 @@ func TestOne() int { return a + b }`) - result, err := Load([]string{dir}, true) + result, err := Load([]string{dir}, excludeTestsLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -85,7 +95,7 @@ func TestOne() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -109,7 +119,7 @@ func Good() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -142,7 +152,7 @@ func Vendored() int { return a + b }`) - result, err := Load([]string{dir}, false) + result, err := Load([]string{dir}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -164,7 +174,7 @@ func Single() int { return a + b }`) - result, err := Load([]string{file}, false) + result, err := Load([]string{file}, emptyLoadInput()) if err != nil { t.Fatalf("Load() error = %v", err) } diff --git a/main.go b/main.go index 98c6719..4c5d666 100644 --- a/main.go +++ b/main.go @@ -4,11 +4,12 @@ import ( "flag" "fmt" "os" + "regexp" - "github.com/hashmap-kz/godedup/internal/x/fmtx" - + "github.com/hashmap-kz/godedup/internal/cmd" "github.com/hashmap-kz/godedup/internal/load" "github.com/hashmap-kz/godedup/internal/report" + "github.com/hashmap-kz/godedup/internal/x/fmtx" ) var Version = "dev" @@ -24,9 +25,11 @@ Usage: Examples: godedup ./... - godedup --min-similarity 0.90 ./pkg/... godedup --exact ./... - godedup --output table --no-tests ./... + godedup --exclude '_test\.go$' ./... + godedup --exclude '_test\.go$|\.pb\.go$|\.deepcopy\.go$' ./... + godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... + godedup --output table ./... godedup --output json ./... | jq . Flags: @@ -36,7 +39,7 @@ func main() { minSim := flag.Float64("min-similarity", 0.85, "minimum similarity threshold (0.0-1.0)") minStmts := flag.Int("min-stmts", 3, "minimum statements in a function to analyze") exactOnly := flag.Bool("exact", false, "report only exact structural clones") - noTests := flag.Bool("no-tests", false, "exclude test files") + excludePat := flag.String("exclude", "", "exclude files matching this regular expression (matched against full path)") output := flag.String("output", "text", "output format: text, table, json") showVer := flag.Bool("version", false, "print version and exit") @@ -60,6 +63,16 @@ func main() { os.Exit(1) } + var exclude *regexp.Regexp + if *excludePat != "" { + var err error + exclude, err = regexp.Compile(*excludePat) + if err != nil { + fmtx.Fprintf(os.Stderr, "godedup: invalid --exclude pattern: %v\n", err) + os.Exit(1) + } + } + paths := flag.Args() if len(paths) == 0 { paths = []string{"."} @@ -67,7 +80,9 @@ func main() { paths = expandPaths(paths) - result, err := load.Load(paths, *noTests) + result, err := load.Load(paths, &cmd.LoadInput{ + ExcludeRegex: exclude, + }) if err != nil { fmtx.Fprintf(os.Stderr, "godedup: load error: %v\n", err) os.Exit(1) @@ -88,7 +103,7 @@ func main() { cwd, err := os.Getwd() if err != nil { - fmt.Printf("cannot get cwd: %v", err) + fmt.Fprintf(os.Stderr, "godedup: cannot get cwd: %v\n", err) os.Exit(2) } From 1e109d7ca75bca65fbdea144e365daedb74c2ad6 Mon Sep 17 00:00:00 2001 From: "alexey.zh" Date: Mon, 4 May 2026 19:33:05 +0500 Subject: [PATCH 2/4] feat: allow multiple --exclude patterns in CLI --- internal/cmd/input.go | 12 +++++++++++- internal/load/load.go | 4 ++-- internal/load/load_test.go | 2 +- main.go | 24 +++++++++++------------- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/internal/cmd/input.go b/internal/cmd/input.go index 89c29db..1861c42 100644 --- a/internal/cmd/input.go +++ b/internal/cmd/input.go @@ -3,5 +3,15 @@ package cmd import "regexp" type LoadInput struct { - ExcludeRegex *regexp.Regexp + ExcludePatterns []*regexp.Regexp +} + +// Matches reports whether path matches any of the exclude patterns. +func (inp *LoadInput) Matches(path string) bool { + for _, re := range inp.ExcludePatterns { + if re.MatchString(path) { + return true + } + } + return false } diff --git a/internal/load/load.go b/internal/load/load.go index 216c266..0d63f38 100644 --- a/internal/load/load.go +++ b/internal/load/load.go @@ -67,7 +67,7 @@ func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.Loa if !strings.HasSuffix(path, ".go") { return nil } - if inp.ExcludeRegex != nil && inp.ExcludeRegex.MatchString(path) { + if inp.Matches(path) { return nil } return parseFile(path, fset, hasher, inp, out) @@ -75,7 +75,7 @@ func walkDir(root string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.Loa } func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.LoadInput, out *[]hash.FuncInfo) error { - if inp.ExcludeRegex != nil && inp.ExcludeRegex.MatchString(path) { + if inp.Matches(path) { return nil } diff --git a/internal/load/load_test.go b/internal/load/load_test.go index bfeb8c2..76f9723 100644 --- a/internal/load/load_test.go +++ b/internal/load/load_test.go @@ -15,7 +15,7 @@ func emptyLoadInput() *cmd.LoadInput { func excludeTestsLoadInput() *cmd.LoadInput { return &cmd.LoadInput{ - ExcludeRegex: regexp.MustCompile(`_test\.go$`), + ExcludePatterns: []*regexp.Regexp{regexp.MustCompile(`_test\.go$`)}, } } diff --git a/main.go b/main.go index 4c5d666..4beee29 100644 --- a/main.go +++ b/main.go @@ -27,7 +27,7 @@ Examples: godedup ./... godedup --exact ./... godedup --exclude '_test\.go$' ./... - godedup --exclude '_test\.go$|\.pb\.go$|\.deepcopy\.go$' ./... + godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... godedup --output table ./... godedup --output json ./... | jq . @@ -39,7 +39,15 @@ func main() { minSim := flag.Float64("min-similarity", 0.85, "minimum similarity threshold (0.0-1.0)") minStmts := flag.Int("min-stmts", 3, "minimum statements in a function to analyze") exactOnly := flag.Bool("exact", false, "report only exact structural clones") - excludePat := flag.String("exclude", "", "exclude files matching this regular expression (matched against full path)") + var excludePatterns []*regexp.Regexp + flag.Func("exclude", "exclude files matching `regexp` (may be repeated)", func(s string) error { + re, err := regexp.Compile(s) + if err != nil { + return err + } + excludePatterns = append(excludePatterns, re) + return nil + }) output := flag.String("output", "text", "output format: text, table, json") showVer := flag.Bool("version", false, "print version and exit") @@ -63,16 +71,6 @@ func main() { os.Exit(1) } - var exclude *regexp.Regexp - if *excludePat != "" { - var err error - exclude, err = regexp.Compile(*excludePat) - if err != nil { - fmtx.Fprintf(os.Stderr, "godedup: invalid --exclude pattern: %v\n", err) - os.Exit(1) - } - } - paths := flag.Args() if len(paths) == 0 { paths = []string{"."} @@ -81,7 +79,7 @@ func main() { paths = expandPaths(paths) result, err := load.Load(paths, &cmd.LoadInput{ - ExcludeRegex: exclude, + ExcludePatterns: excludePatterns, }) if err != nil { fmtx.Fprintf(os.Stderr, "godedup: load error: %v\n", err) From 3dae6fa28c4b8abc57689bc03c27c6a3fc959669 Mon Sep 17 00:00:00 2001 From: "alexey.zh" Date: Mon, 4 May 2026 19:40:17 +0500 Subject: [PATCH 3/4] docs: update readme with actual flags, exit with 0 even when clones are found --- README.md | 12 ++++++++++-- main.go | 5 ----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 24da9cc..f98c292 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ godedup ./... ``` ``` -$ godedup --output=table --no-tests +$ godedup --output=table --exclude '_test\.go$' GROUP TYPE SIM FUNCTION LOCATION STMTS LINES ------------------------------------------------------------------------------------------ @@ -73,11 +73,19 @@ brew install godedup ``` godedup [flags] [path ...] +Examples: + godedup ./... + godedup --exact ./... + godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... + godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... + godedup --output table ./... + godedup --output json ./... | jq . + Flags: --min-similarity float minimum similarity threshold (default: 0.85) --min-stmts int minimum statements to analyze (default: 3) --exact report only exact structural clones - --no-tests exclude test files + --exclude exclude files matching regexp (may be repeated) --output string output format: text, table, json (default: text) --version print version ``` diff --git a/main.go b/main.go index 4beee29..fb5530d 100644 --- a/main.go +++ b/main.go @@ -26,7 +26,6 @@ Usage: Examples: godedup ./... godedup --exact ./... - godedup --exclude '_test\.go$' ./... godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... godedup --output table ./... @@ -113,10 +112,6 @@ func main() { default: report.Print(os.Stdout, clones, cwd) } - - if len(clones) > 0 { - os.Exit(1) - } } // expandPaths handles the ./... pattern by walking from the given root. From 0496932969ca586f9d3c3eb07a9e7e5c9c2facb6 Mon Sep 17 00:00:00 2001 From: "alexey.zh" Date: Mon, 4 May 2026 19:42:46 +0500 Subject: [PATCH 4/4] docs: update readme, installation paragraph --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f98c292..608e84c 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,14 @@ calls hash identically. ## Install +#### Package + ```bash go install github.com/hashmap-kz/godedup@latest ``` +#### Brew + ```bash brew tap hashmap-kz/homebrew-tap brew install godedup @@ -90,13 +94,6 @@ Flags: --version print version ``` -**CI integration** - exits with code 1 if any clones found: - -```yaml -- name: Check for duplicate code - run: godedup --exact ./... -``` - **JSON output** for custom tooling: ```bash