diff --git a/README.md b/README.md index 608e84c..5be7686 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ Find structurally duplicate functions in Go code. godedup ./... ``` +**HTML output** + +![HTML](https://raw.githubusercontent.com/hashmap-kz/assets/main/godedup/godedup-html-v1.png) + +**Table output with clickable `file.go:line` locations in supported terminals and editors** + ``` $ godedup --output=table --exclude '_test\.go$' @@ -83,6 +89,7 @@ Examples: godedup --exclude '_test\.go$' --exclude '\.pb\.go$' ./... godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... godedup --output table ./... + godedup --output html ./... > godedup.html godedup --output json ./... | jq . Flags: @@ -90,7 +97,7 @@ Flags: --min-stmts int minimum statements to analyze (default: 3) --exact report only exact structural clones --exclude exclude files matching regexp (may be repeated) - --output string output format: text, table, json (default: text) + --output string output format: text, table, html, json (default: text) --version print version ``` diff --git a/internal/hash/hash.go b/internal/hash/hash.go index f8f21b4..dc7df8d 100644 --- a/internal/hash/hash.go +++ b/internal/hash/hash.go @@ -23,6 +23,7 @@ type FuncInfo struct { StmtSeq []uint64 // per-statement hashes for similarity comparison NumStmts int // total statement count (excluding blank lines) NumLines int // line span of the function body + Source string // original function source, used by rich reports } // Hasher computes structural hashes of AST nodes. diff --git a/internal/load/load.go b/internal/load/load.go index 0d63f38..b50ab60 100644 --- a/internal/load/load.go +++ b/internal/load/load.go @@ -1,6 +1,7 @@ package load import ( + "bytes" "go/ast" "go/parser" "go/token" @@ -79,7 +80,12 @@ func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.L return nil } - f, err := parser.ParseFile(fset, path, nil, 0) + src, err := os.ReadFile(path) + if err != nil { + return err + } + + f, err := parser.ParseFile(fset, path, src, 0) if err != nil { // skip unparseable files return nil @@ -99,6 +105,7 @@ func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.L } info := hasher.HashFunc(pkg, path, fn) + info.Source = sourceSpan(src, fset, fn) if info.Name == "" { continue } @@ -107,3 +114,12 @@ func parseFile(path string, fset *token.FileSet, hasher *hash.Hasher, inp *cmd.L return nil } + +func sourceSpan(src []byte, fset *token.FileSet, fn *ast.FuncDecl) string { + start := fset.Position(fn.Pos()).Offset + end := fset.Position(fn.End()).Offset + if start < 0 || end < start || end > len(src) { + return "" + } + return string(bytes.TrimRight(src[start:end], "\n")) +} diff --git a/internal/load/load_test.go b/internal/load/load_test.go index 76f9723..83c7b9e 100644 --- a/internal/load/load_test.go +++ b/internal/load/load_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "testing" "github.com/hashmap-kz/godedup/internal/cmd" @@ -164,6 +165,29 @@ func Vendored() int { } } +func TestLoadStoresFunctionSource(t *testing.T) { + dir := t.TempDir() + file := filepath.Join(dir, "source.go") + writeFile(t, file, `package sample +func Source() int { + a := 1 + b := 2 + return a + b +} +`) + + result, err := Load([]string{file}, emptyLoadInput()) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + if len(result.Funcs) != 1 { + t.Fatalf("len(Funcs) = %d, want 1", len(result.Funcs)) + } + if got := result.Funcs[0].Source; !strings.Contains(got, "func Source() int") || !strings.Contains(got, "return a + b") { + t.Fatalf("Source was not captured correctly:\n%s", got) + } +} + func TestLoadSingleFile(t *testing.T) { dir := t.TempDir() file := filepath.Join(dir, "single.go") diff --git a/internal/report/consts.go b/internal/report/consts.go new file mode 100644 index 0000000..b36a32c --- /dev/null +++ b/internal/report/consts.go @@ -0,0 +1,7 @@ +package report + +const ( + kindExact = "EXACT" + kindNear = "NEAR" + sim100Percent = "100%" +) diff --git a/internal/report/html.go b/internal/report/html.go new file mode 100644 index 0000000..24a9e7b --- /dev/null +++ b/internal/report/html.go @@ -0,0 +1,340 @@ +package report + +import ( + "fmt" + "html/template" + "io" + "sort" + "strings" + + "github.com/hashmap-kz/godedup/internal/hash" + "github.com/hashmap-kz/godedup/internal/x/fmtx" +) + +// htmlLine is one source line within a function card. +type htmlLine struct { + No int + FileURL template.URL + Text string +} + +// htmlFuncView is the template data for a single function card. +type htmlFuncView struct { + Name string + Location string + FileURL template.URL + NumStmts int + NumLines int + Lines []htmlLine +} + +// htmlGroupView is the template data for one clone group. +type htmlGroupView struct { + No int + KindClass string // "exact" or "near" + Kind string // "EXACT" or "NEAR" + Sim string + IsTwoFunc bool + Funcs []htmlFuncView +} + +// htmlReportView is the top-level template data. +type htmlReportView struct { + Groups []htmlGroupView + Total int + Exact int + Near int + FnCount int +} + +const htmlCSS = `*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } +:root { + --bg: #f6f8fa; + --card: #ffffff; + --border: #d0d7de; + --text: #24292f; + --muted: #57606a; + --code-bg:#f6f8fa; + --blue: #0969da; + --purple: #8250df; + --mono: ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace; +} +body { + background: var(--bg); + color: var(--text); + font-family: system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif; + font-size: 14px; + line-height: 1.5; + padding: 20px; +} +a { color: var(--blue); text-decoration: none; } +a:hover { text-decoration: underline; } +.hdr { + background: var(--card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 14px 20px; + margin-bottom: 14px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 16px; + flex-wrap: wrap; +} +.hdr-title { font-size: 17px; font-weight: 700; } +.hdr-sub { color: var(--muted); font-size: 12px; margin-top: 2px; } +.tags { display: flex; gap: 6px; flex-wrap: wrap; align-items: center; } +.tag { + border: 1px solid var(--border); + border-radius: 999px; + padding: 2px 10px; + font-size: 12px; + white-space: nowrap; +} +.tag-e { color: var(--blue); border-color: #b6d4fe; background: #dbeafe; } +.tag-n { color: var(--purple); border-color: #d8b4fe; background: #ede9fe; } +.group { + background: var(--card); + border: 1px solid var(--border); + border-radius: 8px; + margin-bottom: 10px; + overflow: hidden; +} +.group-hdr { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 14px; + border-bottom: 1px solid var(--border); + background: var(--code-bg); + border-left: 3px solid transparent; +} +.group.exact .group-hdr { border-left-color: var(--blue); } +.group.near .group-hdr { border-left-color: var(--purple); } +.badge { + border-radius: 4px; + padding: 1px 6px; + font-size: 11px; + font-weight: 700; + font-family: var(--mono); +} +.badge.exact { color: var(--blue); border: 1px solid #b6d4fe; background: #dbeafe; } +.badge.near { color: var(--purple); border: 1px solid #d8b4fe; background: #ede9fe; } +.group-num { font-family: var(--mono); font-size: 11px; color: var(--muted); } +.group-sim { font-family: var(--mono); font-size: 12px; } +.group-meta { margin-left: auto; font-size: 12px; color: var(--muted); } +.fn-row-wrap { overflow-x: auto; } +.fn-row { + display: flex; + align-items: stretch; + width: max-content; + min-width: 100%; +} +.group.funcs-2 .fn-row { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + width: 100%; +} +.fn-card { + min-width: 360px; + border-right: 1px solid var(--border); + display: flex; + flex-direction: column; +} +.group.funcs-2 .fn-card { min-width: 0; } +.fn-card:last-child { border-right: none; } +.fn-card-hdr { + padding: 7px 12px; + border-bottom: 1px solid var(--border); + background: var(--code-bg); + flex-shrink: 0; +} +.fn-name { font-weight: 600; font-size: 13px; word-break: break-all; } +.fn-loc { font-size: 11px; color: var(--muted); margin-top: 2px; } +.fn-stat { font-size: 11px; color: var(--muted); margin-top: 1px; } +.code { flex: 1; } +pre { + font-family: var(--mono); + font-size: 12px; + line-height: 1.5; + padding: 8px 0; + white-space: pre; +} +.code-line { display: flex; } +.code-line:hover { background: rgba(0,0,0,.04); } +.line-no { + color: #6e7781; + text-align: right; + padding: 0 12px; + user-select: none; + min-width: 48px; + flex-shrink: 0; +} +.code-text { padding-right: 16px; } +.empty-msg { padding: 32px; text-align: center; color: var(--muted); }` + +const htmlTmpl = ` + + + + +godedup report + + + +
+
+
godedup report
+
Structural duplicate detection for Go
+
+
+ {{.Data.Total}} groups + {{.Data.FnCount}} functions + {{.Data.Exact}} exact + {{.Data.Near}} near +
+
+{{- if not .Data.Groups}} +
No structural duplicates found.
+{{- else}} +{{- range .Data.Groups}} +
+
+ #{{.No}} + {{.Kind}} + {{.Sim}} + {{len .Funcs}} functions +
+
+ {{- range .Funcs}} +
+
+
{{.Name}}
+ +
{{.NumStmts}} stmts · {{.NumLines}} lines
+
+
{{- range .Lines}}
{{.No}}{{.Text}}
{{end}}
+
+ {{- end}} +
+
+{{- end}} +{{- end}} + +` + +var htmlReport = template.Must( + template.New("report"). + Funcs(template.FuncMap{ + "not": func(groups []htmlGroupView) bool { return len(groups) == 0 }, + }). + Parse(htmlTmpl), +) + +// PrintHTML writes a self-contained HTML report. +func PrintHTML(w io.Writer, clones []Clone, cwd string) { + exact, near, fnCount := cloneStats(clones) + + data := htmlReportView{ + Total: len(clones), + Exact: exact, + Near: near, + FnCount: fnCount, + Groups: make([]htmlGroupView, 0, len(clones)), + } + for i, clone := range clones { + data.Groups = append(data.Groups, buildHTMLGroup(i+1, clone, cwd)) + } + + err := htmlReport.Execute(w, struct { + CSS template.CSS + Data htmlReportView + }{ + CSS: htmlCSS, + Data: data, + }) + if err != nil { + fmtx.Fprintf(w, "\n", err) + } +} + +func cloneStats(clones []Clone) (exact, near, funcs int) { + for _, c := range clones { + funcs += len(c.Funcs) + if c.Exact { + exact++ + } else { + near++ + } + } + return exact, near, funcs +} + +func buildHTMLGroup(no int, clone Clone, cwd string) htmlGroupView { + kind := kindExact + sim := sim100Percent + kindClass := "exact" + if !clone.Exact { + kind = kindNear + sim = fmt.Sprintf("%.0f%%", clone.Similarity*100) + kindClass = "near" + } + sorted := sortedFuncs(clone.Funcs) + funcs := make([]htmlFuncView, 0, len(sorted)) + for _, f := range sorted { + funcs = append(funcs, buildHTMLFunc(f, cwd)) + } + return htmlGroupView{ + No: no, + KindClass: kindClass, + Kind: kind, + Sim: sim, + IsTwoFunc: len(sorted) == 2, + Funcs: funcs, + } +} + +//nolint:gocritic +func buildHTMLFunc(f hash.FuncInfo, cwd string) htmlFuncView { + loc := fmt.Sprintf("%s:%d", relativePath(f.File, cwd), f.Line) + src := f.Source + if src == "" { + src = "(source unavailable)" + } + rawLines := strings.Split(src, "\n") + lines := make([]htmlLine, 0, len(rawLines)) + for i, text := range rawLines { + lineNo := f.Line + i + lines = append(lines, htmlLine{ + No: lineNo, + //nolint:gosec + FileURL: template.URL(fileURL(f.File, lineNo)), + Text: text, + }) + } + return htmlFuncView{ + Name: f.Name, + Location: loc, + //nolint:gosec + FileURL: template.URL(fileURL(f.File, f.Line)), + NumStmts: f.NumStmts, + NumLines: f.NumLines, + Lines: lines, + } +} + +func fileURL(path string, line int) string { + return fmt.Sprintf("file://%s:%d", path, line) +} + +func sortedFuncs(funcs []hash.FuncInfo) []hash.FuncInfo { + sorted := make([]hash.FuncInfo, len(funcs)) + copy(sorted, funcs) + sort.Slice(sorted, func(a, b int) bool { + if sorted[a].File != sorted[b].File { + return sorted[a].File < sorted[b].File + } + return sorted[a].Line < sorted[b].Line + }) + return sorted +} diff --git a/internal/report/html_test.go b/internal/report/html_test.go new file mode 100644 index 0000000..ff638c8 --- /dev/null +++ b/internal/report/html_test.go @@ -0,0 +1,107 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/hashmap-kz/godedup/internal/hash" +) + +func TestPrintHTML(t *testing.T) { + clones := []Clone{{ + Exact: true, + Similarity: 1.0, + Funcs: []hash.FuncInfo{ + funcInfo("pkg.B", "/repo/b.go", 20, 3, 7, 100, 1, 2, 3), + funcInfo("pkg.A", "/repo/a.go", 10, 3, 7, 100, 1, 2, 3), + }, + }} + clones[0].Funcs[0].Source = "func B() int {\n\tx := 1\n\ty := 2\n\treturn x + y\n}" + clones[0].Funcs[1].Source = "func A() int {\n\tx := 1\n\ty := 2\n\treturn x + y\n}" + + var buf bytes.Buffer + PrintHTML(&buf, clones, "/repo") + got := buf.String() + + for _, want := range []string{ + "", + "godedup report", + "1 groups", + "1 exact", + `class="group exact funcs-2"`, + "pkg.A", + "a.go:10", + "func A() int", + "pkg.B", + "b.go:20", + "file:///repo/a.go:10", + } { + if !strings.Contains(got, want) { + t.Fatalf("PrintHTML() missing %q in:\n%s", want, got) + } + } + for _, unwanted := range []string{"Suggestion:", "review this clone group", "#ZgotmplZ"} { + if strings.Contains(got, unwanted) { + t.Fatalf("PrintHTML() contains unwanted %q in:\n%s", unwanted, got) + } + } +} + +func TestPrintHTMLEmpty(t *testing.T) { + var buf bytes.Buffer + PrintHTML(&buf, nil, "/repo") + got := buf.String() + if !strings.Contains(got, "No structural duplicates found") { + t.Fatalf("PrintHTML() empty case missing expected message in:\n%s", got) + } + if strings.Contains(got, " in:\n%s", got) + } +} + +func TestPrintHTMLNearClone(t *testing.T) { + clones := []Clone{{ + Exact: false, + Similarity: 0.88, + Funcs: []hash.FuncInfo{ + funcInfo("pkg.A", "/repo/a.go", 10, 4, 8, 100, 1, 2, 3, 4), + funcInfo("pkg.B", "/repo/b.go", 20, 4, 8, 200, 1, 2, 9, 4), + }, + }} + + var buf bytes.Buffer + PrintHTML(&buf, clones, "/repo") + got := buf.String() + + for _, want := range []string{ + `class="group near funcs-2"`, + `class="badge near"`, + "88%", + } { + if !strings.Contains(got, want) { + t.Fatalf("PrintHTML() near clone missing %q in:\n%s", want, got) + } + } +} + +func TestPrintHTMLSortsByFileLine(t *testing.T) { + clones := []Clone{{ + Exact: true, + Similarity: 1.0, + Funcs: []hash.FuncInfo{ + funcInfo("pkg.B", "/repo/b.go", 20, 3, 5, 100, 1, 2, 3), + funcInfo("pkg.A", "/repo/a.go", 10, 3, 5, 100, 1, 2, 3), + }, + }} + + var buf bytes.Buffer + PrintHTML(&buf, clones, "/repo") + got := buf.String() + + posA := strings.Index(got, "pkg.A") + posB := strings.Index(got, "pkg.B") + if posA > posB { + t.Fatalf("PrintHTML() functions not sorted by file/line: pkg.A at %d, pkg.B at %d", posA, posB) + } +} diff --git a/internal/report/json.go b/internal/report/json.go new file mode 100644 index 0000000..9986765 --- /dev/null +++ b/internal/report/json.go @@ -0,0 +1,29 @@ +package report + +import ( + "io" + + "github.com/hashmap-kz/godedup/internal/x/fmtx" +) + +func PrintJSON(w io.Writer, clones []Clone) { + fmtx.Fprintln(w, "[") + for i, clone := range clones { + fmtx.Fprintf(w, ` {"exact":%v,"similarity":%.2f,"functions":[`, + clone.Exact, clone.Similarity) + for j, f := range clone.Funcs { + if j > 0 { + fmtx.Fprint(w, ",") + } + fmtx.Fprintf(w, `{"name":%q,"file":%q,"line":%d,"stmts":%d}`, + f.Name, f.File, f.Line, f.NumStmts) + } + fmtx.Fprint(w, "]}") + if i < len(clones)-1 { + fmtx.Fprintln(w, ",") + } else { + fmtx.Fprintln(w) + } + } + fmtx.Fprintln(w, "]") +} diff --git a/internal/report/json_test.go b/internal/report/json_test.go new file mode 100644 index 0000000..ebe9a86 --- /dev/null +++ b/internal/report/json_test.go @@ -0,0 +1,45 @@ +package report + +import ( + "bytes" + "encoding/json" + "testing" + + "github.com/hashmap-kz/godedup/internal/hash" +) + +func TestPrintJSON(t *testing.T) { + clones := []Clone{{ + Exact: true, + Similarity: 1.0, + Funcs: []hash.FuncInfo{ + funcInfo("pkg.A", "a.go", 10, 3, 7, 100, 1, 2, 3), + }, + }} + + var buf bytes.Buffer + PrintJSON(&buf, clones) + + var decoded []struct { + Exact bool `json:"exact"` + Similarity float64 `json:"similarity"` + Functions []struct { + Name string `json:"name"` + File string `json:"file"` + Line int `json:"line"` + Stmts int `json:"stmts"` + } `json:"functions"` + } + if err := json.Unmarshal(buf.Bytes(), &decoded); err != nil { + t.Fatalf("PrintJSON produced invalid JSON: %v\n%s", err, buf.String()) + } + if len(decoded) != 1 { + t.Fatalf("len(decoded) = %d, want 1", len(decoded)) + } + if !decoded[0].Exact || decoded[0].Similarity != 1.0 { + t.Fatalf("decoded clone = %+v, want exact similarity 1.0", decoded[0]) + } + if got := decoded[0].Functions[0].Name; got != "pkg.A" { + t.Fatalf("function name = %q, want pkg.A", got) + } +} diff --git a/internal/report/report.go b/internal/report/report.go index 8955f00..0c6ec4e 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -1,13 +1,9 @@ package report import ( - "fmt" - "io" "sort" "strings" - "github.com/hashmap-kz/godedup/internal/x/fmtx" - "github.com/hashmap-kz/godedup/internal/hash" ) @@ -172,83 +168,6 @@ func sortClones(clones []Clone) { }) } -// Print writes a human-readable report to w. -func Print(w io.Writer, clones []Clone, cwd string) { - if len(clones) == 0 { - fmtx.Fprintln(w, "godedup: no structural duplicates found") - return - } - - exact := 0 - near := 0 - for _, c := range clones { - if c.Exact { - exact++ - } else { - near++ - } - } - - fmtx.Fprintf(w, "godedup: found %d clone group(s) (%d exact, %d near)\n\n", - len(clones), exact, near) - - for i, clone := range clones { - kind := "EXACT" - simStr := "100%" - if !clone.Exact { - kind = "NEAR" - simStr = fmt.Sprintf("%.0f%%", clone.Similarity*100) - } - - fmtx.Fprintf(w, "=== clone group %d [%s %s similarity] ===\n", - i+1, kind, simStr) - - // sort functions by file+line for stable output - sorted := make([]hash.FuncInfo, len(clone.Funcs)) - copy(sorted, clone.Funcs) - sort.Slice(sorted, func(a, b int) bool { - if sorted[a].File != sorted[b].File { - return sorted[a].File < sorted[b].File - } - return sorted[a].Line < sorted[b].Line - }) - - for _, f := range sorted { - relPath := relativePath(f.File, cwd) - fmtx.Fprintf(w, " %s\n", f.Name) - fmtx.Fprintf(w, " %s:%d (%d stmts, %d lines)\n", - relPath, f.Line, f.NumStmts, f.NumLines) - } - fmtx.Fprintln(w) - } - - fmtx.Fprintf(w, "suggestion: extract shared logic into a common function\n") - fmtx.Fprintf(w, " or use generics if types differ\n") -} - -// PrintJSON writes machine-readable JSON output. -func PrintJSON(w io.Writer, clones []Clone) { - fmtx.Fprintln(w, "[") - for i, clone := range clones { - fmtx.Fprintf(w, ` {"exact":%v,"similarity":%.2f,"functions":[`, - clone.Exact, clone.Similarity) - for j, f := range clone.Funcs { - if j > 0 { - fmtx.Fprint(w, ",") - } - fmtx.Fprintf(w, `{"name":%q,"file":%q,"line":%d,"stmts":%d}`, - f.Name, f.File, f.Line, f.NumStmts) - } - fmtx.Fprint(w, "]}") - if i < len(clones)-1 { - fmtx.Fprintln(w, ",") - } else { - fmtx.Fprintln(w) - } - } - fmtx.Fprintln(w, "]") -} - func relativePath(path, cwd string) string { if cwd == "" { return path @@ -259,108 +178,3 @@ func relativePath(path, cwd string) string { } return rel } - -// PrintTable writes aligned tabular output suitable for terminal viewing. -// Columns: GROUP TYPE SIM FUNCTION LOCATION STMTS LINES -func PrintTable(w io.Writer, clones []Clone, cwd string) { - if len(clones) == 0 { - fmtx.Fprintln(w, "godedup: no structural duplicates found") - return - } - - // collect all rows first so we can compute column widths - type row struct { - group string - typ string - sim string - function string - location string - stmts string - lines string - } - - var rows []row - for i, clone := range clones { - typ := "EXACT" - sim := "100%" - if !clone.Exact { - typ = "NEAR" - sim = fmt.Sprintf("%.0f%%", clone.Similarity*100) - } - - sorted := make([]hash.FuncInfo, len(clone.Funcs)) - copy(sorted, clone.Funcs) - sort.Slice(sorted, func(a, b int) bool { - if sorted[a].File != sorted[b].File { - return sorted[a].File < sorted[b].File - } - return sorted[a].Line < sorted[b].Line - }) - - for _, f := range sorted { - loc := fmt.Sprintf("%s:%d", relativePath(f.File, cwd), f.Line) - rows = append(rows, row{ - group: fmt.Sprintf("%d", i+1), - typ: typ, - sim: sim, - function: f.Name, - location: loc, - stmts: fmt.Sprintf("%d", f.NumStmts), - lines: fmt.Sprintf("%d", f.NumLines), - }) - } - } - - // compute column widths - headers := row{"GROUP", "TYPE", "SIM", "FUNCTION", "LOCATION", "STMTS", "LINES"} - widths := [7]int{ - len(headers.group), - len(headers.typ), - len(headers.sim), - len(headers.function), - len(headers.location), - len(headers.stmts), - len(headers.lines), - } - for _, r := range rows { - vals := [7]string{r.group, r.typ, r.sim, r.function, r.location, r.stmts, r.lines} - for i, v := range vals { - if len(v) > widths[i] { - widths[i] = len(v) - } - } - } - - fmtRow := func(r row) string { - return fmt.Sprintf("%-*s %-*s %-*s %-*s %-*s %-*s %s", - widths[0], r.group, - widths[1], r.typ, - widths[2], r.sim, - widths[3], r.function, - widths[4], r.location, - widths[5], r.stmts, - r.lines, - ) - } - - // header - fmtx.Fprintln(w, fmtRow(headers)) - - // separator using only dashes - sep := "" - total := widths[0] + widths[1] + widths[2] + widths[3] + widths[4] + widths[5] + widths[6] + 12 - for i := 0; i < total; i++ { - sep += "-" - } - fmtx.Fprintln(w, sep) - - // rows: emit the separator between groups - prevGroup := "" - for _, r := range rows { - if prevGroup != "" && r.group != prevGroup { - fmtx.Fprintln(w, sep) - } - fmtx.Fprintln(w, fmtRow(r)) - prevGroup = r.group - } -} diff --git a/internal/report/report_test.go b/internal/report/report_test.go index b3f0f21..3ac4035 100644 --- a/internal/report/report_test.go +++ b/internal/report/report_test.go @@ -2,7 +2,6 @@ package report import ( "bytes" - "encoding/json" "strings" "testing" @@ -122,104 +121,6 @@ func TestPrintNoClones(t *testing.T) { } } -func TestPrintHumanReadable(t *testing.T) { - clones := []Clone{{ - Exact: true, - Similarity: 1.0, - Funcs: []hash.FuncInfo{ - funcInfo("pkg.B", "/repo/b.go", 20, 3, 7, 100, 1, 2, 3), - funcInfo("pkg.A", "/repo/a.go", 10, 3, 7, 100, 1, 2, 3), - }, - }} - - var buf bytes.Buffer - Print(&buf, clones, "/repo") - got := buf.String() - for _, want := range []string{ - "godedup: found 1 clone group(s) (1 exact, 0 near)", - "=== clone group 1 [EXACT 100% similarity] ===", - "pkg.A", - "a.go:10 (3 stmts, 7 lines)", - "pkg.B", - "b.go:20 (3 stmts, 7 lines)", - "suggestion: extract shared logic into a common function", - } { - if !strings.Contains(got, want) { - t.Fatalf("Print() missing %q in:\n%s", want, got) - } - } - if strings.Index(got, "pkg.A") > strings.Index(got, "pkg.B") { - t.Fatalf("functions are not sorted by file/line:\n%s", got) - } -} - -func TestPrintTable(t *testing.T) { - clones := []Clone{{ - Exact: false, - Similarity: 0.91, - Funcs: []hash.FuncInfo{ - funcInfo("api.handleOrderCreate", "/repo/pkg/api/order.go", 51, 19, 47, 200, 1, 2, 9), - funcInfo("api.handleUserCreate", "/repo/pkg/api/user.go", 44, 18, 45, 100, 1, 2, 3), - }, - }} - - var buf bytes.Buffer - PrintTable(&buf, clones, "/repo") - got := buf.String() - for _, want := range []string{ - "GROUP", - "TYPE", - "SIM", - "FUNCTION", - "LOCATION", - "1 NEAR 91%", - "api.handleOrderCreate", - "pkg/api/order.go:51", - "api.handleUserCreate", - "pkg/api/user.go:44", - } { - if !strings.Contains(got, want) { - t.Fatalf("PrintTable() missing %q in:\n%s", want, got) - } - } -} - -func TestPrintJSON(t *testing.T) { - clones := []Clone{{ - Exact: true, - Similarity: 1.0, - Funcs: []hash.FuncInfo{ - funcInfo("pkg.A", "a.go", 10, 3, 7, 100, 1, 2, 3), - }, - }} - - var buf bytes.Buffer - PrintJSON(&buf, clones) - - var decoded []struct { - Exact bool `json:"exact"` - Similarity float64 `json:"similarity"` - Functions []struct { - Name string `json:"name"` - File string `json:"file"` - Line int `json:"line"` - Stmts int `json:"stmts"` - } `json:"functions"` - } - if err := json.Unmarshal(buf.Bytes(), &decoded); err != nil { - t.Fatalf("PrintJSON produced invalid JSON: %v\n%s", err, buf.String()) - } - if len(decoded) != 1 { - t.Fatalf("len(decoded) = %d, want 1", len(decoded)) - } - if !decoded[0].Exact || decoded[0].Similarity != 1.0 { - t.Fatalf("decoded clone = %+v, want exact similarity 1.0", decoded[0]) - } - if got := decoded[0].Functions[0].Name; got != "pkg.A" { - t.Fatalf("function name = %q, want pkg.A", got) - } -} - func TestRelativePath(t *testing.T) { tests := []struct { name string diff --git a/internal/report/table.go b/internal/report/table.go new file mode 100644 index 0000000..0f2322a --- /dev/null +++ b/internal/report/table.go @@ -0,0 +1,115 @@ +package report + +import ( + "fmt" + "io" + "sort" + + "github.com/hashmap-kz/godedup/internal/hash" + "github.com/hashmap-kz/godedup/internal/x/fmtx" +) + +// PrintTable writes aligned tabular output suitable for terminal viewing. +// Columns: GROUP TYPE SIM FUNCTION LOCATION STMTS LINES +func PrintTable(w io.Writer, clones []Clone, cwd string) { + if len(clones) == 0 { + fmtx.Fprintln(w, "godedup: no structural duplicates found") + return + } + + // collect all rows first so we can compute column widths + type row struct { + group string + typ string + sim string + function string + location string + stmts string + lines string + } + + var rows []row + for i, clone := range clones { + typ := kindExact + sim := sim100Percent + if !clone.Exact { + typ = kindNear + sim = fmt.Sprintf("%.0f%%", clone.Similarity*100) + } + + sorted := make([]hash.FuncInfo, len(clone.Funcs)) + copy(sorted, clone.Funcs) + sort.Slice(sorted, func(a, b int) bool { + if sorted[a].File != sorted[b].File { + return sorted[a].File < sorted[b].File + } + return sorted[a].Line < sorted[b].Line + }) + + for _, f := range sorted { + loc := fmt.Sprintf("%s:%d", relativePath(f.File, cwd), f.Line) + rows = append(rows, row{ + group: fmt.Sprintf("%d", i+1), + typ: typ, + sim: sim, + function: f.Name, + location: loc, + stmts: fmt.Sprintf("%d", f.NumStmts), + lines: fmt.Sprintf("%d", f.NumLines), + }) + } + } + + // compute column widths + headers := row{"GROUP", "TYPE", "SIM", "FUNCTION", "LOCATION", "STMTS", "LINES"} + widths := [7]int{ + len(headers.group), + len(headers.typ), + len(headers.sim), + len(headers.function), + len(headers.location), + len(headers.stmts), + len(headers.lines), + } + for _, r := range rows { + vals := [7]string{r.group, r.typ, r.sim, r.function, r.location, r.stmts, r.lines} + for i, v := range vals { + if len(v) > widths[i] { + widths[i] = len(v) + } + } + } + + fmtRow := func(r row) string { + return fmt.Sprintf("%-*s %-*s %-*s %-*s %-*s %-*s %s", + widths[0], r.group, + widths[1], r.typ, + widths[2], r.sim, + widths[3], r.function, + widths[4], r.location, + widths[5], r.stmts, + r.lines, + ) + } + + // header + fmtx.Fprintln(w, fmtRow(headers)) + + // separator using only dashes + sep := "" + total := widths[0] + widths[1] + widths[2] + widths[3] + widths[4] + widths[5] + widths[6] + 12 + for i := 0; i < total; i++ { + sep += "-" + } + fmtx.Fprintln(w, sep) + + // rows: emit the separator between groups + prevGroup := "" + for _, r := range rows { + if prevGroup != "" && r.group != prevGroup { + fmtx.Fprintln(w, sep) + } + fmtx.Fprintln(w, fmtRow(r)) + prevGroup = r.group + } +} diff --git a/internal/report/table_test.go b/internal/report/table_test.go new file mode 100644 index 0000000..961cced --- /dev/null +++ b/internal/report/table_test.go @@ -0,0 +1,40 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/hashmap-kz/godedup/internal/hash" +) + +func TestPrintTable(t *testing.T) { + clones := []Clone{{ + Exact: false, + Similarity: 0.91, + Funcs: []hash.FuncInfo{ + funcInfo("api.handleOrderCreate", "/repo/pkg/api/order.go", 51, 19, 47, 200, 1, 2, 9), + funcInfo("api.handleUserCreate", "/repo/pkg/api/user.go", 44, 18, 45, 100, 1, 2, 3), + }, + }} + + var buf bytes.Buffer + PrintTable(&buf, clones, "/repo") + got := buf.String() + for _, want := range []string{ + "GROUP", + "TYPE", + "SIM", + "FUNCTION", + "LOCATION", + "1 NEAR 91%", + "api.handleOrderCreate", + "pkg/api/order.go:51", + "api.handleUserCreate", + "pkg/api/user.go:44", + } { + if !strings.Contains(got, want) { + t.Fatalf("PrintTable() missing %q in:\n%s", want, got) + } + } +} diff --git a/internal/report/text.go b/internal/report/text.go new file mode 100644 index 0000000..d1be306 --- /dev/null +++ b/internal/report/text.go @@ -0,0 +1,61 @@ +package report + +import ( + "fmt" + "io" + "sort" + + "github.com/hashmap-kz/godedup/internal/hash" + "github.com/hashmap-kz/godedup/internal/x/fmtx" +) + +// Print writes a human-readable report to w. +func Print(w io.Writer, clones []Clone, cwd string) { + if len(clones) == 0 { + fmtx.Fprintln(w, "godedup: no structural duplicates found") + return + } + + exact := 0 + near := 0 + for _, c := range clones { + if c.Exact { + exact++ + } else { + near++ + } + } + + fmtx.Fprintf(w, "godedup: found %d clone group(s) (%d exact, %d near)\n\n", + len(clones), exact, near) + + for i, clone := range clones { + kind := kindExact + simStr := sim100Percent + if !clone.Exact { + kind = kindNear + simStr = fmt.Sprintf("%.0f%%", clone.Similarity*100) + } + + fmtx.Fprintf(w, "=== clone group %d [%s %s similarity] ===\n", + i+1, kind, simStr) + + // sort functions by file+line for stable output + sorted := make([]hash.FuncInfo, len(clone.Funcs)) + copy(sorted, clone.Funcs) + sort.Slice(sorted, func(a, b int) bool { + if sorted[a].File != sorted[b].File { + return sorted[a].File < sorted[b].File + } + return sorted[a].Line < sorted[b].Line + }) + + for _, f := range sorted { + relPath := relativePath(f.File, cwd) + fmtx.Fprintf(w, " %s\n", f.Name) + fmtx.Fprintf(w, " %s:%d (%d stmts, %d lines)\n", + relPath, f.Line, f.NumStmts, f.NumLines) + } + fmtx.Fprintln(w) + } +} diff --git a/internal/report/text_test.go b/internal/report/text_test.go new file mode 100644 index 0000000..b323859 --- /dev/null +++ b/internal/report/text_test.go @@ -0,0 +1,42 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/hashmap-kz/godedup/internal/hash" +) + +func TestPrintHumanReadable(t *testing.T) { + clones := []Clone{{ + Exact: true, + Similarity: 1.0, + Funcs: []hash.FuncInfo{ + funcInfo("pkg.B", "/repo/b.go", 20, 3, 7, 100, 1, 2, 3), + funcInfo("pkg.A", "/repo/a.go", 10, 3, 7, 100, 1, 2, 3), + }, + }} + + var buf bytes.Buffer + Print(&buf, clones, "/repo") + got := buf.String() + for _, want := range []string{ + "godedup: found 1 clone group(s) (1 exact, 0 near)", + "=== clone group 1 [EXACT 100% similarity] ===", + "pkg.A", + "a.go:10 (3 stmts, 7 lines)", + "pkg.B", + "b.go:20 (3 stmts, 7 lines)", + } { + if !strings.Contains(got, want) { + t.Fatalf("Print() missing %q in:\n%s", want, got) + } + } + if strings.Contains(got, "suggestion:") { + t.Fatalf("Print() contains superfluous suggestion:\n%s", got) + } + if strings.Index(got, "pkg.A") > strings.Index(got, "pkg.B") { + t.Fatalf("functions are not sorted by file/line:\n%s", got) + } +} diff --git a/main.go b/main.go index fb5530d..6065503 100644 --- a/main.go +++ b/main.go @@ -30,6 +30,7 @@ Examples: godedup --exclude '(_test|[.]pb|[.]deepcopy)[.]go$' ./... godedup --output table ./... godedup --output json ./... | jq . + godedup --output html ./... > godedup.html Flags: ` @@ -47,7 +48,7 @@ func main() { excludePatterns = append(excludePatterns, re) return nil }) - output := flag.String("output", "text", "output format: text, table, json") + output := flag.String("output", "text", "output format: text, table, json, html") showVer := flag.Bool("version", false, "print version and exit") flag.Usage = func() { @@ -63,10 +64,10 @@ func main() { } switch *output { - case "text", "table", "json": + case "text", "table", "json", "html": // valid default: - fmtx.Fprintf(os.Stderr, "godedup: unknown output format %q (want: text, table, json)\n", *output) + fmtx.Fprintf(os.Stderr, "godedup: unknown output format %q (want: text, table, json, html)\n", *output) os.Exit(1) } @@ -109,6 +110,8 @@ func main() { report.PrintJSON(os.Stdout, clones) case "table": report.PrintTable(os.Stdout, clones, cwd) + case "html": + report.PrintHTML(os.Stdout, clones, cwd) default: report.Print(os.Stdout, clones, cwd) }