diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f16a5f..5f5831c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- `releasegen validate --require-changelog-entry` now folds *staged* + index changes into its diff against the base ref. Without this, the + check was a no-op under pre-commit hooks (e.g. prenup), because HEAD + is still the parent commit at that moment and a tree-only diff sees + nothing. Local pre-commit checks now catch missing `[Unreleased]` + entries just as reliably as CI does on a pushed commit. Unstaged + worktree edits and untracked files are deliberately excluded so + unrelated local dirt (build artifacts, half-finished edits, scratch + files) cannot cause false failures, and so the changed-file set + matches what `FileAtIndex` reads for the next-commit comparison. +- `releasegen validate --require-changelog-entry` now compares the + `[Unreleased]` section as it appears in the **git index** (the staged + view of the next commit) rather than the working tree. Previously, a + developer who edited `CHANGELOG.md` but forgot to `git add` it would + see the pre-commit check pass — the worktree had the new entry — even + though the commit being created did not, leaving the gap to be caught + only in CI. The check now predicts the post-commit state correctly: + unstaged changelog edits no longer satisfy the requirement, and + `git commit -a` keeps working because git stages worktree changes + before pre-commit hooks run. + ## [[v1.2.0](https://github.com/C2FO/releasegen/releases/tag/v1.2.0)] - 2026-06-18 ### Added - New `releasegen validate` subcommand. Parses every `## [Unreleased]` section diff --git a/cmd/releasegen/validate.go b/cmd/releasegen/validate.go index 24f3db9..c8da510 100644 --- a/cmd/releasegen/validate.go +++ b/cmd/releasegen/validate.go @@ -195,7 +195,7 @@ func validateAll( } log.Debug("require_changelog_entry enabled", "base_ref", base) var err error - entryProblems, err = collectEntryProblems(ctx, cfg, paths, repo, base, log) + entryProblems, err = collectEntryProblems(ctx, paths, repo, base, log) if err != nil { return cliError{code: exitVCSErr, err: err} } @@ -263,7 +263,6 @@ func validatePaths(cfg *config.Config, paths []string, log *slog.Logger) error { // them with content-validation problems in one report. func collectEntryProblems( ctx context.Context, - cfg *config.Config, paths []string, repo *vcs.GitRepo, base string, @@ -322,7 +321,7 @@ func collectEntryProblems( if !st.nonChangelogHit { continue } - gained, err := unreleasedGained(ctx, repo, base, st.changelog, cfg.RepoRoot) + gained, err := unreleasedGained(ctx, repo, base, st.changelog) if err != nil { problems = append(problems, changelogProblem{ path: st.changelog, @@ -370,25 +369,32 @@ func assignModule(f string, prefixes map[string]string, hasRoot bool) string { } // unreleasedGained reports whether the [Unreleased] section of changelogPath -// has more non-whitespace lines at HEAD than it did at base. A net-zero or -// shrinking change counts as "no new content," which catches both the -// "didn't touch the changelog" case and the (rarer) "edited a versioned -// section but not [Unreleased]" case. +// has more non-whitespace lines in the *next commit* than it did at base. +// A net-zero or shrinking change counts as "no new content," which catches +// both the "didn't touch the changelog" case and the (rarer) "edited a +// versioned section but not [Unreleased]" case. +// +// We deliberately read the index (staged) version rather than the working +// tree: when this runs as a pre-commit hook, only what is staged will be +// committed, and we want validation to predict the post-commit state. An +// unstaged worktree edit to CHANGELOG.md will not survive the commit, so +// it must not satisfy this check — otherwise developers can silently +// commit code without their changelog updates and only discover the gap +// in CI. func unreleasedGained( ctx context.Context, repo *vcs.GitRepo, - base, changelogPath, repoRoot string, + base, changelogPath string, ) (bool, error) { baseBody, err := repo.FileAtRef(ctx, base, changelogPath) if err != nil { return false, err } - headPath := filepath.Join(repoRoot, changelogPath) - headBytes, err := os.ReadFile(headPath) //nolint:gosec // path comes from repo discovery + headBody, err := repo.FileAtIndex(ctx, changelogPath) if err != nil { - return false, fmt.Errorf("read %s: %w", changelogPath, err) + return false, fmt.Errorf("read %s from index: %w", changelogPath, err) } - headLines := countNonWhitespaceLines(changelog.ExtractUnreleased(string(headBytes))) + headLines := countNonWhitespaceLines(changelog.ExtractUnreleased(headBody)) baseLines := countNonWhitespaceLines(changelog.ExtractUnreleased(baseBody)) return headLines > baseLines, nil } diff --git a/cmd/releasegen/validate_test.go b/cmd/releasegen/validate_test.go index ca14a03..b6fd2f2 100644 --- a/cmd/releasegen/validate_test.go +++ b/cmd/releasegen/validate_test.go @@ -453,6 +453,143 @@ func (s *ValidateTestSuite) TestRequireChangelogEntry_EmptyBaseRefUsesDefault() s.Contains(err.Error(), "origin/main") } +func (s *ValidateTestSuite) TestRequireChangelogEntry_CatchesStagedChangesPreCommit() { + // Regression: a prenup-style pre-commit hook runs while HEAD is still + // the parent commit. Without folding the index/staged set into + // ChangedFiles, validate would see "nothing changed since base" and + // pass on a developer who staged code without updating the changelog. + // This test asserts the fix: a staged code change with no [Unreleased] + // entry must fail, even though HEAD itself has not moved. + f := s.newRequireEntryFixture() + // DO NOT commit. Stage a source file edit (no changelog update). + full := filepath.Join(f.dir, "svc", "foo.go") + s.Require().NoError(os.WriteFile(full, []byte("package svc\n// staged but uncommitted\n"), 0o600)) + _, err := f.wt.Add("svc/foo.go") + s.Require().NoError(err) + + repo := f.repoOpen() + cfg := &config.Config{ + RepoRoot: f.dir, + RequireChangelogEntry: true, + BaseRef: "base-tag", + } + paths := []string{"CHANGELOG.md", "svc/CHANGELOG.md"} + err = validateAll(context.Background(), cfg, paths, repo, s.log) + s.Require().Error(err, "validate must fail on staged-but-uncommitted code changes without a changelog entry") + s.Contains(err.Error(), "svc/CHANGELOG.md") + s.Contains(err.Error(), "[Unreleased] section gained no new lines") +} + +func (s *ValidateTestSuite) TestRequireChangelogEntry_UnstagedChangelogEditDoesNotSatisfyCheck() { + // Regression: a developer stages svc/foo.go, runs the commit, prenup + // fires validate, the check fails because no [Unreleased] entry exists. + // They then edit svc/CHANGELOG.md but forget `git add` and run commit + // again. The worktree now has the new entry, but only the staged tree + // will be committed — and the staged tree still lacks the entry. + // Validate must fail, otherwise the developer silently commits code + // without the matching changelog line and only CI catches it. + f := s.newRequireEntryFixture() + // Stage a source file. + full := filepath.Join(f.dir, "svc", "foo.go") + s.Require().NoError(os.WriteFile(full, []byte("package svc\n// staged code change\n"), 0o600)) + _, err := f.wt.Add("svc/foo.go") + s.Require().NoError(err) + // Edit the changelog in the worktree but DO NOT stage it. + clPath := filepath.Join(f.dir, "svc", "CHANGELOG.md") + s.Require().NoError(os.WriteFile( + clPath, + []byte("# Changelog\n\n## [Unreleased]\n### Added\n- did the thing\n"), + 0o600, + )) + + repo := f.repoOpen() + cfg := &config.Config{ + RepoRoot: f.dir, + RequireChangelogEntry: true, + BaseRef: "base-tag", + } + paths := []string{"CHANGELOG.md", "svc/CHANGELOG.md"} + err = validateAll(context.Background(), cfg, paths, repo, s.log) + s.Require().Error(err, "validate must reject a changelog edit that was never staged") + s.Contains(err.Error(), "svc/CHANGELOG.md") + s.Contains(err.Error(), "[Unreleased] section gained no new lines") +} + +func (s *ValidateTestSuite) TestRequireChangelogEntry_StagedChangelogEditSatisfiesCheck() { + // Companion to UnstagedChangelogEdit*: same setup, but the developer + // remembers to `git add svc/CHANGELOG.md`. With both files staged, the + // next commit will contain both, so validate must pass. + f := s.newRequireEntryFixture() + full := filepath.Join(f.dir, "svc", "foo.go") + s.Require().NoError(os.WriteFile(full, []byte("package svc\n// staged code change\n"), 0o600)) + _, err := f.wt.Add("svc/foo.go") + s.Require().NoError(err) + clRel := "svc/CHANGELOG.md" + clPath := filepath.Join(f.dir, clRel) + s.Require().NoError(os.WriteFile( + clPath, + []byte("# Changelog\n\n## [Unreleased]\n### Added\n- did the thing\n"), + 0o600, + )) + _, err = f.wt.Add(clRel) + s.Require().NoError(err) + + repo := f.repoOpen() + cfg := &config.Config{ + RepoRoot: f.dir, + RequireChangelogEntry: true, + BaseRef: "base-tag", + } + paths := []string{"CHANGELOG.md", "svc/CHANGELOG.md"} + s.NoError(validateAll(context.Background(), cfg, paths, repo, s.log)) +} + +func (s *ValidateTestSuite) TestRequireChangelogEntry_IgnoresUnstagedAndUntrackedDirt() { + // Companion to the staged-only tightening of ChangedFiles. A + // developer with unstaged build-artifact edits and untracked scratch + // files lying around must NOT see false failures from validate: only + // what is actually staged (and therefore in the next commit) should + // be evaluated. Here the developer has properly staged both + // svc/foo.go and svc/CHANGELOG.md, so validate must pass even though + // the worktree is dirty. + f := s.newRequireEntryFixture() + // Stage code + matching changelog entry (the disciplined case). + full := filepath.Join(f.dir, "svc", "foo.go") + s.Require().NoError(os.WriteFile(full, []byte("package svc\n// staged code change\n"), 0o600)) + _, err := f.wt.Add("svc/foo.go") + s.Require().NoError(err) + s.Require().NoError(os.WriteFile( + filepath.Join(f.dir, "svc", "CHANGELOG.md"), + []byte("# Changelog\n\n## [Unreleased]\n### Added\n- did the thing\n"), + 0o600, + )) + _, err = f.wt.Add("svc/CHANGELOG.md") + s.Require().NoError(err) + // Now sprinkle local dirt: an unstaged edit to a previously + // committed file in the root module and a brand-new untracked file + // in the svc module. Both must be ignored. + s.Require().NoError(os.WriteFile( + filepath.Join(f.dir, "main.go"), + []byte("package x\n// unstaged scratch edit\n"), + 0o600, + )) + s.Require().NoError(os.WriteFile( + filepath.Join(f.dir, "svc", "scratch.log"), + []byte("temp build log\n"), + 0o600, + )) + + repo := f.repoOpen() + cfg := &config.Config{ + RepoRoot: f.dir, + RequireChangelogEntry: true, + BaseRef: "base-tag", + } + paths := []string{"CHANGELOG.md", "svc/CHANGELOG.md"} + s.NoError(validateAll(context.Background(), cfg, paths, repo, s.log), + "local dirt (unstaged edits + untracked files) must not cause validate to fail") +} + func (s *ValidateTestSuite) TestRequireChangelogEntry_DisabledByDefault() { f := s.newRequireEntryFixture() f.write("svc/foo.go", "package svc\n// updated\n") diff --git a/internal/vcs/git.go b/internal/vcs/git.go index 82da0b6..b3228dd 100644 --- a/internal/vcs/git.go +++ b/internal/vcs/git.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "log/slog" "path" "sort" @@ -214,11 +215,24 @@ func changelogBlobHash(c *object.Commit, changelogPath string) (plumbing.Hash, e return entry.Hash, nil } -// ChangedFiles returns the set of file paths that differ between the tree at -// baseRef and the tree at HEAD. The comparison is a two-dot diff (HEAD vs -// baseRef directly), matching the behavior of `git diff ` rather than -// the merge-base "three-dot" form. For PR-time validation this is precise -// enough and avoids a merge-base computation that go-git makes awkward. +// ChangedFiles returns the set of file paths that differ between the tree +// at baseRef and the state that *will be* at HEAD after the next commit — +// i.e. the union of the HEAD-vs-base tree diff and any path currently +// staged in the index. The HEAD-vs-base portion is a two-dot diff (HEAD vs +// baseRef directly), matching `git diff ` rather than the merge-base +// "three-dot" form. +// +// Including the staged portion is essential for pre-commit use cases +// (e.g. a prenup hook). Without it, ChangedFiles would only see what is +// already in HEAD, which during a pre-commit hook is still the unchanged +// parent commit — and the caller would conclude "nothing changed" even +// when the developer has staged a hundred lines of new code. +// +// Unstaged worktree edits and untracked files are deliberately ignored: +// they will not be in the commit being created, and counting them would +// (a) flag callers like `validate --require-changelog-entry` for local +// dirt unrelated to the commit and (b) disagree with FileAtIndex, which +// also reads the staged/index view. // // baseRef may be any revision spec go-git can resolve (branch name, tag, // remote-tracking ref like "origin/main", or a raw hash). When baseRef does @@ -248,13 +262,30 @@ func (g *GitRepo) ChangedFiles(ctx context.Context, baseRef string) ([]string, e if err != nil { return nil, fmt.Errorf("%w: load HEAD tree: %w", ErrVCS, err) } + seen := make(map[string]struct{}) + if err := collectTreeDiff(baseTree, headTree, baseRef, seen); err != nil { + return nil, err + } + if err := g.collectWorktreeChanges(seen); err != nil { + return nil, err + } + + out := make([]string, 0, len(seen)) + for p := range seen { + out = append(out, p) + } + sort.Strings(out) + return out, nil +} + +// collectTreeDiff records every path differing between baseTree and headTree +// in seen. Renames contribute both their From and To paths so callers can +// reason about either name. +func collectTreeDiff(baseTree, headTree *object.Tree, baseRef string, seen map[string]struct{}) error { changes, err := baseTree.Diff(headTree) if err != nil { - return nil, fmt.Errorf("%w: diff %q..HEAD: %w", ErrVCS, baseRef, err) + return fmt.Errorf("%w: diff %q..HEAD: %w", ErrVCS, baseRef, err) } - // Use a map to deduplicate when a path appears as both From and To - // (e.g. a rename). - seen := make(map[string]struct{}, len(changes)) for _, c := range changes { if c.From.Name != "" { seen[c.From.Name] = struct{}{} @@ -263,12 +294,90 @@ func (g *GitRepo) ChangedFiles(ctx context.Context, baseRef string) ([]string, e seen[c.To.Name] = struct{}{} } } - out := make([]string, 0, len(seen)) - for p := range seen { - out = append(out, p) + return nil +} + +// collectWorktreeChanges records every path that has a *staged* change in +// the index, so ChangedFiles reflects what will land in the next commit +// when HEAD has not yet moved (the prenup / pre-commit case). +// +// We deliberately exclude paths whose only change is in the worktree +// (st.Staging == git.Unmodified): such edits will not be in the commit +// being created, so reporting them would cause `validate +// --require-changelog-entry` to flag modules for unrelated local dirt +// (build artifacts, half-finished edits, untracked scratch files) that +// the developer never intended to commit. Limiting this to the staged +// set also matches the index-based view used by FileAtIndex, keeping +// the two halves of the validate check in agreement about what "next +// commit" means. `git commit -a` is unaffected, because git stages +// tracked-modified files before pre-commit hooks run. +func (g *GitRepo) collectWorktreeChanges(seen map[string]struct{}) error { + wt, err := g.repo.Worktree() + if err != nil { + return fmt.Errorf("%w: open worktree: %w", ErrVCS, err) } - sort.Strings(out) - return out, nil + status, err := wt.Status() + if err != nil { + return fmt.Errorf("%w: read worktree status: %w", ErrVCS, err) + } + for path, st := range status { + // Skip both Unmodified (no change at all) and Untracked + // (present only in the worktree, never added to the index). + // Everything else — Modified / Added / Deleted / Renamed / + // Copied — means the index entry for this path differs from + // HEAD, which is exactly the "will be in the next commit" set. + if st.Staging == git.Unmodified || st.Staging == git.Untracked { + continue + } + seen[path] = struct{}{} + } + return nil +} + +// FileAtIndex returns the contents of filePath as it appears in the git +// index — i.e. the version that will be written to the next commit. This is +// the right "current state" for any pre-commit validation: comparing it +// against the base ref tells us what the resulting commit will contain, +// regardless of whether the developer has staged their changes or left +// them in the worktree. +// +// When filePath is not in the index (untracked, or staged for deletion), +// the empty string is returned with a nil error so callers can treat it +// as "the file will not be in the next commit." +func (g *GitRepo) FileAtIndex(ctx context.Context, filePath string) (string, error) { + if err := ctx.Err(); err != nil { + return "", err + } + idx, err := g.repo.Storer.Index() + if err != nil { + return "", fmt.Errorf("%w: read index: %w", ErrVCS, err) + } + for _, entry := range idx.Entries { + if entry.Name != filePath { + continue + } + blob, err := g.repo.BlobObject(entry.Hash) + if err != nil { + return "", fmt.Errorf("%w: load blob for %s in index: %w", ErrVCS, filePath, err) + } + reader, err := blob.Reader() + if err != nil { + return "", fmt.Errorf("%w: open blob for %s in index: %w", ErrVCS, filePath, err) + } + data, readErr := io.ReadAll(reader) + closeErr := reader.Close() + // Prefer a ReadAll error, since it tells us the data is incomplete; + // surface Close on its own as a fallback so deferred I/O failures + // (e.g. underlying object storage hiccups) aren't silently swallowed. + switch { + case readErr != nil: + return "", fmt.Errorf("%w: read blob for %s in index: %w", ErrVCS, filePath, readErr) + case closeErr != nil: + return "", fmt.Errorf("%w: close blob reader for %s in index: %w", ErrVCS, filePath, closeErr) + } + return string(data), nil + } + return "", nil } // FileAtRef returns the contents of the file at filePath as it appears in diff --git a/internal/vcs/git_test.go b/internal/vcs/git_test.go index 2abeb98..8b88e46 100644 --- a/internal/vcs/git_test.go +++ b/internal/vcs/git_test.go @@ -170,7 +170,91 @@ func (s *VCSTestSuite) TestChangedFilesAndFileAtRef() { // An unresolvable ref produces an ErrVCS-wrapped error. _, err = g.ChangedFiles(ctx, "no-such-ref") s.Require().Error(err) - s.ErrorIs(err, vcs.ErrVCS) + s.Require().ErrorIs(err, vcs.ErrVCS) + + // Pre-commit-style scenario: stage a new edit to an existing file + // AND drop an untracked + an unstaged-modified file alongside. + // Only the *staged* change must appear in ChangedFiles: unstaged + // worktree edits and untracked scratch files won't be in the next + // commit, so reporting them would cause callers like + // `validate --require-changelog-entry` to flag modules for local + // dirt the developer never intended to commit. + s.Require().NoError(os.WriteFile(filepath.Join(dir, "main.go"), []byte("package x\n// staged but not committed\n"), 0o600)) + _, err = wt.Add("main.go") + s.Require().NoError(err) + s.Require().NoError(os.WriteFile(filepath.Join(dir, "untracked.go"), []byte("package x\n"), 0o600)) + // Don't `git add` untracked.go — it stays untracked. + s.Require().NoError(os.WriteFile(filepath.Join(dir, "submodule", "foo.go"), []byte("package submodule\n// unstaged edit\n"), 0o600)) + // Don't `git add` submodule/foo.go's new edit — the staged version + // is still the committed one, the worktree just has extra dirt. + + changed, err = g.ChangedFiles(ctx, "base-tag") + s.Require().NoError(err) + s.Contains(changed, "main.go", "staged modification must be reported") + s.Contains(changed, "submodule/foo.go", "previously committed change must still be reported (tree diff)") + s.NotContains(changed, "untracked.go", "untracked files are not staged and must not be reported") + // submodule/foo.go appears via the tree diff above, but its unstaged + // worktree edit on top of HEAD must not be what brings it in — that's + // already covered by the tree-diff assertion. Nothing else to check. +} + +// TestFileAtIndex covers the three states FileAtIndex must disambiguate +// to keep pre-commit validation honest: +// +// 1. A staged modification — the index hash points at the new blob, so +// FileAtIndex must return the *staged* (next-commit) content, not the +// working-tree content and not the HEAD content. +// 2. An unstaged worktree edit on an otherwise-tracked file — the index +// still points at HEAD's blob, so FileAtIndex must return the HEAD +// content. This is the bug that prompted the index-aware lookup: +// `unreleasedGained` previously read the worktree, which let a +// developer satisfy --require-changelog-entry by editing +// CHANGELOG.md and forgetting to `git add`. +// 3. An untracked or staged-for-deletion path — not in the index, so +// FileAtIndex must return the empty string with no error. +func (s *VCSTestSuite) TestFileAtIndex() { + dir := s.T().TempDir() + repo, err := git.PlainInit(dir, false) + s.Require().NoError(err) + wt, err := repo.Worktree() + s.Require().NoError(err) + sig := &object.Signature{Name: "tester", Email: "t@example.com", When: time.Now()} + + writeAndAdd := func(rel, body string) { + s.Require().NoError(os.MkdirAll(filepath.Join(dir, filepath.Dir(rel)), 0o750)) + s.Require().NoError(os.WriteFile(filepath.Join(dir, rel), []byte(body), 0o600)) + _, err := wt.Add(rel) + s.Require().NoError(err) + } + + writeAndAdd("staged.go", "package x\n// committed body\n") + writeAndAdd("worktree.go", "package x\n// committed body\n") + _, err = wt.Commit("base", &git.CommitOptions{Author: sig}) + s.Require().NoError(err) + + // (1) Modify staged.go AND re-stage it. (2) Modify worktree.go but + // leave it unstaged. (3) Drop a fresh untracked file in. + s.Require().NoError(os.WriteFile(filepath.Join(dir, "staged.go"), []byte("package x\n// staged update\n"), 0o600)) + _, err = wt.Add("staged.go") + s.Require().NoError(err) + s.Require().NoError(os.WriteFile(filepath.Join(dir, "worktree.go"), []byte("package x\n// worktree-only edit\n"), 0o600)) + s.Require().NoError(os.WriteFile(filepath.Join(dir, "untracked.go"), []byte("package x\n"), 0o600)) + + g, err := vcs.Open(dir, "main", slog.New(slog.DiscardHandler)) + s.Require().NoError(err) + ctx := context.Background() + + staged, err := g.FileAtIndex(ctx, "staged.go") + s.Require().NoError(err) + s.Equal("package x\n// staged update\n", staged, "FileAtIndex must return the staged blob, not HEAD or the worktree") + + unstaged, err := g.FileAtIndex(ctx, "worktree.go") + s.Require().NoError(err) + s.Equal("package x\n// committed body\n", unstaged, "unstaged worktree edits must NOT leak through FileAtIndex") + + missing, err := g.FileAtIndex(ctx, "untracked.go") + s.Require().NoError(err) + s.Empty(missing, "untracked files are absent from the index and must return empty, not error") } // TestCommitTagAndPush_PushedToBareRemote exercises the full