From 5913fd26aad32bd028a8fe4e5b80fccc28e118af Mon Sep 17 00:00:00 2001 From: Ashlesh Gawande Date: Wed, 7 Jan 2026 13:17:24 +0530 Subject: [PATCH 01/38] t5550: add netrc tests for http 401/403 git allows using .netrc file to supply credentials for HTTP auth. Three test cases are added in this patch to provide missing coverage when cloning over HTTP using .netrc file: - First test case checks that the git clone is successful when credentials are provided via .netrc file - Second test case checks that the git clone fails when the .netrc file provides invalid credentials. The HTTP server is expected to return 401 Unauthorized in such a case. The test checks that the user is provided with a prompt for username/password on 401 to provide the valid ones. - Third test case checks that the git clone fails when the .netrc file provides credentials that are valid but do not have permission for this user. For example one may have multiple tokens in GitHub and uses the one which was not authorized for cloning this repo. In such a case the HTTP server returns 403 Forbidden. For this test, the apache.conf is modified to return a 403 on finding a forbidden-user. No prompt for username/password is expected after the 403 (unlike 401). This is because prompting may wipe out existing credentials or conflict with custom credential helpers. Signed-off-by: Ashlesh Gawande Signed-off-by: Junio C Hamano --- t/lib-httpd.sh | 13 +++++++++++-- t/lib-httpd/apache.conf | 4 ++++ t/lib-httpd/passwd | 1 + t/t5550-http-fetch-dumb.sh | 25 +++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 5091db949b7f99..5f42c311c2f2f5 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -319,13 +319,22 @@ setup_askpass_helper() { ' } -set_askpass() { +set_askpass () { >"$TRASH_DIRECTORY/askpass-query" && echo "$1" >"$TRASH_DIRECTORY/askpass-user" && echo "$2" >"$TRASH_DIRECTORY/askpass-pass" } -expect_askpass() { +set_netrc () { + # $HOME=$TRASH_DIRECTORY + echo "machine $1 login $2 password $3" >"$TRASH_DIRECTORY/.netrc" +} + +clear_netrc () { + rm -f "$TRASH_DIRECTORY/.netrc" +} + +expect_askpass () { dest=$HTTPD_DEST${3+/$3} { diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf index e631ab0eb5ef05..6b8c50a51a3b72 100644 --- a/t/lib-httpd/apache.conf +++ b/t/lib-httpd/apache.conf @@ -238,6 +238,10 @@ SSLEngine On AuthName "git-auth" AuthUserFile passwd Require valid-user + + # return 403 for authenticated user: forbidden-user@host + RewriteCond "%{REMOTE_USER}" "^forbidden-user@host" + RewriteRule ^ - [F] diff --git a/t/lib-httpd/passwd b/t/lib-httpd/passwd index d9c122f3482891..3bab7b64236b11 100644 --- a/t/lib-httpd/passwd +++ b/t/lib-httpd/passwd @@ -1 +1,2 @@ user@host:$apr1$LGPmCZWj$9vxEwj5Z5GzQLBMxp3mCx1 +forbidden-user@host:$apr1$LGPmCZWj$9vxEwj5Z5GzQLBMxp3mCx1 diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index ed0ad66fade32b..9530f01b9e3d13 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -102,6 +102,31 @@ test_expect_success 'cloning password-protected repository can fail' ' expect_askpass both wrong ' +test_expect_success 'using credentials from netrc to clone successfully' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 user@host pass@host && + git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc && + expect_askpass none +' + +test_expect_success 'netrc unauthorized credentials (prompt after 401)' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 user@host pass@wrong && + test_must_fail git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc-401 && + expect_askpass both wrong +' + +test_expect_success 'netrc authorized but forbidden credentials (fail on 403)' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 forbidden-user@host pass@host && + test_must_fail git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc-403 2>err && + expect_askpass none && + grep "The requested URL returned error: 403" err +' + test_expect_success 'http auth can use user/pass in URL' ' set_askpass wrong && git clone "$HTTPD_URL_USER_PASS/auth/dumb/repo.git" clone-auth-none && From bd1855b89760cc0f9a185010a0d92d2e11a73132 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:17 +0100 Subject: [PATCH 02/38] odb: rename `FOR_EACH_OBJECT_*` flags Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This prepares us for a new upcoming `odb_for_each_object()` function and ensures that both the function and its flags have the same prefix. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/pack-objects.c | 10 +++++----- commit-graph.c | 4 ++-- object-file.c | 4 ++-- object-file.h | 2 +- odb.h | 13 +++++++------ packfile.c | 20 ++++++++++---------- packfile.h | 4 ++-- reachable.c | 8 ++++---- repack-promisor.c | 2 +- revision.c | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 2ad712e9f8f55c..6964a5a52c1646 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; batch_each_object(opt, batch_unordered_object, - FOR_EACH_OBJECT_PACK_ORDER, &cb); + ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6ee31d48c94748..74317051fdf7f6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) for_each_object_in_pack(p, add_object_entry_from_pack, revs, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); } strbuf_release(&buf); @@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void) if (for_each_packed_object(to_pack.repo, add_object_in_unpacked_pack, NULL, - FOR_EACH_OBJECT_PACK_ORDER | - FOR_EACH_OBJECT_LOCAL_ONLY | - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) die(_("cannot open pack index")); } diff --git a/commit-graph.c b/commit-graph.c index 6b1f02e1792b64..7f1145a0821cbb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, goto cleanup; } for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } @@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); for_each_packed_object(ctx->r, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); diff --git a/object-file.c b/object-file.c index e7e4c3348f9c1b..64e9e239dc341a 100644 --- a/object-file.c +++ b/object-file.c @@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source, int for_each_loose_object(struct object_database *odb, each_loose_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { struct odb_source *source; @@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb, if (r) return r; - if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) break; } diff --git a/object-file.h b/object-file.h index 1229d5f675b44a..42bb50e10cf296 100644 --- a/object-file.h +++ b/object-file.h @@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source, */ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); /** diff --git a/odb.h b/odb.h index bab07755f4ec95..74503addf1462c 100644 --- a/odb.h +++ b/odb.h @@ -442,24 +442,25 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } + /* Flags for for_each_*_object(). */ -enum for_each_object_flags { +enum odb_for_each_object_flags { /* Iterate only over local objects, not alternates. */ - FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), /* Only iterate over packs obtained from the promisor remote. */ - FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), /* * Visit objects within a pack in packfile order rather than .idx order */ - FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2), /* Only iterate over packs that are not marked as kept in-core. */ - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), /* Only iterate over packs that do not have .keep files. */ - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; enum { diff --git a/packfile.c b/packfile.c index 402c3b5dc73131..b65f0b43f16eb3 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) { if (load_pack_revindex(p->repo, p)) return -1; } @@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p, * - in pack-order, it is pack position, which we must * convert to an index position in order to get the oid. */ - if (flags & FOR_EACH_OBJECT_PACK_ORDER) + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) index_pos = pack_pos_to_index(p, i); else index_pos = i; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags) + void *data, enum odb_for_each_object_flags flags) { struct odb_source *source; int r = 0; @@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (repo_has_promisor_remote(r)) { for_each_packed_object(r, add_promisor_object, &promisor_objects, - FOR_EACH_OBJECT_PROMISOR_ONLY | - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/packfile.h b/packfile.h index acc5c55ad57754..15551258bde519 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); + void *data, enum odb_for_each_object_flags flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 diff --git a/reachable.c b/reachable.c index 4b532039d5f84f..82676b2668090d 100644 --- a/reachable.c +++ b/reachable.c @@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum for_each_object_flags flags; + enum odb_for_each_object_flags flags; int r; data.revs = revs; @@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, data.extra_recent_oids_loaded = 0; r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); + ODB_FOR_EACH_OBJECT_LOCAL_ONLY); if (r) goto done; - flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) - flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f65602..45c330b9a53ae6 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo, ctx.cmd = &cmd; ctx.algop = repo->hash_algo; for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index b65a76377062cd..5aadf46dac2b9d 100644 --- a/revision.c +++ b/revision.c @@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs) if (revs->exclude_promisor_objects) { for_each_packed_object(revs->repo, mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); } if (!revs->reflog_info) From 6358da200fffc7f010f079c3f64ed77f10cd751d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:18 +0100 Subject: [PATCH 03/38] odb: fix flags parameter to be unsigned The `flags` parameter accepted by various `for_each_object()` functions is a bitfield of multiple flags. Such parameters are typically unsigned in the Git codebase, but we use `enum odb_for_each_object_flags` in some places. Adapt these function signatures to use the correct type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 ++- object-file.h | 3 ++- packfile.c | 4 ++-- packfile.h | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 64e9e239dc341a..8fa461dd596615 100644 --- a/object-file.c +++ b/object-file.c @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + unsigned flags) { int ret; int fd; diff --git a/object-file.h b/object-file.h index 42bb50e10cf296..2acf19fb91ab41 100644 --- a/object-file.h +++ b/object-file.h @@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + unsigned flags); int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, diff --git a/packfile.c b/packfile.c index b65f0b43f16eb3..79fe64a25b2f5e 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum odb_for_each_object_flags flags) + unsigned flags) { uint32_t i; int r = 0; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags) + void *data, unsigned flags) { struct odb_source *source; int r = 0; diff --git a/packfile.h b/packfile.h index 15551258bde519..447c44c4a7517d 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum odb_for_each_object_flags flags); + unsigned flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags); + void *data, unsigned flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 6ecab3cdf67012592734ed9493db634d39326d43 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:19 +0100 Subject: [PATCH 04/38] object-file: extract function to read object info from path Extract a new function that allows us to read object info for a specific loose object via a user-supplied path. This function will be used in a subsequent commit. Note that this also allows us to drop `stat_loose_object()`, which is a simple wrapper around `odb_loose_path()` plus lstat(3p). Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 8fa461dd596615..a651129426992c 100644 --- a/object-file.c +++ b/object-file.c @@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in given source. - * Returns 0 on success, negative on failure. + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, - struct stat *st, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - - *path = odb_loose_path(loose->source, &buf, oid); - if (!lstat(*path, st)) - return 0; - - return -1; -} - -/* - * Like stat_loose_object(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { @@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int odb_source_loose_read_object_info(struct odb_source *source, +static int read_object_info_from_path(struct odb_source *source, + const char *path, const struct object_id *oid, struct object_info *oi, unsigned flags) @@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source, int ret; int fd; unsigned long mapsize; - const char *path; void *map = NULL; git_zstream stream, *stream_to_end = NULL; char hdr[MAX_HEADER_LEN]; @@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - if (stat_loose_object(source->loose, oid, &st, &path) < 0) { + if (lstat(path, &st) < 0) { ret = -1; goto out; } @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - fd = open_loose_object(source->loose, oid, &path); + fd = git_open(path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, return ret; } +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + static struct strbuf buf = STRBUF_INIT; + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(source, buf.buf, oid, oi, flags); +} + static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, From cde615b6f05228cd7cf125de6bf5757381f65381 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:20 +0100 Subject: [PATCH 05/38] object-file: introduce function to iterate through objects We have multiple divergent interfaces to iterate through objects of a specific backend: - `for_each_loose_object()` yields all loose objects. - `for_each_packed_object()` (somewhat obviously) yields all packed objects. These functions have different function signatures, which makes it hard to create a common abstraction layer that covers both of these. Introduce a new function `odb_source_loose_for_each_object()` to plug this gap. This function doesn't take any data specific to loose objects, but instead it accepts a `struct object_info` that will be populated the exact same as if `odb_source_loose_read_object()` was called. The benefit of this new interface is that we can continue to pass backend-specific data, as `struct object_info` contains a union for these exact use cases. This will allow us to unify how we iterate through objects across both loose and packed objects in a subsequent commit. The `for_each_loose_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ object-file.h | 11 +++++++++++ odb.h | 12 ++++++++++++ 3 files changed, 71 insertions(+) diff --git a/object-file.c b/object-file.c index a651129426992c..ef2c7618c17a64 100644 --- a/object-file.c +++ b/object-file.c @@ -1801,6 +1801,54 @@ int for_each_loose_object(struct object_database *odb, return 0; } +struct for_each_object_wrapper_data { + struct odb_source *source; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + struct object_info oi = *data->request; + + if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct for_each_object_wrapper_data data = { + .source = source, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + return 0; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + return 0; + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index 2acf19fb91ab41..5b9641cd890175 100644 --- a/object-file.h +++ b/object-file.h @@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, enum odb_for_each_object_flags flags); +/* + * Iterate through all loose objects in the given object database source and + * invoke the callback function for each of them. If given, the object info + * will be populated with the object's data as if you had called + * `odb_source_loose_read_object_info()` on the object. + */ +int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /** * format_object_header() is a thin wrapper around s xsnprintf() that diff --git a/odb.h b/odb.h index 74503addf1462c..f97f249580e08a 100644 --- a/odb.h +++ b/odb.h @@ -463,6 +463,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + enum { /* * By default, `odb_write_object()` does not actually write anything From 37353119046414b2dccb26b32cb5224e0c9258e1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:21 +0100 Subject: [PATCH 06/38] packfile: extract function to iterate through objects of a store In the next commit we're about to introduce a new function that knows to iterate through objects of a given packfile store. Same as with the equivalent function for loose objects, this new function will also be agnostic of backends by using a `struct object_info`. Prepare for this by extracting a new shared function to iterate through a single packfile store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 78 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/packfile.c b/packfile.c index 79fe64a25b2f5e..d15a2ce12b1ce5 100644 --- a/packfile.c +++ b/packfile.c @@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) +static int packfile_store_for_each_object_internal(struct packfile_store *store, + each_packed_object_fn cb, + void *data, + unsigned flags, + int *pack_errors) { - struct odb_source *source; - int r = 0; - int pack_errors = 0; + struct packfile_list_entry *e; + int ret = 0; - odb_prepare_alternates(repo->objects); + store->skip_mru_updates = true; - for (source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *e; + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; - source->packfiles->skip_mru_updates = true; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + *pack_errors = 1; + continue; + } - for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { - struct packed_git *p = e->pack; + ret = for_each_object_in_pack(p, cb, data, flags); + if (ret) + break; + } - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; - } + store->skip_mru_updates = false; - r = for_each_object_in_pack(p, cb, data, flags); - if (r) - break; - } + return ret; +} - source->packfiles->skip_mru_updates = false; +int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, + void *data, unsigned flags) +{ + struct odb_source *source; + int pack_errors = 0; + int ret = 0; - if (r) + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, + flags, &pack_errors); + if (ret) break; } - return r ? r : pack_errors; + return ret ? ret : pack_errors; } static int add_promisor_object(const struct object_id *oid, From 736464b84f4439361ec10e9ef49bff674fea952d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:22 +0100 Subject: [PATCH 07/38] packfile: introduce function to iterate through objects Introduce a new function `packfile_store_for_each_object()`. This function is equivalent to `odb_source_loose_for_each_object()`, except that it: - Works on a single packfile store instead of working on the object database level. Consequently, it will only yield packed objects of a single object database source. - Passes a `struct object_info` to the callback function. As such, it provides the same callback interface as we already provide for loose objects now. These functions will be used in a subsequent step to implement `odb_for_each_object()`. The `for_each_packed_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 15 +++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/packfile.c b/packfile.c index d15a2ce12b1ce5..c35d5ea6552ec3 100644 --- a/packfile.c +++ b/packfile.c @@ -2360,6 +2360,57 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, return ret ? ret : pack_errors; } +struct packfile_store_for_each_object_wrapper_data { + struct packfile_store *store; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int packfile_store_for_each_object_wrapper(const struct object_id *oid, + struct packed_git *pack, + uint32_t index_pos, + void *cb_data) +{ + struct packfile_store_for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + off_t offset = nth_packed_object_offset(pack, index_pos); + struct object_info oi = *data->request; + + if (packed_object_info(pack, offset, &oi) < 0) { + mark_bad_packed_object(pack, oid); + return -1; + } + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +int packfile_store_for_each_object(struct packfile_store *store, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct packfile_store_for_each_object_wrapper_data data = { + .store = store, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + int pack_errors = 0, ret; + + ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, + &data, flags, &pack_errors); + if (ret) + return ret; + + return pack_errors ? -1 : 0; +} + static int add_promisor_object(const struct object_id *oid, struct packed_git *pack, uint32_t pos UNUSED, diff --git a/packfile.h b/packfile.h index 447c44c4a7517d..b7964f0289705c 100644 --- a/packfile.h +++ b/packfile.h @@ -343,6 +343,21 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, unsigned flags); +/* + * Iterate through all packed objects in the given packfile store and invoke + * the callback function for each of them. If an object info request is given, + * then the object info will be read for every individual object and passed to + * the callback as if `packfile_store_read_object_info()` was called for the + * object. + * + * The flags parameter is a combination of `odb_for_each_object_flags`. + */ +int packfile_store_for_each_object(struct packfile_store *store, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From df2fbdfa553526062e5234286f60bd643941298a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:23 +0100 Subject: [PATCH 08/38] odb: introduce `odb_for_each_object()` Introduce a new function `odb_for_each_object()` that knows to iterate through all objects part of a given object database. This function is essentially a simple wrapper around the object database sources. Subsequent commits will adapt callers to use this new function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.h | 7 ++++--- odb.c | 29 +++++++++++++++++++++++++++++ odb.h | 20 ++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/object-file.h b/object-file.h index 5b9641cd890175..b5eac0349ebc98 100644 --- a/object-file.h +++ b/object-file.h @@ -139,9 +139,10 @@ int for_each_loose_object(struct object_database *odb, /* * Iterate through all loose objects in the given object database source and - * invoke the callback function for each of them. If given, the object info - * will be populated with the object's data as if you had called - * `odb_source_loose_read_object_info()` on the object. + * invoke the callback function for each of them. If an object info request is + * given, then the object info will be read for every individual object and + * passed to the callback as if `odb_source_loose_read_object_info()` was + * called for the object. */ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, diff --git a/odb.c b/odb.c index ac70b6a099f588..13a415c2c3e415 100644 --- a/odb.c +++ b/odb.c @@ -995,6 +995,35 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, + cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(source->packfiles, request, + cb, cb_data, flags); + if (ret) + return ret; + } + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f97f249580e08a..b5d28bc188f957 100644 --- a/odb.h +++ b/odb.h @@ -475,6 +475,26 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Iterate through all objects contained in the object database. Note that + * objects may be iterated over multiple times in case they are either stored + * in different backends or in case they are stored in multiple sources. + * If an object info request is given, then the object info will be read and + * passed to the callback as if `odb_read_object_info()` was called for the + * object. + * + * Returning a non-zero error code from the callback function will cause + * iteration to abort. The error code will be propagated. + * + * Returns 0 on success, a negative error code in case a failure occurred, or + * an arbitrary non-zero error code returned by the callback itself. + */ +int odb_for_each_object(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + enum { /* * By default, `odb_write_object()` does not actually write anything From cc47e3d38c5be2969df3dba6814ee0e685a07de2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:24 +0100 Subject: [PATCH 09/38] builtin/fsck: refactor to use `odb_for_each_object()` In git-fsck(1) we have two callsites where we iterate over all objects via `for_each_loose_object()` and `for_each_packed_object()`. Both of these are trivially convertible with `odb_for_each_object()`. Refactor these callsites accordingly. Note that `odb_for_each_object()` may iterate over the same object multiple times, for example when it exists both in packed and loose format. But this has already been the case beforehand, so this does not result in a change in behaviour. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 57 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 4979bc795e5d61..2ebe77d58e5c0e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, return 0; } -static void mark_unreachable_referents(const struct object_id *oid) +static int mark_unreachable_referents(const struct object_id *oid, + struct object_info *oi UNUSED, + void *data UNUSED) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) - return; /* not part of our original set */ + return 0; /* not part of our original set */ if (obj->flags & REACHABLE) - return; /* reachable objects already traversed */ + return 0; /* reachable objects already traversed */ /* * Avoid passing OBJ_NONE to fsck_walk, which will parse the object @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid) fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) free_tree_buffer((struct tree *)obj); -} -static int mark_loose_unreachable_referents(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); - return 0; -} - -static int mark_packed_unreachable_referents(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); return 0; } @@ -394,12 +381,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(the_repository->objects, - mark_loose_unreachable_referents, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_unreachable_referents, - NULL, - 0); + odb_for_each_object(the_repository->objects, NULL, + mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path, fsck_resolve_undo(istate, index_path); } -static void mark_object_for_connectivity(const struct object_id *oid) +static int mark_object_for_connectivity(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data UNUSED) { struct object *obj = lookup_unknown_object(the_repository, oid); obj->flags |= HAS_OBJ; -} - -static int mark_loose_for_connectivity(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); - return 0; -} - -static int mark_packed_for_connectivity(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); return 0; } @@ -1001,10 +970,8 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(the_repository->objects, - mark_loose_for_connectivity, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_for_connectivity, NULL, 0); + odb_for_each_object(the_repository->objects, NULL, + mark_object_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) From 2813c97310a998510ad4bcbbf38a774fd6bb5386 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:25 +0100 Subject: [PATCH 10/38] treewide: enumerate promisor objects via `odb_for_each_object()` We have multiple callsites where we enumerate all promisor objects in the object database via `for_each_packed_object()`. This is done by passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to skip over all non-promisor objects. These callsites can be trivially converted to `odb_for_each_object()` as we know to skip enumeration of loose objects in case the `PROMISOR_ONLY` flag was passed by the caller. Refactor the sites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 37 ++++++++++++++++++++++--------------- repack-promisor.c | 8 ++++---- revision.c | 10 ++++------ 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/packfile.c b/packfile.c index c35d5ea6552ec3..c54deabd645075 100644 --- a/packfile.c +++ b/packfile.c @@ -2411,28 +2411,32 @@ int packfile_store_for_each_object(struct packfile_store *store, return pack_errors ? -1 : 0; } +struct add_promisor_object_data { + struct repository *repo; + struct oidset *set; +}; + static int add_promisor_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos UNUSED, - void *set_) + struct object_info *oi UNUSED, + void *cb_data) { - struct oidset *set = set_; + struct add_promisor_object_data *data = cb_data; struct object *obj; int we_parsed_object; - obj = lookup_object(pack->repo, oid); + obj = lookup_object(data->repo, oid); if (obj && obj->parsed) { we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object_with_flags(pack->repo, oid, + obj = parse_object_with_flags(data->repo, oid, PARSE_OBJECT_SKIP_HASH_CHECK); } if (!obj) return 1; - oidset_insert(set, oid); + oidset_insert(data->set, oid); /* * If this is a tree, commit, or tag, the objects it refers @@ -2450,19 +2454,19 @@ static int add_promisor_object(const struct object_id *oid, */ return 0; while (tree_entry_gently(&desc, &entry)) - oidset_insert(set, &entry.oid); + oidset_insert(data->set, &entry.oid); if (we_parsed_object) free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, get_commit_tree_oid(commit)); + oidset_insert(data->set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) - oidset_insert(set, &parents->item->object.oid); + oidset_insert(data->set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; - oidset_insert(set, get_tagged_oid(tag)); + oidset_insert(data->set, get_tagged_oid(tag)); } return 0; } @@ -2474,10 +2478,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (!promisor_objects_prepared) { if (repo_has_promisor_remote(r)) { - for_each_packed_object(r, add_promisor_object, - &promisor_objects, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | - ODB_FOR_EACH_OBJECT_PACK_ORDER); + struct add_promisor_object_data data = { + .repo = r, + .set = &promisor_objects, + }; + + odb_for_each_object(r->objects, NULL, add_promisor_object, &data, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/repack-promisor.c b/repack-promisor.c index 45c330b9a53ae6..35c4073632b1b4 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -17,8 +17,8 @@ struct write_oid_context { * necessary. */ static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) + struct object_info *oi UNUSED, + void *data) { struct write_oid_context *ctx = data; struct child_process *cmd = ctx->cmd; @@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo, */ ctx.cmd = &cmd; ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); + odb_for_each_object(repo->objects, NULL, write_oid, &ctx, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index 5aadf46dac2b9d..e34bcd8e888127 100644 --- a/revision.c +++ b/revision.c @@ -3626,8 +3626,7 @@ void reset_revision_walk(void) } static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, + struct object_info *oi UNUSED, void *cb) { struct rev_info *revs = cb; @@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(revs->repo, mark_uninteresting, revs, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); - } + if (revs->exclude_promisor_objects) + odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting, + revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); From 317ea9a6c3c134a1bcdee49bbbbf1731c17b967a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:26 +0100 Subject: [PATCH 11/38] treewide: drop uses of `for_each_{loose,packed}_object()` We're using `for_each_loose_object()` and `for_each_packed_object()` at a couple of callsites to enumerate all loose and packed objects, respectively. These functions will be removed in a subsequent commit in favor of the newly introduced `odb_source_loose_for_each_object()` and `packfile_store_for_each_object()` replacements. Prepare for this by refactoring the sites accordingly. Note that ideally, we'd convert all callsites to use the generic `odb_for_each_object()` function already. But for some callers this is not possible (yet), and it would require some significant refactorings to make this work. Converting these site will thus be deferred to a later patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 34 ++++++++++++++++++++++++++++------ commit-graph.c | 44 +++++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 6964a5a52c1646..e2c63dbedf48fc 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -806,11 +806,14 @@ struct for_each_object_payload { void *payload; }; -static int batch_one_object_loose(const struct object_id *oid, - const char *path UNUSED, - void *_payload) +static int batch_one_object_oi(const struct object_id *oid, + struct object_info *oi, + void *_payload) { struct for_each_object_payload *payload = _payload; + if (oi && oi->whence == OI_PACKED) + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset, + payload->payload); return payload->callback(oid, NULL, 0, payload->payload); } @@ -846,8 +849,21 @@ static void batch_each_object(struct batch_options *opt, .payload = _payload, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + struct odb_source *source; - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); + /* + * TODO: we still need to tap into implementation details of the object + * database sources. Ideally, we should extend `odb_for_each_object()` + * to handle object filters itself so that we can move the filtering + * logic into the individual sources. + */ + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, + &payload, flags); + if (ret) + break; + } if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { @@ -861,8 +877,14 @@ static void batch_each_object(struct batch_options *opt, &payload, flags); } } else { - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + struct object_info oi = { 0 }; + + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = packfile_store_for_each_object(source->packfiles, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } free_bitmap_index(bitmap); diff --git a/commit-graph.c b/commit-graph.c index 7f1145a0821cbb..a3087d78835677 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, return 0; } +static int add_packed_commits_oi(const struct object_id *oid, + struct object_info *oi, + void *data) +{ + struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; + + if (ctx->progress) + display_progress(ctx->progress, ++ctx->progress_done); + + if (*oi->typep != OBJ_COMMIT) + return 0; + + oid_array_append(&ctx->oids, oid); + set_commit_pos(ctx->r, oid); + + return 0; +} + static int add_packed_commits(const struct object_id *oid, struct packed_git *pack, uint32_t pos, void *data) { - struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; enum object_type type; off_t offset = nth_packed_object_offset(pack, pos); struct object_info oi = OBJECT_INFO_INIT; - if (ctx->progress) - display_progress(ctx->progress, ++ctx->progress_done); - oi.typep = &type; if (packed_object_info(pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); - if (type != OBJ_COMMIT) - return 0; - - oid_array_append(&ctx->oids, oid); - set_commit_pos(ctx->r, oid); - - return 0; + return add_packed_commits_oi(oid, &oi, data); } static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) @@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx, static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { + struct odb_source *source; + enum object_type type; + struct object_info oi = { + .typep = &type, + }; + if (ctx->report_progress) ctx->progress = start_delayed_progress( ctx->r, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); - for_each_packed_object(ctx->r, add_packed_commits, ctx, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + + odb_prepare_alternates(ctx->r->objects); + for (source = ctx->r->objects->sources; source; source = source->next) + packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi, + ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); From 7b7cbaef2781cf755bc900e871964ae62ad532c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:27 +0100 Subject: [PATCH 12/38] odb: introduce mtime fields for object info requests There are some use cases where we need to figure out the mtime for objects. Most importantly, this is the case when we want to prune unreachable objects. But getting at that data requires users to manually derive the info either via the loose object's mtime, the packfiles' mtime or via the ".mtimes" file. Introduce a new `struct object_info::mtimep` pointer that allows callers to request an object's mtime. This new field will be used in a subsequent commit. Note that the concept of "mtime" is ambiguous: given an object, it may be stored multiple times in the object database, and each of these instances may have a different mtime. Disambiguating these mtimes is nothing that can happen on the generic ODB layer: the caller may search for the oldest object, the newest object, or even the relation of object mtimes depending on the specific source they are located in. As such, it is the responsibility of the caller to disambiguate mtimes. A consequence of this is that it's most likely incorrect to look up the mtime via `odb_read_object_info()`, as this interface does not give us enough information to disambiguate the mtime. Document this accordingly and tell users to use `odb_for_each_object()` instead. Even with this gotcha though it's sensible to have this request as part of the object info, as the mtime is a property of the object storage format. If we for example had a "black-box" storage backend, we'd still need to be able to query it for the mtime info in a generic way. We could introduce a safety mechanism that for example calls `BUG()` in case we look up the mtime outside of `odb_for_each_object()`. But that feels somewhat heavy-handed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++---- odb.c | 2 ++ odb.h | 13 +++++++++++++ packfile.c | 41 ++++++++++++++++++++++++++++++++++------- 4 files changed, 74 insertions(+), 11 deletions(-) diff --git a/object-file.c b/object-file.c index ef2c7618c17a64..5537ab2c370992 100644 --- a/object-file.c +++ b/object-file.c @@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source, char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; + struct stat st; /* * If we don't care about type or size, then we don't @@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source, if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { struct stat st; - if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) { + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { ret = quick_has_loose(source->loose, oid) ? 0 : -1; goto out; } @@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - if (oi && oi->disk_sizep) - *oi->disk_sizep = st.st_size; + if (oi) { + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + } ret = 0; goto out; @@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - map = map_fd(fd, path, &mapsize); + if (fstat(fd, &st)) { + close(fd); + ret = -1; + goto out; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + ret = error(_("object file %s is empty"), path); + goto out; + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); if (!map) { ret = -1; goto out; @@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source, if (oi->disk_sizep) *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; stream_to_end = &stream; diff --git a/odb.c b/odb.c index 13a415c2c3e415..9d9a3fad627369 100644 --- a/odb.c +++ b/odb.c @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb, oidclr(oi->delta_base_oid, odb->repo->hash_algo); if (oi->contentp) *oi->contentp = xmemdupz(co->buf, co->size); + if (oi->mtimep) + *oi->mtimep = 0; oi->whence = OI_CACHED; } return 0; diff --git a/odb.h b/odb.h index b5d28bc188f957..8ad0fcc02f148d 100644 --- a/odb.h +++ b/odb.h @@ -318,6 +318,19 @@ struct object_info { struct object_id *delta_base_oid; void **contentp; + /* + * The time the given looked-up object has been last modified. + * + * Note: the mtime may be ambiguous in case the object exists multiple + * times in the object database. It is thus _not_ recommended to use + * this field outside of contexts where you would read every instance + * of the object, like for example with `odb_for_each_object()`. As it + * is impossible to say at the ODB level what the intent of the caller + * is (e.g. whether to find the oldest or newest object), it is the + * responsibility of the caller to disambiguate the mtimes. + */ + time_t *mtimep; + /* Response */ enum { OI_CACHED, diff --git a/packfile.c b/packfile.c index c54deabd645075..845633139f9942 100644 --- a/packfile.c +++ b/packfile.c @@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct packed_git *p, - off_t obj_offset, struct object_info *oi) +static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset, + uint32_t *maybe_index_pos, struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; enum object_type type = OBJ_NONE; + uint32_t pack_pos; int ret; /* @@ -1619,16 +1620,35 @@ int packed_object_info(struct packed_git *p, } } - if (oi->disk_sizep) { - uint32_t pos; - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); ret = -1; goto out; } + } + + if (oi->disk_sizep) + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + + if (oi->mtimep) { + if (p->is_cruft) { + uint32_t index_pos; + + if (load_pack_mtimes(p) < 0) + die(_("could not load .mtimes for cruft pack '%s'"), + pack_basename(p)); + + if (maybe_index_pos) + index_pos = *maybe_index_pos; + else + index_pos = pack_pos_to_index(p, pack_pos); - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; + *oi->mtimep = nth_packed_mtime(p, index_pos); + } else { + *oi->mtimep = p->mtime; + } } if (oi->typep) { @@ -1681,6 +1701,12 @@ int packed_object_info(struct packed_git *p, return ret; } +int packed_object_info(struct packed_git *p, off_t obj_offset, + struct object_info *oi) +{ + return packed_object_info_with_index_pos(p, obj_offset, NULL, oi); +} + static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@ -2378,7 +2404,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, off_t offset = nth_packed_object_offset(pack, index_pos); struct object_info oi = *data->request; - if (packed_object_info(pack, offset, &oi) < 0) { + if (packed_object_info_with_index_pos(pack, offset, + &index_pos, &oi) < 0) { mark_bad_packed_object(pack, oid); return -1; } From dd097bbe295d58fa698708d3754426f664fdfe02 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:28 +0100 Subject: [PATCH 13/38] builtin/pack-objects: use `packfile_store_for_each_object()` When enumerating objects that are supposed to be stored in a new cruft pack we use `for_each_packed_object()` and then derive each object's mtime individually. Refactor this logic to instead use the new `packfile_store_for_each_object()` function with an object info request that asks for the respective mtimes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 46 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 74317051fdf7f6..a6d37366ffa36f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit) } static int add_object_in_unpacked_pack(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, + struct object_info *oi, void *data UNUSED) { if (cruft) { - off_t offset; - time_t mtime; - - if (pack->is_cruft) { - if (load_pack_mtimes(pack) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(pack, pos); - } else { - mtime = pack->mtime; - } - offset = nth_packed_object_offset(pack, pos); - - add_cruft_object_entry(oid, OBJ_NONE, pack, offset, - NULL, mtime); + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack, + oi->u.packed.offset, NULL, *oi->mtimep); } else { add_object_entry(oid, OBJ_NONE, "", 0); } @@ -4341,14 +4328,25 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, static void add_objects_in_unpacked_packs(void) { - if (for_each_packed_object(to_pack.repo, - add_object_in_unpacked_pack, - NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) - die(_("cannot open pack index")); + struct odb_source *source; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + }; + + odb_prepare_alternates(to_pack.repo->objects); + for (source = to_pack.repo->objects->sources; source; source = source->next) { + if (!source->local) + continue; + + if (packfile_store_for_each_object(source->packfiles, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); + } } static int add_loose_object(const struct object_id *oid, const char *path, From 7a8582c82ce896d89bbcc1d91d8b5bdc31902416 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:29 +0100 Subject: [PATCH 14/38] reachable: convert to use `odb_for_each_object()` To figure out which objects expired objects we enumerate all loose and packed objects individually so that we can figure out their respective mtimes. Refactor the code to instead use `odb_for_each_object()` with a request that ask for the object mtime instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reachable.c | 125 +++++++++++++++------------------------------------- 1 file changed, 35 insertions(+), 90 deletions(-) diff --git a/reachable.c b/reachable.c index 82676b2668090d..101cfc272715fb 100644 --- a/reachable.c +++ b/reachable.c @@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, return oidset_contains(&data->extra_recent_oids, oid); } -static void add_recent_object(const struct object_id *oid, - struct packed_git *pack, - off_t offset, - timestamp_t mtime, - struct recent_data *data) +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) { - struct object *obj; - enum object_type type; + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + return 0; + return 1; +} - if (!obj_is_recent(oid, mtime, data)) - return; +static int add_recent_object(const struct object_id *oid, + struct object_info *oi, + void *cb_data) +{ + struct recent_data *data = cb_data; + struct object *obj; - /* - * We do not want to call parse_object here, because - * inflating blobs and trees could be very expensive. - * However, we do need to know the correct type for - * later processing, and the revision machinery expects - * commits and tags to have been parsed. - */ - type = odb_read_object_info(the_repository->objects, oid, NULL); - if (type < 0) - die("unable to get object info for %s", oid_to_hex(oid)); + if (!want_recent_object(data, oid) || + !obj_is_recent(oid, *oi->mtimep, data)) + return 0; - switch (type) { + switch (*oi->typep) { case OBJ_TAG: case OBJ_COMMIT: obj = parse_object_or_die(the_repository, oid, NULL); @@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid, break; default: die("unknown object type for %s: %s", - oid_to_hex(oid), type_name(type)); + oid_to_hex(oid), type_name(*oi->typep)); } if (!obj) die("unable to lookup %s", oid_to_hex(oid)); - - add_pending_object(data->revs, obj, ""); - if (data->cb) - data->cb(obj, pack, offset, mtime); -} - -static int want_recent_object(struct recent_data *data, - const struct object_id *oid) -{ - if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + if (obj->flags & SEEN) return 0; - return 1; -} -static int add_recent_loose(const struct object_id *oid, - const char *path, void *data) -{ - struct stat st; - struct object *obj; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - - if (stat(path, &st) < 0) { - /* - * It's OK if an object went away during our iteration; this - * could be due to a simultaneous repack. But anything else - * we should abort, since we might then fail to mark objects - * which should not be pruned. - */ - if (errno == ENOENT) - return 0; - return error_errno("unable to stat %s", oid_to_hex(oid)); + add_pending_object(data->revs, obj, ""); + if (data->cb) { + if (oi->whence == OI_PACKED) + data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep); + else + data->cb(obj, NULL, 0, *oi->mtimep); } - add_recent_object(oid, NULL, 0, st.st_mtime, data); - return 0; -} - -static int add_recent_packed(const struct object_id *oid, - struct packed_git *p, - uint32_t pos, - void *data) -{ - struct object *obj; - timestamp_t mtime = p->mtime; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - if (p->is_cruft) { - if (load_pack_mtimes(p) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(p, pos); - } - add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } @@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum odb_for_each_object_flags flags; + unsigned flags; + enum object_type type; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + .typep = &type, + }; int r; data.revs = revs; @@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - ODB_FOR_EACH_OBJECT_LOCAL_ONLY); - if (r) - goto done; - flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); + r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags); + if (r) + goto done; done: oidset_clear(&data.extra_recent_oids); From 3565faf28c2059c6260d53ac71a303b1c04b0a7b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:30 +0100 Subject: [PATCH 15/38] odb: drop unused `for_each_{loose,packed}_object()` functions We have converted all callers of `for_each_loose_object()` and `for_each_packed_object()` to use their new replacement functions instead. We can thus remove them now. Do so and inline `packfile_store_for_each_object_internal()` now that it only has a single callsite again. This makes it a bit easier to follow the callback indirection that is happening there. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 20 ----------- object-file.h | 11 ------ packfile.c | 99 ++++++++++++++++++--------------------------------- packfile.h | 2 -- 4 files changed, 35 insertions(+), 97 deletions(-) diff --git a/object-file.c b/object-file.c index 5537ab2c370992..6785821c8c9fc0 100644 --- a/object-file.c +++ b/object-file.c @@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn cb, void *data, - enum odb_for_each_object_flags flags) -{ - struct odb_source *source; - - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - int r = for_each_loose_file_in_source(source, cb, NULL, - NULL, data); - if (r) - return r; - - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) - break; - } - - return 0; -} - struct for_each_object_wrapper_data { struct odb_source *source; const struct object_info *request; diff --git a/object-file.h b/object-file.h index b5eac0349ebc98..d9979baea8e39c 100644 --- a/object-file.h +++ b/object-file.h @@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_subdir_fn subdir_cb, void *data); -/* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. - */ -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn, void *, - enum odb_for_each_object_flags flags); - /* * Iterate through all loose objects in the given object database source and * invoke the callback function for each of them. If an object info request is diff --git a/packfile.c b/packfile.c index 845633139f9942..57fbf518762f2f 100644 --- a/packfile.c +++ b/packfile.c @@ -2327,65 +2327,6 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -static int packfile_store_for_each_object_internal(struct packfile_store *store, - each_packed_object_fn cb, - void *data, - unsigned flags, - int *pack_errors) -{ - struct packfile_list_entry *e; - int ret = 0; - - store->skip_mru_updates = true; - - for (e = packfile_store_get_packs(store); e; e = e->next) { - struct packed_git *p = e->pack; - - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - *pack_errors = 1; - continue; - } - - ret = for_each_object_in_pack(p, cb, data, flags); - if (ret) - break; - } - - store->skip_mru_updates = false; - - return ret; -} - -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) -{ - struct odb_source *source; - int pack_errors = 0; - int ret = 0; - - odb_prepare_alternates(repo->objects); - - for (source = repo->objects->sources; source; source = source->next) { - ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, - flags, &pack_errors); - if (ret) - break; - } - - return ret ? ret : pack_errors; -} - struct packfile_store_for_each_object_wrapper_data { struct packfile_store *store; const struct object_info *request; @@ -2428,14 +2369,44 @@ int packfile_store_for_each_object(struct packfile_store *store, .cb = cb, .cb_data = cb_data, }; + struct packfile_list_entry *e; int pack_errors = 0, ret; - ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, - &data, flags, &pack_errors); - if (ret) - return ret; + store->skip_mru_updates = true; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, + &data, flags); + if (ret) + goto out; + } + + ret = 0; - return pack_errors ? -1 : 0; +out: + store->skip_mru_updates = false; + + if (!ret && pack_errors) + ret = -1; + return ret; } struct add_promisor_object_data { diff --git a/packfile.h b/packfile.h index b7964f0289705c..1a1b72076457f1 100644 --- a/packfile.h +++ b/packfile.h @@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, unsigned flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags); /* * Iterate through all packed objects in the given packfile store and invoke From a606fcdceb807b93013542e5e4d5f4c233aa6c83 Mon Sep 17 00:00:00 2001 From: Pushkar Singh Date: Tue, 3 Feb 2026 16:48:16 +0000 Subject: [PATCH 16/38] subtree: validate --prefix against commit in split git subtree split currently validates --prefix against the working tree. This breaks when splitting an older commit or when the working tree does not contain the subtree, even though the commit does. For example: git subtree split --prefix=pkg fails if pkg was removed later, even though it exists in . Fix this by validating the prefix against the specified commit using git cat-file instead of the working tree. Add a test to ensure this behavior does not regress. Signed-off-by: Pushkar Singh Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 9 +++++++++ contrib/subtree/t/t7900-subtree.sh | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 17106d1a721519..d7f9121f2f8ee9 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -257,6 +257,9 @@ main () { test -e "$arg_prefix" && die "fatal: prefix '$arg_prefix' already exists." ;; + split) + # checked later against the commit, not the working tree + ;; *) test -e "$arg_prefix" || die "fatal: '$arg_prefix' does not exist; use 'git subtree add'" @@ -966,6 +969,12 @@ cmd_split () { else die "fatal: you must provide exactly one revision, and optionally a repository. Got: '$*'" fi + + # Now validate prefix against the commit, not the working tree + if ! git cat-file -e "$rev:$dir" 2>/dev/null + then + die "fatal: '$dir' does not exist; use 'git subtree add'" + fi repository="" if test "$#" = 2 then diff --git a/contrib/subtree/t/t7900-subtree.sh b/contrib/subtree/t/t7900-subtree.sh index 316dc5269e2b6f..e4f632f3afdb1e 100755 --- a/contrib/subtree/t/t7900-subtree.sh +++ b/contrib/subtree/t/t7900-subtree.sh @@ -368,6 +368,28 @@ test_expect_success 'split requires path given by option --prefix must exist' ' ) ' +test_expect_success 'split works when prefix exists in commit but not in working tree' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + + # create subtree + mkdir pkg && + echo ok >pkg/file && + git add pkg && + git commit -m "add pkg" && + good=$(git rev-parse HEAD) && + + # remove it from working tree in later commit + git rm -r pkg && + git commit -m "remove pkg" && + + # must still be able to split using the old commit + git subtree split --prefix=pkg "$good" >out && + test -s out + ) +' + test_expect_success 'split rejects flags for add' ' subtree_test_create_repo "$test_count" && subtree_test_create_repo "$test_count/sub proj" && From 90695bbdaea86064398c26eb259043cadcf99a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 4 Feb 2026 16:23:06 +0100 Subject: [PATCH 17/38] gpg-interface: signatures by expired keys are fine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a signature is made with a valid key and that key later expires, the signature should still be considered good. GnuPG emits in this case something like: [GNUPG:] NEWSIG gpg: Signature made Wed 26 Nov 2014 05:56:50 AM CET gpg: using RSA key FE3958F9067BC667 [GNUPG:] KEYEXPIRED 1478449622 [GNUPG:] KEY_CONSIDERED D783920D6D4F0C06AA4C25F3FE3958F9067BC667 0 [GNUPG:] KEYEXPIRED 1478449622 [GNUPG:] SIG_ID 8tAN3Fx6XB2NAoH5U8neoguQ9MI 2014-11-26 1416977810 [GNUPG:] EXPKEYSIG FE3958F9067BC667 Jason Cooper gpg: Good signature from "Jason Cooper " [expired] [GNUPG:] VALIDSIG D783920D6D4F0C06AA4C25F3FE3958F9067BC667 2014-11-26 1416977810 0 4 0 1 2 00 D783920D6D4F0C06AA4C25F3FE3958F9067BC667 gpg: Note: This key has expired! D783920D6D4F0C06AA4C25F3FE3958F9067BC667 (signature and signed data in this example is taken from Linux commit 756f80cee766574ae282baa97fdcf9cc). So GnuPG is relaxed and the fact that the key is expired is only worth a "Note" which is weaker than e.g. gpg: WARNING: The key's User ID is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. which git still considers ok. So stop coloring the signature by an expired key red and handle it like any other good signature. Signed-off-by: Uwe Kleine-König Signed-off-by: Junio C Hamano --- gpg-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpg-interface.c b/gpg-interface.c index 47222bf31b6ef2..5a58f333dfd8c2 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -382,7 +382,8 @@ static int verify_gpg_signed_buffer(struct signature_check *sigc, delete_tempfile(&temp); - ret |= !strstr(gpg_stdout.buf, "\n[GNUPG:] GOODSIG "); + ret |= !strstr(gpg_stdout.buf, "\n[GNUPG:] GOODSIG ") && + !strstr(gpg_stdout.buf, "\n[GNUPG:] EXPKEYSIG "); sigc->output = strbuf_detach(&gpg_stderr, NULL); sigc->gpg_status = strbuf_detach(&gpg_stdout, NULL); @@ -680,7 +681,7 @@ int check_signature(struct signature_check *sigc, if (status && !sigc->output) return !!status; - status |= sigc->result != 'G'; + status |= sigc->result != 'G' && sigc->result != 'Y'; status |= sigc->trust_level < configured_min_trust_level; return !!status; From 1e3962c6b75f7131a236e35a0f7a3962102452ed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 4 Feb 2026 09:38:11 +0100 Subject: [PATCH 18/38] meson: wire up gitk and git-gui Wire up both gitk and git-gui in Meson as subprojects. These two programs should be the last missing pieces for feature compatibility with our Makefile for distributors. Note that Meson expects subprojects to live in the "subprojects/" directory. Create symlinks to fulfill this requirement. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 14 ++++++++++++++ meson_options.txt | 4 ++++ subprojects/git-gui | 1 + subprojects/gitk | 1 + 4 files changed, 20 insertions(+) create mode 120000 subprojects/git-gui create mode 120000 subprojects/gitk diff --git a/meson.build b/meson.build index dd52efd1c87574..e96953afecbe18 100644 --- a/meson.build +++ b/meson.build @@ -239,7 +239,9 @@ git = find_program('git', dirs: program_path, native: true, required: false) sed = find_program('sed', dirs: program_path, native: true) shell = find_program('sh', dirs: program_path, native: true) tar = find_program('tar', dirs: program_path, native: true) +tclsh = find_program('tclsh', required: get_option('git_gui'), native: false) time = find_program('time', dirs: program_path, required: get_option('benchmarks')) +wish = find_program('wish', required: get_option('git_gui').enabled() or get_option('gitk').enabled(), native: false) # Detect the target shell that is used by Git at runtime. Note that we prefer # "/bin/sh" over a PATH-based lookup, which provides a working shell on most @@ -2250,6 +2252,16 @@ configure_file( configuration: build_options_config, ) +gitk_option = get_option('gitk').disable_auto_if(not wish.found()) +if gitk_option.allowed() + subproject('gitk') +endif + +git_gui_option = get_option('git_gui').disable_auto_if(not tclsh.found() or not wish.found()) +if git_gui_option.allowed() + subproject('git-gui') +endif + # Development environments can be used via `meson devenv -C `. This # allows you to execute test scripts directly with the built Git version and # puts the built version of Git in your PATH. @@ -2276,6 +2288,8 @@ summary({ 'curl': curl, 'expat': expat, 'gettext': intl, + 'gitk': gitk_option.allowed(), + 'git-gui': git_gui_option.allowed(), 'gitweb': gitweb_option.allowed(), 'iconv': iconv, 'pcre2': pcre2, diff --git a/meson_options.txt b/meson_options.txt index e0be260ae1bce8..659cbb218f46e0 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -43,6 +43,10 @@ option('expat', type: 'feature', value: 'enabled', description: 'Build helpers used to push to remotes with the HTTP transport.') option('gettext', type: 'feature', value: 'auto', description: 'Build translation files.') +option('gitk', type: 'feature', value: 'auto', + description: 'Build the Gitk graphical repository browser. Requires Tcl/Tk.') +option('git_gui', type: 'feature', value: 'auto', + description: 'Build the git-gui graphical user interface for Git. Requires Tcl/Tk.') option('gitweb', type: 'feature', value: 'auto', description: 'Build Git web interface. Requires Perl.') option('iconv', type: 'feature', value: 'auto', diff --git a/subprojects/git-gui b/subprojects/git-gui new file mode 120000 index 00000000000000..c6d917088b39bc --- /dev/null +++ b/subprojects/git-gui @@ -0,0 +1 @@ +../git-gui \ No newline at end of file diff --git a/subprojects/gitk b/subprojects/gitk new file mode 120000 index 00000000000000..b66ad18ae5f423 --- /dev/null +++ b/subprojects/gitk @@ -0,0 +1 @@ +../gitk-git \ No newline at end of file From 58e4eeeeb5439dc1f629579ba03844500827ff20 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 9 Feb 2026 10:08:43 +0000 Subject: [PATCH 19/38] meson: fix building mergetool docs Building the documentation with meson when the build directory is not an immediate subdirectory of the source directory prints the following error [2/1349] Generating Documentation/mer... command (wrapped by meson to set env) ../../Documentation/generate-mergetool-list.sh: line 15: ../git-mergetool--lib.sh: No such file or directory The build does not fail because the failure is upstream of a pipe. Fix the error by passing the correct source directory when meson runs "generate-mergetool-list.sh". As that script sets $MERGE_TOOLS_DIR we do not need to set it in the environment when running the script. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- Documentation/meson.build | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/meson.build b/Documentation/meson.build index c00c9fe7f429a5..31049f99e19f8e 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -352,13 +352,10 @@ foreach mode : [ 'diff', 'merge' ] command: [ shell, '@INPUT@', - '..', + meson.project_source_root(), mode, '@OUTPUT@' ], - env: [ - 'MERGE_TOOLS_DIR=' + meson.project_source_root() / 'mergetools', - ], input: 'generate-mergetool-list.sh', output: 'mergetools-' + mode + '.adoc', ) From b679ee04226f4ba1633078797f3aa3a5677e2c5c Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 9 Feb 2026 18:34:34 +0100 Subject: [PATCH 20/38] doc: am: normalize git(1) command links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many mentions of commands using inline-verbatim or emphasis ('). We just mention the command themselves, not specific invocations like `git am `. Let’s link to them instead. There are also many such mentions which then link to the command right afterwards. Simplify to just using a link. Also remove “see ” phrases where they have now already been mentioned. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/config/am.adoc | 18 +++++++++--------- Documentation/git-am.adoc | 31 +++++++++++++++---------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/Documentation/config/am.adoc b/Documentation/config/am.adoc index 5bcad2efb11639..19ef5aa00c4682 100644 --- a/Documentation/config/am.adoc +++ b/Documentation/config/am.adoc @@ -1,14 +1,14 @@ am.keepcr:: - If true, git-am will call git-mailsplit for patches in mbox format - with parameter `--keep-cr`. In this case git-mailsplit will + If true, linkgit:git-am[1] will call linkgit:git-mailsplit[1] + for patches in mbox format with parameter `--keep-cr`. In this + case linkgit:git-mailsplit[1] will not remove `\r` from lines ending with `\r\n`. Can be overridden by giving `--no-keep-cr` from the command line. - See linkgit:git-am[1], linkgit:git-mailsplit[1]. am.threeWay:: - By default, `git am` will fail if the patch does not apply cleanly. When - set to true, this setting tells `git am` to fall back on 3-way merge if - the patch records the identity of blobs it is supposed to apply to and - we have those blobs available locally (equivalent to giving the `--3way` - option from the command line). Defaults to `false`. - See linkgit:git-am[1]. + By default, linkgit:git-am[1] will fail if the patch does not + apply cleanly. When set to true, this setting tells + linkgit:git-am[1] to fall back on 3-way merge if the patch + records the identity of blobs it is supposed to apply to and we + have those blobs available locally (equivalent to giving the + `--3way` option from the command line). Defaults to `false`. diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index b23b4fba2013c2..0f9980603d24c2 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -43,14 +43,14 @@ OPTIONS -k:: --keep:: - Pass `-k` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]). + Pass `-k` flag to linkgit:git-mailinfo[1]. --keep-non-patch:: - Pass `-b` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]). + Pass `-b` flag to linkgit:git-mailinfo[1]. --keep-cr:: --no-keep-cr:: - With `--keep-cr`, call 'git mailsplit' (see linkgit:git-mailsplit[1]) + With `--keep-cr`, call linkgit:git-mailsplit[1] with the same option, to prevent it from stripping CR at the end of lines. `am.keepcr` configuration variable can be used to specify the default behaviour. `--no-keep-cr` is useful to override `am.keepcr`. @@ -65,7 +65,7 @@ OPTIONS Ignore scissors lines (see linkgit:git-mailinfo[1]). --quoted-cr=:: - This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]). + This flag will be passed down to linkgit:git-mailinfo[1]. --empty=(drop|keep|stop):: How to handle an e-mail message lacking a patch: @@ -83,7 +83,7 @@ OPTIONS -m:: --message-id:: - Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]), + Pass the `-m` flag to linkgit:git-mailinfo[1], so that the Message-ID header is added to the commit message. The `am.messageid` configuration variable can be used to specify the default behaviour. @@ -98,7 +98,7 @@ OPTIONS -u:: --utf8:: - Pass `-u` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]). + Pass `-u` flag to linkgit:git-mailinfo[1]. The proposed commit log message taken from the e-mail is re-coded into UTF-8 encoding (configuration variable `i18n.commitEncoding` can be used to specify the project's @@ -108,8 +108,7 @@ This was optional in prior versions of git, but now it is the default. You can use `--no-utf8` to override this. --no-utf8:: - Pass `-n` flag to 'git mailinfo' (see - linkgit:git-mailinfo[1]). + Pass `-n` flag to linkgit:git-mailinfo[1]. -3:: --3way:: @@ -132,9 +131,8 @@ include::rerere-options.adoc[] --exclude=:: --include=:: --reject:: - These flags are passed to the 'git apply' (see linkgit:git-apply[1]) - program that applies - the patch. + These flags are passed to the linkgit:git-apply[1] program that + applies the patch. + Valid for the `--whitespace` option are: `nowarn`, `warn`, `fix`, `error`, and `error-all`. @@ -198,7 +196,8 @@ Valid for the `--whitespace` option are: to the screen before exiting. This overrides the standard message informing you to use `--continue` or `--skip` to handle the failure. This is solely - for internal use between 'git rebase' and 'git am'. + for internal use between linkgit:git-rebase[1] and + linkgit:git-am[1]. --abort:: Restore the original branch and abort the patching operation. @@ -216,7 +215,7 @@ Valid for the `--whitespace` option are: failure again. --show-current-patch[=(diff|raw)]:: - Show the message at which `git am` has stopped due to + Show the message at which linkgit:git-am[1] has stopped due to conflicts. If `raw` is specified, show the raw contents of the e-mail message; if `diff`, show the diff portion only. Defaults to `raw`. @@ -254,7 +253,7 @@ message. Any line that is of the form: is taken as the beginning of a patch, and the commit log message is terminated before the first occurrence of such a line. -When initially invoking `git am`, you give it the names of the mailboxes +When initially invoking linkgit:git-am[1], you give it the names of the mailboxes to process. Upon seeing the first patch that does not apply, it aborts in the middle. You can recover from this in one of two ways: @@ -272,8 +271,8 @@ names. Before any patches are applied, ORIG_HEAD is set to the tip of the current branch. This is useful if you have problems with multiple -commits, like running 'git am' on the wrong branch or an error in the -commits that is more easily fixed by changing the mailbox (e.g. +commits, like running linkgit:git-am[1] on the wrong branch or an error +in the commits that is more easily fixed by changing the mailbox (e.g. errors in the "From:" lines). HOOKS From 3c18135bfd04d8ab1007137bcf565badc4ae2953 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 9 Feb 2026 18:34:35 +0100 Subject: [PATCH 21/38] doc: am: say that --message-id adds a trailer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The option `--message-id` was added in a078f732 (git-am: add --message-id/--no-message-id, 2014-11-25) back when git-interpret- trailers(1) was relatively new. Let’s spell out that it is a trailer and link to the dedicated trailer command. Also use inline-verbatim for `Message-ID`. Also link to git-interpret-trailers(1) on `--signoff`. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-am.adoc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index 0f9980603d24c2..ba05c703fcab33 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -37,9 +37,10 @@ OPTIONS -s:: --signoff:: - Add a `Signed-off-by` trailer to the commit message, using - the committer identity of yourself. - See the signoff option in linkgit:git-commit[1] for more information. + Add a `Signed-off-by` trailer to the commit message (see + linkgit:git-interpret-trailers[1]), using the committer identity + of yourself. See the signoff option in linkgit:git-commit[1] + for more information. -k:: --keep:: @@ -83,10 +84,11 @@ OPTIONS -m:: --message-id:: - Pass the `-m` flag to linkgit:git-mailinfo[1], - so that the Message-ID header is added to the commit message. - The `am.messageid` configuration variable can be used to specify - the default behaviour. + Pass the `-m` flag to linkgit:git-mailinfo[1], so that the + `Message-ID` header is added as a trailer (see + linkgit:git-interpret-trailers[1]). The `am.messageid` + configuration variable can be used to specify the default + behaviour. --no-message-id:: Do not add the Message-ID header to the commit message. From c2f700ac27f8d382acb8d4e49b0f714234604fe4 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 9 Feb 2026 18:34:36 +0100 Subject: [PATCH 22/38] doc: am: add missing config am.messageId Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/config/am.adoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/config/am.adoc b/Documentation/config/am.adoc index 19ef5aa00c4682..e9561e12d7467e 100644 --- a/Documentation/config/am.adoc +++ b/Documentation/config/am.adoc @@ -12,3 +12,9 @@ am.threeWay:: records the identity of blobs it is supposed to apply to and we have those blobs available locally (equivalent to giving the `--3way` option from the command line). Defaults to `false`. + +am.messageId:: + Add a `Message-ID` trailer based on the email header to the + commit when using linkgit:git-am[1] (see + linkgit:git-interpret-trailers[1]). See also the `--message-id` + and `--no-message-id` options. From b10e0cb1f391a4466f8d7c4b2550a8b89fda3573 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 9 Feb 2026 18:34:37 +0100 Subject: [PATCH 23/38] doc: am: fill out hook discussion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document `--verify` and rephrase the `--[no-]verify` section to lead with the default, in imperative mood.[1] Historically it makes sense that only the negated forms are documented; they are all run by default and thus you only need to use hook options if you want to turn some of them off. But, beyond just desiring uniform documentation,[2] it’s very much possible to have, say, a Git alias with `--no-verify` that you might sometimes want to turn back on with the *positive* form. Also mention the options in the “Hooks” section and mention that `post-applypatch` cannot be skipped. † 1: See e.g. acffc5e9 (doc: convert git-remote to synopsis style, 2025-12-20) † 2: https://lore.kernel.org/git/xmqqcyct1mtq.fsf@gitster.g/ Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-am.adoc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index ba05c703fcab33..403181baa9fb4a 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -9,7 +9,7 @@ git-am - Apply a series of patches from a mailbox SYNOPSIS -------- [verse] -'git am' [--signoff] [--keep] [--[no-]keep-cr] [--[no-]utf8] [--no-verify] +'git am' [--signoff] [--keep] [--[no-]keep-cr] [--[no-]utf8] [--[no-]verify] [--[no-]3way] [--interactive] [--committer-date-is-author-date] [--ignore-date] [--ignore-space-change | --ignore-whitespace] [--whitespace=] [-C] [-p] [--directory=] @@ -150,11 +150,14 @@ Valid for the `--whitespace` option are: --interactive:: Run interactively. +--verify:: -n:: --no-verify:: - By default, the pre-applypatch and applypatch-msg hooks are run. - When any of `--no-verify` or `-n` is given, these are bypassed. - See also linkgit:githooks[5]. + Run the `pre-applypatch` and `applypatch-msg` hooks. This is the + default. Skip these hooks with `-n` or `--no-verify`. See also + linkgit:githooks[5]. ++ +Note that `post-applypatch` cannot be skipped. --committer-date-is-author-date:: By default the command records the date from the e-mail @@ -283,6 +286,8 @@ This command can run `applypatch-msg`, `pre-applypatch`, and `post-applypatch` hooks. See linkgit:githooks[5] for more information. +See the `--verify`/`-n`/`--no-verify` options. + CONFIGURATION ------------- From 048357d49d59cbb8c81ff891803d5eace813baef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Feb 2026 07:59:37 +0100 Subject: [PATCH 24/38] builtin/backfill: fix flags passed to `odb_has_object()` The function `fill_missing_blobs()` receives an array of object IDs and verifies for each of them whether the corresponding object exists. If it doesn't exist, we add it to a set of objects and then batch-fetch all of the objects at once. The check for whether or not we already have the object is broken though: we pass `OBJECT_INFO_FOR_PREFETCH`, but `odb_has_object()` expects us to pass `HAS_OBJECT_*` flags. The flag expands to: - `OBJECT_INFO_QUICK`, which asks the object database to not reprepare in case the object wasn't found. This makes sense, as we'd otherwise reprepare the object database as many times as we have missing objects. - `OBJECT_INFO_SKIP_FETCH_OBJECT`, which asks the object database to not fetch the object in case it's missing. Again, this makes sense, as we want to batch-fetch the objects. This shows that we indeed want the equivalent of this flag, but of course represented as `HAS_OBJECT_*` flags. Luckily, the code is already working correctly. The `OBJECT_INFO` flag expands to `(1 << 3) | (1 << 4)`, none of which are valid `HAS_OBJECT` flags. And if no flags are passed, `odb_has_object()` ends up calling `odb_read_object_info_extended()` with exactly the above two flags that we wanted to set in the first place. Of course, this is pure luck, and this can break any moment. So let's fix this and correct the code to not pass any flags at all. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/backfill.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index e80fc1b694df61..d8cb3b0eba799d 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -67,8 +67,7 @@ static int fill_missing_blobs(const char *path UNUSED, return 0; for (size_t i = 0; i < list->nr; i++) { - if (!odb_has_object(ctx->repo->objects, &list->oid[i], - OBJECT_INFO_FOR_PREFETCH)) + if (!odb_has_object(ctx->repo->objects, &list->oid[i], 0)) oid_array_append(&ctx->current_batch, &list->oid[i]); } From 6cf965ccaf04cfaa0d1bc72336c380970549c6ee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Feb 2026 07:59:38 +0100 Subject: [PATCH 25/38] builtin/fsck: fix flags passed to `odb_has_object()` In `mark_object()` we invoke `has_object()` with a value of 1. This is somewhat fishy given that the function expects a bitset of flags, so any behaviour that this results in is purely coincidental and may break at any point in time. The call to `has_object()` was originally introduced in 9eb86f41de (fsck: do not lazy fetch known non-promisor object, 2020-08-05). The intent here was to skip lazy fetches of promisor objects: we have already verified that the object is not a promisor object, so if the object is missing it indicates a corrupt repository. The hardcoded value that we pass maps to `HAS_OBJECT_RECHECK_PACKED`, which is probably the intended behaviour: `odb_has_object()` will not fetch promisor objects unless `HAS_OBJECT_FETCH_PROMISOR` is passed, but we may want to verify that no concurrent process has written the object that we're trying to read. Convert the code to use the named flag instead of the the hardcoded value. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 0512f78a87fe1d..1d059dd6c22e31 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -162,7 +162,8 @@ static int mark_object(struct object *obj, enum object_type type, return 0; if (!(obj->flags & HAS_OBJ)) { - if (parent && !odb_has_object(the_repository->objects, &obj->oid, 1)) { + if (parent && !odb_has_object(the_repository->objects, &obj->oid, + HAS_OBJECT_RECHECK_PACKED)) { printf_ln(_("broken link from %7s %s\n" " to %7s %s"), printable_type(&parent->oid, parent->type), From ae77afc3478c87766c4db9082fa82195d6ce9560 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Feb 2026 07:59:39 +0100 Subject: [PATCH 26/38] odb: drop gaps in object info flag values The object info flag values have a two gaps in their definitions, where some bits are skipped over. These gaps don't really hurt, but it makes one wonder whether anything is going on and whether a subset of flags might be defined somewhere else. That's not the case though. Instead, this is a case of flags that have been dropped in the past: - The value 4 was used by `OBJECT_INFO_SKIP_CACHED`, removed in 9c8a294a1a (sha1-file: remove OBJECT_INFO_SKIP_CACHED, 2020-01-02). - The value 8 was used by `OBJECT_INFO_ALLOW_UNKNOWN_TYPE`, removed in ae24b032a0 (object-file: drop OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag, 2025-05-16). Close those gaps to avoid any more confusion. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/odb.h b/odb.h index bab07755f4ec95..8e1fca77551243 100644 --- a/odb.h +++ b/odb.h @@ -353,14 +353,14 @@ struct object_info { #define OBJECT_INFO_INIT { 0 } /* Invoke lookup_replace_object() on the given hash */ -#define OBJECT_INFO_LOOKUP_REPLACE 1 +#define OBJECT_INFO_LOOKUP_REPLACE (1 << 0) /* Do not retry packed storage after checking packed and loose storage */ -#define OBJECT_INFO_QUICK 8 +#define OBJECT_INFO_QUICK (1 << 1) /* * Do not attempt to fetch the object if missing (even if fetch_is_missing is * nonzero). */ -#define OBJECT_INFO_SKIP_FETCH_OBJECT 16 +#define OBJECT_INFO_SKIP_FETCH_OBJECT (1 << 2) /* * This is meant for bulk prefetching of missing blobs in a partial * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK @@ -368,7 +368,7 @@ struct object_info { #define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) /* Die if object corruption (not just an object being missing) was detected. */ -#define OBJECT_INFO_DIE_IF_CORRUPT 32 +#define OBJECT_INFO_DIE_IF_CORRUPT (1 << 3) /* * Read object info from the object database and populate the `object_info` From f6516a5241684355f3fb9f7b70e287e98b48d0ef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Feb 2026 07:59:40 +0100 Subject: [PATCH 27/38] odb: convert object info flags into an enum Convert the object info flags into an enum and adapt all functions that receive these flags as parameters to use the enum instead of an integer. This serves two purposes: - The function signatures become more self-documenting, as callers don't have to wonder which flags they expect. - The compiler can warn when a wrong flag type is passed. Note that the second benefit is somewhat limited. For example, when or-ing multiple enum flags together the result will be an integer, and the compiler will not warn about such use cases. But where it does help is when a single flag of the wrong type is passed, as the compiler would generate a warning in that case. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 ++- object-file.h | 3 ++- odb.c | 2 +- odb.h | 40 +++++++++++++++++++++++----------------- packfile.c | 2 +- packfile.h | 2 +- 6 files changed, 30 insertions(+), 22 deletions(-) diff --git a/object-file.c b/object-file.c index e7e4c3348f9c1b..0ab6c4d4f36d4b 100644 --- a/object-file.c +++ b/object-file.c @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + enum object_info_flags flags) { int ret; int fd; diff --git a/object-file.h b/object-file.h index 1229d5f675b44a..cdb54b52183fb6 100644 --- a/object-file.h +++ b/object-file.h @@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + enum object_info_flags flags); int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, diff --git a/odb.c b/odb.c index ac70b6a099f588..d437aa8b061448 100644 --- a/odb.c +++ b/odb.c @@ -842,7 +842,7 @@ static int oid_object_info_convert(struct repository *r, int odb_read_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, - unsigned flags) + enum object_info_flags flags) { int ret; diff --git a/odb.h b/odb.h index 8e1fca77551243..e94cdc3665da47 100644 --- a/odb.h +++ b/odb.h @@ -352,23 +352,29 @@ struct object_info { */ #define OBJECT_INFO_INIT { 0 } -/* Invoke lookup_replace_object() on the given hash */ -#define OBJECT_INFO_LOOKUP_REPLACE (1 << 0) -/* Do not retry packed storage after checking packed and loose storage */ -#define OBJECT_INFO_QUICK (1 << 1) -/* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ -#define OBJECT_INFO_SKIP_FETCH_OBJECT (1 << 2) -/* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK - */ -#define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 1), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), -/* Die if object corruption (not just an object being missing) was detected. */ -#define OBJECT_INFO_DIE_IF_CORRUPT (1 << 3) + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; /* * Read object info from the object database and populate the `object_info` @@ -377,7 +383,7 @@ struct object_info { int odb_read_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, - unsigned flags); + enum object_info_flags flags); /* * Read a subset of object info for the given object ID. Returns an `enum diff --git a/packfile.c b/packfile.c index 402c3b5dc73131..cb418846aef49a 100644 --- a/packfile.c +++ b/packfile.c @@ -2149,7 +2149,7 @@ int packfile_store_freshen_object(struct packfile_store *store, int packfile_store_read_object_info(struct packfile_store *store, const struct object_id *oid, struct object_info *oi, - unsigned flags UNUSED) + enum object_info_flags flags UNUSED) { struct pack_entry e; int ret; diff --git a/packfile.h b/packfile.h index acc5c55ad57754..989fd10cb64eb8 100644 --- a/packfile.h +++ b/packfile.h @@ -247,7 +247,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, int packfile_store_read_object_info(struct packfile_store *store, const struct object_id *oid, struct object_info *oi, - unsigned flags); + enum object_info_flags flags); /* * Open the packfile and add it to the store if it isn't yet known. Returns From 732ec9b17b78a49496bfb796fcfe606f3a9f02f1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Feb 2026 07:59:41 +0100 Subject: [PATCH 28/38] odb: convert `odb_has_object()` flags into an enum Following the reason in the preceding commit, convert the `odb_has_object()` flags into an enum. With this change, we would have catched the misuse of `odb_has_object()` that was fixed in a preceding commit as the compiler would have generated a warning: ../builtin/backfill.c:71:9: error: implicit conversion from enumeration type 'enum odb_object_info_flag' to different enumeration type 'enum odb_has_object_flag' [-Werror,-Wenum-conversion] 70 | if (!odb_has_object(ctx->repo->objects, &list->oid[i], | ~~~~~~~~~~~~~~ 71 | OBJECT_INFO_FOR_PREFETCH)) | ^~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 2 +- odb.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/odb.c b/odb.c index d437aa8b061448..2bbbfb344ae322 100644 --- a/odb.c +++ b/odb.c @@ -964,7 +964,7 @@ void *odb_read_object_peeled(struct object_database *odb, } int odb_has_object(struct object_database *odb, const struct object_id *oid, - unsigned flags) + enum has_object_flags flags) { unsigned object_info_flags = 0; diff --git a/odb.h b/odb.h index e94cdc3665da47..f7368827aca8f6 100644 --- a/odb.h +++ b/odb.h @@ -395,7 +395,7 @@ int odb_read_object_info(struct object_database *odb, const struct object_id *oid, unsigned long *sizep); -enum { +enum has_object_flags { /* Retry packed storage after checking packed and loose storage */ HAS_OBJECT_RECHECK_PACKED = (1 << 0), /* Allow fetching the object in case the repository has a promisor remote. */ @@ -408,7 +408,7 @@ enum { */ int odb_has_object(struct object_database *odb, const struct object_id *oid, - unsigned flags); + enum has_object_flags flags); int odb_freshen_object(struct object_database *odb, const struct object_id *oid); From bfd125f64f86e78894d67c9eafdbae38779484bc Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Sat, 14 Feb 2026 12:55:41 +0100 Subject: [PATCH 29/38] doc: patch-id: emphasize multi-patch processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emphasize that you can pass multiple patches or diffs to this command. git-patch-id(1) is an efficient pID–commit mapper, able to map thousands of commits in seconds. But discussions on the command seem to typically[1] use the standard loop-over-rev-list-and- shell-out pattern: for commit in rev-list: prepare a diff from commit | git patch-id This is unnecessary; we can bulk-process the patches: git rev-list --no-merges | git diff-tree --patch --stdin | git patch-id --stable The first version (translated to shell) takes a little over nine minutes for a commit history of about 78K commits.[2] The other one, by contrast, takes slightly less than a minute. Also drop “the” from “standard input”. † 1: https://stackoverflow.com/a/19758159 † 2: This is `master` of this repository on 2025-10-02 Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-patch-id.adoc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/git-patch-id.adoc b/Documentation/git-patch-id.adoc index 013e1a6190681b..e95391cd255ee4 100644 --- a/Documentation/git-patch-id.adoc +++ b/Documentation/git-patch-id.adoc @@ -3,7 +3,7 @@ git-patch-id(1) NAME ---- -git-patch-id - Compute unique ID for a patch +git-patch-id - Compute unique IDs for patches SYNOPSIS -------- @@ -12,7 +12,7 @@ git patch-id [--stable | --unstable | --verbatim] DESCRIPTION ----------- -Read a patch from the standard input and compute the patch ID for it. +Read patches from standard input and compute the patch IDs. A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it's "reasonably stable", but at @@ -25,7 +25,8 @@ When dealing with `git diff-tree --patch` output, it takes advantage of the fact that the patch is prefixed with the object name of the commit, and outputs two 40-byte hexadecimal strings. The first string is the patch ID, and the second string is the commit ID. -This can be used to make a mapping from patch ID to commit ID. +This can be used to make a mapping from patch ID to commit ID for a +set or range of commits. OPTIONS ------- From 795d41db1304cdba06361916fa64f93c44678228 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Sat, 14 Feb 2026 12:55:42 +0100 Subject: [PATCH 30/38] doc: patch-id: add script example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utility and usability of git-patch-id(1) was discussed relatively recently:[1] Using "git patch-id" is definitely in the "write a script for it" category. I don't think I've ever used it as-is from the command line as part of a one-liner. It's very much a command that is designed purely for scripting, the interface is just odd and baroque and doesn't really make sense for one-liners. The typical use of patch-id is to generate two *lists* of patch-ids, then sort them and use the patch-id as a key to find commits that look the same. The command doc *could* use an example, and since it is a mapper command it makes sense for that example to be a little script. Mapping the commits of some branch to an upstream ref allows us to demonstrate generating two lists, sorting them, joining them, and finally discarding the patch ID lookup column with cut(1). † 1: https://lore.kernel.org/workflows/CAHk-=wiN+8EUoik4UeAJ-HPSU7hczQP+8+_uP3vtAy_=YfJ9PQ@mail.gmail.com/ Inspired-by: Linus Torvalds Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-patch-id.adoc | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Documentation/git-patch-id.adoc b/Documentation/git-patch-id.adoc index e95391cd255ee4..1618994e76c753 100644 --- a/Documentation/git-patch-id.adoc +++ b/Documentation/git-patch-id.adoc @@ -68,6 +68,46 @@ This is the default if `patchid.stable` is set to `true`. + This is the default. +EXAMPLES +-------- + +linkgit:git-cherry[1] shows what commits from a branch have patch ID +equivalent commits in some upstream branch. But it only tells you +whether such a commit exists or not. What if you wanted to know the +relevant commits in the upstream? We can use this command to make a +mapping between your branch and the upstream branch: + +---- +#!/bin/sh + +upstream="$1" +branch="$2" +test -z "$branch" && branch=HEAD +limit="$3" +if test -n "$limit" +then + tail_opts="$limit".."$upstream" +else + since=$(git log --format=%aI "$upstream".."$branch" | tail -1) + tail_opts=--since="$since"' '"$upstream" +fi +for_branch=$(mktemp) +for_upstream=$(mktemp) + +git rev-list --no-merges "$upstream".."$branch" | + git diff-tree --patch --stdin | + git patch-id --stable | sort >"$for_branch" +git rev-list --no-merges $tail_opts | + git diff-tree --patch --stdin | + git patch-id --stable | sort >"$for_upstream" +join -a1 "$for_branch" "$for_upstream" | cut -d' ' -f2,3 +rm "$for_branch" +rm "$for_upstream" +---- + +Now the first column shows the commit from your branch and the second +column shows the patch ID equivalent commit, if it exists. + GIT --- Part of the linkgit:git[1] suite From ed84bc1c0da0c5a972459f639801bc7ec235084c Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Sat, 14 Feb 2026 12:55:43 +0100 Subject: [PATCH 31/38] doc: patch-id: see also git-cherry(1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-cherry(1) links to this command. These two commands are similar and we also mention it in the “Examples” section now. Let’s link to it. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-patch-id.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/git-patch-id.adoc b/Documentation/git-patch-id.adoc index 1618994e76c753..05859990c8e1fd 100644 --- a/Documentation/git-patch-id.adoc +++ b/Documentation/git-patch-id.adoc @@ -108,6 +108,10 @@ rm "$for_upstream" Now the first column shows the commit from your branch and the second column shows the patch ID equivalent commit, if it exists. +SEE ALSO +-------- +linkgit:git-cherry[1] + GIT --- Part of the linkgit:git[1] suite From f23ac77a4325fc776ebb38044a34f5e9629e4f67 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Feb 2026 16:38:01 +0100 Subject: [PATCH 32/38] commit: avoid parsing non-commits in `lookup_commit_reference_gently()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function `lookup_commit_reference_gently()` can be used to look up a committish by object ID. As such, the function knows to peel for example tag objects so that we eventually end up with the commit. The function is used quite a lot throughout our tree. One such user is "shallow.c" via `assign_shallow_commits_to_refs()`. The intent of this function is to figure out whether a shallow push is missing any objects that are required to satisfy the ref updates, and if so, which of the ref updates is missing objects. This is done by painting the tree with `UNINTERESTING`. We start painting by calling `refs_for_each_ref()` so that we can mark all existing referenced objects as the boundary of objects that we already have, and which are supposed to be fully connected. The reference tips are then parsed via `lookup_commit_reference_gently()`, and the commit is then marked as uninteresting. But references may not necessarily point to a committish, and if a lot of them aren't then this step takes a lot of time. This is mostly due to the way that `lookup_commit_reference_gently()` is implemented: before we learn about the type of the object we already call `parse_object()` on the object ID. This has two consequences: - We parse all objects, including trees and blobs, even though we don't even need the contents of them. - More importantly though, `parse_object()` will cause us to check whether the object ID matches its contents. Combined this means that we deflate and hash every non-committish object, and that of course ends up being both CPU- and memory-intensive. Improve the logic so that we first use `peel_object()`. This function won't parse the object for us, and thus it allows us to learn about the object's type before we parse and return it. The following benchmark pushes a single object from a shallow clone into a repository that has 100,000 refs. These refs were created by listing all objects via `git rev-list(1) --objects --all` and creating refs for a subset of them, so lots of those refs will cover non-commit objects. Benchmark 1: git-receive-pack (rev = HEAD~) Time (mean ± σ): 62.571 s ± 0.413 s [User: 58.331 s, System: 4.053 s] Range (min … max): 62.191 s … 63.010 s 3 runs Benchmark 2: git-receive-pack (rev = HEAD) Time (mean ± σ): 38.339 s ± 0.192 s [User: 36.220 s, System: 1.992 s] Range (min … max): 38.176 s … 38.551 s 3 runs Summary git-receive-pack . Signed-off-by: Junio C Hamano --- commit.c | 32 +++++++++++++++++++++++++++----- object.c | 23 ++++++++++++++++++----- object.h | 5 +++++ 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/commit.c b/commit.c index 28bb5ce029f3c5..fb1b01b9c3acf4 100644 --- a/commit.c +++ b/commit.c @@ -43,13 +43,35 @@ const char *commit_type = "commit"; struct commit *lookup_commit_reference_gently(struct repository *r, const struct object_id *oid, int quiet) { - struct object *obj = deref_tag(r, - parse_object(r, oid), - NULL, 0); + const struct object_id *maybe_peeled; + struct object_id peeled_oid; + struct object *object; + enum object_type type; - if (!obj) + switch (peel_object_ext(r, oid, &peeled_oid, 0, &type)) { + case PEEL_NON_TAG: + maybe_peeled = oid; + break; + case PEEL_PEELED: + maybe_peeled = &peeled_oid; + break; + default: return NULL; - return object_as_type(obj, OBJ_COMMIT, quiet); + } + + if (type != OBJ_COMMIT) { + if (!quiet) + error(_("object %s is a %s, not a %s"), + oid_to_hex(oid), type_name(type), + type_name(OBJ_COMMIT)); + return NULL; + } + + object = parse_object(r, maybe_peeled); + if (!object) + return NULL; + + return object_as_type(object, OBJ_COMMIT, quiet); } struct commit *lookup_commit_reference(struct repository *r, const struct object_id *oid) diff --git a/object.c b/object.c index 4669b8d65e74ab..99b6df3780475e 100644 --- a/object.c +++ b/object.c @@ -207,10 +207,11 @@ struct object *lookup_object_by_type(struct repository *r, } } -enum peel_status peel_object(struct repository *r, - const struct object_id *name, - struct object_id *oid, - unsigned flags) +enum peel_status peel_object_ext(struct repository *r, + const struct object_id *name, + struct object_id *oid, + unsigned flags, + enum object_type *typep) { struct object *o = lookup_unknown_object(r, name); @@ -220,8 +221,10 @@ enum peel_status peel_object(struct repository *r, return PEEL_INVALID; } - if (o->type != OBJ_TAG) + if (o->type != OBJ_TAG) { + *typep = o->type; return PEEL_NON_TAG; + } while (o && o->type == OBJ_TAG) { o = parse_object(r, &o->oid); @@ -241,9 +244,19 @@ enum peel_status peel_object(struct repository *r, return PEEL_INVALID; oidcpy(oid, &o->oid); + *typep = o->type; return PEEL_PEELED; } +enum peel_status peel_object(struct repository *r, + const struct object_id *name, + struct object_id *oid, + unsigned flags) +{ + enum object_type dummy; + return peel_object_ext(r, name, oid, flags, &dummy); +} + struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p) { struct object *obj; diff --git a/object.h b/object.h index 4bca957b8dcbd6..8f983822431c3b 100644 --- a/object.h +++ b/object.h @@ -309,6 +309,11 @@ enum peel_status peel_object(struct repository *r, const struct object_id *name, struct object_id *oid, unsigned flags); +enum peel_status peel_object_ext(struct repository *r, + const struct object_id *name, + struct object_id *oid, + unsigned flags, + enum object_type *typep); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); From 024b4c96976fabdc8b73f4183d6bb8626ffe2c7d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Feb 2026 16:38:02 +0100 Subject: [PATCH 33/38] commit: make `repo_parse_commit_no_graph()` more robust In the next commit we will start to parse more commits via the commit-graph. This change will lead to a segfault though because we try to access the tree of a commit via `repo_get_commit_tree()`, but: - The commit has been parsed via the commit-graph, and thus its `maybe_tree` field is not yet populated. - We cannot use the commit-graph to populate the commit's tree because we're in the process of writing the commit-graph. The consequence is that we'll get a `NULL` pointer for the tree in `write_graph_chunk_data()`. In theory we are already mindful of this situation, as we explicitly use `repo_parse_commit_no_graph()` to parse the commit without the help of the commit-graph. But that doesn't do the trick as the commit is already marked as parsed, so the function will not re-populate it. And as the commit-graph has been closed, neither will `get_commit_tree_oid()` be able to load the tree for us. It seems like this issue can only be hit under artificial circumstances: the error was hit via `git_test_write_commit_graph_or_die()`, which is run by git-commit(1) and git-merge(1) in case `GIT_TEST_COMMIT_GRAPH=1`: $ GIT_TEST_COMMIT_GRAPH=1 meson test t7507-commit-verbose \ --test-args=-ix -i ... ++ git -c commit.verbose=true commit --amend hint: Waiting for your editor to close the file... ./test-lib.sh: line 1012: 55895 Segmentation fault (core dumped) git -c commit.verbose=true commit --amend To the best of my knowledge, this is the only case where we end up writing a commit-graph in the same process that might have already consulted the commit-graph to look up arbitrary objects. But regardless of that, this feels like a bigger accident that is just waiting to happen. Make the code more robust by extending `repo_parse_commit_no_graph()` to unparse a commit first in case we detect it's coming from a graph. This ensures that we will re-read the object without it, and thus we will populate `maybe_tree` properly. This fix shouldn't have any performance consequences: the function is only ever called in the "commit-graph.c" code, and we'll only re-parse the commit at most once. Add an exclusion to our Coccinelle rules so that it doesn't complain about us accessing `maybe_tree` directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit.h | 14 ++++++++++++-- contrib/coccinelle/commit.cocci | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/commit.h b/commit.h index 79a761c37df023..05165a48a52bcf 100644 --- a/commit.h +++ b/commit.h @@ -103,16 +103,26 @@ static inline int repo_parse_commit(struct repository *r, struct commit *item) return repo_parse_commit_gently(r, item, 0); } +void unparse_commit(struct repository *r, const struct object_id *oid); + static inline int repo_parse_commit_no_graph(struct repository *r, struct commit *commit) { + /* + * When the commit has been parsed but its tree wasn't populated then + * this is an indicator that it has been parsed via the commit-graph. + * We cannot read the tree via the commit-graph, as we're explicitly + * told not to use it. We thus have to first un-parse the object so + * that we can re-parse it without the graph. + */ + if (commit->object.parsed && !commit->maybe_tree) + unparse_commit(r, &commit->object.oid); + return repo_parse_commit_internal(r, commit, 0, 0); } void parse_commit_or_die(struct commit *item); -void unparse_commit(struct repository *r, const struct object_id *oid); - struct buffer_slab; struct buffer_slab *allocate_commit_buffer_slab(void); void free_commit_buffer_slab(struct buffer_slab *bs); diff --git a/contrib/coccinelle/commit.cocci b/contrib/coccinelle/commit.cocci index c5284604c566bc..42725161e9f660 100644 --- a/contrib/coccinelle/commit.cocci +++ b/contrib/coccinelle/commit.cocci @@ -26,7 +26,7 @@ expression s; // repo_get_commit_tree() on the LHS. @@ identifier f != { repo_get_commit_tree, get_commit_tree_in_graph_one, - load_tree_for_commit, set_commit_tree }; + load_tree_for_commit, set_commit_tree, repo_parse_commit_no_graph }; expression c; @@ f(...) {<... From bb5da75d6116c35924a04a418ef4c3182663d0a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Feb 2026 16:38:03 +0100 Subject: [PATCH 34/38] commit: use commit graph in `lookup_commit_reference_gently()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the preceding commit we refactored `lookup_commit_reference_gently()` so that it doesn't parse non-commit objects anymore. This has led to a speedup when git-receive-pack(1) accepts a shallow push into a repo with lots of refs that point to blobs or trees. But while this case is now faster, we still have the issue that accepting pushes with lots of "normal" refs that point to commits are still slow. This is mostly because we look up the commits via the object database, and that is rather costly. Adapt the code to use `repo_parse_commit_gently()` instead of `parse_object()` to parse the resulting commit object. This function knows to use the commit-graph to fill in the object, which is way more cost efficient. This leads to another significant speedup when accepting shallow pushes. The following benchmark pushes a single objects from a shallow clone into a repository with 600,000 references that all point to commits: Benchmark 1: git-receive-pack (rev = HEAD~) Time (mean ± σ): 9.179 s ± 0.031 s [User: 8.858 s, System: 0.528 s] Range (min … max): 9.154 s … 9.213 s 3 runs Benchmark 2: git-receive-pack (rev = HEAD) Time (mean ± σ): 2.337 s ± 0.032 s [User: 2.331 s, System: 0.234 s] Range (min … max): 2.308 s … 2.371 s 3 runs Summary git-receive-pack . Signed-off-by: Junio C Hamano --- commit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/commit.c b/commit.c index fb1b01b9c3acf4..25cdbd3be41922 100644 --- a/commit.c +++ b/commit.c @@ -45,7 +45,7 @@ struct commit *lookup_commit_reference_gently(struct repository *r, { const struct object_id *maybe_peeled; struct object_id peeled_oid; - struct object *object; + struct commit *commit; enum object_type type; switch (peel_object_ext(r, oid, &peeled_oid, 0, &type)) { @@ -67,11 +67,11 @@ struct commit *lookup_commit_reference_gently(struct repository *r, return NULL; } - object = parse_object(r, maybe_peeled); - if (!object) + commit = lookup_commit(r, maybe_peeled); + if (!commit || repo_parse_commit_gently(r, commit, quiet) < 0) return NULL; - return object_as_type(object, OBJ_COMMIT, quiet); + return commit; } struct commit *lookup_commit_reference(struct repository *r, const struct object_id *oid) From 20daad2db4d5ff6c108d473ae068785198868d68 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Wed, 18 Feb 2026 15:01:20 -0600 Subject: [PATCH 35/38] object-file: use `container_of()` to convert from base types To improve code hygiene, replace direct casts from `struct odb_transaction` and `struct odb_read_stream` to their concrete implementations with `container_of()`. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/object-file.c b/object-file.c index 1b62996ef0a96f..1a24f08978f508 100644 --- a/object-file.c +++ b/object-file.c @@ -719,7 +719,8 @@ struct odb_transaction_files { static void prepare_loose_object_transaction(struct odb_transaction *base) { - struct odb_transaction_files *transaction = (struct odb_transaction_files *)base; + struct odb_transaction_files *transaction = + container_of(base, struct odb_transaction_files, base); /* * We lazily create the temporary object directory @@ -738,7 +739,8 @@ static void prepare_loose_object_transaction(struct odb_transaction *base) static void fsync_loose_object_transaction(struct odb_transaction *base, int fd, const char *filename) { - struct odb_transaction_files *transaction = (struct odb_transaction_files *)base; + struct odb_transaction_files *transaction = + container_of(base, struct odb_transaction_files, base); /* * If we have an active ODB transaction, we issue a call that @@ -1634,11 +1636,14 @@ int index_fd(struct index_state *istate, struct object_id *oid, type, path, flags); } else { struct object_database *odb = the_repository->objects; + struct odb_transaction_files *files_transaction; struct odb_transaction *transaction; transaction = odb_transaction_begin(odb); - ret = index_blob_packfile_transaction((struct odb_transaction_files *)odb->transaction, - oid, fd, + files_transaction = container_of(odb->transaction, + struct odb_transaction_files, + base); + ret = index_blob_packfile_transaction(files_transaction, oid, fd, xsize_t(st->st_size), path, flags); odb_transaction_commit(transaction); @@ -1992,7 +1997,8 @@ int read_loose_object(struct repository *repo, static void odb_transaction_files_commit(struct odb_transaction *base) { - struct odb_transaction_files *transaction = (struct odb_transaction_files *)base; + struct odb_transaction_files *transaction = + container_of(base, struct odb_transaction_files, base); flush_loose_object_transaction(transaction); flush_packfile_transaction(transaction); @@ -2047,7 +2053,8 @@ struct odb_loose_read_stream { static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) { - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); size_t total_read = 0; switch (st->z_state) { @@ -2093,7 +2100,9 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t static int close_istream_loose(struct odb_read_stream *_st) { - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); From 96286f14b0dc1a1c9ba4f6842d99ce738cc766da Mon Sep 17 00:00:00 2001 From: Tian Yuchen Date: Tue, 17 Feb 2026 01:20:28 +0800 Subject: [PATCH 36/38] symlinks: use unsigned int for flags The 'flags' and 'track_flags' fields in symlinks.c are used strictly as a collection of bits (using bitwise operators including &, |, ~). Using a signed integer for bitmasks may lead to undefined behavior with shift operations and logic errors if the MSB is touched. Change these fields from 'int' to 'unsigned int' to match our usage patterns. Signed-off-by: Tian Yuchen Signed-off-by: Junio C Hamano --- symlinks.c | 13 +++++++------ symlinks.h | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/symlinks.c b/symlinks.c index 9cc090d42c089f..9e01ab3bc86c5b 100644 --- a/symlinks.c +++ b/symlinks.c @@ -74,11 +74,12 @@ static inline void reset_lstat_cache(struct cache_def *cache) */ static int lstat_cache_matchlen(struct cache_def *cache, const char *name, int len, - int *ret_flags, int track_flags, + unsigned int *ret_flags, unsigned int track_flags, int prefix_len_stat_func) { int match_len, last_slash, last_slash_dir, previous_slash; - int save_flags, ret, saved_errno = 0; + unsigned int save_flags; + int ret, saved_errno = 0; struct stat st; if (cache->track_flags != track_flags || @@ -192,10 +193,10 @@ static int lstat_cache_matchlen(struct cache_def *cache, return match_len; } -static int lstat_cache(struct cache_def *cache, const char *name, int len, - int track_flags, int prefix_len_stat_func) +static unsigned int lstat_cache(struct cache_def *cache, const char *name, int len, + unsigned int track_flags, int prefix_len_stat_func) { - int flags; + unsigned int flags; (void)lstat_cache_matchlen(cache, name, len, &flags, track_flags, prefix_len_stat_func); return flags; @@ -234,7 +235,7 @@ int check_leading_path(const char *name, int len, int warn_on_lstat_err) static int threaded_check_leading_path(struct cache_def *cache, const char *name, int len, int warn_on_lstat_err) { - int flags; + unsigned int flags; int match_len = lstat_cache_matchlen(cache, name, len, &flags, FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT); int saved_errno = errno; diff --git a/symlinks.h b/symlinks.h index 7ae3d5b85695fb..25bf04f54fbee0 100644 --- a/symlinks.h +++ b/symlinks.h @@ -5,8 +5,8 @@ struct cache_def { struct strbuf path; - int flags; - int track_flags; + unsigned int flags; + unsigned int track_flags; int prefix_len_stat_func; }; #define CACHE_DEF_INIT { \ From 8e06f961d7290e2fd31ea008b00b7a1ed1c904b5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 22 Feb 2026 12:16:19 -0800 Subject: [PATCH 37/38] object-file.c: avoid container_of() of a NULL container Even though the "struct odb_transaction" member is at the beginning of the containing "struct odb_transaction_files", i.e., at offset 0, using container_of() to add offset 0 to a NULL pointer gets flagged as a bad behaviour under SANITIZE=undefined. Use container_of_or_null() to work around this issue. Helped-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object-file.c b/object-file.c index 1a24f08978f508..bd580ef032f741 100644 --- a/object-file.c +++ b/object-file.c @@ -720,7 +720,7 @@ struct odb_transaction_files { static void prepare_loose_object_transaction(struct odb_transaction *base) { struct odb_transaction_files *transaction = - container_of(base, struct odb_transaction_files, base); + container_of_or_null(base, struct odb_transaction_files, base); /* * We lazily create the temporary object directory @@ -740,7 +740,7 @@ static void fsync_loose_object_transaction(struct odb_transaction *base, int fd, const char *filename) { struct odb_transaction_files *transaction = - container_of(base, struct odb_transaction_files, base); + container_of_or_null(base, struct odb_transaction_files, base); /* * If we have an active ODB transaction, we issue a call that From 4805bb993087cb1fb9a81470aaa238123c0ed6e2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 2 Mar 2026 16:05:51 -0800 Subject: [PATCH 38/38] The 9th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 7c8c2fa466e0ec..207db16b80d7a7 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -36,6 +36,14 @@ UI, Workflows & Features * Extend the alias configuration syntax to allow aliases using characters outside ASCII alphanumeric (plus '-'). + * A signature on a commit that was GPG signed long time ago ought to + be still valid after the key that was used to sign it has expired, + but we showed them in alarming red. + + * "git subtree split --prefix=P " now checks the prefix P + against the tree of the (potentially quite different from the + current working tree) given commit. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -86,6 +94,19 @@ Performance, Internal Implementation, Development Support etc. * Some tests assumed "iconv" is available without honoring ICONV prerequisite, which has been corrected. + * Revamp object enumeration API around odb. + + * Additional tests were introduced to see the interaction with netrc + auth with auth failure on the http transport. + + * A couple of bugs in use of flag bits around odb API has been + corrected, and the flag bits reordered. + + * Plumb gitk/git-gui build and install procedure in meson based + builds. + + * The code to accept shallow "git push" has been optimized. + Fixes since v2.53 ----------------- @@ -155,6 +176,9 @@ Fixes since v2.53 * "git format-patch --from=" did not honor the command line option when writing out the cover letter, which has been corrected. + * Update build precedure for mergetool documentation in meson-based builds. + (merge 58e4eeeeb5 pw/meson-doc-mergetool later to maint). + * Other code cleanup, docfix, build fix, etc. (merge d79fff4a11 jk/remote-tracking-ref-leakfix later to maint). (merge 7a747f972d dd/t5403-modernise later to maint). @@ -180,3 +204,6 @@ Fixes since v2.53 (merge 5133837392 ps/ci-gitlab-msvc-updates later to maint). (merge 143e84958c db/doc-fetch-jobs-auto later to maint). (merge 0678e01f02 ap/use-test-seq-f-more later to maint). + (merge 96286f14b0 ty/symlinks-use-unsigned-for-bitset later to maint). + (merge b10e0cb1f3 kh/doc-am-xref later to maint). + (merge ed84bc1c0d kh/doc-patch-id-4 later to maint).