From e28c86f002d66e88f12042659b667f5cc9d67b93 Mon Sep 17 00:00:00 2001
From: Andrew Oakley <andrew@adoakley.name>
Date: Mon, 12 Apr 2021 09:52:50 +0100
Subject: [PATCH 001/198] git-p4: avoid decoding more data from perforce

Perforce does not validate or store the encoding of user submitted data
by default (although this can be enabled).  In large repositories it is
therefore very likely that some data will not be valid UTF-8.

Historically (with python2) git-p4 did not attempt to decode the data
from the perforce server - it just passed bytes from perforce to git,
preserving whatever was stored in perforce.  This seems like a sensible
approach - it avoids any loss of data, and there is no way to determine
the intended encoding for any invalid data from perforce.

This change updates git-p4 to avoid decoding changelist descriptions,
user and time information.  The time data is almost certainly valid
unicode, but as they are processed with the user information it is more
convenient for them to be handled as bytes.

Signed-off-by: Andrew Oakley <andrew@adoakley.name>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-p4.py                          | 57 +++++++++++++++---------------
 t/t9835-git-p4-message-encoding.sh | 48 +++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 28 deletions(-)
 create mode 100755 t/t9835-git-p4-message-encoding.sh

diff --git a/git-p4.py b/git-p4.py
index 09c9e93ac401cc..8407ec5c7ae216 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -764,13 +764,15 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
         while True:
             entry = marshal.load(p4.stdout)
             if bytes is not str:
-                # Decode unmarshalled dict to use str keys and values, except for:
-                #   - `data` which may contain arbitrary binary data
-                #   - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
+                # Decode unmarshalled dict to use str keys and values, except
+                # for cases where the values may not be valid UTF-8.
+                binary_keys = ('data', 'path', 'clientFile', 'Description',
+                               'desc', 'Email', 'FullName', 'Owner', 'time',
+                               'user', 'User')
                 decoded_entry = {}
                 for key, value in entry.items():
                     key = key.decode()
-                    if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
+                    if isinstance(value, bytes) and not (key in binary_keys or key.startswith('depotFile')):
                         value = value.decode()
                     decoded_entry[key] = value
                 # Parse out data if it's an error response
@@ -949,11 +951,11 @@ def gitConfigInt(key):
             _gitConfig[key] = None
     return _gitConfig[key]
 
-def gitConfigList(key):
+def gitConfigList(key, raw=False):
     if key not in _gitConfig:
-        s = read_pipe(["git", "config", "--get-all", key], ignore_error=True)
+        s = read_pipe(["git", "config", "--get-all", key], ignore_error=True, raw=raw)
         _gitConfig[key] = s.strip().splitlines()
-        if _gitConfig[key] == ['']:
+        if _gitConfig[key] == [''] or _gitConfig[key] == [b'']:
             _gitConfig[key] = []
     return _gitConfig[key]
 
@@ -1499,35 +1501,35 @@ def getUserMapFromPerforceServer(self):
         for output in p4CmdList("users"):
             if "User" not in output:
                 continue
-            self.users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
+            self.users[output["User"]] = output["FullName"] + b" <" + output["Email"] + b">"
             self.emails[output["Email"]] = output["User"]
 
-        mapUserConfigRegex = re.compile(r"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$", re.VERBOSE)
-        for mapUserConfig in gitConfigList("git-p4.mapUser"):
+        mapUserConfigRegex = re.compile(br"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$", re.VERBOSE)
+        for mapUserConfig in gitConfigList("git-p4.mapUser", raw=True):
             mapUser = mapUserConfigRegex.findall(mapUserConfig)
             if mapUser and len(mapUser[0]) == 3:
                 user = mapUser[0][0]
                 fullname = mapUser[0][1]
                 email = mapUser[0][2]
-                self.users[user] = fullname + " <" + email + ">"
+                self.users[user] = fullname + b" <" + email + b">"
                 self.emails[email] = user
 
-        s = ''
+        s = b''
         for (key, val) in self.users.items():
-            s += "%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1))
+            s += b"%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1))
 
-        open(self.getUserCacheFilename(), 'w').write(s)
+        open(self.getUserCacheFilename(), 'wb').write(s)
         self.userMapFromPerforceServer = True
 
     def loadUserMapFromCache(self):
         self.users = {}
         self.userMapFromPerforceServer = False
         try:
-            cache = open(self.getUserCacheFilename(), 'r')
+            cache = open(self.getUserCacheFilename(), 'rb')
             lines = cache.readlines()
             cache.close()
             for line in lines:
-                entry = line.strip().split("\t")
+                entry = line.strip().split(b"\t")
                 self.users[entry[0]] = entry[1]
         except IOError:
             self.getUserMapFromPerforceServer()
@@ -1780,7 +1782,7 @@ def p4UserForCommit(self,id):
         # Return the tuple (perforce user,git email) for a given git commit id
         self.getUserMapFromPerforceServer()
         gitEmail = read_pipe(["git", "log", "--max-count=1",
-                              "--format=%ae", id])
+                              "--format=%ae", id], raw=True)
         gitEmail = gitEmail.strip()
         if gitEmail not in self.emails:
             return (None,gitEmail)
@@ -1911,7 +1913,7 @@ def prepareSubmitTemplate(self, changelist=None):
             template += key + ':'
             if key == 'Description':
                 template += '\n'
-            for field_line in change_entry[key].splitlines():
+            for field_line in decode_text_stream(change_entry[key]).splitlines():
                 template += '\t'+field_line+'\n'
         if len(files_list) > 0:
             template += '\n'
@@ -2163,7 +2165,7 @@ def applyCommit(self, id):
            submitTemplate += "\n######## Actual user %s, modified after commit\n" % p4User
 
         if self.checkAuthorship and not self.p4UserIsMe(p4User):
-            submitTemplate += "######## git author %s does not match your p4 account.\n" % gitEmail
+            submitTemplate += "######## git author %s does not match your p4 account.\n" % decode_text_stream(gitEmail)
             submitTemplate += "######## Use option --preserve-user to modify authorship.\n"
             submitTemplate += "######## Variable git-p4.skipUserNameCheck hides this message.\n"
 
@@ -2802,7 +2804,7 @@ def __init__(self):
         self.knownBranches = {}
         self.initialParents = {}
 
-        self.tz = "%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 3600) / 60))
+        self.tz = b"%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 3600) / 60))
         self.labels = {}
 
     # Force a checkpoint in fast-import and wait for it to finish
@@ -3161,7 +3163,7 @@ def make_email(self, userid):
         if userid in self.users:
             return self.users[userid]
         else:
-            return "%s <a@b>" % userid
+            return b"%s <a@b>" % userid
 
     def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
         """ Stream a p4 tag.
@@ -3184,9 +3186,9 @@ def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
             email = self.make_email(owner)
         else:
             email = self.make_email(self.p4UserId())
-        tagger = "%s %s %s" % (email, epoch, self.tz)
+        tagger = b"%s %s %s" % (email, epoch, self.tz)
 
-        gitStream.write("tagger %s\n" % tagger)
+        gitStream.write(b"tagger %s\n" % tagger)
 
         print("labelDetails=",labelDetails)
         if 'Description' in labelDetails:
@@ -3279,12 +3281,11 @@ def commit(self, details, files, branch, parent = "", allow_empty=False):
         self.gitStream.write("commit %s\n" % branch)
         self.gitStream.write("mark :%s\n" % details["change"])
         self.committedChanges.add(int(details["change"]))
-        committer = ""
         if author not in self.users:
             self.getUserMapFromPerforceServer()
-        committer = "%s %s %s" % (self.make_email(author), epoch, self.tz)
+        committer = b"%s %s %s" % (self.make_email(author), epoch, self.tz)
 
-        self.gitStream.write("committer %s\n" % committer)
+        self.gitStream.write(b"committer %s\n" % committer)
 
         self.gitStream.write("data <<EOT\n")
         self.gitStream.write(details["desc"])
@@ -3422,7 +3423,7 @@ def importP4Labels(self, stream, p4Labels):
                         print("Could not convert label time %s" % labelDetails['Update'])
                         tmwhen = 1
 
-                    when = int(time.mktime(tmwhen))
+                    when = b"%i" % int(time.mktime(tmwhen))
                     self.streamTag(stream, name, labelDetails, gitCommit, when)
                     if verbose:
                         print("p4 label %s mapped to git commit %s" % (name, gitCommit))
@@ -3708,7 +3709,7 @@ def importHeadRevision(self, revision):
         print("Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), revision, self.branch))
 
         details = {}
-        details["user"] = "git perforce import user"
+        details["user"] = b"git perforce import user"
         details["desc"] = ("Initial import of %s from the state at revision %s\n"
                            % (' '.join(self.depotPaths), revision))
         details["change"] = revision
diff --git a/t/t9835-git-p4-message-encoding.sh b/t/t9835-git-p4-message-encoding.sh
new file mode 100755
index 00000000000000..93f24fe295e34e
--- /dev/null
+++ b/t/t9835-git-p4-message-encoding.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+test_description='Clone repositories with non ASCII commit messages'
+
+. ./lib-git-p4.sh
+
+UTF8="$(printf "a-\303\244_o-\303\266_u-\303\274")"
+ISO8859="$(printf "a-\344_o-\366_u-\374")"
+
+test_expect_success 'start p4d' '
+	start_p4d
+'
+
+test_expect_success 'create commits in perforce' '
+	(
+		cd "$cli" &&
+
+		p4_add_user "${UTF8}" &&
+		p4_add_user "${ISO8859}" &&
+
+		>dummy-file1 &&
+		P4USER="${UTF8}" p4 add dummy-file1 &&
+		P4USER="${UTF8}" p4 submit -d "message ${UTF8}" &&
+
+		>dummy-file2 &&
+		P4USER="${ISO8859}" p4 add dummy-file2 &&
+		P4USER="${ISO8859}" p4 submit -d "message ${ISO8859}"
+	)
+'
+
+test_expect_success 'check UTF-8 commit' '
+	(
+		git p4 clone --destination="$git/1" //depot@1,1 &&
+		git -C "$git/1" cat-file commit HEAD | grep -q "^message ${UTF8}$" &&
+		git -C "$git/1" cat-file commit HEAD | grep -q "^author Dr. ${UTF8} <${UTF8}@example.com>"
+	)
+'
+
+test_expect_success 'check ISO-8859 commit' '
+	(
+		git p4 clone --destination="$git/2" //depot@2,2 &&
+		git -C "$git/2" cat-file commit HEAD > /tmp/dump.txt &&
+		git -C "$git/2" cat-file commit HEAD | grep -q "^message ${ISO8859}$" &&
+		git -C "$git/2" cat-file commit HEAD | grep -q "^author Dr. ${ISO8859} <${ISO8859}@example.com>"
+	)
+'
+
+test_done

From ac9cdb5fd175869aef35ca126a41976818599f4f Mon Sep 17 00:00:00 2001
From: Andrew Oakley <andrew@adoakley.name>
Date: Mon, 12 Apr 2021 09:52:51 +0100
Subject: [PATCH 002/198] git-p4: do not decode data from perforce by default

This commit is not intended to change behaviour, any we still attempt to
decode values that might not be valid unicode.  It's not clear that all
of these values are safe to decode, but it's now more obvious which data
is decoded.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-p4.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index 8407ec5c7ae216..8a97ff3dd2d633 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -764,15 +764,19 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
         while True:
             entry = marshal.load(p4.stdout)
             if bytes is not str:
-                # Decode unmarshalled dict to use str keys and values, except
-                # for cases where the values may not be valid UTF-8.
-                binary_keys = ('data', 'path', 'clientFile', 'Description',
-                               'desc', 'Email', 'FullName', 'Owner', 'time',
-                               'user', 'User')
+                # Decode unmarshalled dict to use str keys and values where it
+                # is expected that the data is always valid UTF-8.
+                text_keys = ('action', 'change', 'Change', 'Client', 'code',
+                             'fileSize', 'headAction', 'headRev', 'headType',
+                             'Jobs', 'label', 'options', 'perm', 'rev', 'Root',
+                             'Status', 'type', 'Update')
+                text_key_prefixes = ('action', 'File', 'job', 'rev', 'type',
+                                     'View')
                 decoded_entry = {}
                 for key, value in entry.items():
                     key = key.decode()
-                    if isinstance(value, bytes) and not (key in binary_keys or key.startswith('depotFile')):
+                    if isinstance(value, bytes) and (key in text_keys or
+                            any(filter(key.startswith, text_key_prefixes))):
                         value = value.decode()
                     decoded_entry[key] = value
                 # Parse out data if it's an error response

From 51a0478d2c65df251d69dda959f15260a5fbe50b Mon Sep 17 00:00:00 2001
From: Tzadik Vanderhoof <tzadik.vanderhoof@gmail.com>
Date: Thu, 29 Apr 2021 00:39:05 -0700
Subject: [PATCH 003/198] git-p4: git-p4.fallbackEncoding to specify non UTF-8
 charset

Add git-p4.fallbackEncoding config variable, to prevent git-p4 from
crashing on non UTF-8 changeset descriptions.

When git-p4 reads the output from a p4 command, it assumes it will
be 100% UTF-8. If even one character in the output of one p4 command is
not UTF-8, git-p4 crashes with:

    File "C:/Program Files/Git/bin/git-p4.py", line 774, in p4CmdList
        value = value.decode() UnicodeDecodeError: 'utf-8' codec can't
        decode byte Ox93 in position 42: invalid start byte

This is especially a problem for the "git p4 clone ... @all" command,
where git-p4 needs to read thousands of changeset descriptions, one of
which may have a stray smart quote, causing the whole clone operation
to fail.

Add a new config setting, allowing git-p4 to try a fallback encoding
(for example, "cp1252") and/or use the Unicode replacement character,
to prevent the whole program from crashing on such a minor problem.

Signed-off-by: Tzadik Vanderhoof <tzadik.vanderhoof@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-p4.txt                   |  9 ++
 git-p4.py                                  | 11 ++-
 t/t9836-git-p4-config-fallback-encoding.sh | 98 ++++++++++++++++++++++
 3 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100755 t/t9836-git-p4-config-fallback-encoding.sh

diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index f89e68b424c3b2..86d3ffa644a49f 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -638,6 +638,15 @@ git-p4.pathEncoding::
 	to transcode the paths to UTF-8. As an example, Perforce on Windows
 	often uses "cp1252" to encode path names.
 
+git-p4.fallbackEncoding::
+	Perforce changeset descriptions can be stored in any encoding.
+	Git-p4 first tries to interpret each description as UTF-8. If that
+	fails, this config allows another encoding to be tried. You can specify,
+	for example, "cp1252". If git-p4.fallbackEncoding is "replace", UTF-8 will
+	be used, with invalid UTF-8 characters replaced by the Unicode replacement
+	character. The default is "none": there is no fallback, and any non UTF-8
+	character will cause git-p4 to immediately fail.
+
 git-p4.largeFileSystem::
 	Specify the system that is used for large (binary) files. Please note
 	that large file systems do not support the 'git p4 submit' command.
diff --git a/git-p4.py b/git-p4.py
index 09c9e93ac401cc..202fb01bdf0491 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -771,7 +771,16 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
                 for key, value in entry.items():
                     key = key.decode()
                     if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
-                        value = value.decode()
+                        try:
+                            value = value.decode()
+                        except UnicodeDecodeError:
+                            fallbackEncoding = gitConfig("git-p4.fallbackEncoding").lower() or 'none'
+                            if fallbackEncoding == 'none':
+                                raise Exception("UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding")
+                            elif fallbackEncoding == 'replace':
+                                value = value.decode(errors='replace')
+                            else:
+                                value = value.decode(encoding=fallbackEncoding)
                     decoded_entry[key] = value
                 # Parse out data if it's an error response
                 if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
diff --git a/t/t9836-git-p4-config-fallback-encoding.sh b/t/t9836-git-p4-config-fallback-encoding.sh
new file mode 100755
index 00000000000000..901bb3759df508
--- /dev/null
+++ b/t/t9836-git-p4-config-fallback-encoding.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+
+test_description='test git-p4.fallbackEncoding config'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./lib-git-p4.sh
+
+test_expect_success 'start p4d' '
+	start_p4d
+'
+
+test_expect_success 'add Unicode description' '
+	cd "$cli" &&
+	echo file1 >file1 &&
+	p4 add file1 &&
+	p4 submit -d documentación
+'
+
+# Unicode descriptions cause "git p4 clone" to crash with a UnicodeDecodeError in some
+# environments. This test determines if that is the case in our environment. If so,
+# we create a file called "clone_fails". In subsequent tests, we check whether that
+# file exists to determine what behavior to expect.
+
+clone_fails="$TRASH_DIRECTORY/clone_fails"
+
+# If clone fails with git-p4.fallbackEncoding set to "none", create the "clone_fails" file,
+# and make sure the error message is correct
+
+test_expect_success 'clone with git-p4.fallbackEncoding set to "none"' '
+	git config --global git-p4.fallbackEncoding none &&
+	test_when_finished cleanup_git && {
+		git p4 clone --dest="$git" //depot@all 2>error || (
+			>"$clone_fails" &&
+			grep "UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding" error
+		)
+	}
+'
+
+# If clone fails with git-p4.fallbackEncoding set to "none", it should also fail when it's unset,
+# also with the correct error message.  Otherwise the clone should succeed.
+
+test_expect_success 'clone with git-p4.fallbackEncoding unset' '
+	git config --global --unset git-p4.fallbackEncoding &&
+	test_when_finished cleanup_git && {
+		(
+			test -f "$clone_fails" &&
+			test_must_fail git p4 clone --dest="$git" //depot@all 2>error &&
+			grep "UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding" error
+		) ||
+		(
+			! test -f "$clone_fails" &&
+			git p4 clone --dest="$git" //depot@all 2>error
+		)
+	}
+'
+
+# Whether or not "clone_fails" exists, setting git-p4.fallbackEncoding
+# to "cp1252" should cause clone to succeed and get the right description
+
+test_expect_success 'clone with git-p4.fallbackEncoding set to "cp1252"' '
+	git config --global git-p4.fallbackEncoding cp1252 &&
+	test_when_finished cleanup_git &&
+	(
+		git p4 clone --dest="$git" //depot@all &&
+		cd "$git" &&
+		git log --oneline >log &&
+		desc=$(head -1 log | cut -d" " -f2) &&
+		test "$desc" = "documentación"
+	)
+'
+
+# Setting git-p4.fallbackEncoding to "replace" should always cause clone to succeed.
+# If "clone_fails" exists, the description should contain the Unicode replacement
+# character, otherwise the description should be correct (since we're on a system that
+# doesn't have the Unicode issue)
+
+test_expect_success 'clone with git-p4.fallbackEncoding set to "replace"' '
+	git config --global git-p4.fallbackEncoding replace &&
+	test_when_finished cleanup_git &&
+	(
+		git p4 clone --dest="$git" //depot@all &&
+		cd "$git" &&
+		git log --oneline >log &&
+		desc=$(head -1 log | cut -d" " -f2) &&
+		{
+			(test -f "$clone_fails" &&
+				test "$desc" = "documentaci�n"
+			) ||
+			(! test -f "$clone_fails" &&
+				test "$desc" = "documentación"
+			)
+		}
+	)
+'
+
+test_done

From 7b1a3fb529fa7a0395b9e1fa6d89ec1b5aa888c2 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Sun, 6 Jun 2021 13:47:23 -0500
Subject: [PATCH 004/198] completion: bash: fix prefix detection in branch.*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise we are completely ignoring the --cur argument.

The issue can be tested with:

  git clone --config=branch.<tab>

Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Tested-by: David Aguilar <davvid@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/completion/git-completion.bash | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index b50c5d0ea3867b..47b48fbab6614e 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -2649,8 +2649,8 @@ __git_complete_config_variable_name ()
 		return
 		;;
 	branch.*)
-		local pfx="${cur%.*}."
-		cur_="${cur#*.}"
+		local pfx="${cur_%.*}."
+		cur_="${cur_#*.}"
 		__gitcomp_direct "$(__git_heads "$pfx" "$cur_" ".")"
 		__gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "$sfx"
 		return

From 772a6e72a8b04e6faa2b86414d643ca29bd062d2 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Sun, 6 Jun 2021 13:47:24 -0500
Subject: [PATCH 005/198] completion: bash: fix for suboptions with value

We need to ignore options that don't start with -- as well.

Depending on the value of COMP_WORDBREAKS the last word could be
duplicated otherwise.

Can be tested with:

  git merge -X diff-algorithm=<tab>

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Tested-by: David Aguilar <davvid@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/completion/git-completion.bash |  2 +-
 t/t9902-completion.sh                  | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 47b48fbab6614e..05606609f97bd6 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -356,7 +356,7 @@ __gitcomp ()
 	local cur_="${3-$cur}"
 
 	case "$cur_" in
-	--*=)
+	*=)
 		;;
 	--no-*)
 		local c i=0 IFS=$' \t\n'
diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh
index cb057ef16134f7..6b56e54fc37c67 100755
--- a/t/t9902-completion.sh
+++ b/t/t9902-completion.sh
@@ -540,6 +540,15 @@ test_expect_success '__gitcomp - expand/narrow all negative options' '
 	EOF
 '
 
+test_expect_success '__gitcomp - equal skip' '
+	test_gitcomp "--option=" "--option=" <<-\EOF &&
+
+	EOF
+	test_gitcomp "option=" "option=" <<-\EOF
+
+	EOF
+'
+
 test_expect_success '__gitcomp - doesnt fail because of invalid variable name' '
 	__gitcomp "$invalid_variable_name"
 '
@@ -2380,6 +2389,12 @@ test_expect_success 'git clone --config= - value' '
 	EOF
 '
 
+test_expect_success 'options with value' '
+	test_completion "git merge -X diff-algorithm=" <<-\EOF
+
+	EOF
+'
+
 test_expect_success 'sourcing the completion script clears cached commands' '
 	__git_compute_all_commands &&
 	verbose test -n "$__git_all_commands" &&

From 345d5425a1102a08f6e6ba375152793c5a2cdd7f Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Sun, 6 Jun 2021 13:47:25 -0500
Subject: [PATCH 006/198] completion: bash: fix for multiple dash commands

Otherwise commands like 'for-each-ref' are not completed.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Tested-by: David Aguilar <davvid@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/completion/git-completion.bash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 05606609f97bd6..1feb2ee1081a4e 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -421,7 +421,7 @@ __gitcomp_builtin ()
 	local incl="${2-}"
 	local excl="${3-}"
 
-	local var=__gitcomp_builtin_"${cmd/-/_}"
+	local var=__gitcomp_builtin_"${cmd//-/_}"
 	local options
 	eval "options=\${$var-}"
 

From e3ee1a316302caaeeb176ad4c05a977b08c379b2 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Sun, 6 Jun 2021 13:47:26 -0500
Subject: [PATCH 007/198] completion: bash: add correct suffix in variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__gitcomp automatically adds a suffix, but __gitcomp_nl and others
don't, we need to specify a space by default.

Can be tested with:

  git config branch.autoSetupMe<tab>

This fix only works for versions of bash greater than 4.0, before that
"local sfx" creates an empty string, therefore the unset expansion
doesn't work. The same happens in zsh.

Therefore we don't add the test for that for now.

The correct fix for all shells requires semantic changes in __gitcomp,
but that can be done later.

Cc: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Tested-by: David Aguilar <davvid@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/completion/git-completion.bash | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 1feb2ee1081a4e..c72b5465f9ba14 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -2652,7 +2652,7 @@ __git_complete_config_variable_name ()
 		local pfx="${cur_%.*}."
 		cur_="${cur_#*.}"
 		__gitcomp_direct "$(__git_heads "$pfx" "$cur_" ".")"
-		__gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "$sfx"
+		__gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "${sfx- }"
 		return
 		;;
 	guitool.*.*)
@@ -2686,7 +2686,7 @@ __git_complete_config_variable_name ()
 		local pfx="${cur_%.*}."
 		cur_="${cur_#*.}"
 		__git_compute_all_commands
-		__gitcomp_nl "$__git_all_commands" "$pfx" "$cur_" "$sfx"
+		__gitcomp_nl "$__git_all_commands" "$pfx" "$cur_" "${sfx- }"
 		return
 		;;
 	remote.*.*)
@@ -2702,7 +2702,7 @@ __git_complete_config_variable_name ()
 		local pfx="${cur_%.*}."
 		cur_="${cur_#*.}"
 		__gitcomp_nl "$(__git_remotes)" "$pfx" "$cur_" "."
-		__gitcomp_nl_append "pushDefault" "$pfx" "$cur_" "$sfx"
+		__gitcomp_nl_append "pushDefault" "$pfx" "$cur_" "${sfx- }"
 		return
 		;;
 	url.*.*)

From e681b728e7db88445f2c69be7b2f07053c577d5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 7 Jun 2021 13:58:24 +0200
Subject: [PATCH 008/198] test-tool: split up test-tool read-cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the "test-tool read-cache" was originally added back in
1ecb5ff141 (read-cache: add simple performance test, 2013-06-09) it's
been growing all sorts of bells and whistles that aren't very
conducive to performance testing the index, e.g. it learned how to
read config.

Then in recent changes in e2df6c3972 (test-read-cache: print cache
entries with --table, 2021-03-30) and 2782db3eed (test-tool: don't
force full index, 2021-03-30) we gained even more logic to deal with
sparse index testing.

I think that having one test tool do so many different things makes it
harder to read its code. Let's instead split up the "again" and "perf"
uses for it into their own tools.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile                         |  2 ++
 t/helper/test-read-cache-again.c | 31 ++++++++++++++++++
 t/helper/test-read-cache-perf.c  | 21 ++++++++++++
 t/helper/test-read-cache.c       | 56 +++++++++++++-------------------
 t/helper/test-tool.c             |  2 ++
 t/helper/test-tool.h             |  2 ++
 t/perf/p0002-read-cache.sh       |  2 +-
 t/t7519-status-fsmonitor.sh      |  2 +-
 8 files changed, 83 insertions(+), 35 deletions(-)
 create mode 100644 t/helper/test-read-cache-again.c
 create mode 100644 t/helper/test-read-cache-perf.c

diff --git a/Makefile b/Makefile
index c3565fc0f8f7df..142303bd17a3a2 100644
--- a/Makefile
+++ b/Makefile
@@ -732,6 +732,8 @@ TEST_BUILTINS_OBJS += test-prio-queue.o
 TEST_BUILTINS_OBJS += test-proc-receive.o
 TEST_BUILTINS_OBJS += test-progress.o
 TEST_BUILTINS_OBJS += test-reach.o
+TEST_BUILTINS_OBJS += test-read-cache-again.o
+TEST_BUILTINS_OBJS += test-read-cache-perf.o
 TEST_BUILTINS_OBJS += test-read-cache.o
 TEST_BUILTINS_OBJS += test-read-graph.o
 TEST_BUILTINS_OBJS += test-read-midx.o
diff --git a/t/helper/test-read-cache-again.c b/t/helper/test-read-cache-again.c
new file mode 100644
index 00000000000000..707db036cb1ade
--- /dev/null
+++ b/t/helper/test-read-cache-again.c
@@ -0,0 +1,31 @@
+#include "test-tool.h"
+#include "cache.h"
+
+int cmd__read_cache_again(int argc, const char **argv)
+{
+	struct repository *r = the_repository;
+	int i, cnt;
+	const char *name;
+
+	if (argc != 2)
+		die("usage: test-tool read-cache-again <count> <file>");
+
+	cnt = strtol(argv[0], NULL, 0);
+	name = argv[2];
+
+	setup_git_directory();
+	for (i = 0; i < cnt; i++) {
+		int pos;
+		repo_read_index(r);
+		refresh_index(r->index, REFRESH_QUIET,
+			      NULL, NULL, NULL);
+		pos = index_name_pos(r->index, name, strlen(name));
+		if (pos < 0)
+			die("%s not in index", name);
+		printf("%s is%s up to date\n", name,
+		       ce_uptodate(r->index->cache[pos]) ? "" : " not");
+		write_file(name, "%d\n", cnt);
+		discard_index(r->index);
+	}
+	return 0;
+}
diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c
new file mode 100644
index 00000000000000..90176c010a10ff
--- /dev/null
+++ b/t/helper/test-read-cache-perf.c
@@ -0,0 +1,21 @@
+#include "test-tool.h"
+#include "cache.h"
+
+int cmd__read_cache_perf(int argc, const char **argv)
+{
+	struct repository *r = the_repository;
+	int i, cnt = 1;
+
+	if (argc == 2)
+		cnt = strtol(argv[1], NULL, 0);
+	else
+		die("usage: test-tool read-cache-perf [<count>]");
+
+	setup_git_directory();
+	for (i = 0; i < cnt; i++) {
+		repo_read_index(r);
+		discard_index(r->index);
+	}
+
+	return 0;
+}
diff --git a/t/helper/test-read-cache.c b/t/helper/test-read-cache.c
index b52c174acc7a1f..ae4b9b70ad187d 100644
--- a/t/helper/test-read-cache.c
+++ b/t/helper/test-read-cache.c
@@ -5,6 +5,12 @@
 #include "commit.h"
 #include "tree.h"
 #include "sparse-index.h"
+#include "parse-options.h"
+
+static const char *read_cache_usage[] = {
+	"test-tool read-cache [<options>...]",
+	NULL
+};
 
 static void print_cache_entry(struct cache_entry *ce)
 {
@@ -34,49 +40,33 @@ static void print_cache(struct index_state *istate)
 int cmd__read_cache(int argc, const char **argv)
 {
 	struct repository *r = the_repository;
-	int i, cnt = 1;
-	const char *name = NULL;
 	int table = 0, expand = 0;
+	struct option options[] = {
+		OPT_BOOL(0, "table", &table,
+			 "print a dump of the cache"),
+		OPT_BOOL(0, "expand", &expand,
+			 "call ensure_full_index()"),
+		OPT_END()
+	};
+
+	argc = parse_options(argc, argv, "test-tools", options, read_cache_usage, 0);
+	if (argc > 0)
+		usage_msg_opt("Too many arguments.", read_cache_usage, options);
 
 	initialize_the_repository();
 	prepare_repo_settings(r);
 	r->settings.command_requires_full_index = 0;
 
-	for (++argv, --argc; *argv && starts_with(*argv, "--"); ++argv, --argc) {
-		if (skip_prefix(*argv, "--print-and-refresh=", &name))
-			continue;
-		if (!strcmp(*argv, "--table"))
-			table = 1;
-		else if (!strcmp(*argv, "--expand"))
-			expand = 1;
-	}
-
-	if (argc == 1)
-		cnt = strtol(argv[0], NULL, 0);
 	setup_git_directory();
 	git_config(git_default_config, NULL);
+	repo_read_index(r);
 
-	for (i = 0; i < cnt; i++) {
-		repo_read_index(r);
-
-		if (expand)
-			ensure_full_index(r->index);
+	if (expand)
+		ensure_full_index(r->index);
 
-		if (name) {
-			int pos;
+	if (table)
+		print_cache(r->index);
+	discard_index(r->index);
 
-			refresh_index(r->index, REFRESH_QUIET,
-				      NULL, NULL, NULL);
-			pos = index_name_pos(r->index, name, strlen(name));
-			if (pos < 0)
-				die("%s not in index", name);
-			printf("%s is%s up to date\n", name,
-			       ce_uptodate(r->index->cache[pos]) ? "" : " not");
-			write_file(name, "%d\n", i);
-		}
-		if (table)
-			print_cache(r->index);
-		discard_index(r->index);
-	}
 	return 0;
 }
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index c5bd0c6d4c7abc..b0300f70c7964a 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -54,6 +54,8 @@ static struct test_cmd cmds[] = {
 	{ "progress", cmd__progress },
 	{ "reach", cmd__reach },
 	{ "read-cache", cmd__read_cache },
+	{ "read-cache-again", cmd__read_cache_again },
+	{ "read-cache-perf", cmd__read_cache_perf },
 	{ "read-graph", cmd__read_graph },
 	{ "read-midx", cmd__read_midx },
 	{ "ref-store", cmd__ref_store },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index e8069a3b2222b9..7f451a1eb5d42d 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -43,6 +43,8 @@ int cmd__proc_receive(int argc, const char **argv);
 int cmd__progress(int argc, const char **argv);
 int cmd__reach(int argc, const char **argv);
 int cmd__read_cache(int argc, const char **argv);
+int cmd__read_cache_again(int argc, const char **argv);
+int cmd__read_cache_perf(int argc, const char **argv);
 int cmd__read_graph(int argc, const char **argv);
 int cmd__read_midx(int argc, const char **argv);
 int cmd__ref_store(int argc, const char **argv);
diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh
index cdd105a5945239..d0ba5173fb161a 100755
--- a/t/perf/p0002-read-cache.sh
+++ b/t/perf/p0002-read-cache.sh
@@ -8,7 +8,7 @@ test_perf_default_repo
 
 count=1000
 test_perf "read_cache/discard_cache $count times" "
-	test-tool read-cache $count
+	test-tool read-cache-perf $count
 "
 
 test_done
diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 637391c6ce4608..4c199c16d4de11 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -359,7 +359,7 @@ test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR'
 test_expect_success 'discard_index() also discards fsmonitor info' '
 	test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
 	test_might_fail git update-index --refresh &&
-	test-tool read-cache --print-and-refresh=tracked 2 >actual &&
+	test-tool read-cache-again 2 tracked >actual &&
 	printf "tracked is%s up to date\n" "" " not" >expect &&
 	test_cmp expect actual
 '

From 1eb71fb0e962493a4e46c74949975a6831494992 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 7 Jun 2021 13:58:25 +0200
Subject: [PATCH 009/198] test-tool: migrate read-cache-perf to parse_options()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the newly added (but then mostly copy/pasted) read-cache-perf
to use the parse_options() API. This will make things easier as we add
new options.

Since we check the "cnt = < 1" case now via more idiomatic
post-parse_options() assertions we can move from the for-loop to a
while-loop and ditch the "i" variable.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-read-cache-perf.c | 26 ++++++++++++++++++++------
 t/perf/p0002-read-cache.sh      |  2 +-
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c
index 90176c010a10ff..54ad0c3135e2a0 100644
--- a/t/helper/test-read-cache-perf.c
+++ b/t/helper/test-read-cache-perf.c
@@ -1,18 +1,32 @@
 #include "test-tool.h"
 #include "cache.h"
+#include "parse-options.h"
+
+static const char *read_cache_perf_usage[] = {
+	"test-tool read-cache-perf [<options>...]",
+	NULL
+};
 
 int cmd__read_cache_perf(int argc, const char **argv)
 {
 	struct repository *r = the_repository;
-	int i, cnt = 1;
+	int cnt = -1;
+	struct option options[] = {
+		OPT_INTEGER(0, "count", &cnt, "number of passes"),
+		OPT_END()
+	};
 
-	if (argc == 2)
-		cnt = strtol(argv[1], NULL, 0);
-	else
-		die("usage: test-tool read-cache-perf [<count>]");
+	argc = parse_options(argc, argv, "test-tools", options,
+			     read_cache_perf_usage, 0);
+	if (argc > 0)
+		usage_msg_opt("Too many arguments.", read_cache_perf_usage,
+			      options);
+	if (cnt < 1)
+		usage_msg_opt("Need at least one pass.", read_cache_perf_usage,
+			      options);
 
 	setup_git_directory();
-	for (i = 0; i < cnt; i++) {
+	while (cnt--) {
 		repo_read_index(r);
 		discard_index(r->index);
 	}
diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh
index d0ba5173fb161a..1762b648654ef2 100755
--- a/t/perf/p0002-read-cache.sh
+++ b/t/perf/p0002-read-cache.sh
@@ -8,7 +8,7 @@ test_perf_default_repo
 
 count=1000
 test_perf "read_cache/discard_cache $count times" "
-	test-tool read-cache-perf $count
+	test-tool read-cache-perf --count=$count
 "
 
 test_done

From 32ace34988f3221c0ca4e8754e0b4372e7e24ebd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 7 Jun 2021 13:58:26 +0200
Subject: [PATCH 010/198] test-tool: migrate read-cache-again to
 parse_options()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the newly added (but then mostly copy/pasted) read-cache-perf
to use the parse_options() API. I have no plans to further modify
read-cache-again, but making these commands consistent has a value in
and of itself.

Since we check the "cnt = < 1" case now via more idiomatic
post-parse_options() assertions we can move from the for-loop to a
while-loop and ditch the "i" variable.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-read-cache-again.c | 28 ++++++++++++++++++++++------
 t/t7519-status-fsmonitor.sh      |  2 +-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/t/helper/test-read-cache-again.c b/t/helper/test-read-cache-again.c
index 707db036cb1ade..8487f79d048e8b 100644
--- a/t/helper/test-read-cache-again.c
+++ b/t/helper/test-read-cache-again.c
@@ -1,20 +1,36 @@
 #include "test-tool.h"
 #include "cache.h"
+#include "parse-options.h"
+
+static const char *read_cache_again_usage[] = {
+	"test-tool read-cache-again [<options>...] <file>",
+	NULL
+};
 
 int cmd__read_cache_again(int argc, const char **argv)
 {
 	struct repository *r = the_repository;
-	int i, cnt;
+	int cnt = -1;
 	const char *name;
+	struct option options[] = {
+		OPT_INTEGER(0, "count", &cnt, "number of passes"),
+		OPT_END()
+	};
 
-	if (argc != 2)
-		die("usage: test-tool read-cache-again <count> <file>");
-
-	cnt = strtol(argv[0], NULL, 0);
+	argc = parse_options(argc, argv, "test-tools", options,
+			     read_cache_again_usage, 0);
+	if (argc != 1)
+		usage_msg_opt("Too many arguments.", read_cache_again_usage,
+			      options);
+	if (cnt == -1)
+		cnt = 2;
+	else if (cnt < 1)
+		usage_msg_opt("Need at least one pass.", read_cache_again_usage,
+			      options);
 	name = argv[2];
 
 	setup_git_directory();
-	for (i = 0; i < cnt; i++) {
+	while (cnt--) {
 		int pos;
 		repo_read_index(r);
 		refresh_index(r->index, REFRESH_QUIET,
diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 4c199c16d4de11..fd0815f6b7a8de 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -359,7 +359,7 @@ test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR'
 test_expect_success 'discard_index() also discards fsmonitor info' '
 	test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
 	test_might_fail git update-index --refresh &&
-	test-tool read-cache-again 2 tracked >actual &&
+	test-tool read-cache-again --count=2 tracked >actual &&
 	printf "tracked is%s up to date\n" "" " not" >expect &&
 	test_cmp expect actual
 '

From bbf9e8f1381a33f2c2955412ed91749c0c0fe75e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 7 Jun 2021 13:58:27 +0200
Subject: [PATCH 011/198] read-cache perf: add a perf test for refresh_index()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a perf test for the refresh_index() function to compliment the
existing read()/discard() in a loop perf test added in
1ecb5ff141f (read-cache: add simple performance test, 2013-06-09).

Since this test is much slower (around 10x) than the previous
read()/discard() test let's run it 100 times instead of the 1000 time
the first one runs.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-read-cache-perf.c | 12 ++++++++++++
 t/perf/p0002-read-cache.sh      |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c
index 54ad0c3135e2a0..a2802559a5cd25 100644
--- a/t/helper/test-read-cache-perf.c
+++ b/t/helper/test-read-cache-perf.c
@@ -11,8 +11,11 @@ int cmd__read_cache_perf(int argc, const char **argv)
 {
 	struct repository *r = the_repository;
 	int cnt = -1;
+	int refresh = 0;
 	struct option options[] = {
 		OPT_INTEGER(0, "count", &cnt, "number of passes"),
+		OPT_BOOL(0, "refresh", &refresh,
+			 "call refresh_index() in a loop, not read()/discard()"),
 		OPT_END()
 	};
 
@@ -26,10 +29,19 @@ int cmd__read_cache_perf(int argc, const char **argv)
 			      options);
 
 	setup_git_directory();
+	if (refresh)
+		repo_read_index(r);
 	while (cnt--) {
+		if (refresh) {
+			unsigned int flags = REFRESH_QUIET|REFRESH_PROGRESS;
+			refresh_index(r->index, flags, NULL, NULL, NULL);
+			continue;
+		}
 		repo_read_index(r);
 		discard_index(r->index);
 	}
+	if (refresh)
+		discard_index(r->index);
 
 	return 0;
 }
diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh
index 1762b648654ef2..cbccc5ace95d07 100755
--- a/t/perf/p0002-read-cache.sh
+++ b/t/perf/p0002-read-cache.sh
@@ -11,4 +11,9 @@ test_perf "read_cache/discard_cache $count times" "
 	test-tool read-cache-perf --count=$count
 "
 
+count=100
+test_perf "refresh_index() $count times" "
+	test-tool read-cache-perf --count=$count --refresh
+"
+
 test_done

From 7fd34207440af057c372477fc1fc51fdc169ec89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 8 Jun 2021 14:16:27 +0200
Subject: [PATCH 012/198] upload-pack: run is_repository_shallow() before
 setup_revisions()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the is_repository_shallow() added in b790e0f67cd (upload-pack:
send shallow info over stdin to pack-objects, 2014-03-11) to above
setup_revisions().

Running is_repository_shallow() before setup_revisions() doesn't
matter now, but in subsequent commits we'll make the code that
followed setup_revisions() happen inside a callback in that
function. This isolated change documents that re-arranging this part
of the code is OK in isolation.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index de00adbb9e0d51..1fbaa34f91b264 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3757,11 +3757,12 @@ static void get_object_list(int ac, const char **av)
 
 	repo_init_revisions(the_repository, &revs, NULL);
 	save_commit_buffer = 0;
-	setup_revisions(ac, av, &revs, &s_r_opt);
 
 	/* make sure shallows are read */
 	is_repository_shallow(the_repository);
 
+	setup_revisions(ac, av, &revs, &s_r_opt);
+
 	save_warning = warn_on_object_refname_ambiguity;
 	warn_on_object_refname_ambiguity = 0;
 

From 2521047754c785190aed85be08b3f2867e3b6195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= <lenaic@lhuard.fr>
Date: Sat, 12 Jun 2021 18:50:41 +0200
Subject: [PATCH 013/198] =?UTF-8?q?cache.h:=20Introduce=20a=20generic=20"x?=
 =?UTF-8?q?dg=5Fconfig=5Fhome=5Ffor(=E2=80=A6)"=20function?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Current implementation of `xdg_config_home(filename)` returns
`$XDG_CONFIG_HOME/git/$filename`, with the `git` subdirectory inserted
between the `XDG_CONFIG_HOME` environment variable and the parameter.

This patch introduces a `xdg_config_home_for(subdir, filename)` function
which is more generic. It only concatenates "$XDG_CONFIG_HOME", or
"$HOME/.config" if the former isn’t defined, with the parameters,
without adding `git` in between.

`xdg_config_home(filename)` is now implemented by calling
`xdg_config_home_for("git", filename)` but this new generic function can
be used to compute the configuration directory of other programs.

Signed-off-by: Lénaïc Huard <lenaic@lhuard.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h |  7 +++++++
 path.c  | 13 ++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/cache.h b/cache.h
index ba04ff8bd36b36..2a0fb3e4ba16f0 100644
--- a/cache.h
+++ b/cache.h
@@ -1286,6 +1286,13 @@ int is_ntfs_dotmailmap(const char *name);
  */
 int looks_like_command_line_option(const char *str);
 
+/**
+ * Return a newly allocated string with the evaluation of
+ * "$XDG_CONFIG_HOME/$subdir/$filename" if $XDG_CONFIG_HOME is non-empty, otherwise
+ * "$HOME/.config/$subdir/$filename". Return NULL upon error.
+ */
+char *xdg_config_home_for(const char *subdir, const char *filename);
+
 /**
  * Return a newly allocated string with the evaluation of
  * "$XDG_CONFIG_HOME/git/$filename" if $XDG_CONFIG_HOME is non-empty, otherwise
diff --git a/path.c b/path.c
index 7bccd830e95890..1b1de3be0932d8 100644
--- a/path.c
+++ b/path.c
@@ -1503,21 +1503,28 @@ int looks_like_command_line_option(const char *str)
 	return str && str[0] == '-';
 }
 
-char *xdg_config_home(const char *filename)
+char *xdg_config_home_for(const char *subdir, const char *filename)
 {
 	const char *home, *config_home;
 
+	assert(subdir);
 	assert(filename);
 	config_home = getenv("XDG_CONFIG_HOME");
 	if (config_home && *config_home)
-		return mkpathdup("%s/git/%s", config_home, filename);
+		return mkpathdup("%s/%s/%s", config_home, subdir, filename);
 
 	home = getenv("HOME");
 	if (home)
-		return mkpathdup("%s/.config/git/%s", home, filename);
+		return mkpathdup("%s/.config/%s/%s", home, subdir, filename);
+
 	return NULL;
 }
 
+char *xdg_config_home(const char *filename)
+{
+	return xdg_config_home_for("git", filename);
+}
+
 char *xdg_cache_home(const char *filename)
 {
 	const char *home, *cache_home;

From 9c0d17a61d2cd952b182eb2e2d095cbed712d445 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Fri, 11 Jun 2021 15:54:25 -0700
Subject: [PATCH 014/198] t7400-submodule-basic: modernize inspect() helper

Since the inspect() helper in the submodule-basic test suite was
written, 'git -C <dir>' was added. By using -C, we no longer need a
reference to the base directory for the test. This simplifies callsites,
and will make the addition of other arguments in later patches more
readable.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7400-submodule-basic.sh | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index a924fdb7a6c9aa..f5dc051a6e4f18 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -107,25 +107,18 @@ test_expect_success 'setup - repository to add submodules to' '
 # generates, which will expand symbolic links.
 submodurl=$(pwd -P)
 
-listbranches() {
-	git for-each-ref --format='%(refname)' 'refs/heads/*'
-}
-
 inspect() {
 	dir=$1 &&
-	dotdot="${2:-..}" &&
 
-	(
-		cd "$dir" &&
-		listbranches >"$dotdot/heads" &&
-		{ git symbolic-ref HEAD || :; } >"$dotdot/head" &&
-		git rev-parse HEAD >"$dotdot/head-sha1" &&
-		git update-index --refresh &&
-		git diff-files --exit-code &&
-		git clean -n -d -x >"$dotdot/untracked"
-	)
+	git -C "$dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads &&
+	{ git -C "$dir" symbolic-ref HEAD || :; } >head &&
+	git -C "$dir" rev-parse HEAD >head-sha1 &&
+	git -C "$dir" update-index --refresh &&
+	git -C "$dir" diff-files --exit-code &&
+	git -C "$dir" clean -n -d -x >untracked
 }
 
+
 test_expect_success 'submodule add' '
 	echo "refs/heads/main" >expect &&
 
@@ -146,7 +139,7 @@ test_expect_success 'submodule add' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/submod ../.. &&
+	inspect addtest/submod &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -237,7 +230,7 @@ test_expect_success 'submodule add --branch' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/submod-branch ../.. &&
+	inspect addtest/submod-branch &&
 	test_cmp expect-heads heads &&
 	test_cmp expect-head head &&
 	test_must_be_empty untracked
@@ -253,7 +246,7 @@ test_expect_success 'submodule add with ./ in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/dotsubmod/frotz ../../.. &&
+	inspect addtest/dotsubmod/frotz &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -269,7 +262,7 @@ test_expect_success 'submodule add with /././ in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/dotslashdotsubmod/frotz ../../.. &&
+	inspect addtest/dotslashdotsubmod/frotz &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -285,7 +278,7 @@ test_expect_success 'submodule add with // in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/slashslashsubmod/frotz ../../.. &&
+	inspect addtest/slashslashsubmod/frotz &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -301,7 +294,7 @@ test_expect_success 'submodule add with /.. in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod ../.. &&
+	inspect addtest/realsubmod &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -317,7 +310,7 @@ test_expect_success 'submodule add with ./, /.. and // in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod2 ../.. &&
+	inspect addtest/realsubmod2 &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -348,7 +341,7 @@ test_expect_success 'submodule add in subdirectory' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod3 ../.. &&
+	inspect addtest/realsubmod3 &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked

From cb3a2ac603528856e64137ef45b171f77ffaaa81 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Tue, 15 Jun 2021 05:16:09 +0000
Subject: [PATCH 015/198] xdiff: implement a zealous diff3, or "zdiff3"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"zdiff3" is identical to ordinary diff3 except that it allows compaction
of common lines on the two sides of history at the beginning or end of
the conflict hunk.  For example, the following diff3 conflict:

    1
    2
    3
    4
    <<<<<<
    A
    B
    C
    D
    E
    ||||||
    5
    6
    ======
    A
    X
    C
    Y
    E
    >>>>>>
    7
    8
    9

has common lines 'A', 'C', and 'E' on the two sides.  With zdiff3, one
would instead get the following conflict:

    1
    2
    3
    4
    A
    <<<<<<
    B
    C
    D
    ||||||
    5
    6
    ======
    X
    C
    Y
    >>>>>>
    E
    7
    8
    9

Note that the common lines, 'A', and 'E' were moved outside the
conflict.  Unlike with the two-way conflicts from the 'merge'
conflictStyle, the zdiff3 conflict is NOT split into multiple conflict
regions to allow the common 'C' lines to be shown outside a conflict,
because zdiff3 shows the base version too and the base version cannot be
reasonably split.

Initial-patch-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/merge-file.c                   |  2 +
 contrib/completion/git-completion.bash |  2 +-
 xdiff-interface.c                      |  2 +
 xdiff/xdiff.h                          |  1 +
 xdiff/xmerge.c                         | 52 ++++++++++++++++++++++++--
 5 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/builtin/merge-file.c b/builtin/merge-file.c
index 06a2f90c4875f2..e695867ee54894 100644
--- a/builtin/merge-file.c
+++ b/builtin/merge-file.c
@@ -34,6 +34,8 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix)
 	struct option options[] = {
 		OPT_BOOL('p', "stdout", &to_stdout, N_("send results to standard output")),
 		OPT_SET_INT(0, "diff3", &xmp.style, N_("use a diff3 based merge"), XDL_MERGE_DIFF3),
+		OPT_SET_INT(0, "zdiff3", &xmp.style, N_("use a zealous diff3 based merge"),
+				XDL_MERGE_ZEALOUS_DIFF3),
 		OPT_SET_INT(0, "ours", &xmp.favor, N_("for conflicts, use our version"),
 			    XDL_MERGE_FAVOR_OURS),
 		OPT_SET_INT(0, "theirs", &xmp.favor, N_("for conflicts, use their version"),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index b50c5d0ea3867b..8594559298140f 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1566,7 +1566,7 @@ _git_checkout ()
 
 	case "$cur" in
 	--conflict=*)
-		__gitcomp "diff3 merge" "" "${cur##--conflict=}"
+		__gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}"
 		;;
 	--*)
 		__gitcomp_builtin checkout
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 609615db2cd68f..9977813a9d37d4 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -308,6 +308,8 @@ int git_xmerge_config(const char *var, const char *value, void *cb)
 			die("'%s' is not a boolean", var);
 		if (!strcmp(value, "diff3"))
 			git_xmerge_style = XDL_MERGE_DIFF3;
+		else if (!strcmp(value, "zdiff3"))
+			git_xmerge_style = XDL_MERGE_ZEALOUS_DIFF3;
 		else if (!strcmp(value, "merge"))
 			git_xmerge_style = 0;
 		/*
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 7a046051468f9e..8629ae287c79c3 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -65,6 +65,7 @@ extern "C" {
 
 /* merge output styles */
 #define XDL_MERGE_DIFF3 1
+#define XDL_MERGE_ZEALOUS_DIFF3 2
 
 typedef struct s_mmfile {
 	char *ptr;
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index 1659edb45393a6..b1dc9df7ea0ac9 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -230,7 +230,7 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1,
 	size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, 1,
 			      dest ? dest + size : NULL);
 
-	if (style == XDL_MERGE_DIFF3) {
+	if (style == XDL_MERGE_DIFF3 || style == XDL_MERGE_ZEALOUS_DIFF3) {
 		/* Shared preimage */
 		if (!dest) {
 			size += marker_size + 1 + needs_cr + marker3_size;
@@ -327,7 +327,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
  * lines. Try hard to show only these few lines as conflicting.
  */
 static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
-		xpparam_t const *xpp)
+				xpparam_t const *xpp, int style)
 {
 	for (; m; m = m->next) {
 		mmfile_t t1, t2;
@@ -368,6 +368,42 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
 			continue;
 		}
 		x = xscr;
+		if (style == XDL_MERGE_ZEALOUS_DIFF3) {
+			int advance1 = xscr->i1, advance2 = xscr->i2;
+
+			/*
+			 * Advance m->i1 and m->i2 so that conflict for sides
+			 * 1 and 2 start after common region.  Decrement
+			 * m->chg[12] since there are now fewer conflict lines
+			 * for those sides.
+			 */
+			m->i1 += advance1;
+			m->i2 += advance2;
+			m->chg1 -= advance1;
+			m->chg2 -= advance2;
+
+			/*
+			 * Splitting conflicts due to internal common regions
+			 * on the two sides would be inappropriate since we
+			 * are also showing the merge base and have no
+			 * reasonable way to split the merge base text.
+			 */
+			while (xscr->next)
+				xscr = xscr->next;
+
+			/*
+			 * Lower the number of conflict lines to not include
+			 * the final common lines, if any.  Do this by setting
+			 * number of conflict lines to
+			 *   (line offset for start of conflict in xscr) +
+			 *   (number of lines in the conflict in xscr)
+			 */
+			m->chg1 = (xscr->i1 - advance1) + (xscr->chg1);
+			m->chg2 = (xscr->i2 - advance2) + (xscr->chg2);
+			xdl_free_env(&xe);
+			xdl_free_script(x);
+			continue;
+		}
 		m->i1 = xscr->i1 + i1;
 		m->chg1 = xscr->chg1;
 		m->i2 = xscr->i2 + i2;
@@ -482,6 +518,16 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
 	int style = xmp->style;
 	int favor = xmp->favor;
 
+	/*
+	 * XDL_MERGE_DIFF3 does not attempt to refine conflicts by looking
+	 * at common areas of sides 1 & 2, because the base (side 0) does
+	 * not match and is being shown.
+	 *
+	 * XDL_MERGE_ZEALOUS_DIFF3 will attempt to refine conflicts
+	 * looking for common areas of sides 1 & 2, despite the base
+	 * not matching and being shown, but will only look for common
+	 * areas at the beginning or ending of the conflict block.
+	 */
 	if (style == XDL_MERGE_DIFF3) {
 		/*
 		 * "diff3 -m" output does not make sense for anything
@@ -604,7 +650,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
 		changes = c;
 	/* refine conflicts */
 	if (XDL_MERGE_ZEALOUS <= level &&
-	    (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 ||
+	    (xdl_refine_conflicts(xe1, xe2, changes, xpp, style) < 0 ||
 	     xdl_simplify_non_conflicts(xe1, changes,
 					XDL_MERGE_ZEALOUS < level) < 0)) {
 		xdl_cleanup_merge(changes);

From cf9d93e547cedd172f2180dab49fef27b0d2208d Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Tue, 15 Jun 2021 05:16:10 +0000
Subject: [PATCH 016/198] update documentation for new zdiff3 conflictStyle

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/merge.txt         |  9 +++++++-
 Documentation/git-checkout.txt         |  3 +--
 Documentation/git-merge-file.txt       |  3 +++
 Documentation/git-merge.txt            | 32 ++++++++++++++++++++++----
 Documentation/git-rebase.txt           |  6 ++---
 Documentation/git-restore.txt          |  3 +--
 Documentation/git-switch.txt           |  3 +--
 Documentation/technical/rerere.txt     | 10 ++++----
 builtin/checkout.c                     |  2 +-
 contrib/completion/git-completion.bash |  4 ++--
 10 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/Documentation/config/merge.txt b/Documentation/config/merge.txt
index cb2ed589075baa..7ab289f35c7f14 100644
--- a/Documentation/config/merge.txt
+++ b/Documentation/config/merge.txt
@@ -4,7 +4,14 @@ merge.conflictStyle::
 	shows a `<<<<<<<` conflict marker, changes made by one side,
 	a `=======` marker, changes made by the other side, and then
 	a `>>>>>>>` marker.  An alternate style, "diff3", adds a `|||||||`
-	marker and the original text before the `=======` marker.
+	marker and the original text before the `=======` marker.  The
+	"merge" style tends to produce smaller conflict regions than diff3,
+	both because of the exclusion of the original text, and because
+	when a subset of lines match on the two sides they are just pulled
+	out of the conflict region.  Another alternate style, "zdiff3", is
+	similar to diff3 but removes matching lines on the two sides from
+	the conflict region when those matching lines appear near the
+	beginning or ending of a conflict region.
 
 merge.defaultToUpstream::
 	If merge is called without any commit argument, merge the upstream
diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt
index b1a6fe44997306..85c3d3513f74c2 100644
--- a/Documentation/git-checkout.txt
+++ b/Documentation/git-checkout.txt
@@ -265,8 +265,7 @@ When switching branches with `--merge`, staged changes may be lost.
 	The same as `--merge` option above, but changes the way the
 	conflicting hunks are presented, overriding the
 	`merge.conflictStyle` configuration variable.  Possible values are
-	"merge" (default) and "diff3" (in addition to what is shown by
-	"merge" style, shows the original contents).
+	"merge" (default), "diff3", and "zdiff3".
 
 -p::
 --patch::
diff --git a/Documentation/git-merge-file.txt b/Documentation/git-merge-file.txt
index f85603261325f6..7e9093fab60d26 100644
--- a/Documentation/git-merge-file.txt
+++ b/Documentation/git-merge-file.txt
@@ -70,6 +70,9 @@ OPTIONS
 --diff3::
 	Show conflicts in "diff3" style.
 
+--zdiff3::
+	Show conflicts in "zdiff3" style.
+
 --ours::
 --theirs::
 --union::
diff --git a/Documentation/git-merge.txt b/Documentation/git-merge.txt
index 3819fadac1f1e4..259e1ac2cf0c97 100644
--- a/Documentation/git-merge.txt
+++ b/Documentation/git-merge.txt
@@ -238,7 +238,8 @@ from the RCS suite to present such a conflicted hunk, like this:
 
 ------------
 Here are lines that are either unchanged from the common
-ancestor, or cleanly resolved because only one side changed.
+ancestor, or cleanly resolved because only one side changed,
+or cleanly resolved because both sides changed the same way.
 <<<<<<< yours:sample.txt
 Conflict resolution is hard;
 let's go shopping.
@@ -259,16 +260,37 @@ side wants to say it is hard and you'd prefer to go shopping, while the
 other side wants to claim it is easy.
 
 An alternative style can be used by setting the "merge.conflictStyle"
-configuration variable to "diff3".  In "diff3" style, the above conflict
-may look like this:
+configuration variable to either "diff3" or "zdiff3".  In "diff3"
+style, the above conflict may look like this:
 
 ------------
 Here are lines that are either unchanged from the common
-ancestor, or cleanly resolved because only one side changed.
+ancestor, or cleanly resolved because only one side changed,
 <<<<<<< yours:sample.txt
+or cleanly resolved because both sides changed the same way.
 Conflict resolution is hard;
 let's go shopping.
-|||||||
+||||||| base:sample.txt
+or cleanly resolved because both sides changed identically.
+Conflict resolution is hard.
+=======
+or cleanly resolved because both sides changed the same way.
+Git makes conflict resolution easy.
+>>>>>>> theirs:sample.txt
+And here is another line that is cleanly resolved or unmodified.
+------------
+
+while in "zdiff3" style, it may look like this:
+
+------------
+Here are lines that are either unchanged from the common
+ancestor, or cleanly resolved because only one side changed,
+or cleanly resolved because both sides changed the same way.
+<<<<<<< yours:sample.txt
+Conflict resolution is hard;
+let's go shopping.
+||||||| base:sample.txt
+or cleanly resolved because both sides changed identically.
 Conflict resolution is hard.
 =======
 Git makes conflict resolution easy.
diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt
index 55af6fd24e27cd..a61742c8f98f43 100644
--- a/Documentation/git-rebase.txt
+++ b/Documentation/git-rebase.txt
@@ -740,9 +740,9 @@ information about the rebased commits and their parents (and instead
 generates new fake commits based off limited information in the
 generated patches), those commits cannot be identified; instead it has
 to fall back to a commit summary.  Also, when merge.conflictStyle is
-set to diff3, the apply backend will use "constructed merge base" to
-label the content from the merge base, and thus provide no information
-about the merge base commit whatsoever.
+set to diff3 or zdiff3, the apply backend will use "constructed merge
+base" to label the content from the merge base, and thus provide no
+information about the merge base commit whatsoever.
 
 The merge backend works with the full commits on both sides of history
 and thus has no such limitations.
diff --git a/Documentation/git-restore.txt b/Documentation/git-restore.txt
index 55bde91ef9e54b..5964810caa4153 100644
--- a/Documentation/git-restore.txt
+++ b/Documentation/git-restore.txt
@@ -92,8 +92,7 @@ in linkgit:git-checkout[1] for details.
 	The same as `--merge` option above, but changes the way the
 	conflicting hunks are presented, overriding the
 	`merge.conflictStyle` configuration variable.  Possible values
-	are "merge" (default) and "diff3" (in addition to what is
-	shown by "merge" style, shows the original contents).
+	are "merge" (default), "diff3", and "zdiff3".
 
 --ignore-unmerged::
 	When restoring files on the working tree from the index, do
diff --git a/Documentation/git-switch.txt b/Documentation/git-switch.txt
index 5c438cd5058758..5c90f76fbe3511 100644
--- a/Documentation/git-switch.txt
+++ b/Documentation/git-switch.txt
@@ -137,8 +137,7 @@ should result in deletion of the path).
 	The same as `--merge` option above, but changes the way the
 	conflicting hunks are presented, overriding the
 	`merge.conflictStyle` configuration variable.  Possible values are
-	"merge" (default) and "diff3" (in addition to what is shown by
-	"merge" style, shows the original contents).
+	"merge" (default), "diff3", and "zdiff3".
 
 -q::
 --quiet::
diff --git a/Documentation/technical/rerere.txt b/Documentation/technical/rerere.txt
index af5f9fc24f9343..35d454143399e0 100644
--- a/Documentation/technical/rerere.txt
+++ b/Documentation/technical/rerere.txt
@@ -14,9 +14,9 @@ conflicts before writing them to the rerere database.
 
 Different conflict styles and branch names are normalized by stripping
 the labels from the conflict markers, and removing the common ancestor
-version from the `diff3` conflict style. Branches that are merged
-in different order are normalized by sorting the conflict hunks.  More
-on each of those steps in the following sections.
+version from the `diff3` or `zdiff3` conflict styles.  Branches that
+are merged in different order are normalized by sorting the conflict
+hunks.  More on each of those steps in the following sections.
 
 Once these two normalization operations are applied, a conflict ID is
 calculated based on the normalized conflict, which is later used by
@@ -42,8 +42,8 @@ get a conflict like the following:
     >>>>>>> AC
 
 Doing the analogous with AC2 (forking a branch ABAC2 off of branch AB
-and then merging branch AC2 into it), using the diff3 conflict style,
-we get a conflict like the following:
+and then merging branch AC2 into it), using the diff3 or zdiff3
+conflict style, we get a conflict like the following:
 
     <<<<<<< HEAD
     B
diff --git a/builtin/checkout.c b/builtin/checkout.c
index f4cd7747d35dd1..45606936c328bf 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -1524,7 +1524,7 @@ static struct option *add_common_options(struct checkout_opts *opts,
 		OPT_BOOL(0, "progress", &opts->show_progress, N_("force progress reporting")),
 		OPT_BOOL('m', "merge", &opts->merge, N_("perform a 3-way merge with the new branch")),
 		OPT_STRING(0, "conflict", &opts->conflict_style, N_("style"),
-			   N_("conflict style (merge or diff3)")),
+			   N_("conflict style (merge, diff3, or zdiff3)")),
 		OPT_END()
 	};
 	struct option *newopts = parse_options_concat(prevopts, options);
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 8594559298140f..8489ca39497095 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -2445,7 +2445,7 @@ _git_switch ()
 
 	case "$cur" in
 	--conflict=*)
-		__gitcomp "diff3 merge" "" "${cur##--conflict=}"
+		__gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}"
 		;;
 	--*)
 		__gitcomp_builtin switch
@@ -2886,7 +2886,7 @@ _git_restore ()
 
 	case "$cur" in
 	--conflict=*)
-		__gitcomp "diff3 merge" "" "${cur##--conflict=}"
+		__gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}"
 		;;
 	--source=*)
 		__git_complete_refs --cur="${cur##--source=}"

From 9dcf24461440f9acff960d6b5d3592334248bf8e Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 15 Jun 2021 17:45:06 -0700
Subject: [PATCH 017/198] introduce submodule.superprojectGitDir cache

Teach submodules a reference to their superproject's gitdir. This allows
us to A) know that we're running from a submodule, and B) have a
shortcut to the superproject's vitals, for example, configs.

By using a relative path instead of an absolute path, we can move the
superproject directory around on the filesystem without breaking the
submodule's cache.

Since this cached value is only introduced during new submodule creation
via `git submodule add`, though, there is more work to do to allow the
cache to be created at other times.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/submodule.txt | 12 +++++++++
 builtin/submodule--helper.c        |  4 +++
 t/t7400-submodule-basic.sh         | 40 ++++++++++++++++--------------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt
index d7a63c8c12bbc2..7c459cc19e4a3b 100644
--- a/Documentation/config/submodule.txt
+++ b/Documentation/config/submodule.txt
@@ -90,3 +90,15 @@ submodule.alternateErrorStrategy::
 	`ignore`, `info`, `die`. Default is `die`. Note that if set to `ignore`
 	or `info`, and if there is an error with the computed alternate, the
 	clone proceeds as if no alternate was specified.
+
+submodule.superprojectGitDir::
+	The relative path from the submodule's worktree  to the superproject's
+	gitdir. This config should only be present in projects which are
+	submodules, but is not guaranteed to be present in every submodule. It
+	is set automatically during submodule creation.
++
+	In situations where more than one superproject references the same
+	submodule worktree, the value of this config and the behavior of
+	operations which use it are undefined. To reference a single project
+	from multiple superprojects, it is better to create a worktree of the
+	submodule for each superproject.
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index ae6174ab05bd83..7024a696625141 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1910,6 +1910,10 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 		git_config_set_in_file(p, "submodule.alternateErrorStrategy",
 					   error_strategy);
 
+	git_config_set_in_file(p, "submodule.superprojectGitdir",
+			       relative_path(absolute_path(get_git_dir()),
+					     path, &sb));
+
 	free(sm_alternate);
 	free(error_strategy);
 
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index f5dc051a6e4f18..e45f42588feb36 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -108,14 +108,18 @@ test_expect_success 'setup - repository to add submodules to' '
 submodurl=$(pwd -P)
 
 inspect() {
-	dir=$1 &&
-
-	git -C "$dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads &&
-	{ git -C "$dir" symbolic-ref HEAD || :; } >head &&
-	git -C "$dir" rev-parse HEAD >head-sha1 &&
-	git -C "$dir" update-index --refresh &&
-	git -C "$dir" diff-files --exit-code &&
-	git -C "$dir" clean -n -d -x >untracked
+	sub_dir=$1 &&
+	super_dir=$2 &&
+
+	git -C "$sub_dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads &&
+	{ git -C "$sub_dir" symbolic-ref HEAD || :; } >head &&
+	git -C "$sub_dir" rev-parse HEAD >head-sha1 &&
+	git -C "$sub_dir" update-index --refresh &&
+	git -C "$sub_dir" diff-files --exit-code &&
+	cached_super_dir="$(git -C "$sub_dir" config --get submodule.superprojectGitDir)" &&
+	[ "$(git -C "$super_dir" rev-parse --absolute-git-dir)" \
+		-ef "$sub_dir/$cached_super_dir" ] &&
+	git -C "$sub_dir" clean -n -d -x >untracked
 }
 
 
@@ -139,7 +143,7 @@ test_expect_success 'submodule add' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/submod &&
+	inspect addtest/submod addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -230,7 +234,7 @@ test_expect_success 'submodule add --branch' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/submod-branch &&
+	inspect addtest/submod-branch addtest &&
 	test_cmp expect-heads heads &&
 	test_cmp expect-head head &&
 	test_must_be_empty untracked
@@ -246,7 +250,7 @@ test_expect_success 'submodule add with ./ in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/dotsubmod/frotz &&
+	inspect addtest/dotsubmod/frotz addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -262,7 +266,7 @@ test_expect_success 'submodule add with /././ in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/dotslashdotsubmod/frotz &&
+	inspect addtest/dotslashdotsubmod/frotz addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -278,7 +282,7 @@ test_expect_success 'submodule add with // in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/slashslashsubmod/frotz &&
+	inspect addtest/slashslashsubmod/frotz addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -294,7 +298,7 @@ test_expect_success 'submodule add with /.. in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod &&
+	inspect addtest/realsubmod addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -310,7 +314,7 @@ test_expect_success 'submodule add with ./, /.. and // in path' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod2 &&
+	inspect addtest/realsubmod2 addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -341,7 +345,7 @@ test_expect_success 'submodule add in subdirectory' '
 	) &&
 
 	rm -f heads head untracked &&
-	inspect addtest/realsubmod3 &&
+	inspect addtest/realsubmod3 addtest &&
 	test_cmp expect heads &&
 	test_cmp expect head &&
 	test_must_be_empty untracked
@@ -482,7 +486,7 @@ test_expect_success 'update should work when path is an empty dir' '
 	git submodule update -q >update.out &&
 	test_must_be_empty update.out &&
 
-	inspect init &&
+	inspect init . &&
 	test_cmp expect head-sha1
 '
 
@@ -541,7 +545,7 @@ test_expect_success 'update should checkout rev1' '
 	echo "$rev1" >expect &&
 
 	git submodule update init &&
-	inspect init &&
+	inspect init . &&
 
 	test_cmp expect head-sha1
 '

From 74e46ce157d1a0ddf630f06ac7df782c30b9e457 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 15 Jun 2021 17:45:07 -0700
Subject: [PATCH 018/198] submodule: cache superproject gitdir during
 absorbgitdirs

Already during 'git submodule add' we cache a pointer to the
superproject's gitdir. However, this doesn't help brand-new
submodules created with 'git init' and later absorbed with 'git
submodule absorbgitdir'. Let's start adding that pointer during 'git
submodule absorbgitdir' too.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 submodule.c                        | 10 ++++++++++
 t/t7412-submodule-absorbgitdirs.sh |  9 ++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/submodule.c b/submodule.c
index 0b1d9c1dde5a8f..4b314bf09c72c3 100644
--- a/submodule.c
+++ b/submodule.c
@@ -2065,6 +2065,7 @@ static void relocate_single_git_dir_into_superproject(const char *path)
 	char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL;
 	char *new_git_dir;
 	const struct submodule *sub;
+	struct strbuf config_path = STRBUF_INIT, sb = STRBUF_INIT;
 
 	if (submodule_uses_worktrees(path))
 		die(_("relocate_gitdir for submodule '%s' with "
@@ -2096,6 +2097,15 @@ static void relocate_single_git_dir_into_superproject(const char *path)
 
 	relocate_gitdir(path, real_old_git_dir, real_new_git_dir);
 
+	/* cache pointer to superproject's gitdir */
+	/* NEEDSWORK: this may differ if experimental.worktreeConfig is enabled */
+	strbuf_addf(&config_path, "%s/config", real_new_git_dir);
+	git_config_set_in_file(config_path.buf, "submodule.superprojectGitdir",
+			       relative_path(get_super_prefix_or_empty(),
+					     path, &sb));
+
+	strbuf_release(&config_path);
+	strbuf_release(&sb);
 	free(old_git_dir);
 	free(real_old_git_dir);
 	free(real_new_git_dir);
diff --git a/t/t7412-submodule-absorbgitdirs.sh b/t/t7412-submodule-absorbgitdirs.sh
index 1cfa150768d7f8..e2d78e01df3e5f 100755
--- a/t/t7412-submodule-absorbgitdirs.sh
+++ b/t/t7412-submodule-absorbgitdirs.sh
@@ -30,7 +30,14 @@ test_expect_success 'absorb the git dir' '
 	git status >actual.1 &&
 	git -C sub1 rev-parse HEAD >actual.2 &&
 	test_cmp expect.1 actual.1 &&
-	test_cmp expect.2 actual.2
+	test_cmp expect.2 actual.2 &&
+
+	# make sure the submodule cached the superproject gitdir correctly
+	test-tool path-utils real_path . >expect &&
+	test-tool path-utils real_path \
+		"$(git -C sub1 config submodule.superprojectGitDir)" >actual &&
+
+	test_cmp expect actual
 '
 
 test_expect_success 'absorbing does not fail for deinitialized submodules' '

From d041338fb2f84881e3fd45ba35c60b803c5709a3 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 15 Jun 2021 17:45:08 -0700
Subject: [PATCH 019/198] submodule: cache superproject gitdir during 'update'

A cached path to the superproject's gitdir might be added during 'git
submodule add', but in some cases - like submodules which were created
before 'git submodule add' learned to cache that info - it might be
useful to update the cache. Let's do it during 'git submodule update',
when we already have a handle to the superproject while calling
operations on the submodules.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-submodule.sh            | 10 ++++++++++
 t/t7406-submodule-update.sh | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/git-submodule.sh b/git-submodule.sh
index 46783784240668..f98dcc16aecd31 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -648,6 +648,16 @@ cmd_update()
 			fi
 		fi
 
+		# Cache a pointer to the superproject's gitdir. This may have
+		# changed, so rewrite it unconditionally. Writes it to worktree
+		# if applicable, otherwise to local.
+		relative_gitdir="$(git rev-parse --path-format=relative \
+						 --prefix "${sm_path}" \
+						 --git-dir)"
+
+		git -C "$sm_path" config --worktree \
+			submodule.superprojectgitdir "$relative_gitdir"
+
 		if test -n "$recursive"
 		then
 			(
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index f4f61fe5544cdb..c39821ba8e9df8 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -1061,4 +1061,14 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s
 	)
 '
 
+test_expect_success 'submodule update adds superproject gitdir to older repos' '
+	(cd super &&
+	 git -C submodule config --unset submodule.superprojectGitdir &&
+	 git submodule update &&
+	 echo "../.git" >expect &&
+	 git -C submodule config submodule.superprojectGitdir >actual &&
+	 test_cmp expect actual
+	)
+'
+
 test_done

From 3fa6c577dcf8a578a7241d2210139419750b3740 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Wed, 16 Jun 2021 13:43:34 +0900
Subject: [PATCH 020/198] SQUASH???

---
 Documentation/config/submodule.txt | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt
index 7c459cc19e4a3b..58ba63a75e91ad 100644
--- a/Documentation/config/submodule.txt
+++ b/Documentation/config/submodule.txt
@@ -97,8 +97,5 @@ submodule.superprojectGitDir::
 	submodules, but is not guaranteed to be present in every submodule. It
 	is set automatically during submodule creation.
 +
-	In situations where more than one superproject references the same
-	submodule worktree, the value of this config and the behavior of
-	operations which use it are undefined. To reference a single project
-	from multiple superprojects, it is better to create a worktree of the
-	submodule for each superproject.
+Because of this configuration variable, it is forbidden to use the
+same submodule worktree shared by multiple superprojects.

From 74e34878155fc8658110279f036ceaaa80f92931 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= <lenaic@lhuard.fr>
Date: Fri, 2 Jul 2021 16:25:55 +0200
Subject: [PATCH 021/198] maintenance: `git maintenance run` learned
 `--scheduler=<scheduler>`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Depending on the system, different schedulers can be used to schedule
the hourly, daily and weekly executions of `git maintenance run`:
* `launchctl` for MacOS,
* `schtasks` for Windows and
* `crontab` for everything else.

`git maintenance run` now has an option to let the end-user explicitly
choose which scheduler he wants to use:
`--scheduler=auto|crontab|launchctl|schtasks`.

When `git maintenance start --scheduler=XXX` is run, it not only
registers `git maintenance run` tasks in the scheduler XXX, it also
removes the `git maintenance run` tasks from all the other schedulers to
ensure we cannot have two schedulers launching concurrent identical
tasks.

The default value is `auto` which chooses a suitable scheduler for the
system.

`git maintenance stop` doesn't have any `--scheduler` parameter because
this command will try to remove the `git maintenance run` tasks from all
the available schedulers.

Signed-off-by: Lénaïc Huard <lenaic@lhuard.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-maintenance.txt |   9 +
 builtin/gc.c                      | 370 ++++++++++++++++++++++++------
 t/t7900-maintenance.sh            |  55 ++++-
 3 files changed, 359 insertions(+), 75 deletions(-)

diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt
index 1e738ad398320a..576290b5c61175 100644
--- a/Documentation/git-maintenance.txt
+++ b/Documentation/git-maintenance.txt
@@ -179,6 +179,15 @@ OPTIONS
 	`maintenance.<task>.enabled` configured as `true` are considered.
 	See the 'TASKS' section for the list of accepted `<task>` values.
 
+--scheduler=auto|crontab|launchctl|schtasks::
+	When combined with the `start` subcommand, specify the scheduler
+	for running the hourly, daily and weekly executions of
+	`git maintenance run`.
+	Possible values for `<scheduler>` are `auto`, `crontab` (POSIX),
+	`launchctl` (macOS), and `schtasks` (Windows).
+	When `auto` is specified, the appropriate platform-specific
+	scheduler is used. Default is `auto`.
+
 
 TROUBLESHOOTING
 ---------------
diff --git a/builtin/gc.c b/builtin/gc.c
index f05d2f0a1ac9cd..96a43f99b47eed 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -1529,6 +1529,98 @@ static const char *get_frequency(enum schedule_priority schedule)
 	}
 }
 
+/*
+ * get_schedule_cmd` reads the GIT_TEST_MAINT_SCHEDULER environment variable
+ * to mock the schedulers that `git maintenance start` rely on.
+ *
+ * For test purpose, GIT_TEST_MAINT_SCHEDULER can be set to a comma-separated
+ * list of colon-separated key/value pairs where each pair contains a scheduler
+ * and its corresponding mock.
+ *
+ * * If $GET_TEST_MAINT_SCHEDULER is not set, return false and leave the
+ *   arguments unmodified.
+ *
+ * * If $GET_TEST_MAINT_SCHEDULER is set, return true.
+ *   In this case, the *cmd value is read as input.
+ *
+ *   * if the input value *cmd is the key of one of the comma-separated list
+ *     item, then *is_available is set to true and *cmd is modified and becomes
+ *     the mock command.
+ *
+ *   * if the input value *cmd isn’t the key of any of the comma-separated list
+ *     item, then *is_available is set to false.
+ *
+ * Ex.:
+ *   GIT_TEST_MAINT_SCHEDULER not set
+ *     ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+ *     ┃ Input ┃                     Output                      ┃
+ *     ┃ *cmd  ┃ return code │       *cmd        │ *is_available ┃
+ *     ┣━━━━━━━╋━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┫
+ *     ┃ "foo" ┃    false    │ "foo" (unchanged) │  (unchanged)  ┃
+ *     ┗━━━━━━━┻━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
+ *
+ *   GIT_TEST_MAINT_SCHEDULER set to “foo:./mock_foo.sh,bar:./mock_bar.sh”
+ *     ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+ *     ┃ Input ┃                     Output                      ┃
+ *     ┃ *cmd  ┃ return code │       *cmd        │ *is_available ┃
+ *     ┣━━━━━━━╋━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┫
+ *     ┃ "foo" ┃    true     │  "./mock.foo.sh"  │     true      ┃
+ *     ┃ "qux" ┃    true     │ "qux" (unchanged) │     false     ┃
+ *     ┗━━━━━━━┻━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
+ */
+static int get_schedule_cmd(const char **cmd, int *is_available)
+{
+	char *item;
+	char *testing = xstrdup_or_null(getenv("GIT_TEST_MAINT_SCHEDULER"));
+
+	if (!testing)
+		return 0;
+
+	if (is_available)
+		*is_available = 0;
+
+	for (item = testing;;) {
+		char *sep;
+		char *end_item = strchr(item, ',');
+		if (end_item)
+			*end_item = '\0';
+
+		sep = strchr(item, ':');
+		if (!sep)
+			die("GIT_TEST_MAINT_SCHEDULER unparseable: %s", testing);
+		*sep = '\0';
+
+		if (!strcmp(*cmd, item)) {
+			*cmd = sep + 1;
+			if (is_available)
+				*is_available = 1;
+			UNLEAK(testing);
+			return 1;
+		}
+
+		if (!end_item)
+			break;
+		item = end_item + 1;
+	}
+
+	free(testing);
+	return 1;
+}
+
+static int is_launchctl_available(void)
+{
+	const char *cmd = "launchctl";
+	int is_available;
+	if (get_schedule_cmd(&cmd, &is_available))
+		return is_available;
+
+#ifdef __APPLE__
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 static char *launchctl_service_name(const char *frequency)
 {
 	struct strbuf label = STRBUF_INIT;
@@ -1555,19 +1647,17 @@ static char *launchctl_get_uid(void)
 	return xstrfmt("gui/%d", getuid());
 }
 
-static int launchctl_boot_plist(int enable, const char *filename, const char *cmd)
+static int launchctl_boot_plist(int enable, const char *filename)
 {
+	const char *cmd = "launchctl";
 	int result;
 	struct child_process child = CHILD_PROCESS_INIT;
 	char *uid = launchctl_get_uid();
 
+	get_schedule_cmd(&cmd, NULL);
 	strvec_split(&child.args, cmd);
-	if (enable)
-		strvec_push(&child.args, "bootstrap");
-	else
-		strvec_push(&child.args, "bootout");
-	strvec_push(&child.args, uid);
-	strvec_push(&child.args, filename);
+	strvec_pushl(&child.args, enable ? "bootstrap" : "bootout", uid,
+		     filename, NULL);
 
 	child.no_stderr = 1;
 	child.no_stdout = 1;
@@ -1581,26 +1671,26 @@ static int launchctl_boot_plist(int enable, const char *filename, const char *cm
 	return result;
 }
 
-static int launchctl_remove_plist(enum schedule_priority schedule, const char *cmd)
+static int launchctl_remove_plist(enum schedule_priority schedule)
 {
 	const char *frequency = get_frequency(schedule);
 	char *name = launchctl_service_name(frequency);
 	char *filename = launchctl_service_filename(name);
-	int result = launchctl_boot_plist(0, filename, cmd);
+	int result = launchctl_boot_plist(0, filename);
 	unlink(filename);
 	free(filename);
 	free(name);
 	return result;
 }
 
-static int launchctl_remove_plists(const char *cmd)
+static int launchctl_remove_plists(void)
 {
-	return launchctl_remove_plist(SCHEDULE_HOURLY, cmd) ||
-		launchctl_remove_plist(SCHEDULE_DAILY, cmd) ||
-		launchctl_remove_plist(SCHEDULE_WEEKLY, cmd);
+	return launchctl_remove_plist(SCHEDULE_HOURLY) ||
+	       launchctl_remove_plist(SCHEDULE_DAILY) ||
+	       launchctl_remove_plist(SCHEDULE_WEEKLY);
 }
 
-static int launchctl_schedule_plist(const char *exec_path, enum schedule_priority schedule, const char *cmd)
+static int launchctl_schedule_plist(const char *exec_path, enum schedule_priority schedule)
 {
 	FILE *plist;
 	int i;
@@ -1669,8 +1759,8 @@ static int launchctl_schedule_plist(const char *exec_path, enum schedule_priorit
 	fclose(plist);
 
 	/* bootout might fail if not already running, so ignore */
-	launchctl_boot_plist(0, filename, cmd);
-	if (launchctl_boot_plist(1, filename, cmd))
+	launchctl_boot_plist(0, filename);
+	if (launchctl_boot_plist(1, filename))
 		die(_("failed to bootstrap service %s"), filename);
 
 	free(filename);
@@ -1678,21 +1768,35 @@ static int launchctl_schedule_plist(const char *exec_path, enum schedule_priorit
 	return 0;
 }
 
-static int launchctl_add_plists(const char *cmd)
+static int launchctl_add_plists(void)
 {
 	const char *exec_path = git_exec_path();
 
-	return launchctl_schedule_plist(exec_path, SCHEDULE_HOURLY, cmd) ||
-		launchctl_schedule_plist(exec_path, SCHEDULE_DAILY, cmd) ||
-		launchctl_schedule_plist(exec_path, SCHEDULE_WEEKLY, cmd);
+	return launchctl_schedule_plist(exec_path, SCHEDULE_HOURLY) ||
+	       launchctl_schedule_plist(exec_path, SCHEDULE_DAILY) ||
+	       launchctl_schedule_plist(exec_path, SCHEDULE_WEEKLY);
 }
 
-static int launchctl_update_schedule(int run_maintenance, int fd, const char *cmd)
+static int launchctl_update_schedule(int run_maintenance, int fd)
 {
 	if (run_maintenance)
-		return launchctl_add_plists(cmd);
+		return launchctl_add_plists();
 	else
-		return launchctl_remove_plists(cmd);
+		return launchctl_remove_plists();
+}
+
+static int is_schtasks_available(void)
+{
+	const char *cmd = "schtasks";
+	int is_available;
+	if (get_schedule_cmd(&cmd, &is_available))
+		return is_available;
+
+#ifdef GIT_WINDOWS_NATIVE
+	return 1;
+#else
+	return 0;
+#endif
 }
 
 static char *schtasks_task_name(const char *frequency)
@@ -1702,13 +1806,15 @@ static char *schtasks_task_name(const char *frequency)
 	return strbuf_detach(&label, NULL);
 }
 
-static int schtasks_remove_task(enum schedule_priority schedule, const char *cmd)
+static int schtasks_remove_task(enum schedule_priority schedule)
 {
+	const char *cmd = "schtasks";
 	int result;
 	struct strvec args = STRVEC_INIT;
 	const char *frequency = get_frequency(schedule);
 	char *name = schtasks_task_name(frequency);
 
+	get_schedule_cmd(&cmd, NULL);
 	strvec_split(&args, cmd);
 	strvec_pushl(&args, "/delete", "/tn", name, "/f", NULL);
 
@@ -1719,15 +1825,16 @@ static int schtasks_remove_task(enum schedule_priority schedule, const char *cmd
 	return result;
 }
 
-static int schtasks_remove_tasks(const char *cmd)
+static int schtasks_remove_tasks(void)
 {
-	return schtasks_remove_task(SCHEDULE_HOURLY, cmd) ||
-		schtasks_remove_task(SCHEDULE_DAILY, cmd) ||
-		schtasks_remove_task(SCHEDULE_WEEKLY, cmd);
+	return schtasks_remove_task(SCHEDULE_HOURLY) ||
+	       schtasks_remove_task(SCHEDULE_DAILY) ||
+	       schtasks_remove_task(SCHEDULE_WEEKLY);
 }
 
-static int schtasks_schedule_task(const char *exec_path, enum schedule_priority schedule, const char *cmd)
+static int schtasks_schedule_task(const char *exec_path, enum schedule_priority schedule)
 {
+	const char *cmd = "schtasks";
 	int result;
 	struct child_process child = CHILD_PROCESS_INIT;
 	const char *xml;
@@ -1736,6 +1843,8 @@ static int schtasks_schedule_task(const char *exec_path, enum schedule_priority
 	char *name = schtasks_task_name(frequency);
 	struct strbuf tfilename = STRBUF_INIT;
 
+	get_schedule_cmd(&cmd, NULL);
+
 	strbuf_addf(&tfilename, "%s/schedule_%s_XXXXXX",
 		    get_git_common_dir(), frequency);
 	tfile = xmks_tempfile(tfilename.buf);
@@ -1840,28 +1949,52 @@ static int schtasks_schedule_task(const char *exec_path, enum schedule_priority
 	return result;
 }
 
-static int schtasks_schedule_tasks(const char *cmd)
+static int schtasks_schedule_tasks(void)
 {
 	const char *exec_path = git_exec_path();
 
-	return schtasks_schedule_task(exec_path, SCHEDULE_HOURLY, cmd) ||
-		schtasks_schedule_task(exec_path, SCHEDULE_DAILY, cmd) ||
-		schtasks_schedule_task(exec_path, SCHEDULE_WEEKLY, cmd);
+	return schtasks_schedule_task(exec_path, SCHEDULE_HOURLY) ||
+	       schtasks_schedule_task(exec_path, SCHEDULE_DAILY) ||
+	       schtasks_schedule_task(exec_path, SCHEDULE_WEEKLY);
 }
 
-static int schtasks_update_schedule(int run_maintenance, int fd, const char *cmd)
+static int schtasks_update_schedule(int run_maintenance, int fd)
 {
 	if (run_maintenance)
-		return schtasks_schedule_tasks(cmd);
+		return schtasks_schedule_tasks();
 	else
-		return schtasks_remove_tasks(cmd);
+		return schtasks_remove_tasks();
+}
+
+static int is_crontab_available(void)
+{
+	const char *cmd = "crontab";
+	int is_available;
+	struct child_process child = CHILD_PROCESS_INIT;
+
+	if (get_schedule_cmd(&cmd, &is_available))
+		return is_available;
+
+	strvec_split(&child.args, cmd);
+	strvec_push(&child.args, "-l");
+	child.no_stdin = 1;
+	child.no_stdout = 1;
+	child.no_stderr = 1;
+	child.silent_exec_failure = 1;
+
+	if (start_command(&child))
+		return 0;
+	/* Ignore exit code, as an empty crontab will return error. */
+	finish_command(&child);
+	return 1;
 }
 
 #define BEGIN_LINE "# BEGIN GIT MAINTENANCE SCHEDULE"
 #define END_LINE "# END GIT MAINTENANCE SCHEDULE"
 
-static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd)
+static int crontab_update_schedule(int run_maintenance, int fd)
 {
+	const char *cmd = "crontab";
 	int result = 0;
 	int in_old_region = 0;
 	struct child_process crontab_list = CHILD_PROCESS_INIT;
@@ -1869,6 +2002,7 @@ static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd)
 	FILE *cron_list, *cron_in;
 	struct strbuf line = STRBUF_INIT;
 
+	get_schedule_cmd(&cmd, NULL);
 	strvec_split(&crontab_list.args, cmd);
 	strvec_push(&crontab_list.args, "-l");
 	crontab_list.in = -1;
@@ -1945,66 +2079,160 @@ static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd)
 	return result;
 }
 
+enum scheduler {
+	SCHEDULER_INVALID = -1,
+	SCHEDULER_AUTO,
+	SCHEDULER_CRON,
+	SCHEDULER_LAUNCHCTL,
+	SCHEDULER_SCHTASKS,
+};
+
+static const struct {
+	const char *name;
+	int (*is_available)(void);
+	int (*update_schedule)(int run_maintenance, int fd);
+} scheduler_fn[] = {
+	[SCHEDULER_CRON] = {
+		.name = "crontab",
+		.is_available = is_crontab_available,
+		.update_schedule = crontab_update_schedule,
+	},
+	[SCHEDULER_LAUNCHCTL] = {
+		.name = "launchctl",
+		.is_available = is_launchctl_available,
+		.update_schedule = launchctl_update_schedule,
+	},
+	[SCHEDULER_SCHTASKS] = {
+		.name = "schtasks",
+		.is_available = is_schtasks_available,
+		.update_schedule = schtasks_update_schedule,
+	},
+};
+
+static enum scheduler parse_scheduler(const char *value)
+{
+	if (!value)
+		return SCHEDULER_INVALID;
+	else if (!strcasecmp(value, "auto"))
+		return SCHEDULER_AUTO;
+	else if (!strcasecmp(value, "cron") || !strcasecmp(value, "crontab"))
+		return SCHEDULER_CRON;
+	else if (!strcasecmp(value, "launchctl"))
+		return SCHEDULER_LAUNCHCTL;
+	else if (!strcasecmp(value, "schtasks"))
+		return SCHEDULER_SCHTASKS;
+	else
+		return SCHEDULER_INVALID;
+}
+
+static int maintenance_opt_scheduler(const struct option *opt, const char *arg,
+				     int unset)
+{
+	enum scheduler *scheduler = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+
+	*scheduler = parse_scheduler(arg);
+	if (*scheduler == SCHEDULER_INVALID)
+		return error(_("unrecognized --scheduler argument '%s'"), arg);
+	return 0;
+}
+
+struct maintenance_start_opts {
+	enum scheduler scheduler;
+};
+
+static enum scheduler resolve_scheduler(enum scheduler scheduler)
+{
+	if (scheduler != SCHEDULER_AUTO)
+		return scheduler;
+
 #if defined(__APPLE__)
-static const char platform_scheduler[] = "launchctl";
+	return SCHEDULER_LAUNCHCTL;
+
 #elif defined(GIT_WINDOWS_NATIVE)
-static const char platform_scheduler[] = "schtasks";
+	return SCHEDULER_SCHTASKS;
+
 #else
-static const char platform_scheduler[] = "crontab";
+	return SCHEDULER_CRON;
 #endif
+}
 
-static int update_background_schedule(int enable)
+static void validate_scheduler(enum scheduler scheduler)
 {
-	int result;
-	const char *scheduler = platform_scheduler;
-	const char *cmd = scheduler;
-	char *testing;
+	if (scheduler == SCHEDULER_INVALID)
+		BUG("invalid scheduler");
+	if (scheduler == SCHEDULER_AUTO)
+		BUG("resolve_scheduler should have been called before");
+
+	if (!scheduler_fn[scheduler].is_available())
+		die(_("%s scheduler is not available"),
+		    scheduler_fn[scheduler].name);
+}
+
+static int update_background_schedule(const struct maintenance_start_opts *opts,
+				      int enable)
+{
+	unsigned int i;
+	int result = 0;
 	struct lock_file lk;
 	char *lock_path = xstrfmt("%s/schedule", the_repository->objects->odb->path);
 
-	testing = xstrdup_or_null(getenv("GIT_TEST_MAINT_SCHEDULER"));
-	if (testing) {
-		char *sep = strchr(testing, ':');
-		if (!sep)
-			die("GIT_TEST_MAINT_SCHEDULER unparseable: %s", testing);
-		*sep = '\0';
-		scheduler = testing;
-		cmd = sep + 1;
+	if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) {
+		free(lock_path);
+		return error(_("another process is scheduling background maintenance"));
 	}
 
-	if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) {
-		result = error(_("another process is scheduling background maintenance"));
-		goto cleanup;
+	for (i = 1; i < ARRAY_SIZE(scheduler_fn); i++) {
+		if (enable && opts->scheduler == i)
+			continue;
+		if (!scheduler_fn[i].is_available())
+			continue;
+		scheduler_fn[i].update_schedule(0, get_lock_file_fd(&lk));
 	}
 
-	if (!strcmp(scheduler, "launchctl"))
-		result = launchctl_update_schedule(enable, get_lock_file_fd(&lk), cmd);
-	else if (!strcmp(scheduler, "schtasks"))
-		result = schtasks_update_schedule(enable, get_lock_file_fd(&lk), cmd);
-	else if (!strcmp(scheduler, "crontab"))
-		result = crontab_update_schedule(enable, get_lock_file_fd(&lk), cmd);
-	else
-		die("unknown background scheduler: %s", scheduler);
+	if (enable)
+		result = scheduler_fn[opts->scheduler].update_schedule(
+			1, get_lock_file_fd(&lk));
 
 	rollback_lock_file(&lk);
 
-cleanup:
 	free(lock_path);
-	free(testing);
 	return result;
 }
 
-static int maintenance_start(void)
+static const char *const builtin_maintenance_start_usage[] = {
+	N_("git maintenance start [--scheduler=<scheduler>]"),
+	NULL
+};
+
+static int maintenance_start(int argc, const char **argv, const char *prefix)
 {
+	struct maintenance_start_opts opts = { 0 };
+	struct option options[] = {
+		OPT_CALLBACK_F(
+			0, "scheduler", &opts.scheduler, N_("scheduler"),
+			N_("scheduler to trigger git maintenance run"),
+			PARSE_OPT_NONEG, maintenance_opt_scheduler),
+		OPT_END()
+	};
+
+	argc = parse_options(argc, argv, prefix, options,
+			     builtin_maintenance_start_usage, 0);
+	if (argc)
+		usage_with_options(builtin_maintenance_start_usage, options);
+
+	opts.scheduler = resolve_scheduler(opts.scheduler);
+	validate_scheduler(opts.scheduler);
+
 	if (maintenance_register())
 		warning(_("failed to add repo to global config"));
-
-	return update_background_schedule(1);
+	return update_background_schedule(&opts, 1);
 }
 
 static int maintenance_stop(void)
 {
-	return update_background_schedule(0);
+	return update_background_schedule(NULL, 0);
 }
 
 static const char builtin_maintenance_usage[] =	N_("git maintenance <subcommand> [<options>]");
@@ -2018,7 +2246,7 @@ int cmd_maintenance(int argc, const char **argv, const char *prefix)
 	if (!strcmp(argv[1], "run"))
 		return maintenance_run(argc - 1, argv + 1, prefix);
 	if (!strcmp(argv[1], "start"))
-		return maintenance_start();
+		return maintenance_start(argc - 1, argv + 1, prefix);
 	if (!strcmp(argv[1], "stop"))
 		return maintenance_stop();
 	if (!strcmp(argv[1], "register"))
diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
index b93ae014ee58f9..b36b7f5fb0f21e 100755
--- a/t/t7900-maintenance.sh
+++ b/t/t7900-maintenance.sh
@@ -494,8 +494,21 @@ test_expect_success !MINGW 'register and unregister with regex metacharacters' '
 		maintenance.repo "$(pwd)/$META"
 '
 
+test_expect_success 'start --scheduler=<scheduler>' '
+	test_expect_code 129 git maintenance start --scheduler=foo 2>err &&
+	test_i18ngrep "unrecognized --scheduler argument" err &&
+
+	test_expect_code 129 git maintenance start --no-scheduler 2>err &&
+	test_i18ngrep "unknown option" err &&
+
+	test_expect_code 128 \
+		env GIT_TEST_MAINT_SCHEDULER="launchctl:true,schtasks:true" \
+		git maintenance start --scheduler=crontab 2>err &&
+	test_i18ngrep "fatal: crontab scheduler is not available" err
+'
+
 test_expect_success 'start from empty cron table' '
-	GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start &&
+	GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start --scheduler=crontab &&
 
 	# start registers the repo
 	git config --get --global --fixed-value maintenance.repo "$(pwd)" &&
@@ -518,7 +531,7 @@ test_expect_success 'stop from existing schedule' '
 
 test_expect_success 'start preserves existing schedule' '
 	echo "Important information!" >cron.txt &&
-	GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start &&
+	GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start --scheduler=crontab &&
 	grep "Important information!" cron.txt
 '
 
@@ -547,7 +560,7 @@ test_expect_success 'start and stop macOS maintenance' '
 	EOF
 
 	rm -f args &&
-	GIT_TEST_MAINT_SCHEDULER=launchctl:./print-args git maintenance start &&
+	GIT_TEST_MAINT_SCHEDULER=launchctl:./print-args git maintenance start --scheduler=launchctl &&
 
 	# start registers the repo
 	git config --get --global --fixed-value maintenance.repo "$(pwd)" &&
@@ -598,7 +611,7 @@ test_expect_success 'start and stop Windows maintenance' '
 	EOF
 
 	rm -f args &&
-	GIT_TEST_MAINT_SCHEDULER="schtasks:./print-args" git maintenance start &&
+	GIT_TEST_MAINT_SCHEDULER="schtasks:./print-args" git maintenance start --scheduler=schtasks &&
 
 	# start registers the repo
 	git config --get --global --fixed-value maintenance.repo "$(pwd)" &&
@@ -621,6 +634,40 @@ test_expect_success 'start and stop Windows maintenance' '
 	test_cmp expect args
 '
 
+test_expect_success 'start and stop when several schedulers are available' '
+	write_script print-args <<-\EOF &&
+	printf "%s\n" "$*" | sed "s:gui/[0-9][0-9]*:gui/[UID]:; s:\(schtasks /create .* /xml\).*:\1:;" >>args
+	EOF
+
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl &&
+	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
+		hourly daily weekly >expect &&
+	for frequency in hourly daily weekly
+	do
+		PLIST="$pfx/Library/LaunchAgents/org.git-scm.git.$frequency.plist" &&
+		echo "launchctl bootout gui/[UID] $PLIST" >>expect &&
+		echo "launchctl bootstrap gui/[UID] $PLIST" >>expect || return 1
+	done &&
+	test_cmp expect args &&
+
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks &&
+	printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \
+		hourly daily weekly >expect &&
+	printf "schtasks /create /tn Git Maintenance (%s) /f /xml\n" \
+		hourly daily weekly >>expect &&
+	test_cmp expect args &&
+
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop &&
+	printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \
+		hourly daily weekly >expect &&
+	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
+		hourly daily weekly >>expect &&
+	test_cmp expect args
+'
+
 test_expect_success 'register preserves existing strategy' '
 	git config maintenance.strategy none &&
 	git maintenance register &&

From 54aef5b267747683f5fce7bd4cfa6ea13a7be0d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= <lenaic@lhuard.fr>
Date: Fri, 2 Jul 2021 16:25:56 +0200
Subject: [PATCH 022/198] maintenance: add support for systemd timers on Linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing mechanism for scheduling background maintenance is done
through cron. On Linux systems managed by systemd, systemd provides an
alternative to schedule recurring tasks: systemd timers.

The main motivations to implement systemd timers in addition to cron
are:
* cron is optional and Linux systems running systemd might not have it
  installed.
* The execution of `crontab -l` can tell us if cron is installed but not
  if the daemon is actually running.
* With systemd, each service is run in its own cgroup and its logs are
  tagged by the service inside journald. With cron, all scheduled tasks
  are running in the cron daemon cgroup and all the logs of the
  user-scheduled tasks are pretended to belong to the system cron
  service.
  Concretely, a user that doesn’t have access to the system logs won’t
  have access to the log of their own tasks scheduled by cron whereas
  they will have access to the log of their own tasks scheduled by
  systemd timer.
  Although `cron` attempts to send email, that email may go unseen by
  the user because these days, local mailboxes are not heavily used
  anymore.

In order to schedule git maintenance, we need two unit template files:
* ~/.config/systemd/user/git-maintenance@.service
  to define the command to be started by systemd and
* ~/.config/systemd/user/git-maintenance@.timer
  to define the schedule at which the command should be run.

Those units are templates that are parameterized by the frequency.

Based on those templates, 3 timers are started:
* git-maintenance@hourly.timer
* git-maintenance@daily.timer
* git-maintenance@weekly.timer

The command launched by those three timers are the same as with the
other scheduling methods:

/path/to/git for-each-repo --exec-path=/path/to
--config=maintenance.repo maintenance run --schedule=%i

with the full path for git to ensure that the version of git launched
for the scheduled maintenance is the same as the one used to run
`maintenance start`.

The timer unit contains `Persistent=true` so that, if the computer is
powered down when a maintenance task should run, the task will be run
when the computer is back powered on.

Signed-off-by: Lénaïc Huard <lenaic@lhuard.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-maintenance.txt |  58 +++++++-
 builtin/gc.c                      | 227 ++++++++++++++++++++++++++++++
 t/t7900-maintenance.sh            |  67 ++++++++-
 3 files changed, 341 insertions(+), 11 deletions(-)

diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt
index 576290b5c61175..e2cfb68ab57907 100644
--- a/Documentation/git-maintenance.txt
+++ b/Documentation/git-maintenance.txt
@@ -179,14 +179,16 @@ OPTIONS
 	`maintenance.<task>.enabled` configured as `true` are considered.
 	See the 'TASKS' section for the list of accepted `<task>` values.
 
---scheduler=auto|crontab|launchctl|schtasks::
+--scheduler=auto|crontab|systemd-timer|launchctl|schtasks::
 	When combined with the `start` subcommand, specify the scheduler
 	for running the hourly, daily and weekly executions of
 	`git maintenance run`.
-	Possible values for `<scheduler>` are `auto`, `crontab` (POSIX),
-	`launchctl` (macOS), and `schtasks` (Windows).
-	When `auto` is specified, the appropriate platform-specific
-	scheduler is used. Default is `auto`.
+	Possible values for `<scheduler>` are `auto`, `crontab`
+	(POSIX), `systemd-timer` (Linux), `launchctl` (macOS), and
+	`schtasks` (Windows). When `auto` is specified, the
+	appropriate platform-specific scheduler is used; on Linux,
+	`systemd-timer` is used if available, otherwise
+	`crontab`. Default is `auto`.
 
 
 TROUBLESHOOTING
@@ -286,6 +288,52 @@ schedule to ensure you are executing the correct binaries in your
 schedule.
 
 
+BACKGROUND MAINTENANCE ON LINUX SYSTEMD SYSTEMS
+-----------------------------------------------
+
+While Linux supports `cron`, depending on the distribution, `cron` may
+be an optional package not necessarily installed. On modern Linux
+distributions, systemd timers are superseding it.
+
+If user systemd timers are available, they will be used as a replacement
+of `cron`.
+
+In this case, `git maintenance start` will create user systemd timer units
+and start the timers. The current list of user-scheduled tasks can be found
+by running `systemctl --user list-timers`. The timers written by `git
+maintenance start` are similar to this:
+
+-----------------------------------------------------------------------
+$ systemctl --user list-timers
+NEXT                         LEFT          LAST                         PASSED     UNIT                         ACTIVATES
+Thu 2021-04-29 19:00:00 CEST 42min left    Thu 2021-04-29 18:00:11 CEST 17min ago  git-maintenance@hourly.timer git-maintenance@hourly.service
+Fri 2021-04-30 00:00:00 CEST 5h 42min left Thu 2021-04-29 00:00:11 CEST 18h ago    git-maintenance@daily.timer  git-maintenance@daily.service
+Mon 2021-05-03 00:00:00 CEST 3 days left   Mon 2021-04-26 00:00:11 CEST 3 days ago git-maintenance@weekly.timer git-maintenance@weekly.service
+-----------------------------------------------------------------------
+
+One timer is registered for each `--schedule=<frequency>` option.
+
+The definition of the systemd units can be inspected in the following files:
+
+-----------------------------------------------------------------------
+~/.config/systemd/user/git-maintenance@.timer
+~/.config/systemd/user/git-maintenance@.service
+~/.config/systemd/user/timers.target.wants/git-maintenance@hourly.timer
+~/.config/systemd/user/timers.target.wants/git-maintenance@daily.timer
+~/.config/systemd/user/timers.target.wants/git-maintenance@weekly.timer
+-----------------------------------------------------------------------
+
+`git maintenance start` will overwrite these files and start the timer
+again with `systemctl --user`, so any customization should be done by
+creating a drop-in file, i.e. a `.conf` suffixed file in the
+`~/.config/systemd/user/git-maintenance@.service.d` directory.
+
+`git maintenance stop` will stop the user systemd timers and delete
+the above mentioned files.
+
+For more details, see `systemd.timer(5)`.
+
+
 BACKGROUND MAINTENANCE ON MACOS SYSTEMS
 ---------------------------------------
 
diff --git a/builtin/gc.c b/builtin/gc.c
index 96a43f99b47eed..6698f885df7234 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -2079,10 +2079,221 @@ static int crontab_update_schedule(int run_maintenance, int fd)
 	return result;
 }
 
+#ifdef __linux__
+
+static int real_is_systemd_timer_available(void)
+{
+	struct child_process child = CHILD_PROCESS_INIT;
+
+	strvec_pushl(&child.args, "systemctl", "--user", "list-timers", NULL);
+	child.no_stdin = 1;
+	child.no_stdout = 1;
+	child.no_stderr = 1;
+	child.silent_exec_failure = 1;
+
+	if (start_command(&child))
+		return 0;
+	if (finish_command(&child))
+		return 0;
+	return 1;
+}
+
+#else
+
+static int real_is_systemd_timer_available(void)
+{
+	return 0;
+}
+
+#endif
+
+static int is_systemd_timer_available(void)
+{
+	const char *cmd = "systemctl";
+	int is_available;
+
+	if (get_schedule_cmd(&cmd, &is_available))
+		return is_available;
+
+	return real_is_systemd_timer_available();
+}
+
+static char *xdg_config_home_systemd(const char *filename)
+{
+	return xdg_config_home_for("systemd/user", filename);
+}
+
+static int systemd_timer_enable_unit(int enable,
+				     enum schedule_priority schedule)
+{
+	const char *cmd = "systemctl";
+	struct child_process child = CHILD_PROCESS_INIT;
+	const char *frequency = get_frequency(schedule);
+
+	/*
+	 * Disabling the systemd unit while it is already disabled makes
+	 * systemctl print an error.
+	 * Let's ignore it since it means we already are in the expected state:
+	 * the unit is disabled.
+	 *
+	 * On the other hand, enabling a systemd unit which is already enabled
+	 * produces no error.
+	 */
+	if (!enable)
+		child.no_stderr = 1;
+
+	get_schedule_cmd(&cmd, NULL);
+	strvec_split(&child.args, cmd);
+	strvec_pushl(&child.args, "--user", enable ? "enable" : "disable",
+		     "--now", NULL);
+	strvec_pushf(&child.args, "git-maintenance@%s.timer", frequency);
+
+	if (start_command(&child))
+		return error(_("failed to start systemctl"));
+	if (finish_command(&child))
+		/*
+		 * Disabling an already disabled systemd unit makes
+		 * systemctl fail.
+		 * Let's ignore this failure.
+		 *
+		 * Enabling an enabled systemd unit doesn't fail.
+		 */
+		if (enable)
+			return error(_("failed to run systemctl"));
+	return 0;
+}
+
+static int systemd_timer_delete_unit_templates(void)
+{
+	int ret = 0;
+	char *filename = xdg_config_home_systemd("git-maintenance@.timer");
+	if (unlink(filename) && !is_missing_file_error(errno))
+		ret = error_errno(_("failed to delete '%s'"), filename);
+	FREE_AND_NULL(filename);
+
+	filename = xdg_config_home_systemd("git-maintenance@.service");
+	if (unlink(filename) && !is_missing_file_error(errno))
+		ret = error_errno(_("failed to delete '%s'"), filename);
+
+	free(filename);
+	return ret;
+}
+
+static int systemd_timer_delete_units(void)
+{
+	return systemd_timer_enable_unit(0, SCHEDULE_HOURLY) ||
+	       systemd_timer_enable_unit(0, SCHEDULE_DAILY) ||
+	       systemd_timer_enable_unit(0, SCHEDULE_WEEKLY) ||
+	       systemd_timer_delete_unit_templates();
+}
+
+static int systemd_timer_write_unit_templates(const char *exec_path)
+{
+	char *filename;
+	FILE *file;
+	const char *unit;
+
+	filename = xdg_config_home_systemd("git-maintenance@.timer");
+	if (safe_create_leading_directories(filename)) {
+		error(_("failed to create directories for '%s'"), filename);
+		goto error;
+	}
+	file = fopen_or_warn(filename, "w");
+	if (file == NULL)
+		goto error;
+
+	unit = "# This file was created and is maintained by Git.\n"
+	       "# Any edits made in this file might be replaced in the future\n"
+	       "# by a Git command.\n"
+	       "\n"
+	       "[Unit]\n"
+	       "Description=Optimize Git repositories data\n"
+	       "\n"
+	       "[Timer]\n"
+	       "OnCalendar=%i\n"
+	       "Persistent=true\n"
+	       "\n"
+	       "[Install]\n"
+	       "WantedBy=timers.target\n";
+	if (fputs(unit, file) == EOF) {
+		error(_("failed to write to '%s'"), filename);
+		fclose(file);
+		goto error;
+	}
+	if (fclose(file) == EOF) {
+		error_errno(_("failed to flush '%s'"), filename);
+		goto error;
+	}
+	free(filename);
+
+	filename = xdg_config_home_systemd("git-maintenance@.service");
+	file = fopen_or_warn(filename, "w");
+	if (file == NULL)
+		goto error;
+
+	unit = "# This file was created and is maintained by Git.\n"
+	       "# Any edits made in this file might be replaced in the future\n"
+	       "# by a Git command.\n"
+	       "\n"
+	       "[Unit]\n"
+	       "Description=Optimize Git repositories data\n"
+	       "\n"
+	       "[Service]\n"
+	       "Type=oneshot\n"
+	       "ExecStart=\"%s/git\" --exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%%i\n"
+	       "LockPersonality=yes\n"
+	       "MemoryDenyWriteExecute=yes\n"
+	       "NoNewPrivileges=yes\n"
+	       "RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6\n"
+	       "RestrictNamespaces=yes\n"
+	       "RestrictRealtime=yes\n"
+	       "RestrictSUIDSGID=yes\n"
+	       "SystemCallArchitectures=native\n"
+	       "SystemCallFilter=@system-service\n";
+	if (fprintf(file, unit, exec_path, exec_path) < 0) {
+		error(_("failed to write to '%s'"), filename);
+		fclose(file);
+		goto error;
+	}
+	if (fclose(file) == EOF) {
+		error_errno(_("failed to flush '%s'"), filename);
+		goto error;
+	}
+	free(filename);
+	return 0;
+
+error:
+	free(filename);
+	systemd_timer_delete_unit_templates();
+	return -1;
+}
+
+static int systemd_timer_setup_units(void)
+{
+	const char *exec_path = git_exec_path();
+
+	int ret = systemd_timer_write_unit_templates(exec_path) ||
+		  systemd_timer_enable_unit(1, SCHEDULE_HOURLY) ||
+		  systemd_timer_enable_unit(1, SCHEDULE_DAILY) ||
+		  systemd_timer_enable_unit(1, SCHEDULE_WEEKLY);
+	if (ret)
+		systemd_timer_delete_units();
+	return ret;
+}
+
+static int systemd_timer_update_schedule(int run_maintenance, int fd)
+{
+	if (run_maintenance)
+		return systemd_timer_setup_units();
+	else
+		return systemd_timer_delete_units();
+}
+
 enum scheduler {
 	SCHEDULER_INVALID = -1,
 	SCHEDULER_AUTO,
 	SCHEDULER_CRON,
+	SCHEDULER_SYSTEMD,
 	SCHEDULER_LAUNCHCTL,
 	SCHEDULER_SCHTASKS,
 };
@@ -2097,6 +2308,11 @@ static const struct {
 		.is_available = is_crontab_available,
 		.update_schedule = crontab_update_schedule,
 	},
+	[SCHEDULER_SYSTEMD] = {
+		.name = "systemctl",
+		.is_available = is_systemd_timer_available,
+		.update_schedule = systemd_timer_update_schedule,
+	},
 	[SCHEDULER_LAUNCHCTL] = {
 		.name = "launchctl",
 		.is_available = is_launchctl_available,
@@ -2117,6 +2333,9 @@ static enum scheduler parse_scheduler(const char *value)
 		return SCHEDULER_AUTO;
 	else if (!strcasecmp(value, "cron") || !strcasecmp(value, "crontab"))
 		return SCHEDULER_CRON;
+	else if (!strcasecmp(value, "systemd") ||
+		 !strcasecmp(value, "systemd-timer"))
+		return SCHEDULER_SYSTEMD;
 	else if (!strcasecmp(value, "launchctl"))
 		return SCHEDULER_LAUNCHCTL;
 	else if (!strcasecmp(value, "schtasks"))
@@ -2153,6 +2372,14 @@ static enum scheduler resolve_scheduler(enum scheduler scheduler)
 #elif defined(GIT_WINDOWS_NATIVE)
 	return SCHEDULER_SCHTASKS;
 
+#elif defined(__linux__)
+	if (is_systemd_timer_available())
+		return SCHEDULER_SYSTEMD;
+	else if (is_crontab_available())
+		return SCHEDULER_CRON;
+	else
+		die(_("neither systemd timers nor crontab are available"));
+
 #else
 	return SCHEDULER_CRON;
 #endif
diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
index b36b7f5fb0f21e..b289cae6b9e20b 100755
--- a/t/t7900-maintenance.sh
+++ b/t/t7900-maintenance.sh
@@ -20,6 +20,18 @@ test_xmllint () {
 	fi
 }
 
+test_lazy_prereq SYSTEMD_ANALYZE '
+	systemd-analyze --help >out &&
+	grep verify out
+'
+
+test_systemd_analyze_verify () {
+	if test_have_prereq SYSTEMD_ANALYZE
+	then
+		systemd-analyze verify "$@"
+	fi
+}
+
 test_expect_success 'help text' '
 	test_expect_code 129 git maintenance -h 2>err &&
 	test_i18ngrep "usage: git maintenance <subcommand>" err &&
@@ -634,15 +646,56 @@ test_expect_success 'start and stop Windows maintenance' '
 	test_cmp expect args
 '
 
+test_expect_success 'start and stop Linux/systemd maintenance' '
+	write_script print-args <<-\EOF &&
+	printf "%s\n" "$*" >>args
+	EOF
+
+	XDG_CONFIG_HOME="$PWD" &&
+	export XDG_CONFIG_HOME &&
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args" git maintenance start --scheduler=systemd-timer &&
+
+	# start registers the repo
+	git config --get --global --fixed-value maintenance.repo "$(pwd)" &&
+
+	test_systemd_analyze_verify "systemd/user/git-maintenance@.service" &&
+
+	printf -- "--user enable --now git-maintenance@%s.timer\n" hourly daily weekly >expect &&
+	test_cmp expect args &&
+
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args" git maintenance stop &&
+
+	# stop does not unregister the repo
+	git config --get --global --fixed-value maintenance.repo "$(pwd)" &&
+
+	test_path_is_missing "systemd/user/git-maintenance@.timer" &&
+	test_path_is_missing "systemd/user/git-maintenance@.service" &&
+
+	printf -- "--user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect &&
+	test_cmp expect args
+'
+
 test_expect_success 'start and stop when several schedulers are available' '
 	write_script print-args <<-\EOF &&
 	printf "%s\n" "$*" | sed "s:gui/[0-9][0-9]*:gui/[UID]:; s:\(schtasks /create .* /xml\).*:\1:;" >>args
 	EOF
 
 	rm -f args &&
-	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl &&
-	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=systemd-timer &&
+	printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \
 		hourly daily weekly >expect &&
+	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
+		hourly daily weekly >>expect &&
+	printf -- "systemctl --user enable --now git-maintenance@%s.timer\n" hourly daily weekly >>expect &&
+	test_cmp expect args &&
+
+	rm -f args &&
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl &&
+	printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect &&
+	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
+		hourly daily weekly >>expect &&
 	for frequency in hourly daily weekly
 	do
 		PLIST="$pfx/Library/LaunchAgents/org.git-scm.git.$frequency.plist" &&
@@ -652,17 +705,19 @@ test_expect_success 'start and stop when several schedulers are available' '
 	test_cmp expect args &&
 
 	rm -f args &&
-	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks &&
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks &&
+	printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect &&
 	printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \
-		hourly daily weekly >expect &&
+		hourly daily weekly >>expect &&
 	printf "schtasks /create /tn Git Maintenance (%s) /f /xml\n" \
 		hourly daily weekly >>expect &&
 	test_cmp expect args &&
 
 	rm -f args &&
-	GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop &&
+	GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop &&
+	printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect &&
 	printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \
-		hourly daily weekly >expect &&
+		hourly daily weekly >>expect &&
 	printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \
 		hourly daily weekly >>expect &&
 	test_cmp expect args

From 8afdce2d1ef6ed56f10c08dfd1e2df2c38682796 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Thu, 1 Jul 2021 22:51:17 +0000
Subject: [PATCH 023/198] submodule: mark submodules with update=none as
 inactive

When the user recursively clones a repository with submodules and one or
more of those submodules is marked with the submodule.<name>.update=none
configuration, the submodule will end up being active.  This is a
problem because we will have skipped cloning or checking out the
submodule, and as a result, other commands, such as git reset or git
checkout, will fail if they are invoked with --recurse-submodules (or
when submodule.recurse is true).

This is obviously not the behavior the user wanted, so let's fix this by
specifically setting the submodule as inactive in this case when we're
initializing the repository.  That will make us properly ignore the
submodule when performing recursive operations.

We only do this when initializing a submodule, since git submodule
update can update the submodule with various options despite the setting
of "none" and we want those options to override it as they currently do.

Reported-by: Rose Kunkel <rose@rosekunkel.me>
Helped-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/submodule--helper.c |  6 ++++++
 t/t5601-clone.sh            | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index d55f6262e9c93b..c8cd2f85249c05 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -686,6 +686,12 @@ static void init_submodule(const char *path, const char *prefix,
 
 		if (git_config_set_gently(sb.buf, upd))
 			die(_("Failed to register update mode for submodule path '%s'"), displaypath);
+
+		if (sub->update_strategy.type == SM_UPDATE_NONE) {
+			strbuf_reset(&sb);
+			strbuf_addf(&sb, "submodule.%s.active", sub->name);
+			git_config_set_gently(sb.buf, "false");
+		}
 	}
 	strbuf_release(&sb);
 	free(displaypath);
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index c0688467e7c099..efe6b13be06355 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -752,6 +752,30 @@ test_expect_success 'batch missing blob request does not inadvertently try to fe
 	git clone --filter=blob:limit=0 "file://$(pwd)/server" client
 '
 
+test_expect_success 'clone with submodule with update=none is not active' '
+	rm -rf server client &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+
+	echo aa >server/a &&
+	echo bb >server/b &&
+	git -C server submodule add --name c "$(pwd)/repo_for_submodule" c &&
+	git -C server config -f .gitmodules submodule.c.update none &&
+	git -C server add a b c .gitmodules &&
+	git -C server commit -m x &&
+
+	git clone --recurse-submodules server client &&
+	git -C client config submodule.c.active >actual &&
+	echo false >expected &&
+	test_cmp actual expected &&
+	# This would fail if the submodule were active, since it is not checked out.
+	git -C client reset --recurse-submodules --hard
+'
+
 . "$TEST_DIRECTORY"/lib-httpd.sh
 start_httpd
 

From 5e0fa8cbbfc89ec1226e9ccc4dc8f714ebb507b6 Mon Sep 17 00:00:00 2001
From: Clemens Fruhwirth <clemens@endorphin.org>
Date: Tue, 6 Jul 2021 18:22:38 +0200
Subject: [PATCH 024/198] fetch: fix segfault on --set-upstream while on a
 detached HEAD

branch_get("HEAD") can return NULL, when HEAD is detached, and cause
the code to segfault when the user calls "git pull --set-upstream".

Catch this case and warn the user to avoid a segfault.

Signed-off-by: Clemens Fruhwirth <clemens@endorphin.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fetch.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index dfde96a4354c14..d8f634be321fcc 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1602,6 +1602,10 @@ static int do_fetch(struct transport *transport,
 		struct ref *rm;
 		struct ref *source_ref = NULL;
 
+		if (!branch) {
+			warning(_("not on a branch to use --set-upstream with"));
+			goto skip;
+		}
 		/*
 		 * We're setting the upstream configuration for the
 		 * current branch. The relevant upstream is the

From 7e378520a6b7460d6b924ec862c5bc94d02f60c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Fri, 9 Jul 2021 13:06:13 +0200
Subject: [PATCH 025/198] revision.h: refactor "disable_stdin" and
 "read_from_stdin"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the two "disable_stdin" and "read_from_stdin" flags to an enum,
in preparation for a subsequent commit adding more flags.

The interaction between these is more subtle than they might appear at
first sight, as noted in a12cbe23ef7. "read_stdin" is not the inverse
of "disable_stdin", rather we read stdin if we see the "--stdin"
option.

The "read" is intended to understood as "I've read it already", not
"you should read it". Let's avoid this confusion by using "consume"
and "consumed" instead, i.e. a word whose present and past tense isn't
the same.

See 8b3dce56508 (Teach --stdin option to "log" family, 2009-11-03)
where we added the "disable_stdin" flag, and a12cbe23ef7 (rev-list:
make empty --stdin not an error, 2018-08-22) for the addition of the
"read_from_stdin" flag.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/am.c        |  4 ++--
 builtin/blame.c     |  2 +-
 builtin/diff-tree.c |  2 +-
 builtin/rev-list.c  |  2 +-
 revision.c          | 13 ++++++++-----
 revision.h          | 23 ++++++++++++++++++++---
 sequencer.c         |  4 ++--
 7 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 0b2d886c81b775..3a6c8455b47f71 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -1355,7 +1355,7 @@ static void write_commit_patch(const struct am_state *state, struct commit *comm
 	repo_init_revisions(the_repository, &rev_info, NULL);
 	rev_info.diff = 1;
 	rev_info.abbrev = 0;
-	rev_info.disable_stdin = 1;
+	rev_info.stdin_handling = REV_INFO_STDIN_IGNORE;
 	rev_info.show_root_diff = 1;
 	rev_info.diffopt.output_format = DIFF_FORMAT_PATCH;
 	rev_info.no_commit_id = 1;
@@ -1390,7 +1390,7 @@ static void write_index_patch(const struct am_state *state)
 	fp = xfopen(am_path(state, "patch"), "w");
 	repo_init_revisions(the_repository, &rev_info, NULL);
 	rev_info.diff = 1;
-	rev_info.disable_stdin = 1;
+	rev_info.stdin_handling = REV_INFO_STDIN_IGNORE;
 	rev_info.no_commit_id = 1;
 	rev_info.diffopt.output_format = DIFF_FORMAT_PATCH;
 	rev_info.diffopt.use_color = 0;
diff --git a/builtin/blame.c b/builtin/blame.c
index 641523ff9af693..c9f66c58c46034 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1061,7 +1061,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 		argv[argc - 1] = "--";
 	}
 
-	revs.disable_stdin = 1;
+	revs.stdin_handling = REV_INFO_STDIN_IGNORE;
 	setup_revisions(argc, argv, &revs, NULL);
 	if (!revs.pending.nr && is_bare_repository()) {
 		struct commit *head_commit;
diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index f33d30d57bff2e..fc548ebe16671c 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -122,7 +122,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
 		die(_("index file corrupt"));
 	opt->abbrev = 0;
 	opt->diff = 1;
-	opt->disable_stdin = 1;
+	opt->stdin_handling = REV_INFO_STDIN_IGNORE;
 	memset(&s_r_opt, 0, sizeof(s_r_opt));
 	s_r_opt.tweak = diff_tree_tweak_rev;
 
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 7677b1af5a45fc..524632ba328d69 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -651,7 +651,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 	if ((!revs.commits && reflog_walk_empty(revs.reflog_info) &&
 	     (!(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
 	      !revs.pending.nr) &&
-	     !revs.rev_input_given && !revs.read_from_stdin) ||
+	     !revs.rev_input_given && !revs.consumed_stdin) ||
 	    revs.diff)
 		usage(rev_list_usage);
 
diff --git a/revision.c b/revision.c
index 8140561b6c7e23..50909339a592aa 100644
--- a/revision.c
+++ b/revision.c
@@ -2741,14 +2741,17 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			}
 
 			if (!strcmp(arg, "--stdin")) {
-				if (revs->disable_stdin) {
+				switch (revs->stdin_handling) {
+				case REV_INFO_STDIN_IGNORE:
 					argv[left++] = arg;
 					continue;
+				case REV_INFO_STDIN_CONSUME_ON_OPTION:
+					if (revs->consumed_stdin)
+						die("--stdin given twice?");
+					read_revisions_from_stdin(revs, &prune_data);
+					revs->consumed_stdin = 1;
+					continue;
 				}
-				if (revs->read_from_stdin++)
-					die("--stdin given twice?");
-				read_revisions_from_stdin(revs, &prune_data);
-				continue;
 			}
 
 			if (!strcmp(arg, "--end-of-options")) {
diff --git a/revision.h b/revision.h
index 93aa012f518eb0..0598220831ce8a 100644
--- a/revision.h
+++ b/revision.h
@@ -86,6 +86,11 @@ struct rev_cmdline_info {
 struct oidset;
 struct topo_walk_info;
 
+enum rev_info_stdin {
+	REV_INFO_STDIN_CONSUME_ON_OPTION,
+	REV_INFO_STDIN_IGNORE,
+};
+
 struct rev_info {
 	/* Starting list */
 	struct commit_list *commits;
@@ -114,9 +119,22 @@ struct rev_info {
 	int rev_input_given;
 
 	/*
-	 * Whether we read from stdin due to the --stdin option.
+	 * How should we handle seeing --stdin?
+	 *
+	 * Defaults to REV_INFO_STDIN_CONSUME_ON_OPTION where we'll
+	 * attempt to read it if we see the --stdin option.
+	 *
+	 * Can be set to REV_INFO_STDIN_IGNORE to ignore the --stdin
+	 * option.
+	 */
+	enum rev_info_stdin stdin_handling;
+
+	/*
+	 * Did we read from stdin due to stdin_handling ==
+	 * REV_INFO_STDIN_CONSUME_ON_OPTION and seeing the --stdin
+	 * option?
 	 */
-	int read_from_stdin;
+	unsigned int consumed_stdin:1;
 
 	/* topo-sort */
 	enum rev_sort_order sort_order;
@@ -216,7 +234,6 @@ struct rev_info {
 			date_mode_explicit:1,
 			preserve_subject:1,
 			encode_email_headers:1;
-	unsigned int	disable_stdin:1;
 	/* --show-linear-break */
 	unsigned int	track_linear:1,
 			track_first_time:1,
diff --git a/sequencer.c b/sequencer.c
index 0bec01cf38e817..4e73bd79d69a3b 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -3377,7 +3377,7 @@ static int make_patch(struct repository *r,
 	log_tree_opt.abbrev = 0;
 	log_tree_opt.diff = 1;
 	log_tree_opt.diffopt.output_format = DIFF_FORMAT_PATCH;
-	log_tree_opt.disable_stdin = 1;
+	log_tree_opt.stdin_handling = REV_INFO_STDIN_IGNORE;
 	log_tree_opt.no_commit_id = 1;
 	log_tree_opt.diffopt.file = fopen(buf.buf, "w");
 	log_tree_opt.diffopt.use_color = GIT_COLOR_NEVER;
@@ -4513,7 +4513,7 @@ static int pick_commits(struct repository *r,
 			log_tree_opt.diff = 1;
 			log_tree_opt.diffopt.output_format =
 				DIFF_FORMAT_DIFFSTAT;
-			log_tree_opt.disable_stdin = 1;
+			log_tree_opt.stdin_handling = REV_INFO_STDIN_IGNORE;
 
 			if (read_oneliner(&buf, rebase_path_orig_head(), 0) &&
 			    !get_oid(buf.buf, &orig) &&

From 92a32304a32caa3b759a3e04c565c21b1cbdc7cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Fri, 9 Jul 2021 13:06:14 +0200
Subject: [PATCH 026/198] revision.[ch]: add a "handle_stdin_line" API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the rev_info stdin parsing API to support hooking into its
read_revisions_from_stdin() function, in the next commit we'll change
the the custom stdin parsing in pack-objects.c to use it..

For that use-case adding API is barely justified. We'll be able to
make the handle_revision_arg() static in exchange for it, and we'll
avoid the duplicate dance around setting "save_warning" and
"warn_on_object_refname_ambiguity", but we could just continue to do
that ourselves in builtin/pack-objects.c

The real reason to add this is for a change not part of this
series. We'll soon teach "git bundle create" to accept
revision/refname pairs on stdin, and thus do away with the limitation
of it being impossible to create bundles with ref tips that don't
correspond to the state of the current repository. I.e. this will
work:

    $ printf "e83c5163316f89bfbde7\trefs/heads/first-git-dot-git-commit\n" |
    git bundle create initial.bundle --stdin

As well as things like:

    $ git for-each-ref 'refs/remotes/origin' |
    sed 's!\trefs/remotes/origin/!\trefs/heads/!' |
    git bundle create origin.bundle --stdin

In order to do that we'll need to modify the lines we consume on stdin
revision.c (which bundle.c uses already), and be assured that that
stripping extra bundle-specific data from them is the only change in
behavior.

That change will be much more complex if bundle.c needs to start doing
its own stdin parsing again outside of revision.c, it was recently
converted to use revision.c's parsing in 5bb0fd2cab5 (bundle:
arguments can be read from stdin, 2021-01-11)

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 revision.c | 22 ++++++++++++++++++++++
 revision.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/revision.c b/revision.c
index 50909339a592aa..3f6ab834aff4a5 100644
--- a/revision.c
+++ b/revision.c
@@ -2119,6 +2119,19 @@ static void read_revisions_from_stdin(struct rev_info *revs,
 		int len = sb.len;
 		if (!len)
 			break;
+
+		if (revs->handle_stdin_line) {
+			enum rev_info_stdin_line ret = revs->handle_stdin_line(
+				revs, &sb, revs->stdin_line_priv);
+
+			switch (ret) {
+			case REV_INFO_STDIN_LINE_PROCESS:
+				break;
+			case REV_INFO_STDIN_LINE_CONTINUE:
+				continue;
+			}
+		}
+
 		if (sb.buf[0] == '-') {
 			if (len == 2 && sb.buf[1] == '-') {
 				seen_dashdash = 1;
@@ -2742,6 +2755,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 
 			if (!strcmp(arg, "--stdin")) {
 				switch (revs->stdin_handling) {
+				case REV_INFO_STDIN_ALWAYS_READ:
 				case REV_INFO_STDIN_IGNORE:
 					argv[left++] = arg;
 					continue;
@@ -2790,6 +2804,14 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 		}
 	}
 
+	/*
+	 * We're asked to ALWAYS_READ from stdin, but no --stdin
+	 * option (or "consumed_stdin" would be set).
+	 */
+	if (!revs->consumed_stdin &&
+	    revs->stdin_handling == REV_INFO_STDIN_ALWAYS_READ)
+		read_revisions_from_stdin(revs, &prune_data);
+
 	if (prune_data.nr) {
 		/*
 		 * If we need to introduce the magic "a lone ':' means no
diff --git a/revision.h b/revision.h
index 0598220831ce8a..93b6a02cf020ce 100644
--- a/revision.h
+++ b/revision.h
@@ -89,8 +89,17 @@ struct topo_walk_info;
 enum rev_info_stdin {
 	REV_INFO_STDIN_CONSUME_ON_OPTION,
 	REV_INFO_STDIN_IGNORE,
+	REV_INFO_STDIN_ALWAYS_READ,
 };
 
+enum rev_info_stdin_line {
+	REV_INFO_STDIN_LINE_PROCESS,
+	REV_INFO_STDIN_LINE_CONTINUE,
+};
+
+typedef enum rev_info_stdin_line (*rev_info_stdin_line_func)(
+	struct rev_info *revs, struct strbuf *line, void *stdin_line_priv);
+
 struct rev_info {
 	/* Starting list */
 	struct commit_list *commits;
@@ -126,6 +135,9 @@ struct rev_info {
 	 *
 	 * Can be set to REV_INFO_STDIN_IGNORE to ignore the --stdin
 	 * option.
+	 *
+	 * Set it to REV_INFO_STDIN_ALWAYS_READ if there's always data
+	 * on stdin to be read, even if no --stdin option is provided.
 	 */
 	enum rev_info_stdin stdin_handling;
 
@@ -136,6 +148,24 @@ struct rev_info {
 	 */
 	unsigned int consumed_stdin:1;
 
+	/*
+	 * When reading from stdin (see "stdin_handling" above) define
+	 * a handle_stdin_line function to consume the lines.
+	 *
+	 * - Return REV_INFO_STDIN_LINE_PROCESS to continue
+	 *   revision.c's normal processing of the line (after
+	 *   possibly munging the provided strbuf).
+	 *
+	 * - Return REV_INFO_STDIN_LINE_CONTINUE to indicate that the
+	 *   line is fully processed, moving onto the next line (if
+	 *   any)
+	 *
+	 * Use the "stdin_line_priv" to optionally pass your own data
+	 * around.
+	 */
+	rev_info_stdin_line_func handle_stdin_line;
+	void *stdin_line_priv;
+
 	/* topo-sort */
 	enum rev_sort_order sort_order;
 

From 7db42aa925edb7212c216479344f50a1e55f7e0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Fri, 9 Jul 2021 13:06:15 +0200
Subject: [PATCH 027/198] pack-objects.c: do stdin parsing via revision.c's API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new "handle_stdin_line" API in revision.c to parse stdin in
pack-objects.c, instead of using custom pack-objects.c-specific code
to do so.

This means that we can remove the "if (len && line[len - 1] == '\n')"
check, it's now redundant to using strbuf_getline(), and we get to
skip the whole "warn_on_object_refname_ambiguity" dance. The
read_revisions_from_stdin() function in revision.c we're now using
does it for us.

The pack-objects.c code being refactored away here was first added in
Linus's c323ac7d9c5 (git-pack-objects: create a packed object
representation., 2005-06-25).

Later on rev-list started doing similar parsing in 42cabc341c4 (Teach
rev-list an option to read revs from the standard input., 2006-09-05).
That code was promoted to a more general API in 1fc561d169a (Move
read_revisions_from_stdin from builtin-rev-list.c to revision.c,
2008-07-05).

Since then the API in revision.c has received improvements that have
been missed here. E.g. the arbitrary limit of 1000 bytes was removed
in 63d564b3002 (read_revision_from_stdin(): use strbuf, 2009-11-20),
and it moved to a more simpler strbuf API in 6e8d46f9d4b (revision:
read --stdin with strbuf_getline(), 2015-10-28).

For now we've just made setup_revisions() loop over stdin for us, but
the callback we define makes no use of REV_INFO_STDIN_LINE_PROCESS. We
still need to call handle_revision_arg() ourselves because we'd like
to call it with different flags.

This very light use of the API will be further refined in a subsequent
commit, for now we're just doing the bare minimum to move this
existing code over to the new callback pattern without any functional
changes, and making it as friendly to "git show -w" and "the
--color-moved-ws=allow-indentation-change" mode as possible.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c | 63 ++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 1fbaa34f91b264..35d5247f85a212 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3745,15 +3745,40 @@ static void mark_bitmap_preferred_tips(void)
 	}
 }
 
+static enum rev_info_stdin_line get_object_list_handle_stdin_line(
+	struct rev_info *revs, struct strbuf *line_sb, void *stdin_line_priv)
+{
+	int *flags = stdin_line_priv;
+	char *line = line_sb->buf;
+
+	if (*line == '-') {
+		if (!strcmp(line, "--not")) {
+			*flags ^= UNINTERESTING;
+			write_bitmap_index = 0;
+			return REV_INFO_STDIN_LINE_CONTINUE;
+		}
+		if (starts_with(line, "--shallow ")) {
+			struct object_id oid;
+			if (get_oid_hex(line + 10, &oid))
+				die("not an object name '%s'", line + 10);
+			register_shallow(the_repository, &oid);
+			use_bitmap_index = 0;
+			return REV_INFO_STDIN_LINE_CONTINUE;
+		}
+		die(_("not a rev '%s'"), line);
+	}
+	if (handle_revision_arg(line, revs, *flags, REVARG_CANNOT_BE_FILENAME))
+			die(_("bad revision '%s'"), line);
+	return REV_INFO_STDIN_LINE_CONTINUE;
+}
+
 static void get_object_list(int ac, const char **av)
 {
 	struct rev_info revs;
 	struct setup_revision_opt s_r_opt = {
 		.allow_exclude_promisor_objects = 1,
 	};
-	char line[1000];
 	int flags = 0;
-	int save_warning;
 
 	repo_init_revisions(the_repository, &revs, NULL);
 	save_commit_buffer = 0;
@@ -3761,39 +3786,11 @@ static void get_object_list(int ac, const char **av)
 	/* make sure shallows are read */
 	is_repository_shallow(the_repository);
 
+	revs.stdin_handling = REV_INFO_STDIN_ALWAYS_READ;
+	revs.handle_stdin_line = get_object_list_handle_stdin_line;
+	revs.stdin_line_priv = &flags;
 	setup_revisions(ac, av, &revs, &s_r_opt);
 
-	save_warning = warn_on_object_refname_ambiguity;
-	warn_on_object_refname_ambiguity = 0;
-
-	while (fgets(line, sizeof(line), stdin) != NULL) {
-		int len = strlen(line);
-		if (len && line[len - 1] == '\n')
-			line[--len] = 0;
-		if (!len)
-			break;
-		if (*line == '-') {
-			if (!strcmp(line, "--not")) {
-				flags ^= UNINTERESTING;
-				write_bitmap_index = 0;
-				continue;
-			}
-			if (starts_with(line, "--shallow ")) {
-				struct object_id oid;
-				if (get_oid_hex(line + 10, &oid))
-					die("not an object name '%s'", line + 10);
-				register_shallow(the_repository, &oid);
-				use_bitmap_index = 0;
-				continue;
-			}
-			die(_("not a rev '%s'"), line);
-		}
-		if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
-			die(_("bad revision '%s'"), line);
-	}
-
-	warn_on_object_refname_ambiguity = save_warning;
-
 	if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
 		return;
 

From bb0a763e6d3ba4b06e040ecef466b5cdfd91742b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Fri, 9 Jul 2021 13:06:16 +0200
Subject: [PATCH 028/198] pack-objects.c: make use of
 REV_INFO_STDIN_LINE_PROCESS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Continue the work started in the preceding commit of porting
pack-objects.c over to the new handle_stdin_line callback pattern. The
common case for the users of this API is to do some of their own
parsing or munging, and then have handle_revision_arg() handle the
rest.

The existing users of the --stdin parsing always wanted a flag of "0"
to be passed to handle_revision_arg(), but pack-objects.c wants to set
custom flags. Let's support this common case by having a
"revarg_flags" member in the "rev_info" struct.

This allows us to return REV_INFO_STDIN_LINE_PROCESS in the new
get_object_list_handle_stdin_line() instead of
REV_INFO_STDIN_LINE_CONTINUE, as read_revisions_from_stdin() will now
pass down the right flag for us.

I considered making the "--not" parsing be another flag handled by the
revision.c API itself, but since there's only one caller who wants
this, and the "write_bitmap_index = 0" case is more specific than
marking things UNINTERESTING I think it's better to handle it with a
more general mechanism.

These changes means that we can make the handle_revision_arg()
function static. Now that the only external user of the API has been
migrated over to the callback mechanism nothing external to revision.c
needs to call handle_revision_arg() anymore.

That handle_revision_arg() function was made public in a combination
of 5d6f0935e6d (revision.c: allow injecting revision parameters after
setup_revisions()., 2006-09-05) and b5d97e6b0a0 (pack-objects: run
rev-list equivalent internally., 2006-09-04).

This change leaves the briefly-used in preceding commits
"void *stdin_line_priv" without any in-tree user, as
builtin/pack-objects.c could be ported over to our new "revarg_flags"
common case.

I'm leaving that "void *stdin_line_priv" in place anyway for two
reasons:

 1. It's a common pattern to allow such a "void *" to be used for
    callback data, so we might as well follow that pattern here in
    anticipation of a future in-tree user.

 2. I have patches for such an in-tree user already in a series
    that'll be submitted after this one. See the reference to "git
    bundle create --stdin" in the commit that added the "handle_stdin_line"
    API.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c | 35 +++++++++++++++--------------------
 revision.c             |  4 ++--
 revision.h             |  6 ++++--
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 35d5247f85a212..06a085a9a2aa85 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3748,28 +3748,25 @@ static void mark_bitmap_preferred_tips(void)
 static enum rev_info_stdin_line get_object_list_handle_stdin_line(
 	struct rev_info *revs, struct strbuf *line_sb, void *stdin_line_priv)
 {
-	int *flags = stdin_line_priv;
 	char *line = line_sb->buf;
 
-	if (*line == '-') {
-		if (!strcmp(line, "--not")) {
-			*flags ^= UNINTERESTING;
-			write_bitmap_index = 0;
-			return REV_INFO_STDIN_LINE_CONTINUE;
-		}
-		if (starts_with(line, "--shallow ")) {
-			struct object_id oid;
-			if (get_oid_hex(line + 10, &oid))
-				die("not an object name '%s'", line + 10);
-			register_shallow(the_repository, &oid);
-			use_bitmap_index = 0;
-			return REV_INFO_STDIN_LINE_CONTINUE;
-		}
+	if (*line != '-')
+		return REV_INFO_STDIN_LINE_PROCESS;
+
+	if (!strcmp(line, "--not")) {
+		revs->revarg_flags ^= UNINTERESTING;
+		write_bitmap_index = 0;
+		return REV_INFO_STDIN_LINE_CONTINUE;
+	} else if (starts_with(line, "--shallow ")) {
+		struct object_id oid;
+		if (get_oid_hex(line + 10, &oid))
+			die("not an object name '%s'", line + 10);
+		register_shallow(the_repository, &oid);
+		use_bitmap_index = 0;
+		return REV_INFO_STDIN_LINE_CONTINUE;
+	} else {
 		die(_("not a rev '%s'"), line);
 	}
-	if (handle_revision_arg(line, revs, *flags, REVARG_CANNOT_BE_FILENAME))
-			die(_("bad revision '%s'"), line);
-	return REV_INFO_STDIN_LINE_CONTINUE;
 }
 
 static void get_object_list(int ac, const char **av)
@@ -3778,7 +3775,6 @@ static void get_object_list(int ac, const char **av)
 	struct setup_revision_opt s_r_opt = {
 		.allow_exclude_promisor_objects = 1,
 	};
-	int flags = 0;
 
 	repo_init_revisions(the_repository, &revs, NULL);
 	save_commit_buffer = 0;
@@ -3788,7 +3784,6 @@ static void get_object_list(int ac, const char **av)
 
 	revs.stdin_handling = REV_INFO_STDIN_ALWAYS_READ;
 	revs.handle_stdin_line = get_object_list_handle_stdin_line;
-	revs.stdin_line_priv = &flags;
 	setup_revisions(ac, av, &revs, &s_r_opt);
 
 	if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
diff --git a/revision.c b/revision.c
index 3f6ab834aff4a5..4164a4fcd1155a 100644
--- a/revision.c
+++ b/revision.c
@@ -2089,7 +2089,7 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl
 	return 0;
 }
 
-int handle_revision_arg(const char *arg, struct rev_info *revs, int flags, unsigned revarg_opt)
+static int handle_revision_arg(const char *arg, struct rev_info *revs, int flags, unsigned revarg_opt)
 {
 	int ret = handle_revision_arg_1(arg, revs, flags, revarg_opt);
 	if (!ret)
@@ -2139,7 +2139,7 @@ static void read_revisions_from_stdin(struct rev_info *revs,
 			}
 			die("options not supported in --stdin mode");
 		}
-		if (handle_revision_arg(sb.buf, revs, 0,
+		if (handle_revision_arg(sb.buf, revs, revs->revarg_flags,
 					REVARG_CANNOT_BE_FILENAME))
 			die("bad revision '%s'", sb.buf);
 	}
diff --git a/revision.h b/revision.h
index 93b6a02cf020ce..4d67c216c8b7fc 100644
--- a/revision.h
+++ b/revision.h
@@ -156,6 +156,9 @@ struct rev_info {
 	 *   revision.c's normal processing of the line (after
 	 *   possibly munging the provided strbuf).
 	 *
+	 *   Change "revarg_flags" to affect the subsequent handling
+	 *   in handle_revision_arg()
+	 *
 	 * - Return REV_INFO_STDIN_LINE_CONTINUE to indicate that the
 	 *   line is fully processed, moving onto the next line (if
 	 *   any)
@@ -164,6 +167,7 @@ struct rev_info {
 	 * around.
 	 */
 	rev_info_stdin_line_func handle_stdin_line;
+	int revarg_flags;
 	void *stdin_line_priv;
 
 	/* topo-sort */
@@ -421,8 +425,6 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx,
 			const char * const usagestr[]);
 #define REVARG_CANNOT_BE_FILENAME 01
 #define REVARG_COMMITTISH 02
-int handle_revision_arg(const char *arg, struct rev_info *revs,
-			int flags, unsigned revarg_opt);
 
 /**
  * Reset the flags used by the revision walking api. You can use this to do

From 4afb5e196b8a687cc43d6295eb73d4d693407f08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:04 +0200
Subject: [PATCH 029/198] fsck tests: refactor one test to use a sub-repo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor one of the fsck tests to use a throwaway repository. It's a
pervasive pattern in t1450-fsck.sh to spend a lot of effort on the
teardown of a tests so we're not leaving corrupt content for the next
test.

We should instead simply use something like this test_create_repo
pattern. It's both less verbose, and makes things easier to debug as a
failing test can have their state left behind under -d without
damaging the state for other tests.

But let's punt on that general refactoring and just change this one
test, I'm going to change it further in subsequent commits.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1450-fsck.sh | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 5071ac63a5b51b..7becab5ba1e815 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -48,24 +48,22 @@ remove_object () {
 	rm "$(sha1_file "$1")"
 }
 
-test_expect_success 'object with bad sha1' '
-	sha=$(echo blob | git hash-object -w --stdin) &&
-	old=$(test_oid_to_path "$sha") &&
-	new=$(dirname $old)/$(test_oid ff_2) &&
-	sha="$(dirname $new)$(basename $new)" &&
-	mv .git/objects/$old .git/objects/$new &&
-	test_when_finished "remove_object $sha" &&
-	git update-index --add --cacheinfo 100644 $sha foo &&
-	test_when_finished "git read-tree -u --reset HEAD" &&
-	tree=$(git write-tree) &&
-	test_when_finished "remove_object $tree" &&
-	cmt=$(echo bogus | git commit-tree $tree) &&
-	test_when_finished "remove_object $cmt" &&
-	git update-ref refs/heads/bogus $cmt &&
-	test_when_finished "git update-ref -d refs/heads/bogus" &&
-
-	test_must_fail git fsck 2>out &&
-	test_i18ngrep "$sha.*corrupt" out
+test_expect_success 'object with hash mismatch' '
+	git init --bare hash-mismatch &&
+	(
+		cd hash-mismatch &&
+		oid=$(echo blob | git hash-object -w --stdin) &&
+		old=$(test_oid_to_path "$oid") &&
+		new=$(dirname $old)/$(test_oid ff_2) &&
+		oid="$(dirname $new)$(basename $new)" &&
+		mv objects/$old objects/$new &&
+		git update-index --add --cacheinfo 100644 $oid foo &&
+		tree=$(git write-tree) &&
+		cmt=$(echo bogus | git commit-tree $tree) &&
+		git update-ref refs/heads/bogus $cmt &&
+		test_must_fail git fsck 2>out &&
+		test_i18ngrep "$oid.*corrupt" out
+	)
 '
 
 test_expect_success 'branch pointing to non-commit' '

From 94fc8edb87c0ab2b7dac46c0341de063c429cbe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:05 +0200
Subject: [PATCH 030/198] fsck tests: add test for fsck-ing an unknown type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a blindspot in the fsck tests by checking what we do when we
encounter an unknown "garbage" type produced with hash-object's
--literally option.

This behavior needs to be improved, which'll be done in subsequent
patches, but for now let's test for the current behavior.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1450-fsck.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 7becab5ba1e815..f10d6f7b7e8c7f 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -863,4 +863,16 @@ test_expect_success 'detect corrupt index file in fsck' '
 	test_i18ngrep "bad index file" errors
 '
 
+test_expect_success 'fsck hard errors on an invalid object type' '
+	git init --bare garbage-type &&
+	empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob </dev/null) &&
+	garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally </dev/null) &&
+	cat >err.expect <<-\EOF &&
+	fatal: invalid object type
+	EOF
+	test_must_fail git -C garbage-type fsck >out.actual 2>err.actual &&
+	test_cmp err.expect err.actual &&
+	test_must_be_empty out.actual
+'
+
 test_done

From e70fb77a00f128229b2511c77a85d64242c5d43e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:06 +0200
Subject: [PATCH 031/198] cat-file tests: test for missing object with -t and
 -s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test for what happens when the -t and -s flags are asked to operate on
a missing object, this extends tests added in 3e370f9faf0 (t1006: add
tests for git cat-file --allow-unknown-type, 2015-05-03). The -t and
-s flags are the only ones that can be combined with
--allow-unknown-type, so let's test with and without that flag.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1006-cat-file.sh | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74ad00..b71ef94329e58f 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -315,6 +315,33 @@ test_expect_success '%(deltabase) reports packed delta bases' '
 	}
 '
 
+missing_oid=$(test_oid deadbeef)
+test_expect_success 'error on type of missing object' '
+	cat >expect.err <<-\EOF &&
+	fatal: git cat-file: could not get object info
+	EOF
+	test_must_fail git cat-file -t $missing_oid >out 2>err &&
+	test_must_be_empty out &&
+	test_cmp expect.err err &&
+
+	test_must_fail git cat-file -t --allow-unknown-type $missing_oid >out 2>err &&
+	test_must_be_empty out &&
+	test_cmp expect.err err
+'
+
+test_expect_success 'error on size of missing object' '
+	cat >expect.err <<-\EOF &&
+	fatal: git cat-file: could not get object info
+	EOF
+	test_must_fail git cat-file -s $missing_oid >out 2>err &&
+	test_must_be_empty out &&
+	test_cmp expect.err err &&
+
+	test_must_fail git cat-file -s --allow-unknown-type $missing_oid >out 2>err &&
+	test_must_be_empty out &&
+	test_cmp expect.err err
+'
+
 bogus_type="bogus"
 bogus_content="bogus"
 bogus_size=$(strlen "$bogus_content")

From 9194dc15a9d9ae3e5e1b2e586ffafef3ae226066 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:07 +0200
Subject: [PATCH 032/198] cat-file tests: test that --allow-unknown-type isn't
 on by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a blindspot in the tests for the --allow-unknown-type feature
added in 39e4ae38804 (cat-file: teach cat-file a
'--allow-unknown-type' option, 2015-05-03). We should check that
--allow-unknown-type isn't on by default.

Before this change all the tests would succeed if --allow-unknown-type
was on by default, let's fix that by asserting that -t and -s die on a
"garbage" type without --allow-unknown-type.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1006-cat-file.sh | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index b71ef94329e58f..dc01d7c4a9a9d5 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -347,6 +347,20 @@ bogus_content="bogus"
 bogus_size=$(strlen "$bogus_content")
 bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
 
+test_expect_success 'die on broken object under -t and -s without --allow-unknown-type' '
+	cat >err.expect <<-\EOF &&
+	fatal: invalid object type
+	EOF
+
+	test_must_fail git cat-file -t $bogus_sha1 >out.actual 2>err.actual &&
+	test_cmp err.expect err.actual &&
+	test_must_be_empty out.actual &&
+
+	test_must_fail git cat-file -s $bogus_sha1 >out.actual 2>err.actual &&
+	test_cmp err.expect err.actual &&
+	test_must_be_empty out.actual
+'
+
 test_expect_success "Type of broken object is correct" '
 	echo $bogus_type >expect &&
 	git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
@@ -363,6 +377,21 @@ bogus_content="bogus"
 bogus_size=$(strlen "$bogus_content")
 bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
 
+test_expect_success 'die on broken object with large type under -t and -s without --allow-unknown-type' '
+	cat >err.expect <<-EOF &&
+	error: unable to unpack $bogus_sha1 header
+	fatal: git cat-file: could not get object info
+	EOF
+
+	test_must_fail git cat-file -t $bogus_sha1 >out.actual 2>err.actual &&
+	test_cmp err.expect err.actual &&
+	test_must_be_empty out.actual &&
+
+	test_must_fail git cat-file -s $bogus_sha1 >out.actual 2>err.actual &&
+	test_cmp err.expect err.actual &&
+	test_must_be_empty out.actual
+'
+
 test_expect_success "Type of broken object is correct when type is large" '
 	echo $bogus_type >expect &&
 	git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&

From 5c8b7ba104a409f341bf3759744a7f480925ac4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:08 +0200
Subject: [PATCH 033/198] rev-list tests: test for behavior with invalid object
 types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a blindspot in the tests for the "rev-list --disk-usage" feature
added in 16950f8384a (rev-list: add --disk-usage option for
calculating disk usage, 2021-02-09) to test for what happens when it's
asked to calculate the disk usage of invalid object types.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t6115-rev-list-du.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/t/t6115-rev-list-du.sh b/t/t6115-rev-list-du.sh
index b4aef32b713ca0..edb2ed5584692b 100755
--- a/t/t6115-rev-list-du.sh
+++ b/t/t6115-rev-list-du.sh
@@ -48,4 +48,15 @@ check_du HEAD
 check_du --objects HEAD
 check_du --objects HEAD^..HEAD
 
+test_expect_success 'setup garbage repository' '
+	git clone --bare . garbage.git &&
+	garbage_oid=$(git -C garbage.git hash-object -t garbage -w --stdin --literally <one.t) &&
+	git -C garbage.git rev-list --objects --all --disk-usage &&
+
+	# Manually create a ref because "update-ref", "tag" etc. have
+	# no corresponding --literally option.
+	echo $garbage_oid >garbage.git/refs/tags/garbage-tag &&
+	test_must_fail git -C garbage.git rev-list --objects --all --disk-usage
+'
+
 test_done

From 46de7a03e36a8880bdc627d7772e7a6ce56a5ed1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:09 +0200
Subject: [PATCH 034/198] cat-file tests: add corrupt loose object test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a blindspot in the tests for "cat-file" (and by proxy, the guts of
object-file.c) by testing that when we can't decode a loose object
with zlib we'll emit an error from zlib.c.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1006-cat-file.sh | 52 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index dc01d7c4a9a9d5..7f10a92f0e46d3 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -404,6 +404,58 @@ test_expect_success "Size of large broken object is correct when type is large"
 	test_cmp expect actual
 '
 
+test_expect_success 'cat-file -t and -s on corrupt loose object' '
+	git init --bare corrupt-loose.git &&
+	(
+		cd corrupt-loose.git &&
+
+		# Setup and create the empty blob and its path
+		empty_path=$(git rev-parse --git-path objects/$(test_oid_to_path "$EMPTY_BLOB")) &&
+		git hash-object -w --stdin </dev/null &&
+
+		# Create another blob and its path
+		echo other >other.blob &&
+		other_blob=$(git hash-object -w --stdin <other.blob) &&
+		other_path=$(git rev-parse --git-path objects/$(test_oid_to_path "$other_blob")) &&
+
+		# Before the swap the size is 0
+		cat >out.expect <<-EOF &&
+		0
+		EOF
+		git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual &&
+		test_must_be_empty err.actual &&
+		test_cmp out.expect out.actual &&
+
+		# Swap the two to corrupt the repository
+		mv -f "$other_path" "$empty_path" &&
+		test_must_fail git fsck 2>err.fsck &&
+		grep "hash mismatch" err.fsck &&
+
+		# confirm that cat-file is reading the new swapped-in
+		# blob...
+		cat >out.expect <<-EOF &&
+		blob
+		EOF
+		git cat-file -t "$EMPTY_BLOB" >out.actual 2>err.actual &&
+		test_must_be_empty err.actual &&
+		test_cmp out.expect out.actual &&
+
+		# ... since it has a different size now.
+		cat >out.expect <<-EOF &&
+		6
+		EOF
+		git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual &&
+		test_must_be_empty err.actual &&
+		test_cmp out.expect out.actual &&
+
+		# So far "cat-file" has been happy to spew the found
+		# content out as-is. Try to make it zlib-invalid.
+		mv -f other.blob "$empty_path" &&
+		test_must_fail git fsck 2>err.fsck &&
+		grep "^error: inflate: data stream error (" err.fsck
+	)
+'
+
 # Tests for git cat-file --follow-symlinks
 test_expect_success 'prep for symlink tests' '
 	echo_without_newline "$hello_content" >morx &&

From 68d78db3dbd38ba49581039bd7ad9c06d83e3be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:10 +0200
Subject: [PATCH 035/198] cat-file tests: test for current --allow-unknown-type
 behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add more tests for the current --allow-unknown-type behavior. As noted
in [1] I don't think much of this makes sense, but let's test for it
as-is so we can see if the behavior changes in the future.

1. https://lore.kernel.org/git/87r1i4qf4h.fsf@evledraar.gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1006-cat-file.sh | 61 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 7f10a92f0e46d3..86fd2a90ca773d 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -361,6 +361,46 @@ test_expect_success 'die on broken object under -t and -s without --allow-unknow
 	test_must_be_empty out.actual
 '
 
+test_expect_success '-e is OK with a broken object without --allow-unknown-type' '
+	git cat-file -e $bogus_sha1
+'
+
+test_expect_success '-e can not be combined with --allow-unknown-type' '
+	test_expect_code 128 git cat-file -e --allow-unknown-type $bogus_sha1
+'
+
+test_expect_success '-p cannot print a broken object even with --allow-unknown-type' '
+	test_must_fail git cat-file -p $bogus_sha1 &&
+	test_expect_code 128 git cat-file -p --allow-unknown-type $bogus_sha1
+'
+
+test_expect_success '<type> <hash> does not work with objects of broken types' '
+	cat >err.expect <<-\EOF &&
+	fatal: invalid object type "bogus"
+	EOF
+	test_must_fail git cat-file $bogus_type $bogus_sha1 2>err.actual &&
+	test_cmp err.expect err.actual
+'
+
+test_expect_success 'broken types combined with --batch and --batch-check' '
+	echo $bogus_sha1 >bogus-oid &&
+
+	cat >err.expect <<-\EOF &&
+	fatal: invalid object type
+	EOF
+
+	test_must_fail git cat-file --batch <bogus-oid 2>err.actual &&
+	test_cmp err.expect err.actual &&
+
+	test_must_fail git cat-file --batch-check <bogus-oid 2>err.actual &&
+	test_cmp err.expect err.actual
+'
+
+test_expect_success 'the --batch and --batch-check options do not combine with --allow-unknown-type' '
+	test_expect_code 128 git cat-file --batch --allow-unknown-type <bogus-oid &&
+	test_expect_code 128 git cat-file --batch-check --allow-unknown-type <bogus-oid
+'
+
 test_expect_success "Type of broken object is correct" '
 	echo $bogus_type >expect &&
 	git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
@@ -372,6 +412,27 @@ test_expect_success "Size of broken object is correct" '
 	git cat-file -s --allow-unknown-type $bogus_sha1 >actual &&
 	test_cmp expect actual
 '
+
+test_expect_success 'the --allow-unknown-type option does not consider replacement refs' '
+	cat >expect <<-EOF &&
+	$bogus_type
+	EOF
+	git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
+	test_cmp expect actual &&
+
+	# Create it manually, as "git replace" will die on bogus
+	# types.
+	head=$(git rev-parse --verify HEAD) &&
+	mkdir -p .git/refs/replace &&
+	echo $head >.git/refs/replace/$bogus_sha1 &&
+
+	cat >expect <<-EOF &&
+	commit
+	EOF
+	git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
+	test_cmp expect actual
+'
+
 bogus_type="abcdefghijklmnopqrstuvwxyz1234679"
 bogus_content="bogus"
 bogus_size=$(strlen "$bogus_content")

From 91e194f0e5cc9cf13c593c4efefee4550fe54f5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:11 +0200
Subject: [PATCH 036/198] cache.h: move object functions to object-store.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the declaration of some ancient object functions added in
e.g. c4483576b8d (Add "unpack_sha1_header()" helper function,
2005-06-01) from cache.h to object-store.h. This continues work
started in cbd53a2193d (object-store: move object access functions to
object-store.h, 2018-05-15).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h        | 10 ----------
 object-store.h |  9 +++++++++
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index ba04ff8bd36b36..32ea1ea0474fea 100644
--- a/cache.h
+++ b/cache.h
@@ -1302,16 +1302,6 @@ char *xdg_cache_home(const char *filename);
 
 int git_open_cloexec(const char *name, int flags);
 #define git_open(name) git_open_cloexec(name, O_RDONLY)
-int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
-int parse_loose_header(const char *hdr, unsigned long *sizep);
-
-int check_object_signature(struct repository *r, const struct object_id *oid,
-			   void *buf, unsigned long size, const char *type);
-
-int finalize_object_file(const char *tmpfile, const char *filename);
-
-/* Helper to check and "touch" a file */
-int check_and_freshen_file(const char *fn, int freshen);
 
 extern const signed char hexval_table[256];
 static inline unsigned int hexval(unsigned char c)
diff --git a/object-store.h b/object-store.h
index ec32c23dcb5615..9117115a50c6df 100644
--- a/object-store.h
+++ b/object-store.h
@@ -477,4 +477,13 @@ int for_each_object_in_pack(struct packed_git *p,
 int for_each_packed_object(each_packed_object_fn, void *,
 			   enum for_each_object_flags flags);
 
+int unpack_loose_header(git_zstream *stream, unsigned char *map,
+			unsigned long mapsize, void *buffer,
+			unsigned long bufsiz);
+int parse_loose_header(const char *hdr, unsigned long *sizep);
+int check_object_signature(struct repository *r, const struct object_id *oid,
+			   void *buf, unsigned long size, const char *type);
+int finalize_object_file(const char *tmpfile, const char *filename);
+int check_and_freshen_file(const char *fn, int freshen);
+
 #endif /* OBJECT_STORE_H */

From a4189220254cb0a226046dc4c663cbb6700932c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:12 +0200
Subject: [PATCH 037/198] object-file.c: don't set "typep" when returning
 non-zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the loose_object_info() function returns an error stop faking up
the "oi->typep" to OBJ_BAD. Let the return value of the function
itself suffice. This code cleanup simplifies subsequent changes.

That we set this at all is a relic from the past. Before
052fe5eaca9 (sha1_loose_object_info: make type lookup optional,
2013-07-12) we would always return the type_from_string(type) via the
parse_sha1_header() function, or -1 (i.e. OBJ_BAD) if we couldn't
parse it.

Then in a combination of 46f034483eb (sha1_file: support reading from
a loose object of unknown type, 2015-05-03) and
b3ea7dd32d6 (sha1_loose_object_info: handle errors from
unpack_sha1_rest, 2017-10-05) our API drifted even further towards
conflating the two again.

Having read the code paths involved carefully I think this is OK. We
are just about to return -1, and we have only one caller:
do_oid_object_info_extended(). That function will in turn go on to
return -1 when we return -1 here.

This might be introducing a subtle bug where a caller of
oid_object_info_extended() would inspect its "typep" and expect a
meaningful value if the function returned -1.

Such a problem would not occur for its simpler oid_object_info()
sister function. That one always returns the "enum object_type", which
in the case of -1 would be the OBJ_BAD.

Having read the code for all the callers of these functions I don't
believe any such bug is being introduced here, and in any case we'd
likely already have such a bug for the "sizep" member (although
blindly checking "typep" first would be a more common case).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/object-file.c b/object-file.c
index f233b440b22c06..9210e2e6fe4738 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1480,8 +1480,6 @@ static int loose_object_info(struct repository *r,
 		git_inflate_end(&stream);
 
 	munmap(map, mapsize);
-	if (status && oi->typep)
-		*oi->typep = status;
 	if (oi->sizep == &size_scratch)
 		oi->sizep = NULL;
 	strbuf_release(&hdrbuf);

From a0ef529a656b99432b275020fade43945f62ad21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:13 +0200
Subject: [PATCH 038/198] object-file.c: make parse_loose_header_extended()
 public
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the parse_loose_header_extended() function public and remove the
parse_loose_header() wrapper. The only direct user of it outside of
object-file.c itself was in streaming.c, that caller can simply pass
the required "struct object-info *" instead.

This change is being done in preparation for teaching
read_loose_object() to accept a flag to pass to
parse_loose_header(). It isn't strictly necessary for that change, we
could simply use parse_loose_header_extended() there, but will leave
the API in a better end state.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c  | 21 ++++++++-------------
 object-store.h |  3 ++-
 streaming.c    |  5 ++++-
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/object-file.c b/object-file.c
index 9210e2e6fe4738..e0ba1842272003 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1340,8 +1340,9 @@ static void *unpack_loose_rest(git_zstream *stream,
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
-				       unsigned int flags)
+int parse_loose_header(const char *hdr,
+		       struct object_info *oi,
+		       unsigned int flags)
 {
 	const char *type_buf = hdr;
 	unsigned long size;
@@ -1401,14 +1402,6 @@ static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
 	return *hdr ? -1 : type;
 }
 
-int parse_loose_header(const char *hdr, unsigned long *sizep)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-
-	oi.sizep = sizep;
-	return parse_loose_header_extended(hdr, &oi, 0);
-}
-
 static int loose_object_info(struct repository *r,
 			     const struct object_id *oid,
 			     struct object_info *oi, int flags)
@@ -1463,10 +1456,10 @@ static int loose_object_info(struct repository *r,
 	if (status < 0)
 		; /* Do nothing */
 	else if (hdrbuf.len) {
-		if ((status = parse_loose_header_extended(hdrbuf.buf, oi, flags)) < 0)
+		if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0)
 			status = error(_("unable to parse %s header with --allow-unknown-type"),
 				       oid_to_hex(oid));
-	} else if ((status = parse_loose_header_extended(hdr, oi, flags)) < 0)
+	} else if ((status = parse_loose_header(hdr, oi, flags)) < 0)
 		status = error(_("unable to parse %s header"), oid_to_hex(oid));
 
 	if (status >= 0 && oi->contentp) {
@@ -2547,6 +2540,8 @@ int read_loose_object(const char *path,
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
+	struct object_info oi = OBJECT_INFO_INIT;
+	oi.sizep = size;
 
 	*contents = NULL;
 
@@ -2561,7 +2556,7 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	*type = parse_loose_header(hdr, size);
+	*type = parse_loose_header(hdr, &oi, 0);
 	if (*type < 0) {
 		error(_("unable to parse header of %s"), path);
 		git_inflate_end(&stream);
diff --git a/object-store.h b/object-store.h
index 9117115a50c6df..d443964447c5ca 100644
--- a/object-store.h
+++ b/object-store.h
@@ -480,7 +480,8 @@ int for_each_packed_object(each_packed_object_fn, void *,
 int unpack_loose_header(git_zstream *stream, unsigned char *map,
 			unsigned long mapsize, void *buffer,
 			unsigned long bufsiz);
-int parse_loose_header(const char *hdr, unsigned long *sizep);
+int parse_loose_header(const char *hdr, struct object_info *oi,
+		       unsigned int flags);
 int check_object_signature(struct repository *r, const struct object_id *oid,
 			   void *buf, unsigned long size, const char *type);
 int finalize_object_file(const char *tmpfile, const char *filename);
diff --git a/streaming.c b/streaming.c
index 5f480ad50c415c..8beac62cbb7103 100644
--- a/streaming.c
+++ b/streaming.c
@@ -223,6 +223,9 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
 			      const struct object_id *oid,
 			      enum object_type *type)
 {
+	struct object_info oi = OBJECT_INFO_INIT;
+	oi.sizep = &st->size;
+
 	st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize);
 	if (!st->u.loose.mapped)
 		return -1;
@@ -231,7 +234,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
 				 st->u.loose.mapsize,
 				 st->u.loose.hdr,
 				 sizeof(st->u.loose.hdr)) < 0) ||
-	    (parse_loose_header(st->u.loose.hdr, &st->size) < 0)) {
+	    (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) {
 		git_inflate_end(&st->z);
 		munmap(st->u.loose.mapped, st->u.loose.mapsize);
 		return -1;

From 9e35d955d8b133610ae8dde528d478b4779e8f86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:14 +0200
Subject: [PATCH 039/198] object-file.c: add missing braces to
 loose_object_info()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the formatting in loose_object_info() to conform with our usual
coding style:

    When there are multiple arms to a conditional and some of them
    require braces, enclose even a single line block in braces for
    consistency -- Documentation/CodingGuidelines

This formatting-only change makes a subsequent commit easier to read.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/object-file.c b/object-file.c
index e0ba1842272003..646ca7f85d6aa8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1450,17 +1450,20 @@ static int loose_object_info(struct repository *r,
 		if (unpack_loose_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
 			status = error(_("unable to unpack %s header with --allow-unknown-type"),
 				       oid_to_hex(oid));
-	} else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+	} else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
 		status = error(_("unable to unpack %s header"),
 			       oid_to_hex(oid));
-	if (status < 0)
-		; /* Do nothing */
-	else if (hdrbuf.len) {
+	}
+
+	if (status < 0) {
+		/* Do nothing */
+	} else if (hdrbuf.len) {
 		if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0)
 			status = error(_("unable to parse %s header with --allow-unknown-type"),
 				       oid_to_hex(oid));
-	} else if ((status = parse_loose_header(hdr, oi, flags)) < 0)
+	} else if ((status = parse_loose_header(hdr, oi, flags)) < 0) {
 		status = error(_("unable to parse %s header"), oid_to_hex(oid));
+	}
 
 	if (status >= 0 && oi->contentp) {
 		*oi->contentp = unpack_loose_rest(&stream, hdr,
@@ -1469,8 +1472,9 @@ static int loose_object_info(struct repository *r,
 			git_inflate_end(&stream);
 			status = -1;
 		}
-	} else
+	} else {
 		git_inflate_end(&stream);
+	}
 
 	munmap(map, mapsize);
 	if (oi->sizep == &size_scratch)

From 4d77c12d20cf1b3708cc1697713a3874a877b5dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:15 +0200
Subject: [PATCH 040/198] object-file.c: simplify unpack_loose_short_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combine the unpack_loose_short_header(),
unpack_loose_header_to_strbuf() and unpack_loose_header() functions
into one.

The unpack_loose_header_to_strbuf() function was added in
46f034483eb (sha1_file: support reading from a loose object of unknown
type, 2015-05-03).

Its code was mostly copy/pasted between it and both of
unpack_loose_header() and unpack_loose_short_header(). We now have a
single unpack_loose_header() function which accepts an optional
"struct strbuf *" instead.

I think the remaining unpack_loose_header() function could be further
simplified, we're carrying some complexity just to be able to emit a
garbage type longer than MAX_HEADER_LEN, we could alternatively just
say "we found a garbage type <first 32 bytes>..." instead. But let's
leave the current behavior in place for now.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c  | 60 ++++++++++++++++++--------------------------------
 object-store.h | 14 +++++++++++-
 streaming.c    |  3 ++-
 3 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/object-file.c b/object-file.c
index 646ca7f85d6aa8..ef3a1517fed220 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1210,11 +1210,12 @@ void *map_loose_object(struct repository *r,
 	return map_loose_object_1(r, NULL, oid, size);
 }
 
-static int unpack_loose_short_header(git_zstream *stream,
-				     unsigned char *map, unsigned long mapsize,
-				     void *buffer, unsigned long bufsiz)
+int unpack_loose_header(git_zstream *stream,
+			unsigned char *map, unsigned long mapsize,
+			void *buffer, unsigned long bufsiz,
+			struct strbuf *header)
 {
-	int ret;
+	int status;
 
 	/* Get the data stream */
 	memset(stream, 0, sizeof(*stream));
@@ -1225,44 +1226,25 @@ static int unpack_loose_short_header(git_zstream *stream,
 
 	git_inflate_init(stream);
 	obj_read_unlock();
-	ret = git_inflate(stream, 0);
+	status = git_inflate(stream, 0);
 	obj_read_lock();
-
-	return ret;
-}
-
-int unpack_loose_header(git_zstream *stream,
-			unsigned char *map, unsigned long mapsize,
-			void *buffer, unsigned long bufsiz)
-{
-	int status = unpack_loose_short_header(stream, map, mapsize,
-					       buffer, bufsiz);
-
 	if (status < Z_OK)
 		return status;
 
-	/* Make sure we have the terminating NUL */
-	if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
-		return -1;
-	return 0;
-}
-
-static int unpack_loose_header_to_strbuf(git_zstream *stream, unsigned char *map,
-					 unsigned long mapsize, void *buffer,
-					 unsigned long bufsiz, struct strbuf *header)
-{
-	int status;
-
-	status = unpack_loose_short_header(stream, map, mapsize, buffer, bufsiz);
-	if (status < Z_OK)
-		return -1;
-
 	/*
 	 * Check if entire header is unpacked in the first iteration.
 	 */
 	if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 		return 0;
 
+	/*
+	 * We have a header longer than MAX_HEADER_LEN. The "header"
+	 * here is only non-NULL when we run "cat-file
+	 * --allow-unknown-type".
+	 */
+	if (!header)
+		return -1;
+
 	/*
 	 * buffer[0..bufsiz] was not large enough.  Copy the partial
 	 * result out to header, and then append the result of further
@@ -1410,9 +1392,11 @@ static int loose_object_info(struct repository *r,
 	unsigned long mapsize;
 	void *map;
 	git_zstream stream;
+	int hdr_ret;
 	char hdr[MAX_HEADER_LEN];
 	struct strbuf hdrbuf = STRBUF_INIT;
 	unsigned long size_scratch;
+	int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
 
 	if (oi->delta_base_oid)
 		oidclr(oi->delta_base_oid);
@@ -1446,11 +1430,10 @@ static int loose_object_info(struct repository *r,
 
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) {
-		if (unpack_loose_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
-			status = error(_("unable to unpack %s header with --allow-unknown-type"),
-				       oid_to_hex(oid));
-	} else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
+
+	hdr_ret = unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
+				      allow_unknown ? &hdrbuf : NULL);
+	if (hdr_ret < 0) {
 		status = error(_("unable to unpack %s header"),
 			       oid_to_hex(oid));
 	}
@@ -2555,7 +2538,8 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
+	if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
+				NULL) < 0) {
 		error(_("unable to unpack header of %s"), path);
 		goto out;
 	}
diff --git a/object-store.h b/object-store.h
index d443964447c5ca..31327a7f6c3271 100644
--- a/object-store.h
+++ b/object-store.h
@@ -477,9 +477,21 @@ int for_each_object_in_pack(struct packed_git *p,
 int for_each_packed_object(each_packed_object_fn, void *,
 			   enum for_each_object_flags flags);
 
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns 0 on success. Returns negative values on error.
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes unless an optional
+ * "hdrbuf" argument is non-NULL. This is intended for use with
+ * OBJECT_INFO_ALLOW_UNKNOWN_TYPE to extract the bad type for (error)
+ * reporting. The full header will be extracted to "hdrbuf" for use
+ * with parse_loose_header().
+ */
 int unpack_loose_header(git_zstream *stream, unsigned char *map,
 			unsigned long mapsize, void *buffer,
-			unsigned long bufsiz);
+			unsigned long bufsiz, struct strbuf *hdrbuf);
 int parse_loose_header(const char *hdr, struct object_info *oi,
 		       unsigned int flags);
 int check_object_signature(struct repository *r, const struct object_id *oid,
diff --git a/streaming.c b/streaming.c
index 8beac62cbb7103..cb3c3cf6ff62f6 100644
--- a/streaming.c
+++ b/streaming.c
@@ -233,7 +233,8 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
 				 st->u.loose.mapped,
 				 st->u.loose.mapsize,
 				 st->u.loose.hdr,
-				 sizeof(st->u.loose.hdr)) < 0) ||
+				 sizeof(st->u.loose.hdr),
+				 NULL) < 0) ||
 	    (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) {
 		git_inflate_end(&st->z);
 		munmap(st->u.loose.mapped, st->u.loose.mapsize);

From 9d89f44165b6c4a6265788be4e88410047eb110e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:16 +0200
Subject: [PATCH 041/198] object-file.c: split up ternary in
 parse_loose_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This minor formatting change serves to make a subsequent patch easier
to read.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/object-file.c b/object-file.c
index ef3a1517fed220..e51cf2ca33efcd 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1381,7 +1381,10 @@ int parse_loose_header(const char *hdr,
 	/*
 	 * The length must be followed by a zero byte
 	 */
-	return *hdr ? -1 : type;
+	if (*hdr)
+		return -1;
+
+	return type;
 }
 
 static int loose_object_info(struct repository *r,

From c8be48542e18a713931919f166613dc749a69cd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:17 +0200
Subject: [PATCH 042/198] object-file.c: stop dying in parse_loose_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Start the libification of parse_loose_header() by making it return
error codes and data instead of invoking die() by itself. For now
we'll move the relevant die() call to loose_object_info() and
read_loose_object() to keep this change smaller, but in subsequent
commits we'll also libify those.

Since the refactoring of parse_loose_header_extended() into
parse_loose_header() in an earlier commit, its interface accepts a
"unsigned long *sizep". Rather it accepts a "struct object_info *",
that structure will be populated with information about the object.

It thus makes sense to further libify the interface so that it stops
calling die() when it encounters OBJ_BAD, and instead rely on its
callers to check the populated "oi->typep".

Because of this we don't need to pass in the "unsigned int flags"
which we used for OBJECT_INFO_ALLOW_UNKNOWN_TYPE, we can instead do
that check in loose_object_info().

This also refactors some confusing control flow around the "status"
variable. In some cases we set it to the return value of "error()",
i.e. -1, and later checked if "status < 0" was true.

In another case added in c84a1f3ed4d (sha1_file: refactor read_object,
2017-06-21) (but the behavior pre-dated that) we did checks of "status
>= 0", because at that point "status" had become the return value of
parse_loose_header(). I.e. a non-negative "enum object_type" (unless
we -1, aka. OBJ_BAD).

Now that parse_loose_header() will return 0 on success instead of the
type (which it'll stick into the "struct object_info") we don't need
to conflate these two cases in its callers.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c  | 53 ++++++++++++++++++++++++++------------------------
 object-store.h | 13 +++++++++++--
 streaming.c    |  4 +++-
 3 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/object-file.c b/object-file.c
index e51cf2ca33efcd..31263335af9f53 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1322,9 +1322,7 @@ static void *unpack_loose_rest(git_zstream *stream,
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_loose_header(const char *hdr,
-		       struct object_info *oi,
-		       unsigned int flags)
+int parse_loose_header(const char *hdr, struct object_info *oi)
 {
 	const char *type_buf = hdr;
 	unsigned long size;
@@ -1346,15 +1344,6 @@ int parse_loose_header(const char *hdr,
 	type = type_from_string_gently(type_buf, type_len, 1);
 	if (oi->type_name)
 		strbuf_add(oi->type_name, type_buf, type_len);
-	/*
-	 * Set type to 0 if its an unknown object and
-	 * we're obtaining the type using '--allow-unknown-type'
-	 * option.
-	 */
-	if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0))
-		type = 0;
-	else if (type < 0)
-		die(_("invalid object type"));
 	if (oi->typep)
 		*oi->typep = type;
 
@@ -1384,7 +1373,11 @@ int parse_loose_header(const char *hdr,
 	if (*hdr)
 		return -1;
 
-	return type;
+	/*
+	 * The format is valid, but the type may still be bogus. The
+	 * Caller needs to check its oi->typep.
+	 */
+	return 0;
 }
 
 static int loose_object_info(struct repository *r,
@@ -1399,6 +1392,8 @@ static int loose_object_info(struct repository *r,
 	char hdr[MAX_HEADER_LEN];
 	struct strbuf hdrbuf = STRBUF_INIT;
 	unsigned long size_scratch;
+	enum object_type type_scratch;
+	int parsed_header = 0;
 	int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
 
 	if (oi->delta_base_oid)
@@ -1430,6 +1425,8 @@ static int loose_object_info(struct repository *r,
 
 	if (!oi->sizep)
 		oi->sizep = &size_scratch;
+	if (!oi->typep)
+		oi->typep = &type_scratch;
 
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
@@ -1440,18 +1437,20 @@ static int loose_object_info(struct repository *r,
 		status = error(_("unable to unpack %s header"),
 			       oid_to_hex(oid));
 	}
-
-	if (status < 0) {
-		/* Do nothing */
-	} else if (hdrbuf.len) {
-		if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0)
-			status = error(_("unable to parse %s header with --allow-unknown-type"),
-				       oid_to_hex(oid));
-	} else if ((status = parse_loose_header(hdr, oi, flags)) < 0) {
-		status = error(_("unable to parse %s header"), oid_to_hex(oid));
+	if (!status) {
+		if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi))
+			/*
+			 * oi->{sizep,typep} are meaningless unless
+			 * parse_loose_header() returns >= 0.
+			 */
+			parsed_header = 1;
+		else
+			status = error(_("unable to parse %s header"), oid_to_hex(oid));
 	}
+	if (!allow_unknown && parsed_header && *oi->typep < 0)
+		die(_("invalid object type"));
 
-	if (status >= 0 && oi->contentp) {
+	if (parsed_header && oi->contentp) {
 		*oi->contentp = unpack_loose_rest(&stream, hdr,
 						  *oi->sizep, oid);
 		if (!*oi->contentp) {
@@ -1466,6 +1465,8 @@ static int loose_object_info(struct repository *r,
 	if (oi->sizep == &size_scratch)
 		oi->sizep = NULL;
 	strbuf_release(&hdrbuf);
+	if (oi->typep == &type_scratch)
+		oi->typep = NULL;
 	oi->whence = OI_LOOSE;
 	return (status < 0) ? status : 0;
 }
@@ -2531,6 +2532,7 @@ int read_loose_object(const char *path,
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
 	struct object_info oi = OBJECT_INFO_INIT;
+	oi.typep = type;
 	oi.sizep = size;
 
 	*contents = NULL;
@@ -2547,12 +2549,13 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	*type = parse_loose_header(hdr, &oi, 0);
-	if (*type < 0) {
+	if (parse_loose_header(hdr, &oi) < 0) {
 		error(_("unable to parse header of %s"), path);
 		git_inflate_end(&stream);
 		goto out;
 	}
+	if (*type < 0)
+		die(_("invalid object type"));
 
 	if (*type == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
diff --git a/object-store.h b/object-store.h
index 31327a7f6c3271..65a8e4dc6a85bc 100644
--- a/object-store.h
+++ b/object-store.h
@@ -492,8 +492,17 @@ int for_each_packed_object(each_packed_object_fn, void *,
 int unpack_loose_header(git_zstream *stream, unsigned char *map,
 			unsigned long mapsize, void *buffer,
 			unsigned long bufsiz, struct strbuf *hdrbuf);
-int parse_loose_header(const char *hdr, struct object_info *oi,
-		       unsigned int flags);
+
+/**
+ * parse_loose_header() parses the starting "<type> <len>\0" of an
+ * object. If it doesn't follow that format -1 is returned. To check
+ * the validity of the <type> populate the "typep" in the "struct
+ * object_info". It will be OBJ_BAD if the object type is unknown. The
+ * parsed <len> can be retrieved via "oi->sizep", and from there
+ * passed to unpack_loose_rest().
+ */
+int parse_loose_header(const char *hdr, struct object_info *oi);
+
 int check_object_signature(struct repository *r, const struct object_id *oid,
 			   void *buf, unsigned long size, const char *type);
 int finalize_object_file(const char *tmpfile, const char *filename);
diff --git a/streaming.c b/streaming.c
index cb3c3cf6ff62f6..c3dc241d6a5c2e 100644
--- a/streaming.c
+++ b/streaming.c
@@ -225,6 +225,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	oi.sizep = &st->size;
+	oi.typep = type;
 
 	st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize);
 	if (!st->u.loose.mapped)
@@ -235,7 +236,8 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
 				 st->u.loose.hdr,
 				 sizeof(st->u.loose.hdr),
 				 NULL) < 0) ||
-	    (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) {
+	    (parse_loose_header(st->u.loose.hdr, &oi) < 0) ||
+	    *type < 0) {
 		git_inflate_end(&st->z);
 		munmap(st->u.loose.mapped, st->u.loose.mapsize);
 		return -1;

From b3f04915beb4c2049157b8c95d9ceb029a5e8b77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:18 +0200
Subject: [PATCH 043/198] object-file.c: guard against future bugs in
 loose_object_info()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An earlier version of the preceding commit had a subtle bug where our
"type_scratch" (later assigned to "oi->typep") would be uninitialized
and used in the "!allow_unknown" case, at which point it would contain
a nonsensical value if we'd failed to call parse_loose_header().

The preceding commit introduced "parsed_header" variable to check for
this case, but I think we can do better, let's carry a "oi_header"
variable initially set to NULL, and only set it to "oi" once we're
past parse_loose_header().

This is functionally the same thing, but hopefully makes it even more
obvious in the future that we must not access the "typep" and
"sizep" (or "type_name") unless parse_loose_header() succeeds, but
that accessing other fields set earlier (such as the "disk_sizep" set
earlier) is OK.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/object-file.c b/object-file.c
index 31263335af9f53..d41f444e6cc96b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1393,7 +1393,7 @@ static int loose_object_info(struct repository *r,
 	struct strbuf hdrbuf = STRBUF_INIT;
 	unsigned long size_scratch;
 	enum object_type type_scratch;
-	int parsed_header = 0;
+	struct object_info *oi_header = NULL;
 	int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
 
 	if (oi->delta_base_oid)
@@ -1441,18 +1441,20 @@ static int loose_object_info(struct repository *r,
 		if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi))
 			/*
 			 * oi->{sizep,typep} are meaningless unless
-			 * parse_loose_header() returns >= 0.
+			 * parse_loose_header() returns >= 0. Let's
+			 * access them as "oi_header" (just an alias
+			 * for "oi") below to make that intent clear.
 			 */
-			parsed_header = 1;
+			oi_header = oi;
 		else
 			status = error(_("unable to parse %s header"), oid_to_hex(oid));
 	}
-	if (!allow_unknown && parsed_header && *oi->typep < 0)
+	if (!allow_unknown && oi_header && *oi_header->typep < 0)
 		die(_("invalid object type"));
 
-	if (parsed_header && oi->contentp) {
+	if (oi_header && oi->contentp) {
 		*oi->contentp = unpack_loose_rest(&stream, hdr,
-						  *oi->sizep, oid);
+						  *oi_header->sizep, oid);
 		if (!*oi->contentp) {
 			git_inflate_end(&stream);
 			status = -1;

From b45bd7a52c9c485d9738db8058c8f445c613edec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:19 +0200
Subject: [PATCH 044/198] object-file.c: return -1, not "status" from
 unpack_loose_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return a -1 when git_inflate() fails instead of whatever Z_* status
we'd get from zlib.c. This makes no difference to any error we report,
but makes it more obvious that we don't care about the specific zlib
error codes here.

See d21f8426907 (unpack_sha1_header(): detect malformed object header,
2016-09-25) for the commit that added the "return status" code. As far
as I can tell there was never a real reason (e.g. different reporting)
for carrying down the "status" as opposed to "-1".

At the time that d21f8426907 was written there was a corresponding
"ret < Z_OK" check right after the unpack_sha1_header() call (the
"unpack_sha1_header()" function was later rename to our current
"unpack_loose_header()").

However, that check was removed in c84a1f3ed4d (sha1_file: refactor
read_object, 2017-06-21) without changing the corresponding return
code.

So let's do the minor cleanup of also changing this function to return
a -1.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/object-file.c b/object-file.c
index d41f444e6cc96b..956ca26051848e 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1229,7 +1229,7 @@ int unpack_loose_header(git_zstream *stream,
 	status = git_inflate(stream, 0);
 	obj_read_lock();
 	if (status < Z_OK)
-		return status;
+		return -1;
 
 	/*
 	 * Check if entire header is unpacked in the first iteration.

From 211a82a896400b306d39de33f37cc89728d9e146 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:20 +0200
Subject: [PATCH 045/198] object-file.c: return -2 on "header too long" in
 unpack_loose_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split up the return code for "header too long" from the generic
negative return value unpack_loose_header() returns, and report via
error() if we exceed MAX_HEADER_LEN.

As a test added earlier in this series in t1006-cat-file.sh shows
we'll correctly emit zlib errors from zlib.c already in this case, so
we have no need to carry those return codes further down the
stack. Let's instead just return -2 saying we ran into the
MAX_HEADER_LEN limit, or other negative values for "unable to unpack
<OID> header".

I tried setting up an enum just for these three return values, but I
think the result was less readable. Let's consider doing that if we
gain even more return values. For now let's do the next best thing and
enumerate our known return values, and BUG() if we encounter one we
don't know about.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c       | 16 +++++++++++++---
 object-store.h      |  6 ++++--
 t/t1006-cat-file.sh |  2 +-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/object-file.c b/object-file.c
index 956ca26051848e..1866115a1c5870 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1243,7 +1243,7 @@ int unpack_loose_header(git_zstream *stream,
 	 * --allow-unknown-type".
 	 */
 	if (!header)
-		return -1;
+		return -2;
 
 	/*
 	 * buffer[0..bufsiz] was not large enough.  Copy the partial
@@ -1264,7 +1264,7 @@ int unpack_loose_header(git_zstream *stream,
 		stream->next_out = buffer;
 		stream->avail_out = bufsiz;
 	} while (status != Z_STREAM_END);
-	return -1;
+	return -2;
 }
 
 static void *unpack_loose_rest(git_zstream *stream,
@@ -1433,9 +1433,19 @@ static int loose_object_info(struct repository *r,
 
 	hdr_ret = unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr),
 				      allow_unknown ? &hdrbuf : NULL);
-	if (hdr_ret < 0) {
+	switch (hdr_ret) {
+	case 0:
+		break;
+	case -1:
 		status = error(_("unable to unpack %s header"),
 			       oid_to_hex(oid));
+		break;
+	case -2:
+		status = error(_("header for %s too long, exceeds %d bytes"),
+			       oid_to_hex(oid), MAX_HEADER_LEN);
+		break;
+	default:
+		BUG("unknown hdr_ret value %d", hdr_ret);
 	}
 	if (!status) {
 		if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi))
diff --git a/object-store.h b/object-store.h
index 65a8e4dc6a85bc..1151ce8e8209a0 100644
--- a/object-store.h
+++ b/object-store.h
@@ -481,13 +481,15 @@ int for_each_packed_object(each_packed_object_fn, void *,
  * unpack_loose_header() initializes the data stream needed to unpack
  * a loose object header.
  *
- * Returns 0 on success. Returns negative values on error.
+ * Returns 0 on success. Returns negative values on error. If the
+ * header exceeds MAX_HEADER_LEN -2 will be returned.
  *
  * It will only parse up to MAX_HEADER_LEN bytes unless an optional
  * "hdrbuf" argument is non-NULL. This is intended for use with
  * OBJECT_INFO_ALLOW_UNKNOWN_TYPE to extract the bad type for (error)
  * reporting. The full header will be extracted to "hdrbuf" for use
- * with parse_loose_header().
+ * with parse_loose_header(), -2 will still be returned from this
+ * function to indicate that the header was too long.
  */
 int unpack_loose_header(git_zstream *stream, unsigned char *map,
 			unsigned long mapsize, void *buffer,
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 86fd2a90ca773d..06d38e1fae6fca 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -440,7 +440,7 @@ bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_t
 
 test_expect_success 'die on broken object with large type under -t and -s without --allow-unknown-type' '
 	cat >err.expect <<-EOF &&
-	error: unable to unpack $bogus_sha1 header
+	error: header for $bogus_sha1 too long, exceeds 32 bytes
 	fatal: git cat-file: could not get object info
 	EOF
 

From 70241b6ebbb48608f360f648e88e523077531fbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:21 +0200
Subject: [PATCH 046/198] fsck: don't hard die on invalid object types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the error fsck emits on invalid object types, such as:

    $ git hash-object --stdin -w -t garbage --literally </dev/null
    <OID>

>From the very ungraceful error of:

    $ git fsck
    fatal: invalid object type
    $

To:

    $ git fsck
    error: hash mismatch for <OID_PATH> (expected <OID>)
    error: <OID>: object corrupt or missing: <OID_PATH>
    [ the rest of the fsck output here, i.e. it didn't hard die ]

We'll still exit with non-zero, but now we'll finish the rest of the
traversal. The tests that's being added here asserts that we'll still
complain about other fsck issues (e.g. an unrelated dangling blob).

To do this we need to pass down the "OBJECT_INFO_ALLOW_UNKNOWN_TYPE"
flag from read_loose_object() through to parse_loose_header(). Since
the read_loose_object() function is only used in builtin/fsck.c we can
simply change it. See f6371f92104 (sha1_file: add read_loose_object()
function, 2017-01-13) for the introduction of read_loose_object().

Why are we complaining about a "hash mismatch" for an object of a type
we don't know about? We shouldn't. This is the bare minimal change
needed to not make fsck hard die on a repository that's been corrupted
in this manner. In subsequent commits we'll teach fsck to recognize
this particular type of corruption and emit a better error message.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fsck.c  |  3 ++-
 object-file.c   | 11 ++++++++---
 object-store.h  |  3 ++-
 t/t1450-fsck.sh | 14 +++++++-------
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 87a99b0108ea63..38b515deb690f3 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -600,7 +600,8 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 	void *contents;
 	int eaten;
 
-	if (read_loose_object(path, oid, &type, &size, &contents) < 0) {
+	if (read_loose_object(path, oid, &type, &size, &contents,
+			      OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) {
 		errors_found |= ERROR_OBJECT;
 		error(_("%s: object corrupt or missing: %s"),
 		      oid_to_hex(oid), path);
diff --git a/object-file.c b/object-file.c
index 1866115a1c5870..8fb55fc6f58f8a 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2536,7 +2536,8 @@ int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
 		      enum object_type *type,
 		      unsigned long *size,
-		      void **contents)
+		      void **contents,
+		      unsigned int oi_flags)
 {
 	int ret = -1;
 	void *map = NULL;
@@ -2544,6 +2545,7 @@ int read_loose_object(const char *path,
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
 	struct object_info oi = OBJECT_INFO_INIT;
+	int allow_unknown = oi_flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
 	oi.typep = type;
 	oi.sizep = size;
 
@@ -2566,8 +2568,11 @@ int read_loose_object(const char *path,
 		git_inflate_end(&stream);
 		goto out;
 	}
-	if (*type < 0)
-		die(_("invalid object type"));
+	if (!allow_unknown && *type < 0) {
+		error(_("header for %s declares an unknown type"), path);
+		git_inflate_end(&stream);
+		goto out;
+	}
 
 	if (*type == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
diff --git a/object-store.h b/object-store.h
index 1151ce8e8209a0..94ff03072c143f 100644
--- a/object-store.h
+++ b/object-store.h
@@ -245,7 +245,8 @@ int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
 		      enum object_type *type,
 		      unsigned long *size,
-		      void **contents);
+		      void **contents,
+		      unsigned int oi_flags);
 
 /* Retry packed storage after checking packed and loose storage */
 #define HAS_OBJECT_RECHECK_PACKED 1
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index f10d6f7b7e8c7f..d8303db9709d49 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -863,16 +863,16 @@ test_expect_success 'detect corrupt index file in fsck' '
 	test_i18ngrep "bad index file" errors
 '
 
-test_expect_success 'fsck hard errors on an invalid object type' '
+test_expect_success 'fsck error and recovery on invalid object type' '
 	git init --bare garbage-type &&
 	empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob </dev/null) &&
 	garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally </dev/null) &&
-	cat >err.expect <<-\EOF &&
-	fatal: invalid object type
-	EOF
-	test_must_fail git -C garbage-type fsck >out.actual 2>err.actual &&
-	test_cmp err.expect err.actual &&
-	test_must_be_empty out.actual
+	test_must_fail git -C garbage-type fsck >out 2>err &&
+	grep -e "^error" -e "^fatal" err >errors &&
+	test_line_count = 2 errors &&
+	grep "error: hash mismatch for" err &&
+	grep "$garbage_blob: object corrupt or missing:" err &&
+	grep "dangling blob $empty_blob" out
 '
 
 test_done

From c2a9c37085070449c28b70e643d0627f4d54b1a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:22 +0200
Subject: [PATCH 047/198] object-store.h: move read_loose_object() below
 'struct object_info'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the declaration of read_loose_object() below "struct
object_info". In the next commit we'll add a "struct object_info *"
parameter to it, moving it will avoid a forward declaration of the
struct.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-store.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/object-store.h b/object-store.h
index 94ff03072c143f..72d668b1674995 100644
--- a/object-store.h
+++ b/object-store.h
@@ -234,20 +234,6 @@ int pretend_object_file(void *, unsigned long, enum object_type,
 
 int force_object_loose(const struct object_id *oid, time_t mtime);
 
-/*
- * Open the loose object at path, check its hash, and return the contents,
- * type, and size. If the object is a blob, then "contents" may return NULL,
- * to allow streaming of large blobs.
- *
- * Returns 0 on success, negative on error (details may be written to stderr).
- */
-int read_loose_object(const char *path,
-		      const struct object_id *expected_oid,
-		      enum object_type *type,
-		      unsigned long *size,
-		      void **contents,
-		      unsigned int oi_flags);
-
 /* Retry packed storage after checking packed and loose storage */
 #define HAS_OBJECT_RECHECK_PACKED 1
 
@@ -388,6 +374,20 @@ int oid_object_info_extended(struct repository *r,
 			     const struct object_id *,
 			     struct object_info *, unsigned flags);
 
+/*
+ * Open the loose object at path, check its hash, and return the contents,
+ * type, and size. If the object is a blob, then "contents" may return NULL,
+ * to allow streaming of large blobs.
+ *
+ * Returns 0 on success, negative on error (details may be written to stderr).
+ */
+int read_loose_object(const char *path,
+		      const struct object_id *expected_oid,
+		      enum object_type *type,
+		      unsigned long *size,
+		      void **contents,
+		      unsigned int oi_flags);
+
 /*
  * Iterate over the files in the loose-object parts of the object
  * directory "path", triggering the following callbacks:

From 87290e0ceeb14e30758936ad922ccc580857c7db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:23 +0200
Subject: [PATCH 048/198] fsck: report invalid types recorded in objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Continue the work in the preceding commit and improve the error on:

    $ git hash-object --stdin -w -t garbage --literally </dev/null
    $ git fsck
    error: hash mismatch for <OID_PATH> (expected <OID>)
    error: <OID>: object corrupt or missing: <OID_PATH>
    [ other fsck output ]

To instead emit:

    $ git fsck
    error: <OID>: object is of unknown type 'garbage': <OID_PATH>
    [ other fsck output ]

The complaint about a "hash mismatch" was simply an emergent property
of how we'd fall though from read_loose_object() into fsck_loose()
when we didn't get the data we expected. Now we'll correctly note that
the object type is invalid.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fsck.c  | 22 ++++++++++++++++++----
 object-file.c   | 13 +++++--------
 object-store.h  |  4 ++--
 t/t1450-fsck.sh | 24 +++++++++++++++++++++---
 4 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 38b515deb690f3..32f11dc1fe65c6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -599,12 +599,26 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 	unsigned long size;
 	void *contents;
 	int eaten;
-
-	if (read_loose_object(path, oid, &type, &size, &contents,
-			      OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) {
-		errors_found |= ERROR_OBJECT;
+	struct strbuf sb = STRBUF_INIT;
+	unsigned int oi_flags = OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
+	struct object_info oi;
+	int found = 0;
+	oi.type_name = &sb;
+	oi.sizep = &size;
+	oi.typep = &type;
+
+	if (read_loose_object(path, oid, &contents, &oi, oi_flags) < 0) {
+		found |= ERROR_OBJECT;
 		error(_("%s: object corrupt or missing: %s"),
 		      oid_to_hex(oid), path);
+	}
+	if (type < 0) {
+		found |= ERROR_OBJECT;
+		error(_("%s: object is of unknown type '%s': %s"),
+		      oid_to_hex(oid), sb.buf, path);
+	}
+	if (found) {
+		errors_found |= ERROR_OBJECT;
 		return 0; /* keep checking other objects */
 	}
 
diff --git a/object-file.c b/object-file.c
index 8fb55fc6f58f8a..e550ea0c7cf6bf 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2534,9 +2534,8 @@ static int check_stream_oid(git_zstream *stream,
 
 int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
-		      enum object_type *type,
-		      unsigned long *size,
 		      void **contents,
+		      struct object_info *oi,
 		      unsigned int oi_flags)
 {
 	int ret = -1;
@@ -2544,10 +2543,9 @@ int read_loose_object(const char *path,
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
-	struct object_info oi = OBJECT_INFO_INIT;
 	int allow_unknown = oi_flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
-	oi.typep = type;
-	oi.sizep = size;
+	enum object_type *type = oi->typep;
+	unsigned long *size = oi->sizep;
 
 	*contents = NULL;
 
@@ -2563,7 +2561,7 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	if (parse_loose_header(hdr, &oi) < 0) {
+	if (parse_loose_header(hdr, oi) < 0) {
 		error(_("unable to parse header of %s"), path);
 		git_inflate_end(&stream);
 		goto out;
@@ -2585,8 +2583,7 @@ int read_loose_object(const char *path,
 			goto out;
 		}
 		if (check_object_signature(the_repository, expected_oid,
-					   *contents, *size,
-					   type_name(*type))) {
+					   *contents, *size, oi->type_name->buf)) {
 			error(_("hash mismatch for %s (expected %s)"), path,
 			      oid_to_hex(expected_oid));
 			free(*contents);
diff --git a/object-store.h b/object-store.h
index 72d668b1674995..96a5970f314914 100644
--- a/object-store.h
+++ b/object-store.h
@@ -376,6 +376,7 @@ int oid_object_info_extended(struct repository *r,
 
 /*
  * Open the loose object at path, check its hash, and return the contents,
+ * use the "oi" argument to assert things about the object, or e.g. populate its
  * type, and size. If the object is a blob, then "contents" may return NULL,
  * to allow streaming of large blobs.
  *
@@ -383,9 +384,8 @@ int oid_object_info_extended(struct repository *r,
  */
 int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
-		      enum object_type *type,
-		      unsigned long *size,
 		      void **contents,
+		      struct object_info *oi,
 		      unsigned int oi_flags);
 
 /*
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index d8303db9709d49..da2658155c7daa 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -66,6 +66,25 @@ test_expect_success 'object with hash mismatch' '
 	)
 '
 
+test_expect_success 'object with hash and type mismatch' '
+	git init --bare hash-type-mismatch &&
+	(
+		cd hash-type-mismatch &&
+		oid=$(echo blob | git hash-object -w --stdin -t garbage --literally) &&
+		old=$(test_oid_to_path "$oid") &&
+		new=$(dirname $old)/$(test_oid ff_2) &&
+		oid="$(dirname $new)$(basename $new)" &&
+		mv objects/$old objects/$new &&
+		git update-index --add --cacheinfo 100644 $oid foo &&
+		tree=$(git write-tree) &&
+		cmt=$(echo bogus | git commit-tree $tree) &&
+		git update-ref refs/heads/bogus $cmt &&
+		test_must_fail git fsck 2>out &&
+		grep "^error: hash mismatch for " out &&
+		grep "^error: $oid: object is of unknown type '"'"'garbage'"'"'" out
+	)
+'
+
 test_expect_success 'branch pointing to non-commit' '
 	git rev-parse HEAD^{tree} >.git/refs/heads/invalid &&
 	test_when_finished "git update-ref -d refs/heads/invalid" &&
@@ -869,9 +888,8 @@ test_expect_success 'fsck error and recovery on invalid object type' '
 	garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally </dev/null) &&
 	test_must_fail git -C garbage-type fsck >out 2>err &&
 	grep -e "^error" -e "^fatal" err >errors &&
-	test_line_count = 2 errors &&
-	grep "error: hash mismatch for" err &&
-	grep "$garbage_blob: object corrupt or missing:" err &&
+	test_line_count = 1 errors &&
+	grep "$garbage_blob: object is of unknown type '"'"'garbage'"'"':" err &&
 	grep "dangling blob $empty_blob" out
 '
 

From 27150d03c4ae81eed1a3d636cae359474c82d1cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Sat, 10 Jul 2021 15:37:24 +0200
Subject: [PATCH 049/198] fsck: report invalid object type-path combinations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Improve the error that's emitted in cases where we find a loose object
we parse, but which isn't at the location we expect it to be.

Before this change we'd prefix the error with a not-a-OID derived from
the path at which the object was found, due to an emergent behavior in
how we'd end up with an "OID" in these codepaths.

Now we'll instead say what object we hashed, and what path it was
found at. Before this patch series e.g.:

    $ git hash-object --stdin -w -t blob </dev/null
    e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
    $ mv objects/e6/ objects/e7

Would emit ("[...]" used to abbreviate the OIDs):

    git fsck
    error: hash mismatch for ./objects/e7/9d[...] (expected e79d[...])
    error: e79d[...]: object corrupt or missing: ./objects/e7/9d[...]

Now we'll instead emit:

    error: e69d[...]: hash-path mismatch, found at: ./objects/e7/9d[...]

Furthermore, we'll do the right thing when the object type and its
location are bad. I.e. this case:

    $ git hash-object --stdin -w -t garbage --literally </dev/null
    8315a83d2acc4c174aed59430f9a9c4ed926440f
    $ mv objects/83 objects/84

As noted in an earlier commits we'd simply die early in those cases,
until preceding commits fixed the hard die on invalid object type:

    $ git fsck
    fatal: invalid object type

Now we'll instead emit sensible error messages:

    $ git fsck
    error: 8315[...]: hash-path mismatch, found at: ./objects/84/15[...]
    error: 8315[...]: object is of unknown type 'garbage': ./objects/84/15[...]

In both fsck.c and object-file.c we're using null_oid as a sentinel
value for checking whether we got far enough to be certain that the
issue was indeed this OID mismatch.

In the case of check_object_signature() I don't really trust all the
moving parts there to behave consistently, in the face of future
refactorings. Getting it wrong would mean that we'd potentially emit
no error at all on a failing check_object_signature(), or worse
misreport whatever issue we encountered. So let's use the new bug()
function to ferry and return code up to fsck_loose() in that case.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fast-export.c |  2 +-
 builtin/fsck.c        | 13 +++++++++----
 builtin/index-pack.c  |  2 +-
 builtin/mktag.c       |  3 ++-
 object-file.c         | 21 ++++++++++++---------
 object-store.h        |  4 +++-
 object.c              |  4 ++--
 pack-check.c          |  3 ++-
 t/t1006-cat-file.sh   |  2 +-
 t/t1450-fsck.sh       |  8 +++++---
 10 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 3c20f164f0f051..48a3b6a7f8f01b 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -312,7 +312,7 @@ static void export_blob(const struct object_id *oid)
 		if (!buf)
 			die("could not read blob %s", oid_to_hex(oid));
 		if (check_object_signature(the_repository, oid, buf, size,
-					   type_name(type)) < 0)
+					   type_name(type), NULL) < 0)
 			die("oid mismatch in blob %s", oid_to_hex(oid));
 		object = parse_object_buffer(the_repository, oid, type,
 					     size, buf, &eaten);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 32f11dc1fe65c6..96df1aadbfaa73 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -602,20 +602,25 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 	struct strbuf sb = STRBUF_INIT;
 	unsigned int oi_flags = OBJECT_INFO_ALLOW_UNKNOWN_TYPE;
 	struct object_info oi;
+	struct object_id real_oid = *null_oid();
 	int found = 0;
 	oi.type_name = &sb;
 	oi.sizep = &size;
 	oi.typep = &type;
 
-	if (read_loose_object(path, oid, &contents, &oi, oi_flags) < 0) {
+	if (read_loose_object(path, oid, &real_oid, &contents, &oi, oi_flags) < 0) {
 		found |= ERROR_OBJECT;
-		error(_("%s: object corrupt or missing: %s"),
-		      oid_to_hex(oid), path);
+		if (!oideq(&real_oid, oid))
+			error(_("%s: hash-path mismatch, found at: %s"),
+			      oid_to_hex(&real_oid), path);
+		else
+			error(_("%s: object corrupt or missing: %s"),
+			      oid_to_hex(oid), path);
 	}
 	if (type < 0) {
 		found |= ERROR_OBJECT;
 		error(_("%s: object is of unknown type '%s': %s"),
-		      oid_to_hex(oid), sb.buf, path);
+		      oid_to_hex(&real_oid), sb.buf, path);
 	}
 	if (found) {
 		errors_found |= ERROR_OBJECT;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3fbc5d70777e50..bf860b6555e2a0 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1421,7 +1421,7 @@ static void fix_unresolved_deltas(struct hashfile *f)
 
 		if (check_object_signature(the_repository, &d->oid,
 					   data, size,
-					   type_name(type)))
+					   type_name(type), NULL))
 			die(_("local object %s is corrupt"), oid_to_hex(&d->oid));
 
 		/*
diff --git a/builtin/mktag.c b/builtin/mktag.c
index dddcccdd368328..3b2dbbb37e6fe1 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -62,7 +62,8 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type)
 
 	repl = lookup_replace_object(the_repository, tagged_oid);
 	ret = check_object_signature(the_repository, repl,
-				     buffer, size, type_name(*tagged_type));
+				     buffer, size, type_name(*tagged_type),
+				     NULL);
 	free(buffer);
 
 	return ret;
diff --git a/object-file.c b/object-file.c
index e550ea0c7cf6bf..923ff759e1914d 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1039,9 +1039,11 @@ void *xmmap(void *start, size_t length,
  * the streaming interface and rehash it to do the same.
  */
 int check_object_signature(struct repository *r, const struct object_id *oid,
-			   void *map, unsigned long size, const char *type)
+			   void *map, unsigned long size, const char *type,
+			   struct object_id *real_oidp)
 {
-	struct object_id real_oid;
+	struct object_id tmp;
+	struct object_id *real_oid = real_oidp ? real_oidp : &tmp;
 	enum object_type obj_type;
 	struct git_istream *st;
 	git_hash_ctx c;
@@ -1049,8 +1051,8 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 	int hdrlen;
 
 	if (map) {
-		hash_object_file(r->hash_algo, map, size, type, &real_oid);
-		return !oideq(oid, &real_oid) ? -1 : 0;
+		hash_object_file(r->hash_algo, map, size, type, real_oid);
+		return !oideq(oid, real_oid) ? -1 : 0;
 	}
 
 	st = open_istream(r, oid, &obj_type, &size, NULL);
@@ -1075,9 +1077,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 			break;
 		r->hash_algo->update_fn(&c, buf, readlen);
 	}
-	r->hash_algo->final_oid_fn(&real_oid, &c);
+	r->hash_algo->final_oid_fn(real_oid, &c);
 	close_istream(st);
-	return !oideq(oid, &real_oid) ? -1 : 0;
+	return !oideq(oid, real_oid) ? -1 : 0;
 }
 
 int git_open_cloexec(const char *name, int flags)
@@ -2534,6 +2536,7 @@ static int check_stream_oid(git_zstream *stream,
 
 int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
+		      struct object_id *real_oid,
 		      void **contents,
 		      struct object_info *oi,
 		      unsigned int oi_flags)
@@ -2583,9 +2586,9 @@ int read_loose_object(const char *path,
 			goto out;
 		}
 		if (check_object_signature(the_repository, expected_oid,
-					   *contents, *size, oi->type_name->buf)) {
-			error(_("hash mismatch for %s (expected %s)"), path,
-			      oid_to_hex(expected_oid));
+					   *contents, *size, oi->type_name->buf, real_oid)) {
+			if (oideq(real_oid, null_oid()))
+				BUG("should only get OID mismatch errors with mapped contents");
 			free(*contents);
 			goto out;
 		}
diff --git a/object-store.h b/object-store.h
index 96a5970f314914..9fc69016361e12 100644
--- a/object-store.h
+++ b/object-store.h
@@ -384,6 +384,7 @@ int oid_object_info_extended(struct repository *r,
  */
 int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
+		      struct object_id *real_oid,
 		      void **contents,
 		      struct object_info *oi,
 		      unsigned int oi_flags);
@@ -507,7 +508,8 @@ int unpack_loose_header(git_zstream *stream, unsigned char *map,
 int parse_loose_header(const char *hdr, struct object_info *oi);
 
 int check_object_signature(struct repository *r, const struct object_id *oid,
-			   void *buf, unsigned long size, const char *type);
+			   void *buf, unsigned long size, const char *type,
+			   struct object_id *real_oidp);
 int finalize_object_file(const char *tmpfile, const char *filename);
 int check_and_freshen_file(const char *fn, int freshen);
 
diff --git a/object.c b/object.c
index 14188453c56706..5467ead3285dd4 100644
--- a/object.c
+++ b/object.c
@@ -261,7 +261,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid)
 	if ((obj && obj->type == OBJ_BLOB && repo_has_object_file(r, oid)) ||
 	    (!obj && repo_has_object_file(r, oid) &&
 	     oid_object_info(r, oid, NULL) == OBJ_BLOB)) {
-		if (check_object_signature(r, repl, NULL, 0, NULL) < 0) {
+		if (check_object_signature(r, repl, NULL, 0, NULL, NULL) < 0) {
 			error(_("hash mismatch %s"), oid_to_hex(oid));
 			return NULL;
 		}
@@ -272,7 +272,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid)
 	buffer = repo_read_object_file(r, oid, &type, &size);
 	if (buffer) {
 		if (check_object_signature(r, repl, buffer, size,
-					   type_name(type)) < 0) {
+					   type_name(type), NULL) < 0) {
 			free(buffer);
 			error(_("hash mismatch %s"), oid_to_hex(repl));
 			return NULL;
diff --git a/pack-check.c b/pack-check.c
index 4b089fe8ec051a..e6aa4442c905d4 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -142,7 +142,8 @@ static int verify_packfile(struct repository *r,
 			err = error("cannot unpack %s from %s at offset %"PRIuMAX"",
 				    oid_to_hex(&oid), p->pack_name,
 				    (uintmax_t)entries[i].offset);
-		else if (check_object_signature(r, &oid, data, size, type_name(type)))
+		else if (check_object_signature(r, &oid, data, size,
+						type_name(type), NULL))
 			err = error("packed %s from %s is corrupt",
 				    oid_to_hex(&oid), p->pack_name);
 		else if (fn) {
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 06d38e1fae6fca..72386cfec0efed 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -490,7 +490,7 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' '
 		# Swap the two to corrupt the repository
 		mv -f "$other_path" "$empty_path" &&
 		test_must_fail git fsck 2>err.fsck &&
-		grep "hash mismatch" err.fsck &&
+		grep "hash-path mismatch" err.fsck &&
 
 		# confirm that cat-file is reading the new swapped-in
 		# blob...
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index da2658155c7daa..7d0d57564b5048 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -53,6 +53,7 @@ test_expect_success 'object with hash mismatch' '
 	(
 		cd hash-mismatch &&
 		oid=$(echo blob | git hash-object -w --stdin) &&
+		oldoid=$oid &&
 		old=$(test_oid_to_path "$oid") &&
 		new=$(dirname $old)/$(test_oid ff_2) &&
 		oid="$(dirname $new)$(basename $new)" &&
@@ -62,7 +63,7 @@ test_expect_success 'object with hash mismatch' '
 		cmt=$(echo bogus | git commit-tree $tree) &&
 		git update-ref refs/heads/bogus $cmt &&
 		test_must_fail git fsck 2>out &&
-		test_i18ngrep "$oid.*corrupt" out
+		grep "$oldoid: hash-path mismatch, found at: .*$new" out
 	)
 '
 
@@ -71,6 +72,7 @@ test_expect_success 'object with hash and type mismatch' '
 	(
 		cd hash-type-mismatch &&
 		oid=$(echo blob | git hash-object -w --stdin -t garbage --literally) &&
+		oldoid=$oid &&
 		old=$(test_oid_to_path "$oid") &&
 		new=$(dirname $old)/$(test_oid ff_2) &&
 		oid="$(dirname $new)$(basename $new)" &&
@@ -80,8 +82,8 @@ test_expect_success 'object with hash and type mismatch' '
 		cmt=$(echo bogus | git commit-tree $tree) &&
 		git update-ref refs/heads/bogus $cmt &&
 		test_must_fail git fsck 2>out &&
-		grep "^error: hash mismatch for " out &&
-		grep "^error: $oid: object is of unknown type '"'"'garbage'"'"'" out
+		grep "^error: $oldoid: hash-path mismatch, found at: .*$new" out &&
+		grep "^error: $oldoid: object is of unknown type '"'"'garbage'"'"'" out
 	)
 '
 

From f6cd080bd2d6d6e721a52661a5da19f8835a01c3 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:22 +0000
Subject: [PATCH 050/198] diff --color-moved: add perf tests

Add some tests so we can monitor changes to the performance of the
move detection code. The tests record the performance of a single
large diff and a sequence of smaller diffs.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/p4002-diff-color-moved.sh | 45 ++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100755 t/perf/p4002-diff-color-moved.sh

diff --git a/t/perf/p4002-diff-color-moved.sh b/t/perf/p4002-diff-color-moved.sh
new file mode 100755
index 00000000000000..ad56bcb71e4cd4
--- /dev/null
+++ b/t/perf/p4002-diff-color-moved.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+test_description='Tests diff --color-moved performance'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+if ! git rev-parse --verify v2.29.0^{commit} >/dev/null
+then
+	skip_all='skipping because tag v2.29.0 was not found'
+	test_done
+fi
+
+GIT_PAGER_IN_USE=1
+test_export GIT_PAGER_IN_USE
+
+test_perf 'diff --no-color-moved --no-color-moved-ws large change' '
+	git diff --no-color-moved --no-color-moved-ws v2.28.0 v2.29.0
+'
+
+test_perf 'diff --color-moved --no-color-moved-ws large change' '
+	git diff --color-moved=zebra --no-color-moved-ws v2.28.0 v2.29.0
+'
+
+test_perf 'diff --color-moved-ws=allow-indentation-change large change' '
+	git diff --color-moved=zebra --color-moved-ws=allow-indentation-change \
+		v2.28.0 v2.29.0
+'
+
+test_perf 'log --no-color-moved --no-color-moved-ws' '
+	git log --no-color-moved --no-color-moved-ws --no-merges --patch \
+		-n1000 v2.29.0
+'
+
+test_perf 'log --color-moved --no-color-moved-ws' '
+	git log --color-moved=zebra --no-color-moved-ws --no-merges --patch \
+		-n1000 v2.29.0
+'
+
+test_perf 'log --color-moved-ws=allow-indentation-change' '
+	git log --color-moved=zebra --color-moved-ws=allow-indentation-change \
+		--no-merges --patch -n1000 v2.29.0
+'
+
+test_done

From 7a07e3ee608167fa9af0adc05a5861125d2d5bf4 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:23 +0000
Subject: [PATCH 051/198] diff --color-moved=zebra: fix alternate coloring

b0a2ba4776 ("diff --color-moved=zebra: be stricter with color
alternation", 2018-11-23) sought to avoid using the alternate colors
unless there are two adjacent moved blocks of the same
sign. Unfortunately it contains two bugs that prevented it from fixing
the problem properly. Firstly `last_symbol` is reset at the start of
each iteration of the loop losing the symbol of the last line and
secondly when deciding whether to use the alternate color it should be
checking if the current line is the same sign of the last line, not a
different sign. The combination of the two errors means that we still
use the alternate color when we should do but we also use it when we
shouldn't. This is most noticable when using
--color-moved-ws=allow-indentation-change with hunks like

-this line gets indented
+    this line gets indented

where the post image is colored with newMovedAlternate rather than
newMoved. While this does not matter much, the next commit will change
the coloring to be correct in this case, so lets fix the bug here to
make it clear why the output is changing and add a regression test.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                     |  4 +--
 t/t4015-diff-whitespace.sh | 72 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 52c791574b71c8..cb068f8258c039 100644
--- a/diff.c
+++ b/diff.c
@@ -1142,6 +1142,7 @@ static void mark_color_as_moved(struct diff_options *o,
 	struct moved_block *pmb = NULL; /* potentially moved blocks */
 	int pmb_nr = 0, pmb_alloc = 0;
 	int n, flipped_block = 0, block_length = 0;
+	enum diff_symbol last_symbol = 0;
 
 
 	for (n = 0; n < o->emitted_symbols->nr; n++) {
@@ -1149,7 +1150,6 @@ static void mark_color_as_moved(struct diff_options *o,
 		struct moved_entry *key;
 		struct moved_entry *match = NULL;
 		struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
-		enum diff_symbol last_symbol = 0;
 
 		switch (l->s) {
 		case DIFF_SYMBOL_PLUS:
@@ -1214,7 +1214,7 @@ static void mark_color_as_moved(struct diff_options *o,
 			}
 
 			if (adjust_last_block(o, n, block_length) &&
-			    pmb_nr && last_symbol != l->s)
+			    pmb_nr && last_symbol == l->s)
 				flipped_block = (flipped_block + 1) % 2;
 			else
 				flipped_block = 0;
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 2c13b62d3c6548..920114cd795c01 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1442,6 +1442,78 @@ test_expect_success 'detect permutations inside moved code -- dimmed-zebra' '
 	test_cmp expected actual
 '
 
+test_expect_success 'zebra alternate color is only used when necessary' '
+	cat >old.txt <<-\EOF &&
+	line 1A should be marked as oldMoved newMovedAlternate
+	line 1B should be marked as oldMoved newMovedAlternate
+	unchanged
+	line 2A should be marked as oldMoved newMovedAlternate
+	line 2B should be marked as oldMoved newMovedAlternate
+	line 3A should be marked as oldMovedAlternate newMoved
+	line 3B should be marked as oldMovedAlternate newMoved
+	unchanged
+	line 4A should be marked as oldMoved newMovedAlternate
+	line 4B should be marked as oldMoved newMovedAlternate
+	line 5A should be marked as oldMovedAlternate newMoved
+	line 5B should be marked as oldMovedAlternate newMoved
+	line 6A should be marked as oldMoved newMoved
+	line 6B should be marked as oldMoved newMoved
+	EOF
+	cat >new.txt <<-\EOF &&
+	  line 1A should be marked as oldMoved newMovedAlternate
+	  line 1B should be marked as oldMoved newMovedAlternate
+	unchanged
+	  line 3A should be marked as oldMovedAlternate newMoved
+	  line 3B should be marked as oldMovedAlternate newMoved
+	  line 2A should be marked as oldMoved newMovedAlternate
+	  line 2B should be marked as oldMoved newMovedAlternate
+	unchanged
+	  line 6A should be marked as oldMoved newMoved
+	  line 6B should be marked as oldMoved newMoved
+	    line 4A should be marked as oldMoved newMovedAlternate
+	    line 4B should be marked as oldMoved newMovedAlternate
+	  line 5A should be marked as oldMovedAlternate newMoved
+	  line 5B should be marked as oldMovedAlternate newMoved
+	EOF
+	test_expect_code 1 git diff --no-index --color --color-moved=zebra \
+		 --color-moved-ws=allow-indentation-change \
+		 old.txt new.txt >output &&
+	grep -v index output | test_decode_color >actual &&
+	cat >expected <<-\EOF &&
+	<BOLD>diff --git a/old.txt b/new.txt<RESET>
+	<BOLD>--- a/old.txt<RESET>
+	<BOLD>+++ b/new.txt<RESET>
+	<CYAN>@@ -1,14 +1,14 @@<RESET>
+	<BOLD;MAGENTA>-line 1A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;MAGENTA>-line 1B should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 1A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 1B should be marked as oldMoved newMovedAlternate<RESET>
+	 unchanged<RESET>
+	<BOLD;MAGENTA>-line 2A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;MAGENTA>-line 2B should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;BLUE>-line 3A should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;BLUE>-line 3B should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 3A should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 3B should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>  line 2A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>  line 2B should be marked as oldMoved newMovedAlternate<RESET>
+	 unchanged<RESET>
+	<BOLD;MAGENTA>-line 4A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;MAGENTA>-line 4B should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;BLUE>-line 5A should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;BLUE>-line 5B should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;MAGENTA>-line 6A should be marked as oldMoved newMoved<RESET>
+	<BOLD;MAGENTA>-line 6B should be marked as oldMoved newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 6A should be marked as oldMoved newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 6B should be marked as oldMoved newMoved<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>    line 4A should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>    line 4B should be marked as oldMoved newMovedAlternate<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 5A should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>  line 5B should be marked as oldMovedAlternate newMoved<RESET>
+	EOF
+	test_cmp expected actual
+'
+
 test_expect_success 'cmd option assumes configured colored-moved' '
 	test_config color.diff.oldMoved "magenta" &&
 	test_config color.diff.newMoved "cyan" &&

From bdbb7734f7a18e41c802552f0125b8e03cd3e4dc Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:24 +0000
Subject: [PATCH 052/198] diff --color-moved: avoid false short line matches
 and bad zerba coloring

When marking moved lines it is possible for a block of potential
matched lines to extend past a change in sign when there is a sequence
of added lines whose text matches the text of a sequence of deleted
and added lines. Most of the time either `match` will be NULL or
`pmb_advance_or_null()` will fail when the loop encounters a change of
sign but there are corner cases where `match` is non-NULL and
`pmb_advance_or_null()` successfully advances the moved block despite
the change in sign.

One consequence of this is highlighting a short line as moved when it
should not be. For example

-moved line  # Correctly highlighted as moved
+short line  # Wrongly highlighted as moved
 context
+moved line  # Correctly highlighted as moved
+short line
 context
-short line

The other consequence is coloring a moved addition following a moved
deletion in the wrong color. In the example below the first "+moved
line 3" should be highlighted as newMoved not newMovedAlternate.

-moved line 1 # Correctly highlighted as oldMoved
-moved line 2 # Correctly highlighted as oldMovedAlternate
+moved line 3 # Wrongly highlighted as newMovedAlternate
 context      # Everything else is highlighted correctly
+moved line 2
+moved line 3
 context
+moved line 1
-moved line 3

These false matches are more likely when using --color-moved-ws with
the exception of --color-moved-ws=allow-indentation-change which ties
the sign of the current whitespace delta to the sign of the line to
avoid this problem. The fix is to check that the sign of the new line
being matched is the same as the sign of the line that started the
block of potential matches.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                     | 17 ++++++----
 t/t4015-diff-whitespace.sh | 65 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/diff.c b/diff.c
index cb068f8258c039..2b51b77fd20fce 100644
--- a/diff.c
+++ b/diff.c
@@ -1142,7 +1142,7 @@ static void mark_color_as_moved(struct diff_options *o,
 	struct moved_block *pmb = NULL; /* potentially moved blocks */
 	int pmb_nr = 0, pmb_alloc = 0;
 	int n, flipped_block = 0, block_length = 0;
-	enum diff_symbol last_symbol = 0;
+	enum diff_symbol moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
 
 
 	for (n = 0; n < o->emitted_symbols->nr; n++) {
@@ -1168,7 +1168,7 @@ static void mark_color_as_moved(struct diff_options *o,
 			flipped_block = 0;
 		}
 
-		if (!match) {
+		if (pmb_nr && (!match || l->s != moved_symbol)) {
 			int i;
 
 			adjust_last_block(o, n, block_length);
@@ -1177,12 +1177,13 @@ static void mark_color_as_moved(struct diff_options *o,
 			pmb_nr = 0;
 			block_length = 0;
 			flipped_block = 0;
-			last_symbol = l->s;
+		}
+		if (!match) {
+			moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
 			continue;
 		}
 
 		if (o->color_moved == COLOR_MOVED_PLAIN) {
-			last_symbol = l->s;
 			l->flags |= DIFF_SYMBOL_MOVED_LINE;
 			continue;
 		}
@@ -1214,11 +1215,16 @@ static void mark_color_as_moved(struct diff_options *o,
 			}
 
 			if (adjust_last_block(o, n, block_length) &&
-			    pmb_nr && last_symbol == l->s)
+			    pmb_nr && moved_symbol == l->s)
 				flipped_block = (flipped_block + 1) % 2;
 			else
 				flipped_block = 0;
 
+			if (pmb_nr)
+				moved_symbol = l->s;
+			else
+				moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
+
 			block_length = 0;
 		}
 
@@ -1228,7 +1234,6 @@ static void mark_color_as_moved(struct diff_options *o,
 			if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS)
 				l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT;
 		}
-		last_symbol = l->s;
 	}
 	adjust_last_block(o, n, block_length);
 
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 920114cd795c01..3119a59f071d6f 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1514,6 +1514,71 @@ test_expect_success 'zebra alternate color is only used when necessary' '
 	test_cmp expected actual
 '
 
+test_expect_success 'short lines of opposite sign do not get marked as moved' '
+	cat >old.txt <<-\EOF &&
+	this line should be marked as moved
+	unchanged
+	unchanged
+	unchanged
+	unchanged
+	too short
+	this line should be marked as oldMoved newMoved
+	this line should be marked as oldMovedAlternate newMoved
+	unchanged 1
+	unchanged 2
+	unchanged 3
+	unchanged 4
+	this line should be marked as oldMoved newMoved/newMovedAlternate
+	EOF
+	cat >new.txt <<-\EOF &&
+	too short
+	unchanged
+	unchanged
+	this line should be marked as moved
+	too short
+	unchanged
+	unchanged
+	this line should be marked as oldMoved newMoved/newMovedAlternate
+	unchanged 1
+	unchanged 2
+	this line should be marked as oldMovedAlternate newMoved
+	this line should be marked as oldMoved newMoved/newMovedAlternate
+	unchanged 3
+	this line should be marked as oldMoved newMoved
+	unchanged 4
+	EOF
+	test_expect_code 1 git diff --no-index --color --color-moved=zebra \
+		old.txt new.txt >output && cat output &&
+	grep -v index output | test_decode_color >actual &&
+	cat >expect <<-\EOF &&
+	<BOLD>diff --git a/old.txt b/new.txt<RESET>
+	<BOLD>--- a/old.txt<RESET>
+	<BOLD>+++ b/new.txt<RESET>
+	<CYAN>@@ -1,13 +1,15 @@<RESET>
+	<BOLD;MAGENTA>-this line should be marked as moved<RESET>
+	<GREEN>+<RESET><GREEN>too short<RESET>
+	 unchanged<RESET>
+	 unchanged<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as moved<RESET>
+	<GREEN>+<RESET><GREEN>too short<RESET>
+	 unchanged<RESET>
+	 unchanged<RESET>
+	<RED>-too short<RESET>
+	<BOLD;MAGENTA>-this line should be marked as oldMoved newMoved<RESET>
+	<BOLD;BLUE>-this line should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
+	 unchanged 1<RESET>
+	 unchanged 2<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMovedAlternate newMoved<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
+	 unchanged 3<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>this line should be marked as oldMoved newMoved<RESET>
+	 unchanged 4<RESET>
+	<BOLD;MAGENTA>-this line should be marked as oldMoved newMoved/newMovedAlternate<RESET>
+	EOF
+	test_cmp expect actual
+'
+
 test_expect_success 'cmd option assumes configured colored-moved' '
 	test_config color.diff.oldMoved "magenta" &&
 	test_config color.diff.newMoved "cyan" &&

From f52feba8ffe256ee048dece20067976abe8e9db1 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:25 +0000
Subject: [PATCH 053/198] diff: simplify allow-indentation-change delta
 calculation

Now that we reliably end a block when the sign changes we don't need
the whitespace delta calculation to rely on the sign.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/diff.c b/diff.c
index 2b51b77fd20fce..77c893b266a785 100644
--- a/diff.c
+++ b/diff.c
@@ -864,23 +864,17 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a,
 	    a_width = a->indent_width,
 	    b_off = b->indent_off,
 	    b_width = b->indent_width;
-	int delta;
 
 	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) {
 		*out = INDENT_BLANKLINE;
 		return 1;
 	}
 
-	if (a->s == DIFF_SYMBOL_PLUS)
-		delta = a_width - b_width;
-	else
-		delta = b_width - a_width;
-
 	if (a_len - a_off != b_len - b_off ||
 	    memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
 		return 0;
 
-	*out = delta;
+	*out = a_width - b_width;
 
 	return 1;
 }
@@ -924,10 +918,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	 * match those of the current block and that the text of 'l' and 'cur'
 	 * after the indentation match.
 	 */
-	if (cur->es->s == DIFF_SYMBOL_PLUS)
-		delta = a_width - c_width;
-	else
-		delta = c_width - a_width;
+	delta = c_width - a_width;
 
 	/*
 	 * If the previous lines of this block were all blank then set its

From 8e809cbb2f8f8194ce22257339821809187dd1d0 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:26 +0000
Subject: [PATCH 054/198] diff --color-moved-ws=allow-indentation-change:
 simplify and optimize

If we already have a block of potentially moved lines then as we move
down the diff we need to check if the next line of each potentially
moved line matches the current line of the diff. The implementation of
--color-moved-ws=allow-indentation-change was needlessly performing
this check on all the lines in the diff that matched the current line
rather than just the current line. To exacerbate the problem finding
all the other lines in the diff that match the current line involves a
fuzzy lookup so we were wasting even more time performing a second
comparison to filter out the non-matching lines. Fixing this reduces
time to run
  git diff --color-moved-ws=allow-indentation-change v2.28.0 v2.29.0
by 93% compared to master and simplifies the code.

Test                                                                  HEAD^              HEAD
---------------------------------------------------------------------------------------------------------------
4002.1: diff --no-color-moved --no-color-moved-ws large change        0.41( 0.38+0.03)   0.41(0.37+0.04)  +0.0%
4002.2: diff --color-moved --no-color-moved-ws large change           0.83( 0.79+0.04)   0.82(0.79+0.02)  -1.2%
4002.3: diff --color-moved-ws=allow-indentation-change large change  13.68(13.59+0.07)   0.92(0.89+0.03) -93.3%
4002.4: log --no-color-moved --no-color-moved-ws                      1.31( 1.22+0.08)   1.31(1.21+0.10)  +0.0%
4002.5: log --color-moved --no-color-moved-ws                         1.47( 1.40+0.07)   1.47(1.36+0.10)  +0.0%
4002.6: log --color-moved-ws=allow-indentation-change                 1.87( 1.77+0.09)   1.50(1.41+0.09) -19.8%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 65 ++++++++++++++++------------------------------------------
 1 file changed, 18 insertions(+), 47 deletions(-)

diff --git a/diff.c b/diff.c
index 77c893b266a785..55384449170e55 100644
--- a/diff.c
+++ b/diff.c
@@ -881,35 +881,20 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a,
 
 static int cmp_in_block_with_wsd(const struct diff_options *o,
 				 const struct moved_entry *cur,
-				 const struct moved_entry *match,
-				 struct moved_block *pmb,
-				 int n)
+				 const struct emitted_diff_symbol *l,
+				 struct moved_block *pmb)
 {
-	struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
-	int al = cur->es->len, bl = match->es->len, cl = l->len;
+	int al = cur->es->len, bl = l->len;
 	const char *a = cur->es->line,
-		   *b = match->es->line,
-		   *c = l->line;
+		   *b = l->line;
 	int a_off = cur->es->indent_off,
 	    a_width = cur->es->indent_width,
-	    c_off = l->indent_off,
-	    c_width = l->indent_width;
+	    b_off = l->indent_off,
+	    b_width = l->indent_width;
 	int delta;
 
-	/*
-	 * We need to check if 'cur' is equal to 'match'.  As those
-	 * are from the same (+/-) side, we do not need to adjust for
-	 * indent changes. However these were found using fuzzy
-	 * matching so we do have to check if they are equal. Here we
-	 * just check the lengths. We delay calling memcmp() to check
-	 * the contents until later as if the length comparison for a
-	 * and c fails we can avoid the call all together.
-	 */
-	if (al != bl)
-		return 1;
-
 	/* If 'l' and 'cur' are both blank then they match. */
-	if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE)
+	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
 		return 0;
 
 	/*
@@ -918,7 +903,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	 * match those of the current block and that the text of 'l' and 'cur'
 	 * after the indentation match.
 	 */
-	delta = c_width - a_width;
+	delta = b_width - a_width;
 
 	/*
 	 * If the previous lines of this block were all blank then set its
@@ -927,9 +912,8 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	if (pmb->wsd == INDENT_BLANKLINE)
 		pmb->wsd = delta;
 
-	return !(delta == pmb->wsd && al - a_off == cl - c_off &&
-		 !memcmp(a, b, al) && !
-		 memcmp(a + a_off, c + c_off, al - a_off));
+	return !(delta == pmb->wsd && al - a_off == bl - b_off &&
+		 !memcmp(a + a_off, b + b_off, al - a_off));
 }
 
 static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
@@ -1030,36 +1014,23 @@ static void pmb_advance_or_null(struct diff_options *o,
 }
 
 static void pmb_advance_or_null_multi_match(struct diff_options *o,
-					    struct moved_entry *match,
-					    struct hashmap *hm,
+					    struct emitted_diff_symbol *l,
 					    struct moved_block *pmb,
-					    int pmb_nr, int n)
+					    int pmb_nr)
 {
 	int i;
-	char *got_match = xcalloc(1, pmb_nr);
-
-	hashmap_for_each_entry_from(hm, match, ent) {
-		for (i = 0; i < pmb_nr; i++) {
-			struct moved_entry *prev = pmb[i].match;
-			struct moved_entry *cur = (prev && prev->next_line) ?
-					prev->next_line : NULL;
-			if (!cur)
-				continue;
-			if (!cmp_in_block_with_wsd(o, cur, match, &pmb[i], n))
-				got_match[i] |= 1;
-		}
-	}
 
 	for (i = 0; i < pmb_nr; i++) {
-		if (got_match[i]) {
+		struct moved_entry *prev = pmb[i].match;
+		struct moved_entry *cur = (prev && prev->next_line) ?
+			prev->next_line : NULL;
+		if (cur && !cmp_in_block_with_wsd(o, cur, l, &pmb[i])) {
 			/* Advance to the next line */
-			pmb[i].match = pmb[i].match->next_line;
+			pmb[i].match = cur;
 		} else {
 			moved_block_clear(&pmb[i]);
 		}
 	}
-
-	free(got_match);
 }
 
 static int shrink_potential_moved_blocks(struct moved_block *pmb,
@@ -1181,7 +1152,7 @@ static void mark_color_as_moved(struct diff_options *o,
 
 		if (o->color_moved_ws_handling &
 		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-			pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n);
+			pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr);
 		else
 			pmb_advance_or_null(o, match, hm, pmb, pmb_nr);
 

From 76c2aacec9b2cd5eb2c089747cfd0c8adbb51de2 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:27 +0000
Subject: [PATCH 055/198] diff --color-moved: call comparison function directly

This change will allow us to easily combine pmb_advance_or_null() and
pmb_advance_or_null_multi_match() in the next commit. Calling
xdiff_compare_lines() directly rather than using a function pointer
from the hash map has little effect on the run time.

Test                                                                  HEAD^             HEAD
-------------------------------------------------------------------------------------------------------------
4002.1: diff --no-color-moved --no-color-moved-ws large change        0.41(0.37+0.04)   0.41(0.39+0.02) +0.0%
4002.2: diff --color-moved --no-color-moved-ws large change           0.82(0.79+0.02)   0.83(0.79+0.03) +1.2%
4002.3: diff --color-moved-ws=allow-indentation-change large change   0.92(0.89+0.03)   0.91(0.85+0.05) -1.1%
4002.4: log --no-color-moved --no-color-moved-ws                      1.31(1.21+0.10)   1.33(1.22+0.10) +1.5%
4002.5: log --color-moved --no-color-moved-ws                         1.47(1.36+0.10)   1.47(1.39+0.08) +0.0%
4002.6: log --color-moved-ws=allow-indentation-change                 1.50(1.41+0.09)   1.51(1.42+0.09) +0.7%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/diff.c b/diff.c
index 55384449170e55..c056d917d0d70b 100644
--- a/diff.c
+++ b/diff.c
@@ -995,17 +995,20 @@ static void add_lines_to_move_detection(struct diff_options *o,
 }
 
 static void pmb_advance_or_null(struct diff_options *o,
-				struct moved_entry *match,
-				struct hashmap *hm,
+				struct emitted_diff_symbol *l,
 				struct moved_block *pmb,
 				int pmb_nr)
 {
 	int i;
+	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
+
 	for (i = 0; i < pmb_nr; i++) {
 		struct moved_entry *prev = pmb[i].match;
 		struct moved_entry *cur = (prev && prev->next_line) ?
 				prev->next_line : NULL;
-		if (cur && !hm->cmpfn(o, &cur->ent, &match->ent, NULL)) {
+		if (cur && xdiff_compare_lines(cur->es->line, cur->es->len,
+						l->line, l->len,
+						flags)) {
 			pmb[i].match = cur;
 		} else {
 			pmb[i].match = NULL;
@@ -1154,7 +1157,7 @@ static void mark_color_as_moved(struct diff_options *o,
 		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
 			pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr);
 		else
-			pmb_advance_or_null(o, match, hm, pmb, pmb_nr);
+			pmb_advance_or_null(o, l, pmb, pmb_nr);
 
 		pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr);
 

From f9cbfe64eeeebe87b5eb50dbf83257de287a7886 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:28 +0000
Subject: [PATCH 056/198] diff --color-moved: unify moved block growth
 functions

After the last two commits pmb_advance_or_null() and
pmb_advance_or_null_multi_match() differ only in the comparison they
perform. Lets simplify the code by combining them into a single
function.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/diff.c b/diff.c
index c056d917d0d70b..b03f79b626cc0f 100644
--- a/diff.c
+++ b/diff.c
@@ -1003,36 +1003,23 @@ static void pmb_advance_or_null(struct diff_options *o,
 	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
 
 	for (i = 0; i < pmb_nr; i++) {
+		int match;
 		struct moved_entry *prev = pmb[i].match;
 		struct moved_entry *cur = (prev && prev->next_line) ?
 				prev->next_line : NULL;
-		if (cur && xdiff_compare_lines(cur->es->line, cur->es->len,
-						l->line, l->len,
-						flags)) {
-			pmb[i].match = cur;
-		} else {
-			pmb[i].match = NULL;
-		}
-	}
-}
 
-static void pmb_advance_or_null_multi_match(struct diff_options *o,
-					    struct emitted_diff_symbol *l,
-					    struct moved_block *pmb,
-					    int pmb_nr)
-{
-	int i;
-
-	for (i = 0; i < pmb_nr; i++) {
-		struct moved_entry *prev = pmb[i].match;
-		struct moved_entry *cur = (prev && prev->next_line) ?
-			prev->next_line : NULL;
-		if (cur && !cmp_in_block_with_wsd(o, cur, l, &pmb[i])) {
-			/* Advance to the next line */
+		if (o->color_moved_ws_handling &
+		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+			match = cur &&
+				!cmp_in_block_with_wsd(o, cur, l, &pmb[i]);
+		else
+			match = cur &&
+				xdiff_compare_lines(cur->es->line, cur->es->len,
+						    l->line, l->len, flags);
+		if (match)
 			pmb[i].match = cur;
-		} else {
+		else
 			moved_block_clear(&pmb[i]);
-		}
 	}
 }
 
@@ -1153,11 +1140,7 @@ static void mark_color_as_moved(struct diff_options *o,
 			continue;
 		}
 
-		if (o->color_moved_ws_handling &
-		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-			pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr);
-		else
-			pmb_advance_or_null(o, l, pmb, pmb_nr);
+		pmb_advance_or_null(o, l, pmb, pmb_nr);
 
 		pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr);
 

From 72e4640f244b012d980f8e007db9ad6790057db9 Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:29 +0000
Subject: [PATCH 057/198] diff --color-moved: shrink potential moved blocks as
 we go

Rather than setting `match` to NULL and then looping over the list of
potential matched blocks for a second time to remove blocks with no
matches just filter out the blocks with no matches as we go.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 42 ++++++------------------------------------
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/diff.c b/diff.c
index b03f79b626cc0f..068473c0be3044 100644
--- a/diff.c
+++ b/diff.c
@@ -997,12 +997,12 @@ static void add_lines_to_move_detection(struct diff_options *o,
 static void pmb_advance_or_null(struct diff_options *o,
 				struct emitted_diff_symbol *l,
 				struct moved_block *pmb,
-				int pmb_nr)
+				int *pmb_nr)
 {
-	int i;
+	int i, j;
 	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
 
-	for (i = 0; i < pmb_nr; i++) {
+	for (i = 0, j = 0; i < *pmb_nr; i++) {
 		int match;
 		struct moved_entry *prev = pmb[i].match;
 		struct moved_entry *cur = (prev && prev->next_line) ?
@@ -1017,37 +1017,9 @@ static void pmb_advance_or_null(struct diff_options *o,
 				xdiff_compare_lines(cur->es->line, cur->es->len,
 						    l->line, l->len, flags);
 		if (match)
-			pmb[i].match = cur;
-		else
-			moved_block_clear(&pmb[i]);
+			pmb[j++].match = cur;
 	}
-}
-
-static int shrink_potential_moved_blocks(struct moved_block *pmb,
-					 int pmb_nr)
-{
-	int lp, rp;
-
-	/* Shrink the set of potential block to the remaining running */
-	for (lp = 0, rp = pmb_nr - 1; lp <= rp;) {
-		while (lp < pmb_nr && pmb[lp].match)
-			lp++;
-		/* lp points at the first NULL now */
-
-		while (rp > -1 && !pmb[rp].match)
-			rp--;
-		/* rp points at the last non-NULL */
-
-		if (lp < pmb_nr && rp > -1 && lp < rp) {
-			pmb[lp] = pmb[rp];
-			memset(&pmb[rp], 0, sizeof(pmb[rp]));
-			rp--;
-			lp++;
-		}
-	}
-
-	/* Remember the number of running sets */
-	return rp + 1;
+	*pmb_nr = j;
 }
 
 /*
@@ -1140,9 +1112,7 @@ static void mark_color_as_moved(struct diff_options *o,
 			continue;
 		}
 
-		pmb_advance_or_null(o, l, pmb, pmb_nr);
-
-		pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr);
+		pmb_advance_or_null(o, l, pmb, &pmb_nr);
 
 		if (pmb_nr == 0) {
 			/*

From b473dcb0de2dc030be804a89df155c69306ab90d Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:30 +0000
Subject: [PATCH 058/198] diff --color-moved: stop clearing potential moved
 blocks

moved_block_clear() was introduced in 74d156f4a1 ("diff
--color-moved-ws: fix double free crash", 2018-10-04) to free the
memory that was allocated when initializing a potential moved
block. However since 21536d077f ("diff --color-moved-ws: modify
allow-indentation-change", 2018-11-23) initializing a potential moved
block no longer allocates any memory. Up until the last commit we were
relying on moved_block_clear() to set the `match` pointer to NULL when
a block stopped matching, but since that commit we do not clear a
moved block that does not match so it does not make sense to clear
them elsewhere.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/diff.c b/diff.c
index 068473c0be3044..4b5776a5a0ac87 100644
--- a/diff.c
+++ b/diff.c
@@ -807,11 +807,6 @@ struct moved_block {
 	int wsd; /* The whitespace delta of this block */
 };
 
-static void moved_block_clear(struct moved_block *b)
-{
-	memset(b, 0, sizeof(*b));
-}
-
 #define INDENT_BLANKLINE INT_MIN
 
 static void fill_es_indent_data(struct emitted_diff_symbol *es)
@@ -1093,11 +1088,7 @@ static void mark_color_as_moved(struct diff_options *o,
 		}
 
 		if (pmb_nr && (!match || l->s != moved_symbol)) {
-			int i;
-
 			adjust_last_block(o, n, block_length);
-			for(i = 0; i < pmb_nr; i++)
-				moved_block_clear(&pmb[i]);
 			pmb_nr = 0;
 			block_length = 0;
 			flipped_block = 0;
@@ -1155,8 +1146,6 @@ static void mark_color_as_moved(struct diff_options *o,
 	}
 	adjust_last_block(o, n, block_length);
 
-	for(n = 0; n < pmb_nr; n++)
-		moved_block_clear(&pmb[n]);
 	free(pmb);
 }
 

From 447e5899921980c9ba84a5972176763283f4f72a Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:31 +0000
Subject: [PATCH 059/198] diff --color-moved-ws=allow-indentation-change:
 improve hash lookups

As libxdiff does not have a whitespace flag to ignore the indentation
the code for --color-moved-ws=allow-indentation-change uses
XDF_IGNORE_WHITESPACE and then filters out any hash lookups where
there are non-indentation changes. This filtering is inefficient as
we have to perform another string comparison.

By using the offset data that we have already computed to skip the
indentation we can avoid using XDF_IGNORE_WHITESPACE and safely remove
the extra checks which improves the performance by 11% and paves the
way for the elimination of string comparisons in the next commit.

This change slightly increases the run time of other --color-moved
modes. This could be avoided by using different comparison functions
for the different modes but after the next two commits there is no
measurable benefit in doing so.

Test                                                                  HEAD^             HEAD
--------------------------------------------------------------------------------------------------------------
4002.1: diff --no-color-moved --no-color-moved-ws large change        0.41(0.38+0.03)   0.41(0.36+0.04) +0.0%
4002.2: diff --color-moved --no-color-moved-ws large change           0.82(0.76+0.05)   0.84(0.79+0.04) +2.4%
4002.3: diff --color-moved-ws=allow-indentation-change large change   0.91(0.88+0.03)   0.81(0.74+0.06) -11.0%
4002.4: log --no-color-moved --no-color-moved-ws                      1.32(1.21+0.10)   1.31(1.19+0.11) -0.8%
4002.5: log --color-moved --no-color-moved-ws                         1.47(1.37+0.10)   1.47(1.36+0.11) +0.0%
4002.6: log --color-moved-ws=allow-indentation-change                 1.51(1.42+0.09)   1.48(1.37+0.10) -2.0%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 66 +++++++++++++++++-----------------------------------------
 1 file changed, 19 insertions(+), 47 deletions(-)

diff --git a/diff.c b/diff.c
index 4b5776a5a0ac87..f899083d028349 100644
--- a/diff.c
+++ b/diff.c
@@ -850,28 +850,15 @@ static void fill_es_indent_data(struct emitted_diff_symbol *es)
 }
 
 static int compute_ws_delta(const struct emitted_diff_symbol *a,
-			    const struct emitted_diff_symbol *b,
-			    int *out)
-{
-	int a_len = a->len,
-	    b_len = b->len,
-	    a_off = a->indent_off,
-	    a_width = a->indent_width,
-	    b_off = b->indent_off,
+			    const struct emitted_diff_symbol *b)
+{
+	int a_width = a->indent_width,
 	    b_width = b->indent_width;
 
-	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) {
-		*out = INDENT_BLANKLINE;
-		return 1;
-	}
-
-	if (a_len - a_off != b_len - b_off ||
-	    memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
-		return 0;
-
-	*out = a_width - b_width;
+	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
+		return INDENT_BLANKLINE;
 
-	return 1;
+	return a_width - b_width;
 }
 
 static int cmp_in_block_with_wsd(const struct diff_options *o,
@@ -917,26 +904,17 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
 			   const void *keydata)
 {
 	const struct diff_options *diffopt = hashmap_cmp_fn_data;
-	const struct moved_entry *a, *b;
+	const struct emitted_diff_symbol *a, *b;
 	unsigned flags = diffopt->color_moved_ws_handling
 			 & XDF_WHITESPACE_FLAGS;
 
-	a = container_of(eptr, const struct moved_entry, ent);
-	b = container_of(entry_or_key, const struct moved_entry, ent);
-
-	if (diffopt->color_moved_ws_handling &
-	    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-		/*
-		 * As there is not specific white space config given,
-		 * we'd need to check for a new block, so ignore all
-		 * white space. The setup of the white space
-		 * configuration for the next block is done else where
-		 */
-		flags |= XDF_IGNORE_WHITESPACE;
+	a = container_of(eptr, const struct moved_entry, ent)->es;
+	b = container_of(entry_or_key, const struct moved_entry, ent)->es;
 
-	return !xdiff_compare_lines(a->es->line, a->es->len,
-				    b->es->line, b->es->len,
-				    flags);
+	return !xdiff_compare_lines(a->line + a->indent_off,
+				    a->len - a->indent_off,
+				    b->line + b->indent_off,
+				    b->len - b->indent_off, flags);
 }
 
 static struct moved_entry *prepare_entry(struct diff_options *o,
@@ -945,7 +923,8 @@ static struct moved_entry *prepare_entry(struct diff_options *o,
 	struct moved_entry *ret = xmalloc(sizeof(*ret));
 	struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
 	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
-	unsigned int hash = xdiff_hash_string(l->line, l->len, flags);
+	unsigned int hash = xdiff_hash_string(l->line + l->indent_off,
+					      l->len - l->indent_off, flags);
 
 	hashmap_entry_init(&ret->ent, hash);
 	ret->es = l;
@@ -1113,14 +1092,11 @@ static void mark_color_as_moved(struct diff_options *o,
 			hashmap_for_each_entry_from(hm, match, ent) {
 				ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
 				if (o->color_moved_ws_handling &
-				    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) {
-					if (compute_ws_delta(l, match->es,
-							     &pmb[pmb_nr].wsd))
-						pmb[pmb_nr++].match = match;
-				} else {
+				    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+					pmb[pmb_nr].wsd = compute_ws_delta(l, match->es);
+				else
 					pmb[pmb_nr].wsd = 0;
-					pmb[pmb_nr++].match = match;
-				}
+				pmb[pmb_nr++].match = match;
 			}
 
 			if (adjust_last_block(o, n, block_length) &&
@@ -6240,10 +6216,6 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 		if (o->color_moved) {
 			struct hashmap add_lines, del_lines;
 
-			if (o->color_moved_ws_handling &
-			    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-				o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE;
-
 			hashmap_init(&del_lines, moved_entry_cmp, o, 0);
 			hashmap_init(&add_lines, moved_entry_cmp, o, 0);
 

From a95081c2471ae8658e0e76abd3a8c6a43a9f09cb Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:32 +0000
Subject: [PATCH 060/198] diff: use designated initializers for
 emitted_diff_symbol

This makes it clearer which fields are being explicitly initialized
and will simplify the next commit where we add a new field to the
struct.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index f899083d028349..31a20a34240a4a 100644
--- a/diff.c
+++ b/diff.c
@@ -1460,7 +1460,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
 			     const char *line, int len, unsigned flags)
 {
-	struct emitted_diff_symbol e = {line, len, flags, 0, 0, s};
+	struct emitted_diff_symbol e = {
+		.line = line, .len = len, .flags = flags, .s = s
+	};
 
 	if (o->emitted_symbols)
 		append_emitted_diff_symbol(o, &e);

From 5458d65b169d2c21d5c87af897e3ac01f31d84dc Mon Sep 17 00:00:00 2001
From: Phillip Wood <phillip.wood@dunelm.org.uk>
Date: Tue, 20 Jul 2021 10:36:33 +0000
Subject: [PATCH 061/198] diff --color-moved: intern strings

Taking inspiration from xdl_classify_record() assign an id to each
addition and deletion such that lines that match for the current
--color-moved-ws mode share the same unique id. This reduces the
number of hash lookups a little (calculating the ids still involves
one hash lookup per line) but the main benefit is that when growing
blocks of potentially moved lines we can replace string comparisons
which involve chasing a pointer with a simple integer comparison.

On a large diff this commit reduces the time to run
   diff --color-moved
by 33% and
    diff --color-moved-ws=allow-indentation-change
by 26%. Compared to master the time to run
    diff --color-moved-ws=allow-indentation-change
is now reduced by 95% and the overhead compared to --no-color-moved is
reduced to 50%.

Compared to the previous commit the time to run
    git log --patch --color-moved
is increased slightly, but compared to master there is no change in
run time.

Test                                                                  HEAD^             HEAD
--------------------------------------------------------------------------------------------------------------
4002.1: diff --no-color-moved --no-color-moved-ws large change        0.41(0.36+0.04)   0.41(0.37+0.03)  +0.0%
4002.2: diff --color-moved --no-color-moved-ws large change           0.83(0.79+0.03)   0.55(0.52+0.03) -33.7%
4002.3: diff --color-moved-ws=allow-indentation-change large change   0.81(0.77+0.04)   0.60(0.55+0.05) -25.9%
4002.4: log --no-color-moved --no-color-moved-ws                      1.30(1.20+0.09)   1.31(1.22+0.08)  +0.8%
4002.5: log --color-moved --no-color-moved-ws                         1.46(1.35+0.11)   1.47(1.30+0.16)  +0.7%
4002.6: log --color-moved-ws=allow-indentation-change                 1.46(1.38+0.07)   1.47(1.34+0.13)  +0.7%

Test                                                                  master            HEAD
--------------------------------------------------------------------------------------------------------------
4002.1: diff --no-color-moved --no-color-moved-ws large change        0.40( 0.36+0.03)  0.41(0.37+0.03)  +2.5%
4002.2: diff --color-moved --no-color-moved-ws large change           0.82( 0.77+0.04)  0.55(0.52+0.03) -32.9%
4002.3: diff --color-moved-ws=allow-indentation-change large change  14.10(14.04+0.04)  0.60(0.55+0.05) -95.7%
4002.4: log --no-color-moved --no-color-moved-ws                      1.31( 1.21+0.09)  1.31(1.22+0.08)  +0.0%
4002.5: log --color-moved --no-color-moved-ws                         1.47( 1.37+0.09)  1.47(1.30+0.16)  +0.0%
4002.6: log --color-moved-ws=allow-indentation-change                 1.86( 1.76+0.10)  1.47(1.34+0.13) -21.0%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 171 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 95 insertions(+), 76 deletions(-)

diff --git a/diff.c b/diff.c
index 31a20a34240a4a..2956c8f7103c2a 100644
--- a/diff.c
+++ b/diff.c
@@ -18,6 +18,7 @@
 #include "submodule-config.h"
 #include "submodule.h"
 #include "hashmap.h"
+#include "mem-pool.h"
 #include "ll-merge.h"
 #include "string-list.h"
 #include "strvec.h"
@@ -772,6 +773,7 @@ struct emitted_diff_symbol {
 	int flags;
 	int indent_off;   /* Offset to first non-whitespace character */
 	int indent_width; /* The visual width of the indentation */
+	unsigned id;
 	enum diff_symbol s;
 };
 #define EMITTED_DIFF_SYMBOL_INIT {NULL}
@@ -797,9 +799,9 @@ static void append_emitted_diff_symbol(struct diff_options *o,
 }
 
 struct moved_entry {
-	struct hashmap_entry ent;
 	const struct emitted_diff_symbol *es;
 	struct moved_entry *next_line;
+	struct moved_entry *next_match;
 };
 
 struct moved_block {
@@ -866,24 +868,24 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 				 const struct emitted_diff_symbol *l,
 				 struct moved_block *pmb)
 {
-	int al = cur->es->len, bl = l->len;
-	const char *a = cur->es->line,
-		   *b = l->line;
-	int a_off = cur->es->indent_off,
-	    a_width = cur->es->indent_width,
-	    b_off = l->indent_off,
-	    b_width = l->indent_width;
+	int a_width = cur->es->indent_width, b_width = l->indent_width;
 	int delta;
 
-	/* If 'l' and 'cur' are both blank then they match. */
-	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
+	/* The text of each line must match */
+	if (cur->es->id != l->id)
+		return 1;
+
+	/*
+	 * If 'l' and 'cur' are both blank then we don't need to check the
+	 * indent. We only need to check cur as we know the strings match.
+	 * */
+	if (a_width == INDENT_BLANKLINE)
 		return 0;
 
 	/*
 	 * The indent changes of the block are known and stored in pmb->wsd;
 	 * however we need to check if the indent changes of the current line
-	 * match those of the current block and that the text of 'l' and 'cur'
-	 * after the indentation match.
+	 * match those of the current block.
 	 */
 	delta = b_width - a_width;
 
@@ -894,22 +896,26 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	if (pmb->wsd == INDENT_BLANKLINE)
 		pmb->wsd = delta;
 
-	return !(delta == pmb->wsd && al - a_off == bl - b_off &&
-		 !memcmp(a + a_off, b + b_off, al - a_off));
+	return delta != pmb->wsd;
 }
 
-static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
-			   const struct hashmap_entry *eptr,
-			   const struct hashmap_entry *entry_or_key,
-			   const void *keydata)
+struct interned_diff_symbol {
+	struct hashmap_entry ent;
+	struct emitted_diff_symbol *es;
+};
+
+static int interned_diff_symbol_cmp(const void *hashmap_cmp_fn_data,
+				    const struct hashmap_entry *eptr,
+				    const struct hashmap_entry *entry_or_key,
+				    const void *keydata)
 {
 	const struct diff_options *diffopt = hashmap_cmp_fn_data;
 	const struct emitted_diff_symbol *a, *b;
 	unsigned flags = diffopt->color_moved_ws_handling
 			 & XDF_WHITESPACE_FLAGS;
 
-	a = container_of(eptr, const struct moved_entry, ent)->es;
-	b = container_of(entry_or_key, const struct moved_entry, ent)->es;
+	a = container_of(eptr, const struct interned_diff_symbol, ent)->es;
+	b = container_of(entry_or_key, const struct interned_diff_symbol, ent)->es;
 
 	return !xdiff_compare_lines(a->line + a->indent_off,
 				    a->len - a->indent_off,
@@ -917,55 +923,81 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
 				    b->len - b->indent_off, flags);
 }
 
-static struct moved_entry *prepare_entry(struct diff_options *o,
-					 int line_no)
+static void prepare_entry(struct diff_options *o, struct emitted_diff_symbol *l,
+			  struct interned_diff_symbol *s)
 {
-	struct moved_entry *ret = xmalloc(sizeof(*ret));
-	struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
 	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
 	unsigned int hash = xdiff_hash_string(l->line + l->indent_off,
 					      l->len - l->indent_off, flags);
 
-	hashmap_entry_init(&ret->ent, hash);
-	ret->es = l;
-	ret->next_line = NULL;
-
-	return ret;
+	hashmap_entry_init(&s->ent, hash);
+	s->es = l;
 }
 
-static void add_lines_to_move_detection(struct diff_options *o,
-					struct hashmap *add_lines,
-					struct hashmap *del_lines)
+struct moved_entry_list {
+	struct moved_entry *add, *del;
+};
+
+static struct moved_entry_list *add_lines_to_move_detection(struct diff_options *o,
+							    struct mem_pool *entry_mem_pool)
 {
 	struct moved_entry *prev_line = NULL;
-
+	struct mem_pool interned_pool;
+	struct hashmap interned_map;
+	struct moved_entry_list *entry_list = NULL;
+	size_t entry_list_alloc = 0;
+	unsigned id = 0;
 	int n;
+
+	hashmap_init(&interned_map, interned_diff_symbol_cmp, o, 8096);
+	mem_pool_init(&interned_pool, 1024 * 1024);
+
 	for (n = 0; n < o->emitted_symbols->nr; n++) {
-		struct hashmap *hm;
-		struct moved_entry *key;
+		struct interned_diff_symbol key;
+		struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
+		struct interned_diff_symbol *s;
+		struct moved_entry *entry;
 
-		switch (o->emitted_symbols->buf[n].s) {
-		case DIFF_SYMBOL_PLUS:
-			hm = add_lines;
-			break;
-		case DIFF_SYMBOL_MINUS:
-			hm = del_lines;
-			break;
-		default:
+		if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) {
 			prev_line = NULL;
 			continue;
 		}
 
 		if (o->color_moved_ws_handling &
 		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-			fill_es_indent_data(&o->emitted_symbols->buf[n]);
-		key = prepare_entry(o, n);
-		if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
-			prev_line->next_line = key;
+			fill_es_indent_data(l);
 
-		hashmap_add(hm, &key->ent);
-		prev_line = key;
+		prepare_entry(o, l, &key);
+		s = hashmap_get_entry(&interned_map, &key, ent, &key.ent);
+		if (s) {
+			l->id = s->es->id;
+		} else {
+			l->id = id;
+			ALLOC_GROW_BY(entry_list, id, 1, entry_list_alloc);
+			hashmap_add(&interned_map,
+				    memcpy(mem_pool_alloc(&interned_pool,
+							  sizeof(key)),
+					   &key, sizeof(key)));
+		}
+		entry = mem_pool_alloc(entry_mem_pool, sizeof(*entry));
+		entry->es = l;
+		entry->next_line = NULL;
+		if (prev_line && prev_line->es->s == l->s)
+			prev_line->next_line = entry;
+		prev_line = entry;
+		if (l->s == DIFF_SYMBOL_PLUS) {
+			entry->next_match = entry_list[l->id].add;
+			entry_list[l->id].add = entry;
+		} else {
+			entry->next_match = entry_list[l->id].del;
+			entry_list[l->id].del = entry;
+		}
 	}
+
+	hashmap_clear(&interned_map);
+	mem_pool_discard(&interned_pool, 0);
+
+	return entry_list;
 }
 
 static void pmb_advance_or_null(struct diff_options *o,
@@ -974,7 +1006,6 @@ static void pmb_advance_or_null(struct diff_options *o,
 				int *pmb_nr)
 {
 	int i, j;
-	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
 
 	for (i = 0, j = 0; i < *pmb_nr; i++) {
 		int match;
@@ -987,9 +1018,8 @@ static void pmb_advance_or_null(struct diff_options *o,
 			match = cur &&
 				!cmp_in_block_with_wsd(o, cur, l, &pmb[i]);
 		else
-			match = cur &&
-				xdiff_compare_lines(cur->es->line, cur->es->len,
-						    l->line, l->len, flags);
+			match = cur && cur->es->id == l->id;
+
 		if (match)
 			pmb[j++].match = cur;
 	}
@@ -1034,8 +1064,7 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length)
 
 /* Find blocks of moved code, delegate actual coloring decision to helper */
 static void mark_color_as_moved(struct diff_options *o,
-				struct hashmap *add_lines,
-				struct hashmap *del_lines)
+				struct moved_entry_list *entry_list)
 {
 	struct moved_block *pmb = NULL; /* potentially moved blocks */
 	int pmb_nr = 0, pmb_alloc = 0;
@@ -1044,23 +1073,15 @@ static void mark_color_as_moved(struct diff_options *o,
 
 
 	for (n = 0; n < o->emitted_symbols->nr; n++) {
-		struct hashmap *hm = NULL;
-		struct moved_entry *key;
 		struct moved_entry *match = NULL;
 		struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
 
 		switch (l->s) {
 		case DIFF_SYMBOL_PLUS:
-			hm = del_lines;
-			key = prepare_entry(o, n);
-			match = hashmap_get_entry(hm, key, ent, NULL);
-			free(key);
+			match = entry_list[l->id].del;
 			break;
 		case DIFF_SYMBOL_MINUS:
-			hm = add_lines;
-			key = prepare_entry(o, n);
-			match = hashmap_get_entry(hm, key, ent, NULL);
-			free(key);
+			match = entry_list[l->id].add;
 			break;
 		default:
 			flipped_block = 0;
@@ -1089,7 +1110,7 @@ static void mark_color_as_moved(struct diff_options *o,
 			 * The current line is the start of a new block.
 			 * Setup the set of potential blocks.
 			 */
-			hashmap_for_each_entry_from(hm, match, ent) {
+			for (; match; match = match->next_match) {
 				ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
 				if (o->color_moved_ws_handling &
 				    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
@@ -6216,20 +6237,18 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 
 	if (o->emitted_symbols) {
 		if (o->color_moved) {
-			struct hashmap add_lines, del_lines;
-
-			hashmap_init(&del_lines, moved_entry_cmp, o, 0);
-			hashmap_init(&add_lines, moved_entry_cmp, o, 0);
+			struct mem_pool entry_pool;
+			struct moved_entry_list *entry_list;
 
-			add_lines_to_move_detection(o, &add_lines, &del_lines);
-			mark_color_as_moved(o, &add_lines, &del_lines);
+			mem_pool_init(&entry_pool, 1024 * 1024);
+			entry_list = add_lines_to_move_detection(o,
+								 &entry_pool);
+			mark_color_as_moved(o, entry_list);
 			if (o->color_moved == COLOR_MOVED_ZEBRA_DIM)
 				dim_moved_lines(o);
 
-			hashmap_clear_and_free(&add_lines, struct moved_entry,
-						ent);
-			hashmap_clear_and_free(&del_lines, struct moved_entry,
-						ent);
+			mem_pool_discard(&entry_pool, 0);
+			free(entry_list);
 		}
 
 		for (i = 0; i < esm.nr; i++)

From 4a633b2f4dbffa903ac2e01bf1a722ad3fd77007 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:24:06 +0200
Subject: [PATCH 062/198] refs/packet: add missing BUG() invocations to reflog
 callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In e0cc8ac8202 (packed_ref_store: make class into a subclass of
`ref_store`, 2017-06-23) a die() was added to packed_create_reflog(),
but not to any of the other reflog callbacks, let's do that.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/packed-backend.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index f8aa97d7998ecb..24a360b719ff25 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1600,6 +1600,7 @@ static int packed_for_each_reflog_ent(struct ref_store *ref_store,
 				      const char *refname,
 				      each_reflog_ent_fn fn, void *cb_data)
 {
+	BUG("packed reference store does not support reflogs");
 	return 0;
 }
 
@@ -1608,12 +1609,14 @@ static int packed_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 					      each_reflog_ent_fn fn,
 					      void *cb_data)
 {
+	BUG("packed reference store does not support reflogs");
 	return 0;
 }
 
 static int packed_reflog_exists(struct ref_store *ref_store,
 			       const char *refname)
 {
+	BUG("packed reference store does not support reflogs");
 	return 0;
 }
 
@@ -1627,6 +1630,7 @@ static int packed_create_reflog(struct ref_store *ref_store,
 static int packed_delete_reflog(struct ref_store *ref_store,
 			       const char *refname)
 {
+	BUG("packed reference store does not support reflogs");
 	return 0;
 }
 
@@ -1638,6 +1642,7 @@ static int packed_reflog_expire(struct ref_store *ref_store,
 				reflog_expiry_cleanup_fn cleanup_fn,
 				void *policy_cb_data)
 {
+	BUG("packed reference store does not support reflogs");
 	return 0;
 }
 

From 2c14b391ddc2c43b4a05868b634c8d78854a96a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 00:57:41 +0200
Subject: [PATCH 063/198] test-lib tests: split up "write and run" into two
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor the function to write and run tests of the test-lib.sh output
into two functions.

When this was added back in 565b6fa87bb (tests: refactor mechanics of
testing in a sub test-lib, 2012-12-16) there was no reason to do this,
but since we started supporting test arguments in
517cd55fd51 (test-lib: self-test that --verbose works, 2013-06-23)
we've started to write out duplicate tests simply to test different
arguments, now we'll be able to re-use them.

This change doesn't consolidate any of those tests yet, it just makes
it possible to do so. All the changes in t0000-basic.sh are a simple
search-replacement.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 45 +++++++++++++++++++++--------
 t/t0000-basic.sh | 74 ++++++++++++++++++++++++------------------------
 2 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 3cfe09911a2f56..58ae316ad5fc25 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -1,3 +1,22 @@
+write_sub_test_lib_test () {
+	name="$1" descr="$2" # stdin is the body of the test code
+	mkdir "$name" &&
+	(
+		cd "$name" &&
+		write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF &&
+		test_description='$descr (run in sub test-lib)
+
+		This is run in a sub test-lib so that we do not get incorrect
+		passing metrics
+		'
+
+		# Point to the t/test-lib.sh, which isn't in ../ as usual
+		. "\$TEST_DIRECTORY"/test-lib.sh
+		EOF
+		cat >>"$name.sh"
+	)
+}
+
 _run_sub_test_lib_test_common () {
 	neg="$1" name="$2" descr="$3" # stdin is the body of the test code
 	shift 3
@@ -18,25 +37,15 @@ _run_sub_test_lib_test_common () {
 		esac
 	done
 
-	mkdir "$name" &&
 	(
+		cd "$name" &&
+
 		# Pretend we're not running under a test harness, whether we
 		# are or not. The test-lib output depends on the setting of
 		# this variable, so we need a stable setting under which to run
 		# the sub-test.
 		sane_unset HARNESS_ACTIVE &&
-		cd "$name" &&
-		write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF &&
-		test_description='$descr (run in sub test-lib)
 
-		This is run in a sub test-lib so that we do not get incorrect
-		passing metrics
-		'
-
-		# Point to the t/test-lib.sh, which isn't in ../ as usual
-		. "\$TEST_DIRECTORY"/test-lib.sh
-		EOF
-		cat >>"$name.sh" &&
 		export TEST_DIRECTORY &&
 		# The child test re-sources GIT-BUILD-OPTIONS and may thus
 		# override the test output directory. We thus pass it as an
@@ -55,6 +64,18 @@ _run_sub_test_lib_test_common () {
 	)
 }
 
+write_and_run_sub_test_lib_test () {
+	name="$1" descr="$2" # stdin is the body of the test code
+	write_sub_test_lib_test "$@" || return 1
+	_run_sub_test_lib_test_common '' "$@"
+}
+
+write_and_run_sub_test_lib_test_err () {
+	name="$1" descr="$2" # stdin is the body of the test code
+	write_sub_test_lib_test "$@" || return 1
+	_run_sub_test_lib_test_common '!' "$@"
+}
+
 run_sub_test_lib_test () {
 	_run_sub_test_lib_test_common '' "$@"
 }
diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index a3865dd77ba5e8..a7c5aaacab6bc5 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -68,7 +68,7 @@ test_expect_success 'success is reported like this' '
 '
 
 test_expect_success 'pretend we have a fully passing test suite' '
-	run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF &&
+	write_and_run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF &&
 	for i in 1 2 3
 	do
 		test_expect_success "passing test #$i" "true"
@@ -85,7 +85,7 @@ test_expect_success 'pretend we have a fully passing test suite' '
 '
 
 test_expect_success 'pretend we have a partially passing test suite' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		partial-pass "2/3 tests passing" <<-\EOF &&
 	test_expect_success "passing test #1" "true"
 	test_expect_success "failing test #2" "false"
@@ -103,7 +103,7 @@ test_expect_success 'pretend we have a partially passing test suite' '
 '
 
 test_expect_success 'pretend we have a known breakage' '
-	run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF &&
+	write_and_run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_failure "pretend we have a known breakage" "false"
 	test_done
@@ -118,7 +118,7 @@ test_expect_success 'pretend we have a known breakage' '
 '
 
 test_expect_success 'pretend we have fixed a known breakage' '
-	run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF &&
+	write_and_run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF &&
 	test_expect_failure "pretend we have fixed a known breakage" "true"
 	test_done
 	EOF
@@ -130,7 +130,7 @@ test_expect_success 'pretend we have fixed a known breakage' '
 '
 
 test_expect_success 'pretend we have fixed one of two known breakages (run in sub test-lib)' '
-	run_sub_test_lib_test partially-passing-todos \
+	write_and_run_sub_test_lib_test partially-passing-todos \
 		"2 TODO tests, one passing" <<-\EOF &&
 	test_expect_failure "pretend we have a known breakage" "false"
 	test_expect_success "pretend we have a passing test" "true"
@@ -149,7 +149,7 @@ test_expect_success 'pretend we have fixed one of two known breakages (run in su
 '
 
 test_expect_success 'pretend we have a pass, fail, and known breakage' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		mixed-results1 "mixed results #1" <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_success "failing test" "false"
@@ -168,7 +168,7 @@ test_expect_success 'pretend we have a pass, fail, and known breakage' '
 '
 
 test_expect_success 'pretend we have a mix of all possible results' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		mixed-results2 "mixed results #2" <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_success "passing test" "true"
@@ -204,7 +204,7 @@ test_expect_success 'pretend we have a mix of all possible results' '
 '
 
 test_expect_success 'test --verbose' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		t1234-verbose "test verbose" --verbose <<-\EOF &&
 	test_expect_success "passing test" true
 	test_expect_success "test with output" "echo foo"
@@ -231,7 +231,7 @@ test_expect_success 'test --verbose' '
 '
 
 test_expect_success 'test --verbose-only' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		t2345-verbose-only-2 "test verbose-only=2" \
 		--verbose-only=2 <<-\EOF &&
 	test_expect_success "passing test" true
@@ -255,7 +255,7 @@ test_expect_success 'test --verbose-only' '
 
 test_expect_success 'GIT_SKIP_TESTS' '
 	(
-		run_sub_test_lib_test git-skip-tests-basic \
+		write_and_run_sub_test_lib_test git-skip-tests-basic \
 			"GIT_SKIP_TESTS" \
 			--skip="git.2" <<-\EOF &&
 		for i in 1 2 3
@@ -276,7 +276,7 @@ test_expect_success 'GIT_SKIP_TESTS' '
 
 test_expect_success 'GIT_SKIP_TESTS several tests' '
 	(
-		run_sub_test_lib_test git-skip-tests-several \
+		write_and_run_sub_test_lib_test git-skip-tests-several \
 			"GIT_SKIP_TESTS several tests" \
 			--skip="git.2 git.5" <<-\EOF &&
 		for i in 1 2 3 4 5 6
@@ -300,7 +300,7 @@ test_expect_success 'GIT_SKIP_TESTS several tests' '
 
 test_expect_success 'GIT_SKIP_TESTS sh pattern' '
 	(
-		run_sub_test_lib_test git-skip-tests-sh-pattern \
+		write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \
 			"GIT_SKIP_TESTS sh pattern" \
 			--skip="git.[2-5]" <<-\EOF &&
 		for i in 1 2 3 4 5 6
@@ -324,7 +324,7 @@ test_expect_success 'GIT_SKIP_TESTS sh pattern' '
 
 test_expect_success 'GIT_SKIP_TESTS entire suite' '
 	(
-		run_sub_test_lib_test git-skip-tests-entire-suite \
+		write_and_run_sub_test_lib_test git-skip-tests-entire-suite \
 			"GIT_SKIP_TESTS entire suite" \
 			--skip="git" <<-\EOF &&
 		for i in 1 2 3
@@ -341,7 +341,7 @@ test_expect_success 'GIT_SKIP_TESTS entire suite' '
 
 test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' '
 	(
-		run_sub_test_lib_test git-skip-tests-unmatched-suite \
+		write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \
 			"GIT_SKIP_TESTS does not skip unmatched suite" \
 			--skip="notgit" <<-\EOF &&
 		for i in 1 2 3
@@ -361,7 +361,7 @@ test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' '
 '
 
 test_expect_success '--run basic' '
-	run_sub_test_lib_test run-basic \
+	write_and_run_sub_test_lib_test run-basic \
 		"--run basic" --run="1,3,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -382,7 +382,7 @@ test_expect_success '--run basic' '
 '
 
 test_expect_success '--run with a range' '
-	run_sub_test_lib_test run-range \
+	write_and_run_sub_test_lib_test run-range \
 		"--run with a range" --run="1-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -403,7 +403,7 @@ test_expect_success '--run with a range' '
 '
 
 test_expect_success '--run with two ranges' '
-	run_sub_test_lib_test run-two-ranges \
+	write_and_run_sub_test_lib_test run-two-ranges \
 		"--run with two ranges" --run="1-2,5-6" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -424,7 +424,7 @@ test_expect_success '--run with two ranges' '
 '
 
 test_expect_success '--run with a left open range' '
-	run_sub_test_lib_test run-left-open-range \
+	write_and_run_sub_test_lib_test run-left-open-range \
 		"--run with a left open range" --run="-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -445,7 +445,7 @@ test_expect_success '--run with a left open range' '
 '
 
 test_expect_success '--run with a right open range' '
-	run_sub_test_lib_test run-right-open-range \
+	write_and_run_sub_test_lib_test run-right-open-range \
 		"--run with a right open range" --run="4-" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -466,7 +466,7 @@ test_expect_success '--run with a right open range' '
 '
 
 test_expect_success '--run with basic negation' '
-	run_sub_test_lib_test run-basic-neg \
+	write_and_run_sub_test_lib_test run-basic-neg \
 		"--run with basic negation" --run="!3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -487,7 +487,7 @@ test_expect_success '--run with basic negation' '
 '
 
 test_expect_success '--run with two negations' '
-	run_sub_test_lib_test run-two-neg \
+	write_and_run_sub_test_lib_test run-two-neg \
 		"--run with two negations" --run="!3,!6" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -508,7 +508,7 @@ test_expect_success '--run with two negations' '
 '
 
 test_expect_success '--run a range and negation' '
-	run_sub_test_lib_test run-range-and-neg \
+	write_and_run_sub_test_lib_test run-range-and-neg \
 		"--run a range and negation" --run="-4,!2" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -529,7 +529,7 @@ test_expect_success '--run a range and negation' '
 '
 
 test_expect_success '--run range negation' '
-	run_sub_test_lib_test run-range-neg \
+	write_and_run_sub_test_lib_test run-range-neg \
 		"--run range negation" --run="!1-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -550,7 +550,7 @@ test_expect_success '--run range negation' '
 '
 
 test_expect_success '--run include, exclude and include' '
-	run_sub_test_lib_test run-inc-neg-inc \
+	write_and_run_sub_test_lib_test run-inc-neg-inc \
 		"--run include, exclude and include" \
 		--run="1-5,!1-3,2" <<-\EOF &&
 	for i in 1 2 3 4 5 6
@@ -572,7 +572,7 @@ test_expect_success '--run include, exclude and include' '
 '
 
 test_expect_success '--run include, exclude and include, comma separated' '
-	run_sub_test_lib_test run-inc-neg-inc-comma \
+	write_and_run_sub_test_lib_test run-inc-neg-inc-comma \
 		"--run include, exclude and include, comma separated" \
 		--run=1-5,!1-3,2 <<-\EOF &&
 	for i in 1 2 3 4 5 6
@@ -594,7 +594,7 @@ test_expect_success '--run include, exclude and include, comma separated' '
 '
 
 test_expect_success '--run exclude and include' '
-	run_sub_test_lib_test run-neg-inc \
+	write_and_run_sub_test_lib_test run-neg-inc \
 		"--run exclude and include" \
 		--run="!3-,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
@@ -616,7 +616,7 @@ test_expect_success '--run exclude and include' '
 '
 
 test_expect_success '--run empty selectors' '
-	run_sub_test_lib_test run-empty-sel \
+	write_and_run_sub_test_lib_test run-empty-sel \
 		"--run empty selectors" \
 		--run="1,,3,,,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
@@ -638,7 +638,7 @@ test_expect_success '--run empty selectors' '
 '
 
 test_expect_success '--run substring selector' '
-	run_sub_test_lib_test run-substring-selector \
+	write_and_run_sub_test_lib_test run-substring-selector \
 		"--run empty selectors" \
 		--run="relevant" <<-\EOF &&
 	test_expect_success "relevant test" "true"
@@ -662,7 +662,7 @@ test_expect_success '--run substring selector' '
 '
 
 test_expect_success '--run keyword selection' '
-	run_sub_test_lib_test_err run-inv-range-start \
+	write_and_run_sub_test_lib_test_err run-inv-range-start \
 		"--run invalid range start" \
 		--run="a-5" <<-\EOF &&
 	test_expect_success "passing test #1" "true"
@@ -677,7 +677,7 @@ test_expect_success '--run keyword selection' '
 '
 
 test_expect_success '--run invalid range end' '
-	run_sub_test_lib_test_err run-inv-range-end \
+	write_and_run_sub_test_lib_test_err run-inv-range-end \
 		"--run invalid range end" \
 		--run="1-z" <<-\EOF &&
 	test_expect_success "passing test #1" "true"
@@ -692,7 +692,7 @@ test_expect_success '--run invalid range end' '
 '
 
 test_expect_success 'tests respect prerequisites' '
-	run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF &&
+	write_and_run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF &&
 
 	test_set_prereq HAVEIT
 	test_expect_success HAVEIT "prereq is satisfied" "true"
@@ -722,7 +722,7 @@ test_expect_success 'tests respect prerequisites' '
 '
 
 test_expect_success 'tests respect lazy prerequisites' '
-	run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF &&
+	write_and_run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF &&
 
 	test_lazy_prereq LAZY_TRUE true
 	test_expect_success LAZY_TRUE "lazy prereq is satisifed" "true"
@@ -746,7 +746,7 @@ test_expect_success 'tests respect lazy prerequisites' '
 '
 
 test_expect_success 'nested lazy prerequisites' '
-	run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF &&
+	write_and_run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF &&
 
 	test_lazy_prereq NESTED_INNER "
 		>inner &&
@@ -772,7 +772,7 @@ test_expect_success 'nested lazy prerequisites' '
 '
 
 test_expect_success 'lazy prereqs do not turn off tracing' '
-	run_sub_test_lib_test lazy-prereq-and-tracing \
+	write_and_run_sub_test_lib_test lazy-prereq-and-tracing \
 		"lazy prereqs and -x" -v -x <<-\EOF &&
 	test_lazy_prereq LAZY true
 
@@ -785,7 +785,7 @@ test_expect_success 'lazy prereqs do not turn off tracing' '
 '
 
 test_expect_success 'tests clean up after themselves' '
-	run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF &&
+	write_and_run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF &&
 	clean=no
 	test_expect_success "do cleanup" "
 		test_when_finished clean=yes
@@ -805,7 +805,7 @@ test_expect_success 'tests clean up after themselves' '
 '
 
 test_expect_success 'tests clean up even on failures' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		failing-cleanup "Failing tests with cleanup commands" <<-\EOF &&
 	test_expect_success "tests clean up even after a failure" "
 		touch clean-after-failure &&
@@ -834,7 +834,7 @@ test_expect_success 'tests clean up even on failures' '
 '
 
 test_expect_success 'test_atexit is run' '
-	run_sub_test_lib_test_err \
+	write_and_run_sub_test_lib_test_err \
 		atexit-cleanup "Run atexit commands" -i <<-\EOF &&
 	test_expect_success "tests clean up even after a failure" "
 		> ../../clean-atexit &&

From 833a626105faade4f5e961f7119f6461de6339e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 00:57:42 +0200
Subject: [PATCH 064/198] test-lib tests: stop using a subshell in
 write_sub_test_lib_test()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that this function doesn't handle running the test anymore we can
do away with the sub-shell, which was used to scope an "unset" and
"export" shell variables.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 58ae316ad5fc25..21fa570d0b25b0 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -1,20 +1,17 @@
 write_sub_test_lib_test () {
 	name="$1" descr="$2" # stdin is the body of the test code
 	mkdir "$name" &&
-	(
-		cd "$name" &&
-		write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF &&
-		test_description='$descr (run in sub test-lib)
+	write_script "$name/$name.sh" "$TEST_SHELL_PATH" <<-EOF &&
+	test_description='$descr (run in sub test-lib)
 
-		This is run in a sub test-lib so that we do not get incorrect
-		passing metrics
-		'
+	This is run in a sub test-lib so that we do not get incorrect
+	passing metrics
+	'
 
-		# Point to the t/test-lib.sh, which isn't in ../ as usual
-		. "\$TEST_DIRECTORY"/test-lib.sh
-		EOF
-		cat >>"$name.sh"
-	)
+	# Point to the t/test-lib.sh, which isn't in ../ as usual
+	. "\$TEST_DIRECTORY"/test-lib.sh
+	EOF
+	cat >>"$name/$name.sh"
 }
 
 _run_sub_test_lib_test_common () {

From 57b10b205a4fcdbf6a0fcf6c56b706837fce8ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 00:57:43 +0200
Subject: [PATCH 065/198] test-lib tests: don't provide a description for the
 sub-tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the $test_description provided for the generated subtests to be
constant, since the only purpose of having it is that test-lib.sh will
barf if it isn't supplied.

The other purpose of having it was to effectively split up the test
description between the argument to test_expect_success and the
argument to "write_and_run_sub_test_lib_test". Let's only use one of
the two.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh |  12 ++--
 t/t0000-basic.sh | 142 +++++++++++++++++++++--------------------------
 2 files changed, 66 insertions(+), 88 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 21fa570d0b25b0..cdbc2d933206bb 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -1,12 +1,8 @@
 write_sub_test_lib_test () {
-	name="$1" descr="$2" # stdin is the body of the test code
+	name="$1" # stdin is the body of the test code
 	mkdir "$name" &&
 	write_script "$name/$name.sh" "$TEST_SHELL_PATH" <<-EOF &&
-	test_description='$descr (run in sub test-lib)
-
-	This is run in a sub test-lib so that we do not get incorrect
-	passing metrics
-	'
+	test_description='A test of test-lib.sh itself'
 
 	# Point to the t/test-lib.sh, which isn't in ../ as usual
 	. "\$TEST_DIRECTORY"/test-lib.sh
@@ -15,8 +11,8 @@ write_sub_test_lib_test () {
 }
 
 _run_sub_test_lib_test_common () {
-	neg="$1" name="$2" descr="$3" # stdin is the body of the test code
-	shift 3
+	neg="$1" name="$2" # stdin is the body of the test code
+	shift 2
 
 	# intercept pseudo-options at the front of the argument list that we
 	# will not pass to child script
diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index a7c5aaacab6bc5..6fdd5f43caecc3 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -67,8 +67,8 @@ test_expect_success 'success is reported like this' '
 	:
 '
 
-test_expect_success 'pretend we have a fully passing test suite' '
-	write_and_run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF &&
+test_expect_success 'subtest: 3 passing tests' '
+	write_and_run_sub_test_lib_test full-pass <<-\EOF &&
 	for i in 1 2 3
 	do
 		test_expect_success "passing test #$i" "true"
@@ -84,9 +84,8 @@ test_expect_success 'pretend we have a fully passing test suite' '
 	EOF
 '
 
-test_expect_success 'pretend we have a partially passing test suite' '
-	write_and_run_sub_test_lib_test_err \
-		partial-pass "2/3 tests passing" <<-\EOF &&
+test_expect_success 'subtest: 2/3 tests passing' '
+	write_and_run_sub_test_lib_test_err partial-pass <<-\EOF &&
 	test_expect_success "passing test #1" "true"
 	test_expect_success "failing test #2" "false"
 	test_expect_success "passing test #3" "true"
@@ -102,8 +101,8 @@ test_expect_success 'pretend we have a partially passing test suite' '
 	EOF
 '
 
-test_expect_success 'pretend we have a known breakage' '
-	write_and_run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF &&
+test_expect_success 'subtest: a failing TODO test' '
+	write_and_run_sub_test_lib_test failing-todo <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_failure "pretend we have a known breakage" "false"
 	test_done
@@ -117,8 +116,8 @@ test_expect_success 'pretend we have a known breakage' '
 	EOF
 '
 
-test_expect_success 'pretend we have fixed a known breakage' '
-	write_and_run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF &&
+test_expect_success 'subtest: a passing TODO test' '
+	write_and_run_sub_test_lib_test passing-todo <<-\EOF &&
 	test_expect_failure "pretend we have fixed a known breakage" "true"
 	test_done
 	EOF
@@ -129,9 +128,8 @@ test_expect_success 'pretend we have fixed a known breakage' '
 	EOF
 '
 
-test_expect_success 'pretend we have fixed one of two known breakages (run in sub test-lib)' '
-	write_and_run_sub_test_lib_test partially-passing-todos \
-		"2 TODO tests, one passing" <<-\EOF &&
+test_expect_success 'subtest: 2 TODO tests, one passin' '
+	write_and_run_sub_test_lib_test partially-passing-todos <<-\EOF &&
 	test_expect_failure "pretend we have a known breakage" "false"
 	test_expect_success "pretend we have a passing test" "true"
 	test_expect_failure "pretend we have fixed another known breakage" "true"
@@ -148,9 +146,8 @@ test_expect_success 'pretend we have fixed one of two known breakages (run in su
 	EOF
 '
 
-test_expect_success 'pretend we have a pass, fail, and known breakage' '
-	write_and_run_sub_test_lib_test_err \
-		mixed-results1 "mixed results #1" <<-\EOF &&
+test_expect_success 'subtest: mixed results: pass, failure and a TODO test' '
+	write_and_run_sub_test_lib_test_err mixed-results1 <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_success "failing test" "false"
 	test_expect_failure "pretend we have a known breakage" "false"
@@ -167,9 +164,8 @@ test_expect_success 'pretend we have a pass, fail, and known breakage' '
 	EOF
 '
 
-test_expect_success 'pretend we have a mix of all possible results' '
-	write_and_run_sub_test_lib_test_err \
-		mixed-results2 "mixed results #2" <<-\EOF &&
+test_expect_success 'subtest: mixed results: a mixture of all possible results' '
+	write_and_run_sub_test_lib_test_err mixed-results2 <<-\EOF &&
 	test_expect_success "passing test" "true"
 	test_expect_success "passing test" "true"
 	test_expect_success "passing test" "true"
@@ -203,9 +199,8 @@ test_expect_success 'pretend we have a mix of all possible results' '
 	EOF
 '
 
-test_expect_success 'test --verbose' '
-	write_and_run_sub_test_lib_test_err \
-		t1234-verbose "test verbose" --verbose <<-\EOF &&
+test_expect_success 'subtest: --verbose option' '
+	write_and_run_sub_test_lib_test_err t1234-verbose --verbose <<-\EOF &&
 	test_expect_success "passing test" true
 	test_expect_success "test with output" "echo foo"
 	test_expect_success "failing test" false
@@ -230,9 +225,9 @@ test_expect_success 'test --verbose' '
 	EOF
 '
 
-test_expect_success 'test --verbose-only' '
+test_expect_success 'subtest: --verbose-only option' '
 	write_and_run_sub_test_lib_test_err \
-		t2345-verbose-only-2 "test verbose-only=2" \
+		t2345-verbose-only-2 \
 		--verbose-only=2 <<-\EOF &&
 	test_expect_success "passing test" true
 	test_expect_success "test with output" "echo foo"
@@ -253,10 +248,9 @@ test_expect_success 'test --verbose-only' '
 	EOF
 '
 
-test_expect_success 'GIT_SKIP_TESTS' '
+test_expect_success 'subtest: skip one with GIT_SKIP_TESTS' '
 	(
 		write_and_run_sub_test_lib_test git-skip-tests-basic \
-			"GIT_SKIP_TESTS" \
 			--skip="git.2" <<-\EOF &&
 		for i in 1 2 3
 		do
@@ -274,10 +268,9 @@ test_expect_success 'GIT_SKIP_TESTS' '
 	)
 '
 
-test_expect_success 'GIT_SKIP_TESTS several tests' '
+test_expect_success 'subtest: skip several with GIT_SKIP_TESTS' '
 	(
 		write_and_run_sub_test_lib_test git-skip-tests-several \
-			"GIT_SKIP_TESTS several tests" \
 			--skip="git.2 git.5" <<-\EOF &&
 		for i in 1 2 3 4 5 6
 		do
@@ -298,10 +291,9 @@ test_expect_success 'GIT_SKIP_TESTS several tests' '
 	)
 '
 
-test_expect_success 'GIT_SKIP_TESTS sh pattern' '
+test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' '
 	(
 		write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \
-			"GIT_SKIP_TESTS sh pattern" \
 			--skip="git.[2-5]" <<-\EOF &&
 		for i in 1 2 3 4 5 6
 		do
@@ -322,10 +314,9 @@ test_expect_success 'GIT_SKIP_TESTS sh pattern' '
 	)
 '
 
-test_expect_success 'GIT_SKIP_TESTS entire suite' '
+test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' '
 	(
 		write_and_run_sub_test_lib_test git-skip-tests-entire-suite \
-			"GIT_SKIP_TESTS entire suite" \
 			--skip="git" <<-\EOF &&
 		for i in 1 2 3
 		do
@@ -339,10 +330,9 @@ test_expect_success 'GIT_SKIP_TESTS entire suite' '
 	)
 '
 
-test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' '
+test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' '
 	(
 		write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \
-			"GIT_SKIP_TESTS does not skip unmatched suite" \
 			--skip="notgit" <<-\EOF &&
 		for i in 1 2 3
 		do
@@ -360,9 +350,8 @@ test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' '
 	)
 '
 
-test_expect_success '--run basic' '
-	write_and_run_sub_test_lib_test run-basic \
-		"--run basic" --run="1,3,5" <<-\EOF &&
+test_expect_success 'subtest: --run basic' '
+	write_and_run_sub_test_lib_test run-basic --run="1,3,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -381,9 +370,9 @@ test_expect_success '--run basic' '
 	EOF
 '
 
-test_expect_success '--run with a range' '
+test_expect_success 'subtest: --run with a range' '
 	write_and_run_sub_test_lib_test run-range \
-		"--run with a range" --run="1-3" <<-\EOF &&
+		--run="1-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -402,9 +391,9 @@ test_expect_success '--run with a range' '
 	EOF
 '
 
-test_expect_success '--run with two ranges' '
+test_expect_success 'subtest: --run with two ranges' '
 	write_and_run_sub_test_lib_test run-two-ranges \
-		"--run with two ranges" --run="1-2,5-6" <<-\EOF &&
+		--run="1-2,5-6" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -423,9 +412,9 @@ test_expect_success '--run with two ranges' '
 	EOF
 '
 
-test_expect_success '--run with a left open range' '
+test_expect_success 'subtest: --run with a left open range' '
 	write_and_run_sub_test_lib_test run-left-open-range \
-		"--run with a left open range" --run="-3" <<-\EOF &&
+		--run="-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -444,9 +433,9 @@ test_expect_success '--run with a left open range' '
 	EOF
 '
 
-test_expect_success '--run with a right open range' '
+test_expect_success 'subtest: --run with a right open range' '
 	write_and_run_sub_test_lib_test run-right-open-range \
-		"--run with a right open range" --run="4-" <<-\EOF &&
+		--run="4-" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -465,9 +454,9 @@ test_expect_success '--run with a right open range' '
 	EOF
 '
 
-test_expect_success '--run with basic negation' '
+test_expect_success 'subtest: --run with basic negation' '
 	write_and_run_sub_test_lib_test run-basic-neg \
-		"--run with basic negation" --run="!3" <<-\EOF &&
+		--run="!3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -486,9 +475,9 @@ test_expect_success '--run with basic negation' '
 	EOF
 '
 
-test_expect_success '--run with two negations' '
+test_expect_success 'subtest: --run with two negations' '
 	write_and_run_sub_test_lib_test run-two-neg \
-		"--run with two negations" --run="!3,!6" <<-\EOF &&
+		--run="!3,!6" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -507,9 +496,9 @@ test_expect_success '--run with two negations' '
 	EOF
 '
 
-test_expect_success '--run a range and negation' '
+test_expect_success 'subtest: --run a range and negation' '
 	write_and_run_sub_test_lib_test run-range-and-neg \
-		"--run a range and negation" --run="-4,!2" <<-\EOF &&
+		--run="-4,!2" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -528,9 +517,9 @@ test_expect_success '--run a range and negation' '
 	EOF
 '
 
-test_expect_success '--run range negation' '
+test_expect_success 'subtest: --run range negation' '
 	write_and_run_sub_test_lib_test run-range-neg \
-		"--run range negation" --run="!1-3" <<-\EOF &&
+		--run="!1-3" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
 		test_expect_success "passing test #$i" "true"
@@ -549,9 +538,8 @@ test_expect_success '--run range negation' '
 	EOF
 '
 
-test_expect_success '--run include, exclude and include' '
+test_expect_success 'subtest: --run include, exclude and include' '
 	write_and_run_sub_test_lib_test run-inc-neg-inc \
-		"--run include, exclude and include" \
 		--run="1-5,!1-3,2" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -571,9 +559,8 @@ test_expect_success '--run include, exclude and include' '
 	EOF
 '
 
-test_expect_success '--run include, exclude and include, comma separated' '
+test_expect_success 'subtest: --run include, exclude and include, comma separated' '
 	write_and_run_sub_test_lib_test run-inc-neg-inc-comma \
-		"--run include, exclude and include, comma separated" \
 		--run=1-5,!1-3,2 <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -593,9 +580,8 @@ test_expect_success '--run include, exclude and include, comma separated' '
 	EOF
 '
 
-test_expect_success '--run exclude and include' '
+test_expect_success 'subtest: --run exclude and include' '
 	write_and_run_sub_test_lib_test run-neg-inc \
-		"--run exclude and include" \
 		--run="!3-,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -615,9 +601,8 @@ test_expect_success '--run exclude and include' '
 	EOF
 '
 
-test_expect_success '--run empty selectors' '
+test_expect_success 'subtest: --run empty selectors' '
 	write_and_run_sub_test_lib_test run-empty-sel \
-		"--run empty selectors" \
 		--run="1,,3,,,5" <<-\EOF &&
 	for i in 1 2 3 4 5 6
 	do
@@ -637,9 +622,8 @@ test_expect_success '--run empty selectors' '
 	EOF
 '
 
-test_expect_success '--run substring selector' '
+test_expect_success 'subtest: --run substring selector' '
 	write_and_run_sub_test_lib_test run-substring-selector \
-		"--run empty selectors" \
 		--run="relevant" <<-\EOF &&
 	test_expect_success "relevant test" "true"
 	for i in 1 2 3 4 5 6
@@ -661,9 +645,8 @@ test_expect_success '--run substring selector' '
 	EOF
 '
 
-test_expect_success '--run keyword selection' '
+test_expect_success 'subtest: --run keyword selection' '
 	write_and_run_sub_test_lib_test_err run-inv-range-start \
-		"--run invalid range start" \
 		--run="a-5" <<-\EOF &&
 	test_expect_success "passing test #1" "true"
 	test_done
@@ -676,9 +659,8 @@ test_expect_success '--run keyword selection' '
 	EOF_ERR
 '
 
-test_expect_success '--run invalid range end' '
+test_expect_success 'subtest: --run invalid range end' '
 	write_and_run_sub_test_lib_test_err run-inv-range-end \
-		"--run invalid range end" \
 		--run="1-z" <<-\EOF &&
 	test_expect_success "passing test #1" "true"
 	test_done
@@ -691,8 +673,8 @@ test_expect_success '--run invalid range end' '
 	EOF_ERR
 '
 
-test_expect_success 'tests respect prerequisites' '
-	write_and_run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF &&
+test_expect_success 'subtest: tests respect prerequisites' '
+	write_and_run_sub_test_lib_test prereqs <<-\EOF &&
 
 	test_set_prereq HAVEIT
 	test_expect_success HAVEIT "prereq is satisfied" "true"
@@ -721,8 +703,8 @@ test_expect_success 'tests respect prerequisites' '
 	EOF
 '
 
-test_expect_success 'tests respect lazy prerequisites' '
-	write_and_run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF &&
+test_expect_success 'subtest: tests respect lazy prerequisites' '
+	write_and_run_sub_test_lib_test lazy-prereqs <<-\EOF &&
 
 	test_lazy_prereq LAZY_TRUE true
 	test_expect_success LAZY_TRUE "lazy prereq is satisifed" "true"
@@ -745,8 +727,8 @@ test_expect_success 'tests respect lazy prerequisites' '
 	EOF
 '
 
-test_expect_success 'nested lazy prerequisites' '
-	write_and_run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF &&
+test_expect_success 'subtest: nested lazy prerequisites' '
+	write_and_run_sub_test_lib_test nested-lazy <<-\EOF &&
 
 	test_lazy_prereq NESTED_INNER "
 		>inner &&
@@ -771,9 +753,9 @@ test_expect_success 'nested lazy prerequisites' '
 	EOF
 '
 
-test_expect_success 'lazy prereqs do not turn off tracing' '
+test_expect_success 'subtest: lazy prereqs do not turn off tracing' '
 	write_and_run_sub_test_lib_test lazy-prereq-and-tracing \
-		"lazy prereqs and -x" -v -x <<-\EOF &&
+		-v -x <<-\EOF &&
 	test_lazy_prereq LAZY true
 
 	test_expect_success lazy "test_have_prereq LAZY && echo trace"
@@ -784,8 +766,8 @@ test_expect_success 'lazy prereqs do not turn off tracing' '
 	grep "echo trace" lazy-prereq-and-tracing/err
 '
 
-test_expect_success 'tests clean up after themselves' '
-	write_and_run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF &&
+test_expect_success 'subtest: tests clean up after themselves' '
+	write_and_run_sub_test_lib_test cleanup <<-\EOF &&
 	clean=no
 	test_expect_success "do cleanup" "
 		test_when_finished clean=yes
@@ -804,9 +786,9 @@ test_expect_success 'tests clean up after themselves' '
 	EOF
 '
 
-test_expect_success 'tests clean up even on failures' '
+test_expect_success 'subtest: tests clean up even on failures' '
 	write_and_run_sub_test_lib_test_err \
-		failing-cleanup "Failing tests with cleanup commands" <<-\EOF &&
+		failing-cleanup <<-\EOF &&
 	test_expect_success "tests clean up even after a failure" "
 		touch clean-after-failure &&
 		test_when_finished rm clean-after-failure &&
@@ -833,9 +815,9 @@ test_expect_success 'tests clean up even on failures' '
 	EOF
 '
 
-test_expect_success 'test_atexit is run' '
+test_expect_success 'subtest: test_atexit is run' '
 	write_and_run_sub_test_lib_test_err \
-		atexit-cleanup "Run atexit commands" -i <<-\EOF &&
+		atexit-cleanup -i <<-\EOF &&
 	test_expect_success "tests clean up even after a failure" "
 		> ../../clean-atexit &&
 		test_atexit rm ../../clean-atexit &&

From 5a1fc39dc7b630900a15e8a9cd5eb42f9539e43a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:54:59 +0200
Subject: [PATCH 066/198] progress.c tests: make start/stop verbs on stdin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the usage of the "test-tool progress" introduced in
2bb74b53a49 (Test the progress display, 2019-09-16) to take command
like "start" and "stop" on stdin, instead of running them implicitly.

This makes for tests that are easier to read, since the recipe will
mirror the API usage, and allows for easily testing invalid usage that
would yield (or should yield) a BUG(), e.g. providing two "start"
calls in a row. A subsequent commit will add such stress tests.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-progress.c    | 43 +++++++++++++++++++--------
 t/t0500-progress-display.sh | 59 +++++++++++++++++++++++--------------
 2 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c
index 5d05cbe7894097..685c0a7c49a20d 100644
--- a/t/helper/test-progress.c
+++ b/t/helper/test-progress.c
@@ -3,6 +3,9 @@
  *
  * Reads instructions from standard input, one instruction per line:
  *
+ *   "start[ <total>[ <title>]]" - Call start_progress(title, total),
+ *                                 when "start" use a title of
+ *                                 "Working hard" with a total of 0.
  *   "progress <items>" - Call display_progress() with the given item count
  *                        as parameter.
  *   "throughput <bytes> <millis> - Call display_throughput() with the given
@@ -10,6 +13,7 @@
  *                                  specify the time elapsed since the
  *                                  start_progress() call.
  *   "update" - Set the 'progress_update' flag.
+ *   "stop" - Call stop_progress().
  *
  * See 't0500-progress-display.sh' for examples.
  */
@@ -22,31 +26,41 @@
 
 int cmd__progress(int argc, const char **argv)
 {
-	int total = 0;
-	const char *title;
+	const char *default_title = "Working hard";
+	char *detached_title = NULL;
 	struct strbuf line = STRBUF_INIT;
-	struct progress *progress;
+	struct progress *progress = NULL;
 
 	const char *usage[] = {
-		"test-tool progress [--total=<n>] <progress-title>",
+		"test-tool progress <stdin",
 		NULL
 	};
 	struct option options[] = {
-		OPT_INTEGER(0, "total", &total, "total number of items"),
 		OPT_END(),
 	};
 
 	argc = parse_options(argc, argv, NULL, options, usage, 0);
-	if (argc != 1)
-		die("need a title for the progress output");
-	title = argv[0];
+	if (argc)
+		usage_with_options(usage, options);
 
 	progress_testing = 1;
-	progress = start_progress(title, total);
 	while (strbuf_getline(&line, stdin) != EOF) {
 		char *end;
 
-		if (skip_prefix(line.buf, "progress ", (const char **) &end)) {
+		if (!strcmp(line.buf, "start")) {
+			progress = start_progress(default_title, 0);
+		} else if (skip_prefix(line.buf, "start ", (const char **) &end)) {
+			uint64_t total = strtoull(end, &end, 10);
+			if (*end == '\0') {
+				progress = start_progress(default_title, total);
+			} else if (*end == ' ') {
+				free(detached_title);
+				detached_title = strbuf_detach(&line, NULL);
+				progress = start_progress(end + 1, total);
+			} else {
+				die("invalid input: '%s'\n", line.buf);
+			}
+		} else if (skip_prefix(line.buf, "progress ", (const char **) &end)) {
 			uint64_t item_count = strtoull(end, &end, 10);
 			if (*end != '\0')
 				die("invalid input: '%s'\n", line.buf);
@@ -63,12 +77,15 @@ int cmd__progress(int argc, const char **argv)
 				die("invalid input: '%s'\n", line.buf);
 			progress_test_ns = test_ms * 1000 * 1000;
 			display_throughput(progress, byte_count);
-		} else if (!strcmp(line.buf, "update"))
+		} else if (!strcmp(line.buf, "update")) {
 			progress_test_force_update();
-		else
+		} else if (!strcmp(line.buf, "stop")) {
+			stop_progress(&progress);
+		} else {
 			die("invalid input: '%s'\n", line.buf);
+		}
 	}
-	stop_progress(&progress);
+	free(detached_title);
 
 	return 0;
 }
diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh
index 22058b503ac78c..ca96ac1fa556de 100755
--- a/t/t0500-progress-display.sh
+++ b/t/t0500-progress-display.sh
@@ -17,6 +17,7 @@ test_expect_success 'simple progress display' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start 0
 	update
 	progress 1
 	update
@@ -25,8 +26,9 @@ test_expect_success 'simple progress display' '
 	progress 4
 	update
 	progress 5
+	stop
 	EOF
-	test-tool progress "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -41,11 +43,13 @@ test_expect_success 'progress display with total' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start 3
 	progress 1
 	progress 2
 	progress 3
+	stop
 	EOF
-	test-tool progress --total=3 "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -62,14 +66,14 @@ Working hard.......2.........3.........4.........5.........6:
 EOF
 
 	cat >in <<-\EOF &&
+	start 100000 Working hard.......2.........3.........4.........5.........6
 	progress 100
 	progress 1000
 	progress 10000
 	progress 100000
+	stop
 	EOF
-	test-tool progress --total=100000 \
-		"Working hard.......2.........3.........4.........5.........6" \
-		<in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -88,16 +92,15 @@ Working hard.......2.........3.........4.........5.........6:
 EOF
 
 	cat >in <<-\EOF &&
-	update
+	start 100000 Working hard.......2.........3.........4.........5.........6
 	progress 1
 	update
 	progress 2
 	progress 10000
 	progress 100000
+	stop
 	EOF
-	test-tool progress --total=100000 \
-		"Working hard.......2.........3.........4.........5.........6" \
-		<in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -116,14 +119,14 @@ Working hard.......2.........3.........4.........5.........6:
 EOF
 
 	cat >in <<-\EOF &&
+	start 100000 Working hard.......2.........3.........4.........5.........6
 	progress 25000
 	progress 50000
 	progress 75000
 	progress 100000
+	stop
 	EOF
-	test-tool progress --total=100000 \
-		"Working hard.......2.........3.........4.........5.........6" \
-		<in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -140,14 +143,14 @@ Working hard.......2.........3.........4.........5.........6.........7.........:
 EOF
 
 	cat >in <<-\EOF &&
+	start 100000 Working hard.......2.........3.........4.........5.........6.........7.........
 	progress 25000
 	progress 50000
 	progress 75000
 	progress 100000
+	stop
 	EOF
-	test-tool progress --total=100000 \
-		"Working hard.......2.........3.........4.........5.........6.........7........." \
-		<in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -164,12 +167,14 @@ test_expect_success 'progress shortens - crazy caller' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start 1000
 	progress 100
 	progress 200
 	progress 1
 	progress 1000
+	stop
 	EOF
-	test-tool progress --total=1000 "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -185,6 +190,7 @@ test_expect_success 'progress display with throughput' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start
 	throughput 102400 1000
 	update
 	progress 10
@@ -197,8 +203,9 @@ test_expect_success 'progress display with throughput' '
 	throughput 409600 4000
 	update
 	progress 40
+	stop
 	EOF
-	test-tool progress "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -214,6 +221,7 @@ test_expect_success 'progress display with throughput and total' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start 40
 	throughput 102400 1000
 	progress 10
 	throughput 204800 2000
@@ -222,8 +230,9 @@ test_expect_success 'progress display with throughput and total' '
 	progress 30
 	throughput 409600 4000
 	progress 40
+	stop
 	EOF
-	test-tool progress --total=40 "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -239,6 +248,7 @@ test_expect_success 'cover up after throughput shortens' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start
 	throughput 409600 1000
 	update
 	progress 1
@@ -251,8 +261,9 @@ test_expect_success 'cover up after throughput shortens' '
 	throughput 1638400 4000
 	update
 	progress 4
+	stop
 	EOF
-	test-tool progress "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -267,6 +278,7 @@ test_expect_success 'cover up after throughput shortens a lot' '
 	EOF
 
 	cat >in <<-\EOF &&
+	start
 	throughput 1 1000
 	update
 	progress 1
@@ -276,8 +288,9 @@ test_expect_success 'cover up after throughput shortens a lot' '
 	throughput 3145728 3000
 	update
 	progress 3
+	stop
 	EOF
-	test-tool progress "Working hard" <in 2>stderr &&
+	test-tool progress <in 2>stderr &&
 
 	show_cr <stderr >out &&
 	test_cmp expect out
@@ -285,6 +298,7 @@ test_expect_success 'cover up after throughput shortens a lot' '
 
 test_expect_success 'progress generates traces' '
 	cat >in <<-\EOF &&
+	start 40
 	throughput 102400 1000
 	update
 	progress 10
@@ -297,10 +311,11 @@ test_expect_success 'progress generates traces' '
 	throughput 409600 4000
 	update
 	progress 40
+	stop
 	EOF
 
-	GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress --total=40 \
-		"Working hard" <in 2>stderr &&
+	GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress \
+		<in 2>stderr &&
 
 	# t0212/parse_events.perl intentionally omits regions and data.
 	test_region progress "Working hard" trace.event &&

From e3940c1f60ae5d19984848d484858f17e4892ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:00 +0200
Subject: [PATCH 067/198] progress.c tests: test some invalid usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test what happens when we "stop" without a "start", omit the "stop"
after a "start", or try to start two concurrent progress bars. This
extends the trace2 tests added in 98a13647408 (trace2: log progress
time and throughput, 2020-05-12).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t0500-progress-display.sh | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh
index ca96ac1fa556de..ffa819ca1db95a 100755
--- a/t/t0500-progress-display.sh
+++ b/t/t0500-progress-display.sh
@@ -323,4 +323,37 @@ test_expect_success 'progress generates traces' '
 	grep "\"key\":\"total_bytes\",\"value\":\"409600\"" trace.event
 '
 
+test_expect_success 'progress generates traces: stop / start' '
+	cat >in <<-\EOF &&
+	start
+	stop
+	EOF
+
+	GIT_TRACE2_EVENT="$(pwd)/trace-startstop.event" test-tool progress \
+		<in 2>stderr &&
+	test_region progress "Working hard" trace-startstop.event
+'
+
+test_expect_success 'progress generates traces: start without stop' '
+	cat >in <<-\EOF &&
+	start
+	EOF
+
+	GIT_TRACE2_EVENT="$(pwd)/trace-start.event" test-tool progress \
+		<in 2>stderr &&
+	grep region_enter.*progress trace-start.event &&
+	! grep region_leave.*progress trace-start.event
+'
+
+test_expect_success 'progress generates traces: stop without start' '
+	cat >in <<-\EOF &&
+	stop
+	EOF
+
+	GIT_TRACE2_EVENT="$(pwd)/trace-stop.event" test-tool progress \
+		<in 2>stderr &&
+	! grep region_enter.*progress trace-stop.event &&
+	! grep region_leave.*progress trace-stop.event
+'
+
 test_done

From 06b6d15050e105d6b881a5f821cbe027c67c9569 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:01 +0200
Subject: [PATCH 068/198] progress.c: move signal handler functions lower
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the signal handler functions to just before the
start_progress_delay() where they'll be referenced, instead of having
them at the top of the file.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 progress.c | 92 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 44 deletions(-)

diff --git a/progress.c b/progress.c
index 680c6a8bf93b51..893cb0fe56f321 100644
--- a/progress.c
+++ b/progress.c
@@ -53,50 +53,6 @@ static volatile sig_atomic_t progress_update;
  */
 int progress_testing;
 uint64_t progress_test_ns = 0;
-void progress_test_force_update(void)
-{
-	progress_update = 1;
-}
-
-
-static void progress_interval(int signum)
-{
-	progress_update = 1;
-}
-
-static void set_progress_signal(void)
-{
-	struct sigaction sa;
-	struct itimerval v;
-
-	if (progress_testing)
-		return;
-
-	progress_update = 0;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = progress_interval;
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_RESTART;
-	sigaction(SIGALRM, &sa, NULL);
-
-	v.it_interval.tv_sec = 1;
-	v.it_interval.tv_usec = 0;
-	v.it_value = v.it_interval;
-	setitimer(ITIMER_REAL, &v, NULL);
-}
-
-static void clear_progress_signal(void)
-{
-	struct itimerval v = {{0,},};
-
-	if (progress_testing)
-		return;
-
-	setitimer(ITIMER_REAL, &v, NULL);
-	signal(SIGALRM, SIG_IGN);
-	progress_update = 0;
-}
 
 static int is_foreground_fd(int fd)
 {
@@ -249,6 +205,54 @@ void display_progress(struct progress *progress, uint64_t n)
 		display(progress, n, NULL);
 }
 
+static void progress_interval(int signum)
+{
+	progress_update = 1;
+}
+
+/*
+ * The progress_test_force_update() function is intended for testing
+ * the progress output, i.e. exclusively for 'test-tool progress'.
+ */
+void progress_test_force_update(void)
+{
+	progress_update = 1;
+}
+
+static void set_progress_signal(void)
+{
+	struct sigaction sa;
+	struct itimerval v;
+
+	if (progress_testing)
+		return;
+
+	progress_update = 0;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = progress_interval;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_RESTART;
+	sigaction(SIGALRM, &sa, NULL);
+
+	v.it_interval.tv_sec = 1;
+	v.it_interval.tv_usec = 0;
+	v.it_value = v.it_interval;
+	setitimer(ITIMER_REAL, &v, NULL);
+}
+
+static void clear_progress_signal(void)
+{
+	struct itimerval v = {{0,},};
+
+	if (progress_testing)
+		return;
+
+	setitimer(ITIMER_REAL, &v, NULL);
+	signal(SIGALRM, SIG_IGN);
+	progress_update = 0;
+}
+
 static struct progress *start_progress_delay(const char *title, uint64_t total,
 					     unsigned delay, unsigned sparse)
 {

From 7133df11c2f3a20d7511285fba19895adedf475c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:02 +0200
Subject: [PATCH 069/198] progress.c: call progress_interval() from
 progress_test_force_update()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define the progress_test_force_update() function in terms of
progress_interval(). For documentation purposes these two functions
have the same body, but different names. Let's just define the test
function by calling progress_interval() with SIGALRM ourselves.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 progress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/progress.c b/progress.c
index 893cb0fe56f321..7fcc513717a262 100644
--- a/progress.c
+++ b/progress.c
@@ -216,7 +216,7 @@ static void progress_interval(int signum)
  */
 void progress_test_force_update(void)
 {
-	progress_update = 1;
+	progress_interval(SIGALRM);
 }
 
 static void set_progress_signal(void)

From 0b5066cbef96ca33aa807d90191b0780cbdadcc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:03 +0200
Subject: [PATCH 070/198] progress.c: stop eagerly fflush(stderr) when not a
 terminal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's the clear intention of the combination of 137a0d0ef56 (Flush
progress message buffer in display()., 2007-11-19) and
85cb8906f0e (progress: no progress in background, 2015-04-13) to call
fflush(stderr) when we have a stderr in the foreground, but we ended
up always calling fflush(stderr) seemingly by omission. Let's not.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 progress.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/progress.c b/progress.c
index 7fcc513717a262..1fade5808de3fe 100644
--- a/progress.c
+++ b/progress.c
@@ -91,7 +91,8 @@ static void display(struct progress *progress, uint64_t n, const char *done)
 	}
 
 	if (show_update) {
-		if (is_foreground_fd(fileno(stderr)) || done) {
+		int stderr_is_foreground_fd = is_foreground_fd(fileno(stderr));
+		if (stderr_is_foreground_fd || done) {
 			const char *eol = done ? done : "\r";
 			size_t clear_len = counters_sb->len < last_count_len ?
 					last_count_len - counters_sb->len + 1 :
@@ -115,7 +116,8 @@ static void display(struct progress *progress, uint64_t n, const char *done)
 				fprintf(stderr, "%s: %s%*s", progress->title,
 					counters_sb->buf, (int) clear_len, eol);
 			}
-			fflush(stderr);
+			if (stderr_is_foreground_fd)
+				fflush(stderr);
 		}
 		progress_update = 0;
 	}

From 95b87268f6c8fc8c20bcf3940240a6a8ea9a9ace Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:04 +0200
Subject: [PATCH 071/198] progress.c: add temporary variable from progress
 struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a temporary "progress" variable for the dereferenced p_progress
pointer to a "struct progress *". Before 98a13647408 (trace2: log
progress time and throughput, 2020-05-12) we didn't dereference
"p_progress" in this function, now that we do it's easier to read the
code if we work with a "progress" struct pointer like everywhere else,
instead of a pointer to a pointer.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 progress.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/progress.c b/progress.c
index 1fade5808de3fe..1ab7d19deb8cc7 100644
--- a/progress.c
+++ b/progress.c
@@ -331,15 +331,16 @@ void stop_progress(struct progress **p_progress)
 	finish_if_sparse(*p_progress);
 
 	if (*p_progress) {
+		struct progress *progress = *p_progress;
 		trace2_data_intmax("progress", the_repository, "total_objects",
 				   (*p_progress)->total);
 
 		if ((*p_progress)->throughput)
 			trace2_data_intmax("progress", the_repository,
 					   "total_bytes",
-					   (*p_progress)->throughput->curr_total);
+					   progress->throughput->curr_total);
 
-		trace2_region_leave("progress", (*p_progress)->title, the_repository);
+		trace2_region_leave("progress", progress->title, the_repository);
 	}
 
 	stop_progress_msg(p_progress, _("done"));

From 92565d0f5b71de79bfb6624866f70f8f7f277e86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:05 +0200
Subject: [PATCH 072/198] pack-bitmap-write.c: add a missing stop_progress()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a bug that's been here since 7cc8f971085 (pack-objects: implement
bitmap writing, 2013-12-21), we did not call stop_progress() if we
reached the early exit in this function. This will matter in a
subsequent commit where we BUG(...) out if this happens, and matters
now e.g. because we don't have a corresponding "region_end" for the
progress trace2 event.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap-write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 88d9e696a546a8..6e110e41ea4da3 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -550,6 +550,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 	if (indexed_commits_nr < 100) {
 		for (i = 0; i < indexed_commits_nr; ++i)
 			push_bitmapped_commit(indexed_commits[i]);
+		stop_progress(&writer.progress);
 		return;
 	}
 

From 85996a71a468341ab6b8c10c4542894b23316ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 22 Jul 2021 14:55:06 +0200
Subject: [PATCH 073/198] progress.c: add & assert a "global_progress" variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The progress.c code makes a hard assumption that only one progress bar
be active at a time (see [1] for a bug where this wasn't the case),
but nothing has asserted that that's the case. Let's add a BUG()
that'll trigger if two progress bars are active at the same time.

There's an alternate test-only approach to doing the same thing[2],
but by doing this for all progress bars we'll have a canary to check
if we have any unexpected interaction between the "sig_atomic_t
progress_update" variable and this global struct.

I am then planning on using this scaffolding in the future to fix a
limitation in the progress output, namely the current limitation of
the progress.c bar code that any update must pro-actively go through
the likes of display_progress().

If we e.g. hang forever before the first display_progress(), or in the
middle of a loop that would call display_progress() the user will only
see either no output, or output frozen at the last display_progress()
that would have done an update (e.g. in cases where progress_update
was "1" due to an earlier signal).

This change does not fix that, but sets up the structure for solving
that and other related problems by juggling this "global_progress"
struct. Later changes will make more use of the "global_progress" than
only using it for these assertions.

1. 6f9d5f2fda1 (commit-graph: fix progress of reachable commits, 2020-07-09)
2. https://lore.kernel.org/git/20210620200303.2328957-3-szeder.dev@gmail.com

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 progress.c                  | 17 +++++++++++++----
 t/t0500-progress-display.sh | 11 +++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/progress.c b/progress.c
index 1ab7d19deb8cc7..14a023f4b433d8 100644
--- a/progress.c
+++ b/progress.c
@@ -46,6 +46,7 @@ struct progress {
 };
 
 static volatile sig_atomic_t progress_update;
+static struct progress *global_progress;
 
 /*
  * These are only intended for testing the progress output, i.e. exclusively
@@ -221,11 +222,15 @@ void progress_test_force_update(void)
 	progress_interval(SIGALRM);
 }
 
-static void set_progress_signal(void)
+static void set_progress_signal(struct progress *progress)
 {
 	struct sigaction sa;
 	struct itimerval v;
 
+	if (global_progress)
+		BUG("should have no global_progress in set_progress_signal()");
+	global_progress = progress;
+
 	if (progress_testing)
 		return;
 
@@ -243,10 +248,14 @@ static void set_progress_signal(void)
 	setitimer(ITIMER_REAL, &v, NULL);
 }
 
-static void clear_progress_signal(void)
+static void clear_progress_signal(struct progress *progress)
 {
 	struct itimerval v = {{0,},};
 
+	if (!global_progress)
+		BUG("should have a global_progress in clear_progress_signal()");
+	global_progress = NULL;
+
 	if (progress_testing)
 		return;
 
@@ -270,7 +279,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total,
 	strbuf_init(&progress->counters_sb, 0);
 	progress->title_len = utf8_strwidth(title);
 	progress->split = 0;
-	set_progress_signal();
+	set_progress_signal(progress);
 	trace2_region_enter("progress", title, the_repository);
 	return progress;
 }
@@ -374,7 +383,7 @@ void stop_progress_msg(struct progress **p_progress, const char *msg)
 		display(progress, progress->last_value, buf);
 		free(buf);
 	}
-	clear_progress_signal();
+	clear_progress_signal(progress);
 	strbuf_release(&progress->counters_sb);
 	if (progress->throughput)
 		strbuf_release(&progress->throughput->display);
diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh
index ffa819ca1db95a..124d33c96b3f01 100755
--- a/t/t0500-progress-display.sh
+++ b/t/t0500-progress-display.sh
@@ -296,6 +296,17 @@ test_expect_success 'cover up after throughput shortens a lot' '
 	test_cmp expect out
 '
 
+test_expect_success 'BUG: start two concurrent progress bars' '
+	cat >in <<-\EOF &&
+	start 0 one
+	start 0 two
+	EOF
+
+	test_must_fail test-tool progress \
+		<in 2>stderr &&
+	grep -E "^BUG: .*: should have no global_progress in set_progress_signal\(\)$" stderr
+'
+
 test_expect_success 'progress generates traces' '
 	cat >in <<-\EOF &&
 	start 40

From e6edfe9778b8767e6627d68b12283472dfc31f75 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:24 -0400
Subject: [PATCH 074/198] pack-bitmap.c: harden 'test_bitmap_walk()' to check
 type bitmaps

The special `--test-bitmap` mode of `git rev-list` is used to compare
the result of an object traversal with a bitmap to check its integrity.
This mode does not, however, assert that the types of reachable objects
are stored correctly.

Harden this mode by teaching it to also check that each time an object's
bit is marked, the corresponding bit should be set in exactly one of the
type bitmaps (whose type matches the object's true type).

Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index d90e1d9d8cb526..a1425f30541e0a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1301,10 +1301,52 @@ void count_bitmap_commit_list(struct bitmap_index *bitmap_git,
 struct bitmap_test_data {
 	struct bitmap_index *bitmap_git;
 	struct bitmap *base;
+	struct bitmap *commits;
+	struct bitmap *trees;
+	struct bitmap *blobs;
+	struct bitmap *tags;
 	struct progress *prg;
 	size_t seen;
 };
 
+static void test_bitmap_type(struct bitmap_test_data *tdata,
+			     struct object *obj, int pos)
+{
+	enum object_type bitmap_type = OBJ_NONE;
+	int bitmaps_nr = 0;
+
+	if (bitmap_get(tdata->commits, pos)) {
+		bitmap_type = OBJ_COMMIT;
+		bitmaps_nr++;
+	}
+	if (bitmap_get(tdata->trees, pos)) {
+		bitmap_type = OBJ_TREE;
+		bitmaps_nr++;
+	}
+	if (bitmap_get(tdata->blobs, pos)) {
+		bitmap_type = OBJ_BLOB;
+		bitmaps_nr++;
+	}
+	if (bitmap_get(tdata->tags, pos)) {
+		bitmap_type = OBJ_TAG;
+		bitmaps_nr++;
+	}
+
+	if (bitmap_type == OBJ_NONE)
+		die("object %s not found in type bitmaps",
+		    oid_to_hex(&obj->oid));
+
+	if (bitmaps_nr > 1)
+		die("object %s does not have a unique type",
+		    oid_to_hex(&obj->oid));
+
+	if (bitmap_type != obj->type)
+		die("object %s: real type %s, expected: %s",
+		    oid_to_hex(&obj->oid),
+		    type_name(obj->type),
+		    type_name(bitmap_type));
+}
+
 static void test_show_object(struct object *object, const char *name,
 			     void *data)
 {
@@ -1314,6 +1356,7 @@ static void test_show_object(struct object *object, const char *name,
 	bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
 	if (bitmap_pos < 0)
 		die("Object not in bitmap: %s\n", oid_to_hex(&object->oid));
+	test_bitmap_type(tdata, object, bitmap_pos);
 
 	bitmap_set(tdata->base, bitmap_pos);
 	display_progress(tdata->prg, ++tdata->seen);
@@ -1328,6 +1371,7 @@ static void test_show_commit(struct commit *commit, void *data)
 				     &commit->object.oid);
 	if (bitmap_pos < 0)
 		die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid));
+	test_bitmap_type(tdata, &commit->object, bitmap_pos);
 
 	bitmap_set(tdata->base, bitmap_pos);
 	display_progress(tdata->prg, ++tdata->seen);
@@ -1375,6 +1419,10 @@ void test_bitmap_walk(struct rev_info *revs)
 
 	tdata.bitmap_git = bitmap_git;
 	tdata.base = bitmap_new();
+	tdata.commits = ewah_to_bitmap(bitmap_git->commits);
+	tdata.trees = ewah_to_bitmap(bitmap_git->trees);
+	tdata.blobs = ewah_to_bitmap(bitmap_git->blobs);
+	tdata.tags = ewah_to_bitmap(bitmap_git->tags);
 	tdata.prg = start_progress("Verifying bitmap entries", result_popcnt);
 	tdata.seen = 0;
 

From 8abf388fb56f28494951d907eb383bb7a1df35cc Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:27 -0400
Subject: [PATCH 075/198] pack-bitmap-write.c: gracefully fail to write
 non-closed bitmaps

The set of objects covered by a bitmap must be closed under
reachability, since it must be the case that there is a valid bit
position assigned for every possible reachable object (otherwise the
bitmaps would be incomplete).

Pack bitmaps are never written from 'git repack' unless repacking
all-into-one, and so we never write non-closed bitmaps (except in the
case of partial clones where we aren't guaranteed to have all objects).

But multi-pack bitmaps change this, since it isn't known whether the
set of objects in the MIDX is closed under reachability until walking
them. Plumb through a bit that is set when a reachable object isn't
found.

As soon as a reachable object isn't found in the set of objects to
include in the bitmap, bitmap_writer_build() knows that the set is not
closed, and so it now fails gracefully.

A test is added in t0410 to trigger a bitmap write without full
reachability closure by removing local copies of some reachable objects
from a promisor remote.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c   |  3 +-
 pack-bitmap-write.c      | 76 ++++++++++++++++++++++++++++------------
 pack-bitmap.h            |  2 +-
 t/t0410-partial-clone.sh |  9 ++++-
 4 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index de00adbb9e0d51..8a523624a16eb2 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1256,7 +1256,8 @@ static void write_pack_file(void)
 
 				bitmap_writer_show_progress(progress);
 				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
-				bitmap_writer_build(&to_pack);
+				if (bitmap_writer_build(&to_pack) < 0)
+					die(_("failed to write bitmap index"));
 				bitmap_writer_finish(written_list, nr_written,
 						     tmpname.buf, write_bitmap_options);
 				write_bitmap_index = 0;
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 88d9e696a546a8..d374f7884b4114 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -125,15 +125,20 @@ static inline void push_bitmapped_commit(struct commit *commit)
 	writer.selected_nr++;
 }
 
-static uint32_t find_object_pos(const struct object_id *oid)
+static uint32_t find_object_pos(const struct object_id *oid, int *found)
 {
 	struct object_entry *entry = packlist_find(writer.to_pack, oid);
 
 	if (!entry) {
-		die("Failed to write bitmap index. Packfile doesn't have full closure "
+		if (found)
+			*found = 0;
+		warning("Failed to write bitmap index. Packfile doesn't have full closure "
 			"(object %s is missing)", oid_to_hex(oid));
+		return 0;
 	}
 
+	if (found)
+		*found = 1;
 	return oe_in_pack_pos(writer.to_pack, entry);
 }
 
@@ -331,9 +336,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)
 	bb->commits_nr = bb->commits_alloc = 0;
 }
 
-static void fill_bitmap_tree(struct bitmap *bitmap,
-			     struct tree *tree)
+static int fill_bitmap_tree(struct bitmap *bitmap,
+			    struct tree *tree)
 {
+	int found;
 	uint32_t pos;
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -342,9 +348,11 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	 * If our bit is already set, then there is nothing to do. Both this
 	 * tree and all of its children will be set.
 	 */
-	pos = find_object_pos(&tree->object.oid);
+	pos = find_object_pos(&tree->object.oid, &found);
+	if (!found)
+		return -1;
 	if (bitmap_get(bitmap, pos))
-		return;
+		return 0;
 	bitmap_set(bitmap, pos);
 
 	if (parse_tree(tree) < 0)
@@ -355,11 +363,15 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	while (tree_entry(&desc, &entry)) {
 		switch (object_type(entry.mode)) {
 		case OBJ_TREE:
-			fill_bitmap_tree(bitmap,
-					 lookup_tree(the_repository, &entry.oid));
+			if (fill_bitmap_tree(bitmap,
+					     lookup_tree(the_repository, &entry.oid)) < 0)
+				return -1;
 			break;
 		case OBJ_BLOB:
-			bitmap_set(bitmap, find_object_pos(&entry.oid));
+			pos = find_object_pos(&entry.oid, &found);
+			if (!found)
+				return -1;
+			bitmap_set(bitmap, pos);
 			break;
 		default:
 			/* Gitlink, etc; not reachable */
@@ -368,15 +380,18 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	}
 
 	free_tree_buffer(tree);
+	return 0;
 }
 
-static void fill_bitmap_commit(struct bb_commit *ent,
-			       struct commit *commit,
-			       struct prio_queue *queue,
-			       struct prio_queue *tree_queue,
-			       struct bitmap_index *old_bitmap,
-			       const uint32_t *mapping)
+static int fill_bitmap_commit(struct bb_commit *ent,
+			      struct commit *commit,
+			      struct prio_queue *queue,
+			      struct prio_queue *tree_queue,
+			      struct bitmap_index *old_bitmap,
+			      const uint32_t *mapping)
 {
+	int found;
+	uint32_t pos;
 	if (!ent->bitmap)
 		ent->bitmap = bitmap_new();
 
@@ -401,11 +416,16 @@ static void fill_bitmap_commit(struct bb_commit *ent,
 		 * Mark ourselves and queue our tree. The commit
 		 * walk ensures we cover all parents.
 		 */
-		bitmap_set(ent->bitmap, find_object_pos(&c->object.oid));
+		pos = find_object_pos(&c->object.oid, &found);
+		if (!found)
+			return -1;
+		bitmap_set(ent->bitmap, pos);
 		prio_queue_put(tree_queue, get_commit_tree(c));
 
 		for (p = c->parents; p; p = p->next) {
-			int pos = find_object_pos(&p->item->object.oid);
+			pos = find_object_pos(&p->item->object.oid, &found);
+			if (!found)
+				return -1;
 			if (!bitmap_get(ent->bitmap, pos)) {
 				bitmap_set(ent->bitmap, pos);
 				prio_queue_put(queue, p->item);
@@ -413,8 +433,12 @@ static void fill_bitmap_commit(struct bb_commit *ent,
 		}
 	}
 
-	while (tree_queue->nr)
-		fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue));
+	while (tree_queue->nr) {
+		if (fill_bitmap_tree(ent->bitmap,
+				     prio_queue_get(tree_queue)) < 0)
+			return -1;
+	}
+	return 0;
 }
 
 static void store_selected(struct bb_commit *ent, struct commit *commit)
@@ -432,7 +456,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit)
 	kh_value(writer.bitmaps, hash_pos) = stored;
 }
 
-void bitmap_writer_build(struct packing_data *to_pack)
+int bitmap_writer_build(struct packing_data *to_pack)
 {
 	struct bitmap_builder bb;
 	size_t i;
@@ -441,6 +465,7 @@ void bitmap_writer_build(struct packing_data *to_pack)
 	struct prio_queue tree_queue = { NULL };
 	struct bitmap_index *old_bitmap;
 	uint32_t *mapping;
+	int closed = 1; /* until proven otherwise */
 
 	writer.bitmaps = kh_init_oid_map();
 	writer.to_pack = to_pack;
@@ -463,8 +488,11 @@ void bitmap_writer_build(struct packing_data *to_pack)
 		struct commit *child;
 		int reused = 0;
 
-		fill_bitmap_commit(ent, commit, &queue, &tree_queue,
-				   old_bitmap, mapping);
+		if (fill_bitmap_commit(ent, commit, &queue, &tree_queue,
+				       old_bitmap, mapping) < 0) {
+			closed = 0;
+			break;
+		}
 
 		if (ent->selected) {
 			store_selected(ent, commit);
@@ -499,7 +527,9 @@ void bitmap_writer_build(struct packing_data *to_pack)
 
 	stop_progress(&writer.progress);
 
-	compute_xor_offsets();
+	if (closed)
+		compute_xor_offsets();
+	return closed ? 0 : -1;
 }
 
 /**
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 99d733eb264e9f..020cd8d868f9d3 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -87,7 +87,7 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
 				      struct commit *commit);
 void bitmap_writer_select_commits(struct commit **indexed_commits,
 		unsigned int indexed_commits_nr, int max_bitmaps);
-void bitmap_writer_build(struct packing_data *to_pack);
+int bitmap_writer_build(struct packing_data *to_pack);
 void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint32_t index_nr,
 			  const char *filename,
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 584a039b851dbb..1667450917f3c9 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -536,7 +536,13 @@ test_expect_success 'gc does not repack promisor objects if there are none' '
 repack_and_check () {
 	rm -rf repo2 &&
 	cp -r repo repo2 &&
-	git -C repo2 repack $1 -d &&
+	if test x"$1" = "x--must-fail"
+	then
+		shift
+		test_must_fail git -C repo2 repack $1 -d
+	else
+		git -C repo2 repack $1 -d
+	fi &&
 	git -C repo2 fsck &&
 
 	git -C repo2 cat-file -e $2 &&
@@ -561,6 +567,7 @@ test_expect_success 'repack -d does not irreversibly delete promisor objects' '
 	printf "$THREE\n" | pack_as_from_promisor &&
 	delete_object repo "$ONE" &&
 
+	repack_and_check --must-fail -ab "$TWO" "$THREE" &&
 	repack_and_check -a "$TWO" "$THREE" &&
 	repack_and_check -A "$TWO" "$THREE" &&
 	repack_and_check -l "$TWO" "$THREE"

From 96a4c82c75ea42b97fa31a0197b83f3c9a4604d3 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:29 -0400
Subject: [PATCH 076/198] pack-bitmap-write.c: free existing bitmaps

When writing a new bitmap, the bitmap writer code attempts to read the
existing bitmap (if one is present). This is done in order to quickly
permute the bits of any bitmaps for commits which appear in the existing
bitmap, and were also selected for the new bitmap.

But since this code was added in 341fa34887 (pack-bitmap-write: use
existing bitmaps, 2020-12-08), the resources associated with opening an
existing bitmap were never released.

It's fine to ignore this, but it's bad hygiene. It will also cause a
problem for the multi-pack-index builtin, which will be responsible not
only for writing bitmaps, but also for expiring any old multi-pack
bitmaps.

If an existing bitmap was reused here, it will also be expired. That
will cause a problem on platforms which require file resources to be
closed before unlinking them, like Windows. Avoid this by ensuring we
close reused bitmaps with free_bitmap_index() before removing them.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap-write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index d374f7884b4114..142fd0adb86e6f 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -520,6 +520,7 @@ int bitmap_writer_build(struct packing_data *to_pack)
 	clear_prio_queue(&queue);
 	clear_prio_queue(&tree_queue);
 	bitmap_builder_clear(&bb);
+	free_bitmap_index(old_bitmap);
 	free(mapping);
 
 	trace2_region_leave("pack-bitmap-write", "building_bitmaps_total",

From a6e6feff88e4ddade96410ea7f59a7624b2a6981 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:32 -0400
Subject: [PATCH 077/198] Documentation: describe MIDX-based bitmaps

Update the technical documentation to describe the multi-pack bitmap
format. This patch merely introduces the new format, and describes its
high-level ideas. Git does not yet know how to read nor write these
multi-pack variants, and so the subsequent patches will:

  - Introduce code to interpret multi-pack bitmaps, according to this
    document.

  - Then, introduce code to write multi-pack bitmaps from the 'git
    multi-pack-index write' sub-command.

Finally, the implementation will gain tests in subsequent patches (as
opposed to inline with the patch teaching Git how to write multi-pack
bitmaps) to avoid a cyclic dependency.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/bitmap-format.txt    | 71 ++++++++++++++++----
 Documentation/technical/multi-pack-index.txt | 10 +--
 2 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt
index f8c18a0f7aec2b..04b3ec2178574a 100644
--- a/Documentation/technical/bitmap-format.txt
+++ b/Documentation/technical/bitmap-format.txt
@@ -1,6 +1,44 @@
 GIT bitmap v1 format
 ====================
 
+== Pack and multi-pack bitmaps
+
+Bitmaps store reachability information about the set of objects in a packfile,
+or a multi-pack index (MIDX). The former is defined obviously, and the latter is
+defined as the union of objects in packs contained in the MIDX.
+
+A bitmap may belong to either one pack, or the repository's multi-pack index (if
+it exists). A repository may have at most one bitmap.
+
+An object is uniquely described by its bit position within a bitmap:
+
+	- If the bitmap belongs to a packfile, the __n__th bit corresponds to
+	the __n__th object in pack order. For a function `offset` which maps
+	objects to their byte offset within a pack, pack order is defined as
+	follows:
+
+		o1 <= o2 <==> offset(o1) <= offset(o2)
+
+	- If the bitmap belongs to a MIDX, the __n__th bit corresponds to the
+	__n__th object in MIDX order. With an additional function `pack` which
+	maps objects to the pack they were selected from by the MIDX, MIDX order
+	is defined as follows:
+
+		o1 <= o2 <==> pack(o1) <= pack(o2) /\ offset(o1) <= offset(o2)
+
+	The ordering between packs is done according to the MIDX's .rev file.
+	Notably, the preferred pack sorts ahead of all other packs.
+
+The on-disk representation (described below) of a bitmap is the same regardless
+of whether or not that bitmap belongs to a packfile or a MIDX. The only
+difference is the interpretation of the bits, which is described above.
+
+Certain bitmap extensions are supported (see: Appendix B). No extensions are
+required for bitmaps corresponding to packfiles. For bitmaps that correspond to
+MIDXs, both the bit-cache and rev-cache extensions are required.
+
+== On-disk format
+
 	- A header appears at the beginning:
 
 		4-byte signature: {'B', 'I', 'T', 'M'}
@@ -14,17 +52,19 @@ GIT bitmap v1 format
 			The following flags are supported:
 
 			- BITMAP_OPT_FULL_DAG (0x1) REQUIRED
-			This flag must always be present. It implies that the bitmap
-			index has been generated for a packfile with full closure
-			(i.e. where every single object in the packfile can find
-			 its parent links inside the same packfile). This is a
-			requirement for the bitmap index format, also present in JGit,
-			that greatly reduces the complexity of the implementation.
+			This flag must always be present. It implies that the
+			bitmap index has been generated for a packfile or
+			multi-pack index (MIDX) with full closure (i.e. where
+			every single object in the packfile/MIDX can find its
+			parent links inside the same packfile/MIDX). This is a
+			requirement for the bitmap index format, also present in
+			JGit, that greatly reduces the complexity of the
+			implementation.
 
 			- BITMAP_OPT_HASH_CACHE (0x4)
 			If present, the end of the bitmap file contains
 			`N` 32-bit name-hash values, one per object in the
-			pack. The format and meaning of the name-hash is
+			pack/MIDX. The format and meaning of the name-hash is
 			described below.
 
 		4-byte entry count (network byte order)
@@ -33,7 +73,8 @@ GIT bitmap v1 format
 
 		20-byte checksum
 
-			The SHA1 checksum of the pack this bitmap index belongs to.
+			The SHA1 checksum of the pack/MIDX this bitmap index
+			belongs to.
 
 	- 4 EWAH bitmaps that act as type indexes
 
@@ -50,7 +91,7 @@ GIT bitmap v1 format
 			- Tags
 
 		In each bitmap, the `n`th bit is set to true if the `n`th object
-		in the packfile is of that type.
+		in the packfile or multi-pack index is of that type.
 
 		The obvious consequence is that the OR of all 4 bitmaps will result
 		in a full set (all bits set), and the AND of all 4 bitmaps will
@@ -62,8 +103,9 @@ GIT bitmap v1 format
 		Each entry contains the following:
 
 		- 4-byte object position (network byte order)
-			The position **in the index for the packfile** where the
-			bitmap for this commit is found.
+			The position **in the index for the packfile or
+			multi-pack index** where the bitmap for this commit is
+			found.
 
 		- 1-byte XOR-offset
 			The xor offset used to compress this bitmap. For an entry
@@ -146,10 +188,11 @@ Name-hash cache
 ---------------
 
 If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains
-a cache of 32-bit values, one per object in the pack. The value at
+a cache of 32-bit values, one per object in the pack/MIDX. The value at
 position `i` is the hash of the pathname at which the `i`th object
-(counting in index order) in the pack can be found.  This can be fed
-into the delta heuristics to compare objects with similar pathnames.
+(counting in index or multi-pack index order) in the pack/MIDX can be found.
+This can be fed into the delta heuristics to compare objects with similar
+pathnames.
 
 The hash algorithm used is:
 
diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt
index fb688976c4c033..1a73c3ee203ea0 100644
--- a/Documentation/technical/multi-pack-index.txt
+++ b/Documentation/technical/multi-pack-index.txt
@@ -71,14 +71,10 @@ Future Work
   still reducing the number of binary searches required for object
   lookups.
 
-- The reachability bitmap is currently paired directly with a single
-  packfile, using the pack-order as the object order to hopefully
-  compress the bitmaps well using run-length encoding. This could be
-  extended to pair a reachability bitmap with a multi-pack-index. If
-  the multi-pack-index is extended to store a "stable object order"
+- If the multi-pack-index is extended to store a "stable object order"
   (a function Order(hash) = integer that is constant for a given hash,
-  even as the multi-pack-index is updated) then a reachability bitmap
-  could point to a multi-pack-index and be updated independently.
+  even as the multi-pack-index is updated) then MIDX bitmaps could be
+  updated independently of the MIDX.
 
 - Packfiles can be marked as "special" using empty files that share
   the initial name but replace ".pack" with ".keep" or ".promisor".

From e523ab28f71c1aea46a0972e16e79b7dbd851491 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:35 -0400
Subject: [PATCH 078/198] midx: clear auxiliary .rev after replacing the MIDX

When writing a new multi-pack index, write_midx_internal() attempts to
clean up any auxiliary files (currently just the MIDX's `.rev` file, but
soon to include a `.bitmap`, too) corresponding to the MIDX it's
replacing.

This step should happen after the new MIDX is written into place, since
doing so beforehand means that the old MIDX could be read without its
corresponding .rev file.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 midx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/midx.c b/midx.c
index 21d6a05e887283..3759263a22f86d 100644
--- a/midx.c
+++ b/midx.c
@@ -1076,10 +1076,11 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 
 	if (flags & MIDX_WRITE_REV_INDEX)
 		write_midx_reverse_index(midx_name, midx_hash, &ctx);
-	clear_midx_files_ext(the_repository, ".rev", midx_hash);
 
 	commit_lock_file(&lk);
 
+	clear_midx_files_ext(the_repository, ".rev", midx_hash);
+
 cleanup:
 	for (i = 0; i < ctx.nr; i++) {
 		if (ctx.info[i].p) {

From 85fec428eaaf050537e7657a91a27da77d6e2af0 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:38 -0400
Subject: [PATCH 079/198] midx: reject empty `--preferred-pack`'s

The soon-to-be-implemented multi-pack bitmap treats object in the first
bit position specially by assuming that all objects in the pack it was
selected from are also represented from that pack in the MIDX. In other
words, the pack from which the first object was selected must also have
all of its other objects selected from that same pack in the MIDX in
case of any duplicates.

But this assumption relies on the fact that there is at least one object
in that pack to begin with; otherwise the object in the first bit
position isn't from a preferred pack, in which case we can no longer
assume that all objects in that pack were also selected from the same
pack.

Guard this assumption by checking the number of objects in the given
preferred pack, and failing if the given pack is empty.

To make sure we can safely perform this check, open any packs which are
contained in an existing MIDX via prepare_midx_pack(). The same is done
for new packs via the add_pack_to_midx() callback, but packs picked up
from a previous MIDX will not yet have these opened.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-multi-pack-index.txt |  6 +++---
 midx.c                                 | 29 ++++++++++++++++++++++++++
 t/t5319-multi-pack-index.sh            | 17 +++++++++++++++
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt
index ffd601bc17b4dc..c9b063d31e1df8 100644
--- a/Documentation/git-multi-pack-index.txt
+++ b/Documentation/git-multi-pack-index.txt
@@ -37,9 +37,9 @@ write::
 --
 	--preferred-pack=<pack>::
 		Optionally specify the tie-breaking pack used when
-		multiple packs contain the same object. If not given,
-		ties are broken in favor of the pack with the lowest
-		mtime.
+		multiple packs contain the same object. `<pack>` must
+		contain at least one object. If not given, ties are
+		broken in favor of the pack with the lowest mtime.
 --
 
 verify::
diff --git a/midx.c b/midx.c
index 3759263a22f86d..774eff028fe46c 100644
--- a/midx.c
+++ b/midx.c
@@ -924,6 +924,25 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 			ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]);
 			ctx.info[ctx.nr].p = NULL;
 			ctx.info[ctx.nr].expired = 0;
+
+			if (flags & MIDX_WRITE_REV_INDEX) {
+				/*
+				 * If generating a reverse index, need to have
+				 * packed_git's loaded to compare their
+				 * mtimes and object count.
+				 */
+				if (prepare_midx_pack(the_repository, ctx.m, i)) {
+					error(_("could not load pack"));
+					result = 1;
+					goto cleanup;
+				}
+
+				if (open_pack_index(ctx.m->packs[i]))
+					die(_("could not open index for %s"),
+					    ctx.m->packs[i]->pack_name);
+				ctx.info[ctx.nr].p = ctx.m->packs[i];
+			}
+
 			ctx.nr++;
 		}
 	}
@@ -951,6 +970,16 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 		}
 	}
 
+	if (ctx.preferred_pack_idx > -1) {
+		struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p;
+		if (!preferred->num_objects) {
+			error(_("cannot select preferred pack %s with no objects"),
+			      preferred->pack_name);
+			result = 1;
+			goto cleanup;
+		}
+	}
+
 	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
 					 ctx.preferred_pack_idx);
 
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 5641d158dfc4c6..909f9bf7b8ee79 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -277,6 +277,23 @@ test_expect_success 'midx picks objects from preferred pack' '
 	)
 '
 
+test_expect_success 'preferred packs must be non-empty' '
+	test_when_finished rm -rf preferred.git &&
+	git init preferred.git &&
+	(
+		cd preferred.git &&
+
+		test_commit base &&
+		git repack -ad &&
+
+		empty="$(git pack-objects $objdir/pack/pack </dev/null)" &&
+
+		test_must_fail git multi-pack-index write \
+			--preferred-pack=pack-$empty.pack 2>err &&
+		grep "with no objects" err
+	)
+'
+
 test_expect_success 'verify multi-pack-index success' '
 	git multi-pack-index verify --object-dir=$objdir
 '

From 60c38a109b815340720c3b85b10c8bea4d54fd5e Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:41 -0400
Subject: [PATCH 080/198] midx: infer preferred pack when not given one

In 9218c6a40c (midx: allow marking a pack as preferred, 2021-03-30), the
multi-pack index code learned how to select a pack which all duplicate
objects are selected from. That is, if an object appears in multiple
packs, select the copy in the preferred pack before breaking ties
according to the other rules like pack mtime and readdir() order.

Not specifying a preferred pack can cause serious problems with
multi-pack reachability bitmaps, because these bitmaps rely on having at
least one pack from which all duplicates are selected. Not having such a
pack causes problems with the code in pack-objects to reuse packs
verbatim (e.g., that code assumes that a delta object in a chunk of pack
sent verbatim will have its base object sent from the same pack).

So why does not marking a pack preferred cause problems here? The reason
is roughly as follows:

  - Ties are broken (when handling duplicate objects) by sorting
    according to midx_oid_compare(), which sorts objects by OID,
    preferred-ness, pack mtime, and finally pack ID (more on that
    later).

  - The psuedo pack-order (described in
    Documentation/technical/pack-format.txt under the section
    "multi-pack-index reverse indexes") is computed by
    midx_pack_order(), and sorts by pack ID and pack offset, with
    preferred packs sorting first.

  - But! Pack IDs come from incrementing the pack count in
    add_pack_to_midx(), which is a callback to
    for_each_file_in_pack_dir(), meaning that pack IDs are assigned in
    readdir() order.

When specifying a preferred pack, all of that works fine, because
duplicate objects are correctly resolved in favor of the copy in the
preferred pack, and the preferred pack sorts first in the object order.

"Sorting first" is critical, because the bitmap code relies on finding
out which pack holds the first object in the MIDX's pseudo pack-order to
determine which pack is preferred.

But if we didn't specify a preferred pack, and the pack which comes
first in readdir() order does not also have the lowest timestamp, then
it's possible that that pack (the one that sorts first in pseudo-pack
order, which the bitmap code will treat as the preferred one) did *not*
have all duplicate objects resolved in its favor, resulting in breakage.

The fix is simple: pick a (semi-arbitrary, non-empty) preferred pack
when none was specified. This forces that pack to have duplicates
resolved in its favor, and (critically) to sort first in pseudo-pack
order.  Unfortunately, testing this behavior portably isn't possible,
since it depends on readdir() order which isn't guaranteed by POSIX.

(Note that multi-pack reachability bitmaps have yet to be implemented;
so in that sense this patch is fixing a bug which does not yet exist.
But by having this patch beforehand, we can prevent the bug from ever
materializing.)

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 midx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/midx.c b/midx.c
index 774eff028fe46c..112197a2849762 100644
--- a/midx.c
+++ b/midx.c
@@ -959,15 +959,57 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
 		goto cleanup;
 
-	ctx.preferred_pack_idx = -1;
 	if (preferred_pack_name) {
+		int found = 0;
 		for (i = 0; i < ctx.nr; i++) {
 			if (!cmp_idx_or_pack_name(preferred_pack_name,
 						  ctx.info[i].pack_name)) {
 				ctx.preferred_pack_idx = i;
+				found = 1;
 				break;
 			}
 		}
+
+		if (!found)
+			warning(_("unknown preferred pack: '%s'"),
+				preferred_pack_name);
+	} else if (ctx.nr && (flags & MIDX_WRITE_REV_INDEX)) {
+		struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p;
+		ctx.preferred_pack_idx = 0;
+
+		if (packs_to_drop && packs_to_drop->nr)
+			BUG("cannot write a MIDX bitmap during expiration");
+
+		/*
+		 * set a preferred pack when writing a bitmap to ensure that
+		 * the pack from which the first object is selected in pseudo
+		 * pack-order has all of its objects selected from that pack
+		 * (and not another pack containing a duplicate)
+		 */
+		for (i = 1; i < ctx.nr; i++) {
+			struct packed_git *p = ctx.info[i].p;
+
+			if (!oldest->num_objects || p->mtime < oldest->mtime) {
+				oldest = p;
+				ctx.preferred_pack_idx = i;
+			}
+		}
+
+		if (!oldest->num_objects) {
+			/*
+			 * If all packs are empty; unset the preferred index.
+			 * This is acceptable since there will be no duplicate
+			 * objects to resolve, so the preferred value doesn't
+			 * matter.
+			 */
+			ctx.preferred_pack_idx = -1;
+		}
+	} else {
+		/*
+		 * otherwise don't mark any pack as preferred to avoid
+		 * interfering with expiration logic below
+		 */
+		ctx.preferred_pack_idx = -1;
 	}
 
 	if (ctx.preferred_pack_idx > -1) {
@@ -1048,11 +1090,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 						      ctx.info, ctx.nr,
 						      sizeof(*ctx.info),
 						      idx_or_pack_name_cmp);
-
-		if (!preferred)
-			warning(_("unknown preferred pack: '%s'"),
-				preferred_pack_name);
-		else {
+		if (preferred) {
 			uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
 			if (perm == PACK_EXPIRED)
 				warning(_("preferred pack '%s' is expired"),

From f714355889ed5085a19f678eb432c87d2e0e8a46 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:43 -0400
Subject: [PATCH 081/198] midx: close linked MIDXs, avoid leaking memory

When a repository has at least one alternate, the MIDX belonging to each
alternate is accessed through the `next` pointer on the main object
store's copy of the MIDX. close_midx() didn't bother to close any
of the linked MIDXs. It likewise didn't free the memory pointed to by
`m`, leaving uninitialized bytes with live pointers to them left around
in the heap.

Clean this up by closing linked MIDXs, and freeing up the memory pointed
to by each of them. When callers call close_midx(), then they can
discard the entire linked list of MIDXs and set their pointer to the
head of that list to NULL.

This isn't strictly required for the upcoming patches, but it makes it
much more difficult (though still possible, for e.g., by calling
`close_midx(m->next)` which leaves `m->next` pointing at uninitialized
bytes) to have pointers to uninitialized memory.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 midx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/midx.c b/midx.c
index 112197a2849762..5007faff1b73d9 100644
--- a/midx.c
+++ b/midx.c
@@ -195,6 +195,8 @@ void close_midx(struct multi_pack_index *m)
 	if (!m)
 		return;
 
+	close_midx(m->next);
+
 	munmap((unsigned char *)m->data, m->data_len);
 
 	for (i = 0; i < m->num_packs; i++) {
@@ -203,6 +205,7 @@ void close_midx(struct multi_pack_index *m)
 	}
 	FREE_AND_NULL(m->packs);
 	FREE_AND_NULL(m->pack_names);
+	free(m);
 }
 
 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)

From c50613c46d35deba7bce0156e87e6d447fed0c1e Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:46 -0400
Subject: [PATCH 082/198] midx: avoid opening multiple MIDXs when writing

Opening multiple instance of the same MIDX can lead to problems like two
separate packed_git structures which represent the same pack being added
to the repository's object store.

The above scenario can happen because prepare_midx_pack() checks if
`m->packs[pack_int_id]` is NULL in order to determine if a pack has been
opened and installed in the repository before. But a caller can
construct two copies of the same MIDX by calling get_multi_pack_index()
and load_multi_pack_index() since the former manipulates the
object store directly but the latter is a lower-level routine which
allocates a new MIDX for each call.

So if prepare_midx_pack() is called on multiple MIDXs with the same
pack_int_id, then that pack will be installed twice in the object
store's packed_git pointer.

This can lead to problems in, for e.g., the pack-bitmap code, which does
something like the following (in pack-bitmap.c:open_pack_bitmap()):

    struct bitmap_index *bitmap_git = ...;
    for (p = get_all_packs(r); p; p = p->next) {
      if (open_pack_bitmap_1(bitmap_git, p) == 0)
        ret = 0;
    }

which is a problem if two copies of the same pack exist in the
packed_git list because pack-bitmap.c:open_pack_bitmap_1() contains a
conditional like the following:

    if (bitmap_git->pack || bitmap_git->midx) {
      /* ignore extra bitmap file; we can only handle one */
      warning("ignoring extra bitmap file: %s", packfile->pack_name);
      close(fd);
      return -1;
    }

Avoid this scenario by not letting write_midx_internal() open a MIDX
that isn't also pointed at by the object store. So long as this is the
case, other routines should prefer to open MIDXs with
get_multi_pack_index() or reprepare_packed_git() instead of creating
instances on their own. Because get_multi_pack_index() returns
`r->object_store->multi_pack_index` if it is non-NULL, we'll only have
one instance of a MIDX open at one time, avoiding these problems.

To encourage this, drop the `struct multi_pack_index *` parameter from
`write_midx_internal()`, and rely instead on the `object_dir` to find
(or initialize) the correct MIDX instance.

Likewise, replace the call to `close_midx()` with
`close_object_store()`, since we're about to replace the MIDX with a new
one and should invalidate the object store's memory of any MIDX that
might have existed beforehand.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 midx.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/midx.c b/midx.c
index 5007faff1b73d9..9893795e298bb1 100644
--- a/midx.c
+++ b/midx.c
@@ -888,7 +888,7 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
 static void clear_midx_files_ext(struct repository *r, const char *ext,
 				 unsigned char *keep_hash);
 
-static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
+static int write_midx_internal(const char *object_dir,
 			       struct string_list *packs_to_drop,
 			       const char *preferred_pack_name,
 			       unsigned flags)
@@ -899,6 +899,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	struct hashfile *f = NULL;
 	struct lock_file lk;
 	struct write_midx_context ctx = { 0 };
+	struct multi_pack_index *cur;
 	int pack_name_concat_len = 0;
 	int dropped_packs = 0;
 	int result = 0;
@@ -909,10 +910,14 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 		die_errno(_("unable to create leading directories of %s"),
 			  midx_name);
 
-	if (m)
-		ctx.m = m;
-	else
-		ctx.m = load_multi_pack_index(object_dir, 1);
+	for (cur = get_multi_pack_index(the_repository); cur; cur = cur->next) {
+		if (!strcmp(object_dir, cur->object_dir)) {
+			ctx.m = cur;
+			break;
+		}
+	}
+	if (!ctx.m)
+		ctx.m = get_local_multi_pack_index(the_repository);
 
 	ctx.nr = 0;
 	ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
@@ -1172,8 +1177,7 @@ int write_midx_file(const char *object_dir,
 		    const char *preferred_pack_name,
 		    unsigned flags)
 {
-	return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
-				   flags);
+	return write_midx_internal(object_dir, NULL, preferred_pack_name, flags);
 }
 
 struct clear_midx_data {
@@ -1447,8 +1451,10 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
 
 	free(count);
 
-	if (packs_to_drop.nr)
-		result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags);
+	if (packs_to_drop.nr) {
+		result = write_midx_internal(object_dir, &packs_to_drop, NULL, flags);
+		m = NULL;
+	}
 
 	string_list_clear(&packs_to_drop, 0);
 	return result;
@@ -1637,7 +1643,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size,
 		goto cleanup;
 	}
 
-	result = write_midx_internal(object_dir, m, NULL, NULL, flags);
+	result = write_midx_internal(object_dir, NULL, NULL, flags);
 	m = NULL;
 
 cleanup:

From 8544b76cf029c3b032d70ca6e9406faf689685fd Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:49 -0400
Subject: [PATCH 083/198] pack-bitmap.c: introduce 'bitmap_num_objects()'

A subsequent patch to support reading MIDX bitmaps will be less noisy
after extracting a generic function to return how many objects are
contained in a bitmap.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index a1425f30541e0a..1eee7f7fbaa080 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -136,6 +136,11 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
 	return b;
 }
 
+static uint32_t bitmap_num_objects(struct bitmap_index *index)
+{
+	return index->pack->num_objects;
+}
+
 static int load_bitmap_header(struct bitmap_index *index)
 {
 	struct bitmap_disk_header *header = (void *)index->map;
@@ -154,7 +159,7 @@ static int load_bitmap_header(struct bitmap_index *index)
 	/* Parse known bitmap format options */
 	{
 		uint32_t flags = ntohs(header->options);
-		size_t cache_size = st_mult(index->pack->num_objects, sizeof(uint32_t));
+		size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
 		unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
 
 		if ((flags & BITMAP_OPT_FULL_DAG) == 0)
@@ -399,7 +404,7 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
 
 	if (pos < kh_end(positions)) {
 		int bitmap_pos = kh_value(positions, pos);
-		return bitmap_pos + bitmap_git->pack->num_objects;
+		return bitmap_pos + bitmap_num_objects(bitmap_git);
 	}
 
 	return -1;
@@ -451,7 +456,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git,
 		bitmap_pos = kh_value(eindex->positions, hash_pos);
 	}
 
-	return bitmap_pos + bitmap_git->pack->num_objects;
+	return bitmap_pos + bitmap_num_objects(bitmap_git);
 }
 
 struct bitmap_show_data {
@@ -650,7 +655,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
 	for (i = 0; i < eindex->count; ++i) {
 		struct object *obj;
 
-		if (!bitmap_get(objects, bitmap_git->pack->num_objects + i))
+		if (!bitmap_get(objects, bitmap_num_objects(bitmap_git) + i))
 			continue;
 
 		obj = eindex->objects[i];
@@ -808,7 +813,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git,
 	 * individually.
 	 */
 	for (i = 0; i < eindex->count; i++) {
-		uint32_t pos = i + bitmap_git->pack->num_objects;
+		uint32_t pos = i + bitmap_num_objects(bitmap_git);
 		if (eindex->objects[i]->type == type &&
 		    bitmap_get(to_filter, pos) &&
 		    !bitmap_get(tips, pos))
@@ -835,7 +840,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 
 	oi.sizep = &size;
 
-	if (pos < pack->num_objects) {
+	if (pos < bitmap_num_objects(bitmap_git)) {
 		off_t ofs = pack_pos_to_offset(pack, pos);
 		if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
 			struct object_id oid;
@@ -845,7 +850,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 		}
 	} else {
 		struct eindex *eindex = &bitmap_git->ext_index;
-		struct object *obj = eindex->objects[pos - pack->num_objects];
+		struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
 		if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
 			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
 	}
@@ -887,7 +892,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
 	}
 
 	for (i = 0; i < eindex->count; i++) {
-		uint32_t pos = i + bitmap_git->pack->num_objects;
+		uint32_t pos = i + bitmap_num_objects(bitmap_git);
 		if (eindex->objects[i]->type == OBJ_BLOB &&
 		    bitmap_get(to_filter, pos) &&
 		    !bitmap_get(tips, pos) &&
@@ -1113,8 +1118,8 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
 	enum object_type type;
 	unsigned long size;
 
-	if (pos >= bitmap_git->pack->num_objects)
-		return; /* not actually in the pack */
+	if (pos >= bitmap_num_objects(bitmap_git))
+		return; /* not actually in the pack or MIDX */
 
 	offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
 	type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
@@ -1180,6 +1185,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 	struct pack_window *w_curs = NULL;
 	size_t i = 0;
 	uint32_t offset;
+	uint32_t objects_nr = bitmap_num_objects(bitmap_git);
 
 	assert(result);
 
@@ -1187,8 +1193,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 		i++;
 
 	/* Don't mark objects not in the packfile */
-	if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD)
-		i = bitmap_git->pack->num_objects / BITS_IN_EWORD;
+	if (i > objects_nr / BITS_IN_EWORD)
+		i = objects_nr / BITS_IN_EWORD;
 
 	reuse = bitmap_word_alloc(i);
 	memset(reuse->words, 0xFF, i * sizeof(eword_t));
@@ -1272,7 +1278,7 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git,
 
 	for (i = 0; i < eindex->count; ++i) {
 		if (eindex->objects[i]->type == type &&
-			bitmap_get(objects, bitmap_git->pack->num_objects + i))
+			bitmap_get(objects, bitmap_num_objects(bitmap_git) + i))
 			count++;
 	}
 
@@ -1493,7 +1499,7 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
 	uint32_t i, num_objects;
 	uint32_t *reposition;
 
-	num_objects = bitmap_git->pack->num_objects;
+	num_objects = bitmap_num_objects(bitmap_git);
 	CALLOC_ARRAY(reposition, num_objects);
 
 	for (i = 0; i < num_objects; ++i) {
@@ -1576,7 +1582,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
 static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
 {
 	struct bitmap *result = bitmap_git->result;
-	struct packed_git *pack = bitmap_git->pack;
 	struct eindex *eindex = &bitmap_git->ext_index;
 	off_t total = 0;
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -1588,7 +1593,7 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
 	for (i = 0; i < eindex->count; i++) {
 		struct object *obj = eindex->objects[i];
 
-		if (!bitmap_get(result, pack->num_objects + i))
+		if (!bitmap_get(result, bitmap_num_objects(bitmap_git) + i))
 			continue;
 
 		if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)

From e765bea088db73f830197ecf15f246e7e208395d Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:51 -0400
Subject: [PATCH 084/198] pack-bitmap.c: introduce 'nth_bitmap_object_oid()'

A subsequent patch to support reading MIDX bitmaps will be less noisy
after extracting a generic function to fetch the nth OID contained in
the bitmap.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index 1eee7f7fbaa080..dd292e669aa553 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -223,6 +223,13 @@ static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos)
 
 #define MAX_XOR_OFFSET 160
 
+static int nth_bitmap_object_oid(struct bitmap_index *index,
+				 struct object_id *oid,
+				 uint32_t n)
+{
+	return nth_packed_object_id(oid, index->pack, n);
+}
+
 static int load_bitmap_entries_v1(struct bitmap_index *index)
 {
 	uint32_t i;
@@ -242,7 +249,7 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
 		xor_offset = read_u8(index->map, &index->map_pos);
 		flags = read_u8(index->map, &index->map_pos);
 
-		if (nth_packed_object_id(&oid, index->pack, commit_idx_pos) < 0)
+		if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0)
 			return error("corrupt ewah bitmap: commit index %u out of range",
 				     (unsigned)commit_idx_pos);
 
@@ -844,8 +851,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 		off_t ofs = pack_pos_to_offset(pack, pos);
 		if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
 			struct object_id oid;
-			nth_packed_object_id(&oid, pack,
-					     pack_pos_to_index(pack, pos));
+			nth_bitmap_object_oid(bitmap_git, &oid,
+					      pack_pos_to_index(pack, pos));
 			die(_("unable to get size of %s"), oid_to_hex(&oid));
 		}
 	} else {

From 0c24be7bc7ad2b6af6fe624cf907189fbcac85d3 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:54 -0400
Subject: [PATCH 085/198] pack-bitmap.c: introduce
 'bitmap_is_preferred_refname()'

In a recent commit, pack-objects learned support for the
'pack.preferBitmapTips' configuration. This patch prepares the
multi-pack bitmap code to respect this configuration, too.

The yet-to-be implemented code will find that it is more efficient to
check whether each reference contains a prefix found in the configured
set of values rather than doing an additional traversal.

Implement a function 'bitmap_is_preferred_refname()' which will perform
that check. Its caller will be added in a subsequent patch.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 16 ++++++++++++++++
 pack-bitmap.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index dd292e669aa553..2b11cf944c4dd6 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1634,3 +1634,19 @@ const struct string_list *bitmap_preferred_tips(struct repository *r)
 {
 	return repo_config_get_value_multi(r, "pack.preferbitmaptips");
 }
+
+int bitmap_is_preferred_refname(struct repository *r, const char *refname)
+{
+	const struct string_list *preferred_tips = bitmap_preferred_tips(r);
+	struct string_list_item *item;
+
+	if (!preferred_tips)
+		return 0;
+
+	for_each_string_list_item(item, preferred_tips) {
+		if (starts_with(refname, item->string))
+			return 1;
+	}
+
+	return 0;
+}
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 020cd8d868f9d3..52ea10de5117ab 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -94,5 +94,6 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint16_t options);
 
 const struct string_list *bitmap_preferred_tips(struct repository *r);
+int bitmap_is_preferred_refname(struct repository *r, const char *refname);
 
 #endif

From bfa5c3ba868f31512f04136505f2208bfef4d171 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:56 -0400
Subject: [PATCH 086/198] pack-bitmap.c: avoid redundant calls to
 try_partial_reuse

try_partial_reuse() is used to mark any bits in the beginning of a
bitmap whose objects can be reused verbatim from the pack they came
from.

Currently this function returns void, and signals nothing to the caller
when bits could not be reused. But multi-pack bitmaps would benefit from
having such a signal, because they may try to pass objects which are in
bounds, but from a pack other than the preferred one.

Any extra calls are noops because of a conditional in
reuse_partial_packfile_from_bitmap(), but those loop iterations can be
avoided by letting try_partial_reuse() indicate when it can't accept any
more bits for reuse, and then listening to that signal.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index 2b11cf944c4dd6..20fa19fba132c8 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1116,22 +1116,26 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
 	return NULL;
 }
 
-static void try_partial_reuse(struct bitmap_index *bitmap_git,
-			      size_t pos,
-			      struct bitmap *reuse,
-			      struct pack_window **w_curs)
+/*
+ * -1 means "stop trying further objects"; 0 means we may or may not have
+ * reused, but you can keep feeding bits.
+ */
+static int try_partial_reuse(struct bitmap_index *bitmap_git,
+			     size_t pos,
+			     struct bitmap *reuse,
+			     struct pack_window **w_curs)
 {
 	off_t offset, header;
 	enum object_type type;
 	unsigned long size;
 
 	if (pos >= bitmap_num_objects(bitmap_git))
-		return; /* not actually in the pack or MIDX */
+		return -1; /* not actually in the pack or MIDX */
 
 	offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
 	type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
 	if (type < 0)
-		return; /* broken packfile, punt */
+		return -1; /* broken packfile, punt */
 
 	if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
 		off_t base_offset;
@@ -1148,9 +1152,9 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
 		base_offset = get_delta_base(bitmap_git->pack, w_curs,
 					     &offset, type, header);
 		if (!base_offset)
-			return;
+			return 0;
 		if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0)
-			return;
+			return 0;
 
 		/*
 		 * We assume delta dependencies always point backwards. This
@@ -1162,7 +1166,7 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
 		 * odd parameters.
 		 */
 		if (base_pos >= pos)
-			return;
+			return 0;
 
 		/*
 		 * And finally, if we're not sending the base as part of our
@@ -1173,13 +1177,14 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
 		 * object_entry code path handle it.
 		 */
 		if (!bitmap_get(reuse, base_pos))
-			return;
+			return 0;
 	}
 
 	/*
 	 * If we got here, then the object is OK to reuse. Mark it.
 	 */
 	bitmap_set(reuse, pos);
+	return 0;
 }
 
 int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
@@ -1215,10 +1220,23 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 				break;
 
 			offset += ewah_bit_ctz64(word >> offset);
-			try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs);
+			if (try_partial_reuse(bitmap_git, pos + offset, reuse,
+					      &w_curs) < 0) {
+				/*
+				 * try_partial_reuse indicated we couldn't reuse
+				 * any bits, so there is no point in trying more
+				 * bits in the current word, or any other words
+				 * in result.
+				 *
+				 * Jump out of both loops to avoid future
+				 * unnecessary calls to try_partial_reuse.
+				 */
+				goto done;
+			}
 		}
 	}
 
+done:
 	unuse_pack(&w_curs);
 
 	*entries = bitmap_popcount(reuse);

From df01d79fbe0c953f00e5d28c09f968bdfb2e5c63 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:19:59 -0400
Subject: [PATCH 087/198] pack-bitmap: read multi-pack bitmaps

This prepares the code in pack-bitmap to interpret the new multi-pack
bitmaps described in Documentation/technical/bitmap-format.txt, which
mostly involves converting bit positions to accommodate looking them up
in a MIDX.

Note that there are currently no writers who write multi-pack bitmaps,
and that this will be implemented in the subsequent commit. Note also
that get_midx_checksum() and get_midx_filename() are made non-static so
they can be called from pack-bitmap.c.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |   5 +
 midx.c                 |   4 +-
 midx.h                 |   2 +
 pack-bitmap-write.c    |   2 +-
 pack-bitmap.c          | 357 ++++++++++++++++++++++++++++++++++++-----
 pack-bitmap.h          |   6 +
 packfile.c             |   2 +-
 7 files changed, 336 insertions(+), 42 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 8a523624a16eb2..e11d3ac2e5d2a1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1124,6 +1124,11 @@ static void write_reused_pack(struct hashfile *f)
 				break;
 
 			offset += ewah_bit_ctz64(word >> offset);
+			/*
+			 * Can use bit positions directly, even for MIDX
+			 * bitmaps. See comment in try_partial_reuse()
+			 * for why.
+			 */
 			write_reused_pack_one(pos + offset, f, &w_curs);
 			display_progress(progress_state, ++written);
 		}
diff --git a/midx.c b/midx.c
index 9893795e298bb1..9a971105336a7f 100644
--- a/midx.c
+++ b/midx.c
@@ -48,12 +48,12 @@ static uint8_t oid_version(void)
 	}
 }
 
-static const unsigned char *get_midx_checksum(struct multi_pack_index *m)
+const unsigned char *get_midx_checksum(struct multi_pack_index *m)
 {
 	return m->data + m->data_len - the_hash_algo->rawsz;
 }
 
-static char *get_midx_filename(const char *object_dir)
+char *get_midx_filename(const char *object_dir)
 {
 	return xstrfmt("%s/pack/multi-pack-index", object_dir);
 }
diff --git a/midx.h b/midx.h
index 8684cf0fefe81d..1172df1a711489 100644
--- a/midx.h
+++ b/midx.h
@@ -42,6 +42,8 @@ struct multi_pack_index {
 #define MIDX_PROGRESS     (1 << 0)
 #define MIDX_WRITE_REV_INDEX (1 << 1)
 
+const unsigned char *get_midx_checksum(struct multi_pack_index *m);
+char *get_midx_filename(const char *object_dir);
 char *get_midx_rev_filename(struct multi_pack_index *m);
 
 struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 142fd0adb86e6f..9c55c1531e1f55 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -48,7 +48,7 @@ void bitmap_writer_show_progress(int show)
 }
 
 /**
- * Build the initial type index for the packfile
+ * Build the initial type index for the packfile or multi-pack-index
  */
 void bitmap_writer_build_type_index(struct packing_data *to_pack,
 				    struct pack_idx_entry **index,
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 20fa19fba132c8..9b76c0cd276dba 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -13,6 +13,7 @@
 #include "repository.h"
 #include "object-store.h"
 #include "list-objects-filter-options.h"
+#include "midx.h"
 #include "config.h"
 
 /*
@@ -35,8 +36,15 @@ struct stored_bitmap {
  * the active bitmap index is the largest one.
  */
 struct bitmap_index {
-	/* Packfile to which this bitmap index belongs to */
+	/*
+	 * The pack or multi-pack index (MIDX) that this bitmap index belongs
+	 * to.
+	 *
+	 * Exactly one of these must be non-NULL; this specifies the object
+	 * order used to interpret this bitmap.
+	 */
 	struct packed_git *pack;
+	struct multi_pack_index *midx;
 
 	/*
 	 * Mark the first `reuse_objects` in the packfile as reused:
@@ -71,6 +79,9 @@ struct bitmap_index {
 	/* If not NULL, this is a name-hash cache pointing into map. */
 	uint32_t *hashes;
 
+	/* The checksum of the packfile or MIDX; points into map. */
+	const unsigned char *checksum;
+
 	/*
 	 * Extended index.
 	 *
@@ -138,6 +149,8 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
 
 static uint32_t bitmap_num_objects(struct bitmap_index *index)
 {
+	if (index->midx)
+		return index->midx->num_objects;
 	return index->pack->num_objects;
 }
 
@@ -175,6 +188,7 @@ static int load_bitmap_header(struct bitmap_index *index)
 	}
 
 	index->entry_count = ntohl(header->entry_count);
+	index->checksum = header->checksum;
 	index->map_pos += header_size;
 	return 0;
 }
@@ -227,6 +241,8 @@ static int nth_bitmap_object_oid(struct bitmap_index *index,
 				 struct object_id *oid,
 				 uint32_t n)
 {
+	if (index->midx)
+		return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1;
 	return nth_packed_object_id(oid, index->pack, n);
 }
 
@@ -274,7 +290,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
 	return 0;
 }
 
-static char *pack_bitmap_filename(struct packed_git *p)
+char *midx_bitmap_filename(struct multi_pack_index *midx)
+{
+	return xstrfmt("%s-%s.bitmap",
+		       get_midx_filename(midx->object_dir),
+		       hash_to_hex(get_midx_checksum(midx)));
+}
+
+char *pack_bitmap_filename(struct packed_git *p)
 {
 	size_t len;
 
@@ -283,6 +306,57 @@ static char *pack_bitmap_filename(struct packed_git *p)
 	return xstrfmt("%.*s.bitmap", (int)len, p->pack_name);
 }
 
+static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
+			      struct multi_pack_index *midx)
+{
+	struct stat st;
+	char *idx_name = midx_bitmap_filename(midx);
+	int fd = git_open(idx_name);
+
+	free(idx_name);
+
+	if (fd < 0)
+		return -1;
+
+	if (fstat(fd, &st)) {
+		close(fd);
+		return -1;
+	}
+
+	if (bitmap_git->pack || bitmap_git->midx) {
+		/* ignore extra bitmap file; we can only handle one */
+		warning("ignoring extra bitmap file: %s",
+			get_midx_filename(midx->object_dir));
+		close(fd);
+		return -1;
+	}
+
+	bitmap_git->midx = midx;
+	bitmap_git->map_size = xsize_t(st.st_size);
+	bitmap_git->map_pos = 0;
+	bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ,
+				MAP_PRIVATE, fd, 0);
+	close(fd);
+
+	if (load_bitmap_header(bitmap_git) < 0)
+		goto cleanup;
+
+	if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum))
+		goto cleanup;
+
+	if (load_midx_revindex(bitmap_git->midx) < 0) {
+		warning(_("multi-pack bitmap is missing required reverse index"));
+		goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	munmap(bitmap_git->map, bitmap_git->map_size);
+	bitmap_git->map_size = 0;
+	bitmap_git->map = NULL;
+	return -1;
+}
+
 static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile)
 {
 	int fd;
@@ -304,7 +378,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
 		return -1;
 	}
 
-	if (bitmap_git->pack) {
+	if (bitmap_git->pack || bitmap_git->midx) {
+		/* ignore extra bitmap file; we can only handle one */
 		warning("ignoring extra bitmap file: %s", packfile->pack_name);
 		close(fd);
 		return -1;
@@ -326,13 +401,39 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
 	return 0;
 }
 
-static int load_pack_bitmap(struct bitmap_index *bitmap_git)
+static int load_reverse_index(struct bitmap_index *bitmap_git)
+{
+	if (bitmap_is_midx(bitmap_git)) {
+		uint32_t i;
+		int ret;
+
+		/*
+		 * The multi-pack-index's .rev file is already loaded via
+		 * open_pack_bitmap_1().
+		 *
+		 * But we still need to open the individual pack .rev files,
+		 * since we will need to make use of them in pack-objects.
+		 */
+		for (i = 0; i < bitmap_git->midx->num_packs; i++) {
+			if (prepare_midx_pack(the_repository, bitmap_git->midx, i))
+				die(_("load_reverse_index: could not open pack"));
+			ret = load_pack_revindex(bitmap_git->midx->packs[i]);
+			if (ret)
+				return ret;
+		}
+		return 0;
+	}
+	return load_pack_revindex(bitmap_git->pack);
+}
+
+static int load_bitmap(struct bitmap_index *bitmap_git)
 {
 	assert(bitmap_git->map);
 
 	bitmap_git->bitmaps = kh_init_oid_map();
 	bitmap_git->ext_index.positions = kh_init_oid_pos();
-	if (load_pack_revindex(bitmap_git->pack))
+
+	if (load_reverse_index(bitmap_git))
 		goto failed;
 
 	if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) ||
@@ -376,11 +477,47 @@ static int open_pack_bitmap(struct repository *r,
 	return ret;
 }
 
+static int open_midx_bitmap(struct repository *r,
+			    struct bitmap_index *bitmap_git)
+{
+	struct multi_pack_index *midx;
+
+	assert(!bitmap_git->map);
+
+	for (midx = get_multi_pack_index(r); midx; midx = midx->next) {
+		if (!open_midx_bitmap_1(bitmap_git, midx))
+			return 0;
+	}
+	return -1;
+}
+
+static int open_bitmap(struct repository *r,
+		       struct bitmap_index *bitmap_git)
+{
+	assert(!bitmap_git->map);
+
+	if (!open_midx_bitmap(r, bitmap_git))
+		return 0;
+	return open_pack_bitmap(r, bitmap_git);
+}
+
 struct bitmap_index *prepare_bitmap_git(struct repository *r)
 {
 	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
 
-	if (!open_pack_bitmap(r, bitmap_git) && !load_pack_bitmap(bitmap_git))
+	if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git))
+		return bitmap_git;
+
+	free_bitmap_index(bitmap_git);
+	return NULL;
+}
+
+struct bitmap_index *prepare_midx_bitmap_git(struct repository *r,
+					     struct multi_pack_index *midx)
+{
+	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
+
+	if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git))
 		return bitmap_git;
 
 	free_bitmap_index(bitmap_git);
@@ -430,10 +567,26 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
 	return pos;
 }
 
+static int bitmap_position_midx(struct bitmap_index *bitmap_git,
+				const struct object_id *oid)
+{
+	uint32_t want, got;
+	if (!bsearch_midx(oid, bitmap_git->midx, &want))
+		return -1;
+
+	if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0)
+		return -1;
+	return got;
+}
+
 static int bitmap_position(struct bitmap_index *bitmap_git,
 			   const struct object_id *oid)
 {
-	int pos = bitmap_position_packfile(bitmap_git, oid);
+	int pos;
+	if (bitmap_is_midx(bitmap_git))
+		pos = bitmap_position_midx(bitmap_git, oid);
+	else
+		pos = bitmap_position_packfile(bitmap_git, oid);
 	return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid);
 }
 
@@ -726,6 +879,7 @@ static void show_objects_for_type(
 			continue;
 
 		for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
+			struct packed_git *pack;
 			struct object_id oid;
 			uint32_t hash = 0, index_pos;
 			off_t ofs;
@@ -735,14 +889,28 @@ static void show_objects_for_type(
 
 			offset += ewah_bit_ctz64(word >> offset);
 
-			index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
-			ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
-			nth_packed_object_id(&oid, bitmap_git->pack, index_pos);
+			if (bitmap_is_midx(bitmap_git)) {
+				struct multi_pack_index *m = bitmap_git->midx;
+				uint32_t pack_id;
+
+				index_pos = pack_pos_to_midx(m, pos + offset);
+				ofs = nth_midxed_offset(m, index_pos);
+				nth_midxed_object_oid(&oid, m, index_pos);
+
+				pack_id = nth_midxed_pack_int_id(m, index_pos);
+				pack = bitmap_git->midx->packs[pack_id];
+			} else {
+				index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
+				ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
+				nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
+
+				pack = bitmap_git->pack;
+			}
 
 			if (bitmap_git->hashes)
 				hash = get_be32(bitmap_git->hashes + index_pos);
 
-			show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs);
+			show_reach(&oid, object_type, 0, hash, pack, ofs);
 		}
 	}
 }
@@ -754,8 +922,13 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
 		struct object *object = roots->item;
 		roots = roots->next;
 
-		if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
-			return 1;
+		if (bitmap_is_midx(bitmap_git)) {
+			if (bsearch_midx(&object->oid, bitmap_git->midx, NULL))
+				return 1;
+		} else {
+			if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
+				return 1;
+		}
 	}
 
 	return 0;
@@ -841,14 +1014,26 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 				     uint32_t pos)
 {
-	struct packed_git *pack = bitmap_git->pack;
 	unsigned long size;
 	struct object_info oi = OBJECT_INFO_INIT;
 
 	oi.sizep = &size;
 
 	if (pos < bitmap_num_objects(bitmap_git)) {
-		off_t ofs = pack_pos_to_offset(pack, pos);
+		struct packed_git *pack;
+		off_t ofs;
+
+		if (bitmap_is_midx(bitmap_git)) {
+			uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos);
+			uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
+
+			pack = bitmap_git->midx->packs[pack_id];
+			ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
+		} else {
+			pack = bitmap_git->pack;
+			ofs = pack_pos_to_offset(pack, pos);
+		}
+
 		if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
 			struct object_id oid;
 			nth_bitmap_object_oid(bitmap_git, &oid,
@@ -1029,7 +1214,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
 	/* try to open a bitmapped pack, but don't parse it yet
 	 * because we may not need to use it */
 	CALLOC_ARRAY(bitmap_git, 1);
-	if (open_pack_bitmap(revs->repo, bitmap_git) < 0)
+	if (open_bitmap(revs->repo, bitmap_git) < 0)
 		goto cleanup;
 
 	for (i = 0; i < revs->pending.nr; ++i) {
@@ -1073,7 +1258,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
 	 * from disk. this is the point of no return; after this the rev_list
 	 * becomes invalidated and we must perform the revwalk through bitmaps
 	 */
-	if (load_pack_bitmap(bitmap_git) < 0)
+	if (load_bitmap(bitmap_git) < 0)
 		goto cleanup;
 
 	object_array_clear(&revs->pending);
@@ -1121,19 +1306,43 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
  * reused, but you can keep feeding bits.
  */
 static int try_partial_reuse(struct bitmap_index *bitmap_git,
+			     struct packed_git *pack,
 			     size_t pos,
 			     struct bitmap *reuse,
 			     struct pack_window **w_curs)
 {
-	off_t offset, header;
+	off_t offset, delta_obj_offset;
 	enum object_type type;
 	unsigned long size;
 
-	if (pos >= bitmap_num_objects(bitmap_git))
-		return -1; /* not actually in the pack or MIDX */
+	/*
+	 * try_partial_reuse() is called either on (a) objects in the
+	 * bitmapped pack (in the case of a single-pack bitmap) or (b)
+	 * objects in the preferred pack of a multi-pack bitmap.
+	 * Importantly, the latter can pretend as if only a single pack
+	 * exists because:
+	 *
+	 *   - The first pack->num_objects bits of a MIDX bitmap are
+	 *     reserved for the preferred pack, and
+	 *
+	 *   - Ties due to duplicate objects are always resolved in
+	 *     favor of the preferred pack.
+	 *
+	 * Therefore we do not need to ever ask the MIDX for its copy of
+	 * an object by OID, since it will always select it from the
+	 * preferred pack. Likewise, the selected copy of the base
+	 * object for any deltas will reside in the same pack.
+	 *
+	 * This means that we can reuse pos when looking up the bit in
+	 * the reuse bitmap, too, since bits corresponding to the
+	 * preferred pack precede all bits from other packs.
+	 */
+
+	if (pos >= pack->num_objects)
+		return -1; /* not actually in the pack or MIDX preferred pack */
 
-	offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
-	type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
+	offset = delta_obj_offset = pack_pos_to_offset(pack, pos);
+	type = unpack_object_header(pack, w_curs, &offset, &size);
 	if (type < 0)
 		return -1; /* broken packfile, punt */
 
@@ -1149,11 +1358,11 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
 		 * and the normal slow path will complain about it in
 		 * more detail.
 		 */
-		base_offset = get_delta_base(bitmap_git->pack, w_curs,
-					     &offset, type, header);
+		base_offset = get_delta_base(pack, w_curs, &offset, type,
+					     delta_obj_offset);
 		if (!base_offset)
 			return 0;
-		if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0)
+		if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0)
 			return 0;
 
 		/*
@@ -1187,24 +1396,48 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
 	return 0;
 }
 
+static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
+{
+	struct multi_pack_index *m = bitmap_git->midx;
+	if (!m)
+		BUG("midx_preferred_pack: requires non-empty MIDX");
+	return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0));
+}
+
 int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 				       struct packed_git **packfile_out,
 				       uint32_t *entries,
 				       struct bitmap **reuse_out)
 {
+	struct packed_git *pack;
 	struct bitmap *result = bitmap_git->result;
 	struct bitmap *reuse;
 	struct pack_window *w_curs = NULL;
 	size_t i = 0;
 	uint32_t offset;
-	uint32_t objects_nr = bitmap_num_objects(bitmap_git);
+	uint32_t objects_nr;
 
 	assert(result);
 
+	load_reverse_index(bitmap_git);
+
+	if (bitmap_is_midx(bitmap_git))
+		pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)];
+	else
+		pack = bitmap_git->pack;
+	objects_nr = pack->num_objects;
+
 	while (i < result->word_alloc && result->words[i] == (eword_t)~0)
 		i++;
 
-	/* Don't mark objects not in the packfile */
+	/*
+	 * Don't mark objects not in the packfile or preferred pack. This bitmap
+	 * marks objects eligible for reuse, but the pack-reuse code only
+	 * understands how to reuse a single pack. Since the preferred pack is
+	 * guaranteed to have all bases for its deltas (in a multi-pack bitmap),
+	 * we use it instead of another pack. In single-pack bitmaps, the choice
+	 * is made for us.
+	 */
 	if (i > objects_nr / BITS_IN_EWORD)
 		i = objects_nr / BITS_IN_EWORD;
 
@@ -1220,8 +1453,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 				break;
 
 			offset += ewah_bit_ctz64(word >> offset);
-			if (try_partial_reuse(bitmap_git, pos + offset, reuse,
-					      &w_curs) < 0) {
+			if (try_partial_reuse(bitmap_git, pack, pos + offset,
+					      reuse, &w_curs) < 0) {
 				/*
 				 * try_partial_reuse indicated we couldn't reuse
 				 * any bits, so there is no point in trying more
@@ -1250,7 +1483,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 	 * need to be handled separately.
 	 */
 	bitmap_and_not(result, reuse);
-	*packfile_out = bitmap_git->pack;
+	*packfile_out = pack;
 	*reuse_out = reuse;
 	return 0;
 }
@@ -1524,6 +1757,12 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
 	uint32_t i, num_objects;
 	uint32_t *reposition;
 
+	if (!bitmap_is_midx(bitmap_git))
+		load_reverse_index(bitmap_git);
+	else if (load_midx_revindex(bitmap_git->midx) < 0)
+		BUG("rebuild_existing_bitmaps: missing required rev-cache "
+		    "extension");
+
 	num_objects = bitmap_num_objects(bitmap_git);
 	CALLOC_ARRAY(reposition, num_objects);
 
@@ -1531,8 +1770,13 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
 		struct object_id oid;
 		struct object_entry *oe;
 
-		nth_packed_object_id(&oid, bitmap_git->pack,
-				     pack_pos_to_index(bitmap_git->pack, i));
+		if (bitmap_is_midx(bitmap_git))
+			nth_midxed_object_oid(&oid,
+					      bitmap_git->midx,
+					      pack_pos_to_midx(bitmap_git->midx, i));
+		else
+			nth_packed_object_id(&oid, bitmap_git->pack,
+					     pack_pos_to_index(bitmap_git->pack, i));
 		oe = packlist_find(mapping, &oid);
 
 		if (oe)
@@ -1558,6 +1802,19 @@ void free_bitmap_index(struct bitmap_index *b)
 	free(b->ext_index.hashes);
 	bitmap_free(b->result);
 	bitmap_free(b->haves);
+	if (bitmap_is_midx(b)) {
+		/*
+		 * Multi-pack bitmaps need to have resources associated with
+		 * their on-disk reverse indexes unmapped so that stale .rev and
+		 * .bitmap files can be removed.
+		 *
+		 * Unlike pack-based bitmaps, multi-pack bitmaps can be read and
+		 * written in the same 'git multi-pack-index write --bitmap'
+		 * process. Close resources so they can be removed safely on
+		 * platforms like Windows.
+		 */
+		close_midx_revindex(b->midx);
+	}
 	free(b);
 }
 
@@ -1572,7 +1829,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
 				     enum object_type object_type)
 {
 	struct bitmap *result = bitmap_git->result;
-	struct packed_git *pack = bitmap_git->pack;
 	off_t total = 0;
 	struct ewah_iterator it;
 	eword_t filter;
@@ -1589,15 +1845,35 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
 			continue;
 
 		for (offset = 0; offset < BITS_IN_EWORD; offset++) {
-			size_t pos;
-
 			if ((word >> offset) == 0)
 				break;
 
 			offset += ewah_bit_ctz64(word >> offset);
-			pos = base + offset;
-			total += pack_pos_to_offset(pack, pos + 1) -
-				 pack_pos_to_offset(pack, pos);
+
+			if (bitmap_is_midx(bitmap_git)) {
+				uint32_t pack_pos;
+				uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset);
+				off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos);
+
+				uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
+				struct packed_git *pack = bitmap_git->midx->packs[pack_id];
+
+				if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) {
+					struct object_id oid;
+					nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos);
+
+					die(_("could not find %s in pack %s at offset %"PRIuMAX),
+					    oid_to_hex(&oid),
+					    pack->pack_name,
+					    (uintmax_t)offset);
+				}
+
+				total += pack_pos_to_offset(pack, pack_pos + 1) - offset;
+			} else {
+				size_t pos = base + offset;
+				total += pack_pos_to_offset(bitmap_git->pack, pos + 1) -
+					 pack_pos_to_offset(bitmap_git->pack, pos);
+			}
 		}
 	}
 
@@ -1648,6 +1924,11 @@ off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
 	return total;
 }
 
+int bitmap_is_midx(struct bitmap_index *bitmap_git)
+{
+	return !!bitmap_git->midx;
+}
+
 const struct string_list *bitmap_preferred_tips(struct repository *r)
 {
 	return repo_config_get_value_multi(r, "pack.preferbitmaptips");
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 52ea10de5117ab..81664f933f02a0 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -44,6 +44,8 @@ typedef int (*show_reachable_fn)(
 struct bitmap_index;
 
 struct bitmap_index *prepare_bitmap_git(struct repository *r);
+struct bitmap_index *prepare_midx_bitmap_git(struct repository *r,
+					     struct multi_pack_index *midx);
 void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits,
 			      uint32_t *trees, uint32_t *blobs, uint32_t *tags);
 void traverse_bitmap_commit_list(struct bitmap_index *,
@@ -92,6 +94,10 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint32_t index_nr,
 			  const char *filename,
 			  uint16_t options);
+char *midx_bitmap_filename(struct multi_pack_index *midx);
+char *pack_bitmap_filename(struct packed_git *p);
+
+int bitmap_is_midx(struct bitmap_index *bitmap_git);
 
 const struct string_list *bitmap_preferred_tips(struct repository *r);
 int bitmap_is_preferred_refname(struct repository *r, const char *refname);
diff --git a/packfile.c b/packfile.c
index 755aa7aec5efbf..e855b932082bdc 100644
--- a/packfile.c
+++ b/packfile.c
@@ -860,7 +860,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
 	if (!strcmp(file_name, "multi-pack-index"))
 		return;
 	if (starts_with(file_name, "multi-pack-index") &&
-	    ends_with(file_name, ".rev"))
+	    (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))
 		return;
 	if (ends_with(file_name, ".idx") ||
 	    ends_with(file_name, ".rev") ||

From c287aeb9a1a95da846d6750df399af0b4af626c5 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:02 -0400
Subject: [PATCH 088/198] pack-bitmap: write multi-pack bitmaps

Write multi-pack bitmaps in the format described by
Documentation/technical/bitmap-format.txt, inferring their presence with
the absence of '--bitmap'.

To write a multi-pack bitmap, this patch attempts to reuse as much of
the existing machinery from pack-objects as possible. Specifically, the
MIDX code prepares a packing_data struct that pretends as if a single
packfile has been generated containing all of the objects contained
within the MIDX.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-multi-pack-index.txt |  12 +-
 builtin/multi-pack-index.c             |   2 +
 midx.c                                 | 208 ++++++++++++++++++++++++-
 midx.h                                 |   1 +
 4 files changed, 214 insertions(+), 9 deletions(-)

diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt
index c9b063d31e1df8..ed52459a9d5bd0 100644
--- a/Documentation/git-multi-pack-index.txt
+++ b/Documentation/git-multi-pack-index.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress]
-	[--preferred-pack=<pack>] <subcommand>
+	[--preferred-pack=<pack>] [--[no-]bitmap] <subcommand>
 
 DESCRIPTION
 -----------
@@ -40,6 +40,9 @@ write::
 		multiple packs contain the same object. `<pack>` must
 		contain at least one object. If not given, ties are
 		broken in favor of the pack with the lowest mtime.
+
+	--[no-]bitmap::
+		Control whether or not a multi-pack bitmap is written.
 --
 
 verify::
@@ -81,6 +84,13 @@ EXAMPLES
 $ git multi-pack-index write
 -----------------------------------------------
 
+* Write a MIDX file for the packfiles in the current .git folder with a
+corresponding bitmap.
++
+-------------------------------------------------------------
+$ git multi-pack-index write --preferred-pack=<pack> --bitmap
+-------------------------------------------------------------
+
 * Write a MIDX file for the packfiles in an alternate object store.
 +
 -----------------------------------------------
diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
index 5d3ea445fdb3a8..bf6fa982e3f29c 100644
--- a/builtin/multi-pack-index.c
+++ b/builtin/multi-pack-index.c
@@ -68,6 +68,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv)
 		OPT_STRING(0, "preferred-pack", &opts.preferred_pack,
 			   N_("preferred-pack"),
 			   N_("pack for reuse when computing a multi-pack bitmap")),
+		OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"),
+			MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX),
 		OPT_END(),
 	};
 
diff --git a/midx.c b/midx.c
index 9a971105336a7f..9028af2a9d8ade 100644
--- a/midx.c
+++ b/midx.c
@@ -13,6 +13,10 @@
 #include "repository.h"
 #include "chunk-format.h"
 #include "pack.h"
+#include "pack-bitmap.h"
+#include "refs.h"
+#include "revision.h"
+#include "list-objects.h"
 
 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
 #define MIDX_VERSION 1
@@ -888,6 +892,166 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
 static void clear_midx_files_ext(struct repository *r, const char *ext,
 				 unsigned char *keep_hash);
 
+static void prepare_midx_packing_data(struct packing_data *pdata,
+				      struct write_midx_context *ctx)
+{
+	uint32_t i;
+
+	memset(pdata, 0, sizeof(struct packing_data));
+	prepare_packing_data(the_repository, pdata);
+
+	for (i = 0; i < ctx->entries_nr; i++) {
+		struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]];
+		struct object_entry *to = packlist_alloc(pdata, &from->oid);
+
+		oe_set_in_pack(pdata, to,
+			       ctx->info[ctx->pack_perm[from->pack_int_id]].p);
+	}
+}
+
+static int add_ref_to_pending(const char *refname,
+			      const struct object_id *oid,
+			      int flag, void *cb_data)
+{
+	struct rev_info *revs = (struct rev_info*)cb_data;
+	struct object *object;
+
+	if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
+		warning("symbolic ref is dangling: %s", refname);
+		return 0;
+	}
+
+	object = parse_object_or_die(oid, refname);
+	if (object->type != OBJ_COMMIT)
+		return 0;
+
+	add_pending_object(revs, object, "");
+	if (bitmap_is_preferred_refname(revs->repo, refname))
+		object->flags |= NEEDS_BITMAP;
+	return 0;
+}
+
+struct bitmap_commit_cb {
+	struct commit **commits;
+	size_t commits_nr, commits_alloc;
+
+	struct write_midx_context *ctx;
+};
+
+static const struct object_id *bitmap_oid_access(size_t index,
+						 const void *_entries)
+{
+	const struct pack_midx_entry *entries = _entries;
+	return &entries[index].oid;
+}
+
+static void bitmap_show_commit(struct commit *commit, void *_data)
+{
+	struct bitmap_commit_cb *data = _data;
+	int pos = oid_pos(&commit->object.oid, data->ctx->entries,
+			  data->ctx->entries_nr,
+			  bitmap_oid_access);
+	if (pos < 0)
+		return;
+
+	ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc);
+	data->commits[data->commits_nr++] = commit;
+}
+
+static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p,
+						    struct write_midx_context *ctx)
+{
+	struct rev_info revs;
+	struct bitmap_commit_cb cb = {0};
+
+	cb.ctx = ctx;
+
+	repo_init_revisions(the_repository, &revs, NULL);
+	setup_revisions(0, NULL, &revs, NULL);
+	for_each_ref(add_ref_to_pending, &revs);
+
+	/*
+	 * Skipping promisor objects here is intentional, since it only excludes
+	 * them from the list of reachable commits that we want to select from
+	 * when computing the selection of MIDX'd commits to receive bitmaps.
+	 *
+	 * Reachability bitmaps do require that their objects be closed under
+	 * reachability, but fetching any objects missing from promisors at this
+	 * point is too late. But, if one of those objects can be reached from
+	 * an another object that is included in the bitmap, then we will
+	 * complain later that we don't have reachability closure (and fail
+	 * appropriately).
+	 */
+	fetch_if_missing = 0;
+	revs.exclude_promisor_objects = 1;
+
+	if (prepare_revision_walk(&revs))
+		die(_("revision walk setup failed"));
+
+	traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb);
+	if (indexed_commits_nr_p)
+		*indexed_commits_nr_p = cb.commits_nr;
+
+	return cb.commits;
+}
+
+static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash,
+			     struct write_midx_context *ctx,
+			     unsigned flags)
+{
+	struct packing_data pdata;
+	struct pack_idx_entry **index;
+	struct commit **commits = NULL;
+	uint32_t i, commits_nr;
+	char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash));
+	int ret;
+
+	prepare_midx_packing_data(&pdata, ctx);
+
+	commits = find_commits_for_midx_bitmap(&commits_nr, ctx);
+
+	/*
+	 * Build the MIDX-order index based on pdata.objects (which is already
+	 * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of
+	 * this order).
+	 */
+	ALLOC_ARRAY(index, pdata.nr_objects);
+	for (i = 0; i < pdata.nr_objects; i++)
+		index[i] = &pdata.objects[i].idx;
+
+	bitmap_writer_show_progress(flags & MIDX_PROGRESS);
+	bitmap_writer_build_type_index(&pdata, index, pdata.nr_objects);
+
+	/*
+	 * bitmap_writer_finish expects objects in lex order, but pack_order
+	 * gives us exactly that. use it directly instead of re-sorting the
+	 * array.
+	 *
+	 * This changes the order of objects in 'index' between
+	 * bitmap_writer_build_type_index and bitmap_writer_finish.
+	 *
+	 * The same re-ordering takes place in the single-pack bitmap code via
+	 * write_idx_file(), which is called by finish_tmp_packfile(), which
+	 * happens between bitmap_writer_build_type_index() and
+	 * bitmap_writer_finish().
+	 */
+	for (i = 0; i < pdata.nr_objects; i++)
+		index[ctx->pack_order[i]] = &pdata.objects[i].idx;
+
+	bitmap_writer_select_commits(commits, commits_nr, -1);
+	ret = bitmap_writer_build(&pdata);
+	if (ret < 0)
+		goto cleanup;
+
+	bitmap_writer_set_checksum(midx_hash);
+	bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, 0);
+
+cleanup:
+	free(index);
+	free(bitmap_name);
+	return ret;
+}
+
 static int write_midx_internal(const char *object_dir,
 			       struct string_list *packs_to_drop,
 			       const char *preferred_pack_name,
@@ -930,7 +1094,7 @@ static int write_midx_internal(const char *object_dir,
 
 			ctx.info[ctx.nr].orig_pack_int_id = i;
 			ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]);
-			ctx.info[ctx.nr].p = NULL;
+			ctx.info[ctx.nr].p = ctx.m->packs[i];
 			ctx.info[ctx.nr].expired = 0;
 
 			if (flags & MIDX_WRITE_REV_INDEX) {
@@ -964,8 +1128,26 @@ static int write_midx_internal(const char *object_dir,
 	for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
 	stop_progress(&ctx.progress);
 
-	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
-		goto cleanup;
+	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) {
+		struct bitmap_index *bitmap_git;
+		int bitmap_exists;
+		int want_bitmap = flags & MIDX_WRITE_BITMAP;
+
+		bitmap_git = prepare_midx_bitmap_git(the_repository, ctx.m);
+		bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
+		free_bitmap_index(bitmap_git);
+
+		if (bitmap_exists || !want_bitmap) {
+			/*
+			 * The correct MIDX already exists, and so does a
+			 * corresponding bitmap (or one wasn't requested).
+			 */
+			if (!want_bitmap)
+				clear_midx_files_ext(the_repository, ".bitmap",
+						     NULL);
+			goto cleanup;
+		}
+	}
 
 	if (preferred_pack_name) {
 		int found = 0;
@@ -981,7 +1163,8 @@ static int write_midx_internal(const char *object_dir,
 		if (!found)
 			warning(_("unknown preferred pack: '%s'"),
 				preferred_pack_name);
-	} else if (ctx.nr && (flags & MIDX_WRITE_REV_INDEX)) {
+	} else if (ctx.nr &&
+		   (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
 		struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p;
 		ctx.preferred_pack_idx = 0;
 
@@ -1113,9 +1296,6 @@ static int write_midx_internal(const char *object_dir,
 	hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
 	f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
 
-	if (ctx.m)
-		close_midx(ctx.m);
-
 	if (ctx.nr - dropped_packs == 0) {
 		error(_("no pack files to index."));
 		result = 1;
@@ -1146,14 +1326,24 @@ static int write_midx_internal(const char *object_dir,
 	finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
 	free_chunkfile(cf);
 
-	if (flags & MIDX_WRITE_REV_INDEX)
+	if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
 		ctx.pack_order = midx_pack_order(&ctx);
 
 	if (flags & MIDX_WRITE_REV_INDEX)
 		write_midx_reverse_index(midx_name, midx_hash, &ctx);
+	if (flags & MIDX_WRITE_BITMAP) {
+		if (write_midx_bitmap(midx_name, midx_hash, &ctx, flags) < 0) {
+			error(_("could not write multi-pack bitmap"));
+			result = 1;
+			goto cleanup;
+		}
+	}
+
+	close_midx(ctx.m);
 
 	commit_lock_file(&lk);
 
+	clear_midx_files_ext(the_repository, ".bitmap", midx_hash);
 	clear_midx_files_ext(the_repository, ".rev", midx_hash);
 
 cleanup:
@@ -1170,6 +1360,7 @@ static int write_midx_internal(const char *object_dir,
 	free(ctx.pack_perm);
 	free(ctx.pack_order);
 	free(midx_name);
+
 	return result;
 }
 
@@ -1230,6 +1421,7 @@ void clear_midx_file(struct repository *r)
 	if (remove_path(midx))
 		die(_("failed to clear multi-pack-index at %s"), midx);
 
+	clear_midx_files_ext(r, ".bitmap", NULL);
 	clear_midx_files_ext(r, ".rev", NULL);
 
 	free(midx);
diff --git a/midx.h b/midx.h
index 1172df1a711489..350f4d0a7b4c20 100644
--- a/midx.h
+++ b/midx.h
@@ -41,6 +41,7 @@ struct multi_pack_index {
 
 #define MIDX_PROGRESS     (1 << 0)
 #define MIDX_WRITE_REV_INDEX (1 << 1)
+#define MIDX_WRITE_BITMAP (1 << 2)
 
 const unsigned char *get_midx_checksum(struct multi_pack_index *m);
 char *get_midx_filename(const char *object_dir);

From f359671f63ee27715ed049c681df6efb7d7280c7 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:04 -0400
Subject: [PATCH 089/198] t5310: move some tests to lib-bitmap.sh

We'll soon be adding a test script that will cover many of the same
bitmap concepts as t5310, but for MIDX bitmaps. Let's pull out as many
of the applicable tests as we can so we don't have to rewrite them.

There should be no functional change to t5310; we still run the same
operations in the same order.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-bitmap.sh         | 236 ++++++++++++++++++++++++++++++++++++++++
 t/t5310-pack-bitmaps.sh | 229 +-------------------------------------
 2 files changed, 241 insertions(+), 224 deletions(-)

diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh
index fe3f98be24f302..ecb5d0e05d44ae 100644
--- a/t/lib-bitmap.sh
+++ b/t/lib-bitmap.sh
@@ -1,3 +1,6 @@
+# Helpers for scripts testing bitamp functionality; see t5310 for
+# example usage.
+
 # Compare a file containing rev-list bitmap traversal output to its non-bitmap
 # counterpart. You can't just use test_cmp for this, because the two produce
 # subtly different output:
@@ -24,3 +27,236 @@ test_bitmap_traversal () {
 	test_cmp "$1.normalized" "$2.normalized" &&
 	rm -f "$1.normalized" "$2.normalized"
 }
+
+# To ensure the logic for "maximal commits" is exercised, make
+# the repository a bit more complicated.
+#
+#    other                         second
+#      *                             *
+# (99 commits)                  (99 commits)
+#      *                             *
+#      |\                           /|
+#      | * octo-other  octo-second * |
+#      |/|\_________  ____________/|\|
+#      | \          \/  __________/  |
+#      |  | ________/\ /             |
+#      *  |/          * merge-right  *
+#      | _|__________/ \____________ |
+#      |/ |                         \|
+# (l1) *  * merge-left               * (r1)
+#      | / \________________________ |
+#      |/                           \|
+# (l2) *                             * (r2)
+#       \___________________________ |
+#                                   \|
+#                                    * (base)
+#
+# We only push bits down the first-parent history, which
+# makes some of these commits unimportant!
+#
+# The important part for the maximal commit algorithm is how
+# the bitmasks are extended. Assuming starting bit positions
+# for second (bit 0) and other (bit 1), the bitmasks at the
+# end should be:
+#
+#      second: 1       (maximal, selected)
+#       other: 01      (maximal, selected)
+#      (base): 11 (maximal)
+#
+# This complicated history was important for a previous
+# version of the walk that guarantees never walking a
+# commit multiple times. That goal might be important
+# again, so preserve this complicated case. For now, this
+# test will guarantee that the bitmaps are computed
+# correctly, even with the repeat calculations.
+setup_bitmap_history() {
+	test_expect_success 'setup repo with moderate-sized history' '
+		test_commit_bulk --id=file 10 &&
+		git branch -M second &&
+		git checkout -b other HEAD~5 &&
+		test_commit_bulk --id=side 10 &&
+
+		# add complicated history setup, including merges and
+		# ambiguous merge-bases
+
+		git checkout -b merge-left other~2 &&
+		git merge second~2 -m "merge-left" &&
+
+		git checkout -b merge-right second~1 &&
+		git merge other~1 -m "merge-right" &&
+
+		git checkout -b octo-second second &&
+		git merge merge-left merge-right -m "octopus-second" &&
+
+		git checkout -b octo-other other &&
+		git merge merge-left merge-right -m "octopus-other" &&
+
+		git checkout other &&
+		git merge octo-other -m "pull octopus" &&
+
+		git checkout second &&
+		git merge octo-second -m "pull octopus" &&
+
+		# Remove these branches so they are not selected
+		# as bitmap tips
+		git branch -D merge-left &&
+		git branch -D merge-right &&
+		git branch -D octo-other &&
+		git branch -D octo-second &&
+
+		# add padding to make these merges less interesting
+		# and avoid having them selected for bitmaps
+		test_commit_bulk --id=file 100 &&
+		git checkout other &&
+		test_commit_bulk --id=side 100 &&
+		git checkout second &&
+
+		bitmaptip=$(git rev-parse second) &&
+		blob=$(echo tagged-blob | git hash-object -w --stdin) &&
+		git tag tagged-blob $blob
+	'
+}
+
+rev_list_tests_head () {
+	test_expect_success "counting commits via bitmap ($state, $branch)" '
+		git rev-list --count $branch >expect &&
+		git rev-list --use-bitmap-index --count $branch >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "counting partial commits via bitmap ($state, $branch)" '
+		git rev-list --count $branch~5..$branch >expect &&
+		git rev-list --use-bitmap-index --count $branch~5..$branch >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "counting commits with limit ($state, $branch)" '
+		git rev-list --count -n 1 $branch >expect &&
+		git rev-list --use-bitmap-index --count -n 1 $branch >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "counting non-linear history ($state, $branch)" '
+		git rev-list --count other...second >expect &&
+		git rev-list --use-bitmap-index --count other...second >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "counting commits with limiting ($state, $branch)" '
+		git rev-list --count $branch -- 1.t >expect &&
+		git rev-list --use-bitmap-index --count $branch -- 1.t >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "counting objects via bitmap ($state, $branch)" '
+		git rev-list --count --objects $branch >expect &&
+		git rev-list --use-bitmap-index --count --objects $branch >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success "enumerate commits ($state, $branch)" '
+		git rev-list --use-bitmap-index $branch >actual &&
+		git rev-list $branch >expect &&
+		test_bitmap_traversal --no-confirm-bitmaps expect actual
+	'
+
+	test_expect_success "enumerate --objects ($state, $branch)" '
+		git rev-list --objects --use-bitmap-index $branch >actual &&
+		git rev-list --objects $branch >expect &&
+		test_bitmap_traversal expect actual
+	'
+
+	test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" '
+		git rev-list --objects --use-bitmap-index $branch tagged-blob >actual &&
+		grep $blob actual
+	'
+}
+
+rev_list_tests () {
+	state=$1
+
+	for branch in "second" "other"
+	do
+		rev_list_tests_head
+	done
+}
+
+basic_bitmap_tests () {
+	tip="$1"
+	test_expect_success 'rev-list --test-bitmap verifies bitmaps' "
+		git rev-list --test-bitmap "${tip:-HEAD}"
+	"
+
+	rev_list_tests 'full bitmap'
+
+	test_expect_success 'clone from bitmapped repository' '
+		rm -fr clone.git &&
+		git clone --no-local --bare . clone.git &&
+		git rev-parse HEAD >expect &&
+		git --git-dir=clone.git rev-parse HEAD >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success 'partial clone from bitmapped repository' '
+		test_config uploadpack.allowfilter true &&
+		rm -fr partial-clone.git &&
+		git clone --no-local --bare --filter=blob:none . partial-clone.git &&
+		(
+			cd partial-clone.git &&
+			pack=$(echo objects/pack/*.pack) &&
+			git verify-pack -v "$pack" >have &&
+			awk "/blob/ { print \$1 }" <have >blobs &&
+			# we expect this single blob because of the direct ref
+			git rev-parse refs/tags/tagged-blob >expect &&
+			test_cmp expect blobs
+		)
+	'
+
+	test_expect_success 'setup further non-bitmapped commits' '
+		test_commit_bulk --id=further 10
+	'
+
+	rev_list_tests 'partial bitmap'
+
+	test_expect_success 'fetch (partial bitmap)' '
+		git --git-dir=clone.git fetch origin second:second &&
+		git rev-parse HEAD >expect &&
+		git --git-dir=clone.git rev-parse HEAD >actual &&
+		test_cmp expect actual
+	'
+
+	test_expect_success 'enumerating progress counts pack-reused objects' '
+		count=$(git rev-list --objects --all --count) &&
+		git repack -adb &&
+
+		# check first with only reused objects; confirm that our
+		# progress showed the right number, and also that we did
+		# pack-reuse as expected.  Check only the final "done"
+		# line of the meter (there may be an arbitrary number of
+		# intermediate lines ending with CR).
+		GIT_PROGRESS_DELAY=0 \
+			git pack-objects --all --stdout --progress \
+			</dev/null >/dev/null 2>stderr &&
+		grep "Enumerating objects: $count, done" stderr &&
+		grep "pack-reused $count" stderr &&
+
+		# now the same but with one non-reused object
+		git commit --allow-empty -m "an extra commit object" &&
+		GIT_PROGRESS_DELAY=0 \
+			git pack-objects --all --stdout --progress \
+			</dev/null >/dev/null 2>stderr &&
+		grep "Enumerating objects: $((count+1)), done" stderr &&
+		grep "pack-reused $count" stderr
+	'
+}
+
+# have_delta <obj> <expected_base>
+#
+# Note that because this relies on cat-file, it might find _any_ copy of an
+# object in the repository. The caller is responsible for making sure
+# there's only one (e.g., via "repack -ad", or having just fetched a copy).
+have_delta () {
+	echo $2 >expect &&
+	echo $1 | git cat-file --batch-check="%(deltabase)" >actual &&
+	test_cmp expect actual
+}
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index b02838750e49a3..4318f84d534516 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -25,93 +25,10 @@ has_any () {
 	grep -Ff "$1" "$2"
 }
 
-# To ensure the logic for "maximal commits" is exercised, make
-# the repository a bit more complicated.
-#
-#    other                         second
-#      *                             *
-# (99 commits)                  (99 commits)
-#      *                             *
-#      |\                           /|
-#      | * octo-other  octo-second * |
-#      |/|\_________  ____________/|\|
-#      | \          \/  __________/  |
-#      |  | ________/\ /             |
-#      *  |/          * merge-right  *
-#      | _|__________/ \____________ |
-#      |/ |                         \|
-# (l1) *  * merge-left               * (r1)
-#      | / \________________________ |
-#      |/                           \|
-# (l2) *                             * (r2)
-#       \___________________________ |
-#                                   \|
-#                                    * (base)
-#
-# We only push bits down the first-parent history, which
-# makes some of these commits unimportant!
-#
-# The important part for the maximal commit algorithm is how
-# the bitmasks are extended. Assuming starting bit positions
-# for second (bit 0) and other (bit 1), the bitmasks at the
-# end should be:
-#
-#      second: 1       (maximal, selected)
-#       other: 01      (maximal, selected)
-#      (base): 11 (maximal)
-#
-# This complicated history was important for a previous
-# version of the walk that guarantees never walking a
-# commit multiple times. That goal might be important
-# again, so preserve this complicated case. For now, this
-# test will guarantee that the bitmaps are computed
-# correctly, even with the repeat calculations.
-
-test_expect_success 'setup repo with moderate-sized history' '
-	test_commit_bulk --id=file 10 &&
-	git branch -M second &&
-	git checkout -b other HEAD~5 &&
-	test_commit_bulk --id=side 10 &&
-
-	# add complicated history setup, including merges and
-	# ambiguous merge-bases
-
-	git checkout -b merge-left other~2 &&
-	git merge second~2 -m "merge-left" &&
-
-	git checkout -b merge-right second~1 &&
-	git merge other~1 -m "merge-right" &&
-
-	git checkout -b octo-second second &&
-	git merge merge-left merge-right -m "octopus-second" &&
-
-	git checkout -b octo-other other &&
-	git merge merge-left merge-right -m "octopus-other" &&
-
-	git checkout other &&
-	git merge octo-other -m "pull octopus" &&
-
-	git checkout second &&
-	git merge octo-second -m "pull octopus" &&
-
-	# Remove these branches so they are not selected
-	# as bitmap tips
-	git branch -D merge-left &&
-	git branch -D merge-right &&
-	git branch -D octo-other &&
-	git branch -D octo-second &&
-
-	# add padding to make these merges less interesting
-	# and avoid having them selected for bitmaps
-	test_commit_bulk --id=file 100 &&
-	git checkout other &&
-	test_commit_bulk --id=side 100 &&
-	git checkout second &&
-
-	bitmaptip=$(git rev-parse second) &&
-	blob=$(echo tagged-blob | git hash-object -w --stdin) &&
-	git tag tagged-blob $blob &&
-	git config repack.writebitmaps true
+setup_bitmap_history
+
+test_expect_success 'setup writing bitmaps during repack' '
+	git config repack.writeBitmaps true
 '
 
 test_expect_success 'full repack creates bitmaps' '
@@ -123,109 +40,7 @@ test_expect_success 'full repack creates bitmaps' '
 	grep "\"key\":\"num_maximal_commits\",\"value\":\"107\"" trace
 '
 
-test_expect_success 'rev-list --test-bitmap verifies bitmaps' '
-	git rev-list --test-bitmap HEAD
-'
-
-rev_list_tests_head () {
-	test_expect_success "counting commits via bitmap ($state, $branch)" '
-		git rev-list --count $branch >expect &&
-		git rev-list --use-bitmap-index --count $branch >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "counting partial commits via bitmap ($state, $branch)" '
-		git rev-list --count $branch~5..$branch >expect &&
-		git rev-list --use-bitmap-index --count $branch~5..$branch >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "counting commits with limit ($state, $branch)" '
-		git rev-list --count -n 1 $branch >expect &&
-		git rev-list --use-bitmap-index --count -n 1 $branch >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "counting non-linear history ($state, $branch)" '
-		git rev-list --count other...second >expect &&
-		git rev-list --use-bitmap-index --count other...second >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "counting commits with limiting ($state, $branch)" '
-		git rev-list --count $branch -- 1.t >expect &&
-		git rev-list --use-bitmap-index --count $branch -- 1.t >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "counting objects via bitmap ($state, $branch)" '
-		git rev-list --count --objects $branch >expect &&
-		git rev-list --use-bitmap-index --count --objects $branch >actual &&
-		test_cmp expect actual
-	'
-
-	test_expect_success "enumerate commits ($state, $branch)" '
-		git rev-list --use-bitmap-index $branch >actual &&
-		git rev-list $branch >expect &&
-		test_bitmap_traversal --no-confirm-bitmaps expect actual
-	'
-
-	test_expect_success "enumerate --objects ($state, $branch)" '
-		git rev-list --objects --use-bitmap-index $branch >actual &&
-		git rev-list --objects $branch >expect &&
-		test_bitmap_traversal expect actual
-	'
-
-	test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" '
-		git rev-list --objects --use-bitmap-index $branch tagged-blob >actual &&
-		grep $blob actual
-	'
-}
-
-rev_list_tests () {
-	state=$1
-
-	for branch in "second" "other"
-	do
-		rev_list_tests_head
-	done
-}
-
-rev_list_tests 'full bitmap'
-
-test_expect_success 'clone from bitmapped repository' '
-	git clone --no-local --bare . clone.git &&
-	git rev-parse HEAD >expect &&
-	git --git-dir=clone.git rev-parse HEAD >actual &&
-	test_cmp expect actual
-'
-
-test_expect_success 'partial clone from bitmapped repository' '
-	test_config uploadpack.allowfilter true &&
-	git clone --no-local --bare --filter=blob:none . partial-clone.git &&
-	(
-		cd partial-clone.git &&
-		pack=$(echo objects/pack/*.pack) &&
-		git verify-pack -v "$pack" >have &&
-		awk "/blob/ { print \$1 }" <have >blobs &&
-		# we expect this single blob because of the direct ref
-		git rev-parse refs/tags/tagged-blob >expect &&
-		test_cmp expect blobs
-	)
-'
-
-test_expect_success 'setup further non-bitmapped commits' '
-	test_commit_bulk --id=further 10
-'
-
-rev_list_tests 'partial bitmap'
-
-test_expect_success 'fetch (partial bitmap)' '
-	git --git-dir=clone.git fetch origin second:second &&
-	git rev-parse HEAD >expect &&
-	git --git-dir=clone.git rev-parse HEAD >actual &&
-	test_cmp expect actual
-'
+basic_bitmap_tests
 
 test_expect_success 'incremental repack fails when bitmaps are requested' '
 	test_commit more-1 &&
@@ -461,40 +276,6 @@ test_expect_success 'truncated bitmap fails gracefully (cache)' '
 	test_i18ngrep corrupted.bitmap.index stderr
 '
 
-test_expect_success 'enumerating progress counts pack-reused objects' '
-	count=$(git rev-list --objects --all --count) &&
-	git repack -adb &&
-
-	# check first with only reused objects; confirm that our progress
-	# showed the right number, and also that we did pack-reuse as expected.
-	# Check only the final "done" line of the meter (there may be an
-	# arbitrary number of intermediate lines ending with CR).
-	GIT_PROGRESS_DELAY=0 \
-		git pack-objects --all --stdout --progress \
-		</dev/null >/dev/null 2>stderr &&
-	grep "Enumerating objects: $count, done" stderr &&
-	grep "pack-reused $count" stderr &&
-
-	# now the same but with one non-reused object
-	git commit --allow-empty -m "an extra commit object" &&
-	GIT_PROGRESS_DELAY=0 \
-		git pack-objects --all --stdout --progress \
-		</dev/null >/dev/null 2>stderr &&
-	grep "Enumerating objects: $((count+1)), done" stderr &&
-	grep "pack-reused $count" stderr
-'
-
-# have_delta <obj> <expected_base>
-#
-# Note that because this relies on cat-file, it might find _any_ copy of an
-# object in the repository. The caller is responsible for making sure
-# there's only one (e.g., via "repack -ad", or having just fetched a copy).
-have_delta () {
-	echo $2 >expect &&
-	echo $1 | git cat-file --batch-check="%(deltabase)" >actual &&
-	test_cmp expect actual
-}
-
 # Create a state of history with these properties:
 #
 #  - refs that allow a client to fetch some new history, while sharing some old

From 9efd417210f87441ec2ad4ffaffcc2c8b21dcb2f Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:07 -0400
Subject: [PATCH 090/198] t/helper/test-read-midx.c: add --checksum mode

Subsequent tests will want to check for the existence of a multi-pack
bitmap which matches the multi-pack-index stored in the pack directory.

The multi-pack bitmap includes the hex checksum of the MIDX it
corresponds to in its filename (for example,
'$packdir/multi-pack-index-<checksum>.bitmap'). As a result, some tests
want a way to learn what '<checksum>' is.

This helper addresses that need by printing the checksum of the
repository's multi-pack-index.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-read-midx.c | 16 +++++++++++++++-
 t/lib-bitmap.sh           |  4 ++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index 7c2eb11a8e70ff..cb0d27049a07e1 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -60,12 +60,26 @@ static int read_midx_file(const char *object_dir, int show_objects)
 	return 0;
 }
 
+static int read_midx_checksum(const char *object_dir)
+{
+	struct multi_pack_index *m;
+
+	setup_git_directory();
+	m = load_multi_pack_index(object_dir, 1);
+	if (!m)
+		return 1;
+	printf("%s\n", hash_to_hex(get_midx_checksum(m)));
+	return 0;
+}
+
 int cmd__read_midx(int argc, const char **argv)
 {
 	if (!(argc == 2 || argc == 3))
-		usage("read-midx [--show-objects] <object-dir>");
+		usage("read-midx [--show-objects|--checksum] <object-dir>");
 
 	if (!strcmp(argv[1], "--show-objects"))
 		return read_midx_file(argv[2], 1);
+	else if (!strcmp(argv[1], "--checksum"))
+		return read_midx_checksum(argv[2]);
 	return read_midx_file(argv[1], 0);
 }
diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh
index ecb5d0e05d44ae..09cd036f4d50de 100644
--- a/t/lib-bitmap.sh
+++ b/t/lib-bitmap.sh
@@ -260,3 +260,7 @@ have_delta () {
 	echo $1 | git cat-file --batch-check="%(deltabase)" >actual &&
 	test_cmp expect actual
 }
+
+midx_checksum () {
+	test-tool read-midx --checksum "${1:-.git/objects}"
+}

From 884c7c77949fbbaba338cb7cce12032543ed6dd5 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:10 -0400
Subject: [PATCH 091/198] t5326: test multi-pack bitmap behavior

This patch introduces a new test, t5326, which tests the basic
functionality of multi-pack bitmaps.

Some trivial behavior is tested, such as:

  - Whether bitmaps can be generated with more than one pack.
  - Whether clones can be served with all objects in the bitmap.
  - Whether follow-up fetches can be served with some objects outside of
    the server's bitmap

These use lib-bitmap's tests (which in turn were pulled from t5310), and
we cover cases where the MIDX represents both a single pack and multiple
packs.

In addition, some non-trivial and MIDX-specific behavior is tested, too,
including:

  - Whether multi-pack bitmaps behave correctly with respect to the
    pack-reuse machinery when the base for some object is selected from
    a different pack than the delta.
  - Whether multi-pack bitmaps correctly respect the
    pack.preferBitmapTips configuration.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5326-multi-pack-bitmaps.sh | 277 ++++++++++++++++++++++++++++++++++
 1 file changed, 277 insertions(+)
 create mode 100755 t/t5326-multi-pack-bitmaps.sh

diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh
new file mode 100755
index 00000000000000..c1b7d633e2338c
--- /dev/null
+++ b/t/t5326-multi-pack-bitmaps.sh
@@ -0,0 +1,277 @@
+#!/bin/sh
+
+test_description='exercise basic multi-pack bitmap functionality'
+. ./test-lib.sh
+. "${TEST_DIRECTORY}/lib-bitmap.sh"
+
+# We'll be writing our own midx and bitmaps, so avoid getting confused by the
+# automatic ones.
+GIT_TEST_MULTI_PACK_INDEX=0
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+
+objdir=.git/objects
+midx=$objdir/pack/multi-pack-index
+
+# midx_pack_source <obj>
+midx_pack_source () {
+	test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2
+}
+
+setup_bitmap_history
+
+test_expect_success 'enable core.multiPackIndex' '
+	git config core.multiPackIndex true
+'
+
+test_expect_success 'create single-pack midx with bitmaps' '
+	git repack -ad &&
+	git multi-pack-index write --bitmap &&
+	test_path_is_file $midx &&
+	test_path_is_file $midx-$(midx_checksum $objdir).bitmap
+'
+
+basic_bitmap_tests
+
+test_expect_success 'create new additional packs' '
+	for i in $(test_seq 1 16)
+	do
+		test_commit "$i" &&
+		git repack -d
+	done &&
+
+	git checkout -b other2 HEAD~8 &&
+	for i in $(test_seq 1 8)
+	do
+		test_commit "side-$i" &&
+		git repack -d
+	done &&
+	git checkout second
+'
+
+test_expect_success 'create multi-pack midx with bitmaps' '
+	git multi-pack-index write --bitmap &&
+
+	ls $objdir/pack/pack-*.pack >packs &&
+	test_line_count = 25 packs &&
+
+	test_path_is_file $midx &&
+	test_path_is_file $midx-$(midx_checksum $objdir).bitmap
+'
+
+basic_bitmap_tests
+
+test_expect_success '--no-bitmap is respected when bitmaps exist' '
+	git multi-pack-index write --bitmap &&
+
+	test_commit respect--no-bitmap &&
+	GIT_TEST_MULTI_PACK_INDEX=0 git repack -d &&
+
+	test_path_is_file $midx &&
+	test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
+
+	git multi-pack-index write --no-bitmap &&
+
+	test_path_is_file $midx &&
+	test_path_is_missing $midx-$(midx_checksum $objdir).bitmap
+'
+
+test_expect_success 'setup midx with base from later pack' '
+	# Write a and b so that "a" is a delta on top of base "b", since Git
+	# prefers to delete contents out of a base rather than add to a shorter
+	# object.
+	test_seq 1 128 >a &&
+	test_seq 1 130 >b &&
+
+	git add a b &&
+	git commit -m "initial commit" &&
+
+	a=$(git rev-parse HEAD:a) &&
+	b=$(git rev-parse HEAD:b) &&
+
+	# In the first pack, "a" is stored as a delta to "b".
+	p1=$(git pack-objects .git/objects/pack/pack <<-EOF
+	$a
+	$b
+	EOF
+	) &&
+
+	# In the second pack, "a" is missing, and "b" is not a delta nor base to
+	# any other object.
+	p2=$(git pack-objects .git/objects/pack/pack <<-EOF
+	$b
+	$(git rev-parse HEAD)
+	$(git rev-parse HEAD^{tree})
+	EOF
+	) &&
+
+	git prune-packed &&
+	# Use the second pack as the preferred source, so that "b" occurs
+	# earlier in the MIDX object order, rendering "a" unusable for pack
+	# reuse.
+	git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx &&
+
+	have_delta $a $b &&
+	test $(midx_pack_source $a) != $(midx_pack_source $b)
+'
+
+rev_list_tests 'full bitmap with backwards delta'
+
+test_expect_success 'clone with bitmaps enabled' '
+	git clone --no-local --bare . clone-reverse-delta.git &&
+	test_when_finished "rm -fr clone-reverse-delta.git" &&
+
+	git rev-parse HEAD >expect &&
+	git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual &&
+	test_cmp expect actual
+'
+
+bitmap_reuse_tests() {
+	from=$1
+	to=$2
+
+	test_expect_success "setup pack reuse tests ($from -> $to)" '
+		rm -fr repo &&
+		git init repo &&
+		(
+			cd repo &&
+			test_commit_bulk 16 &&
+			git tag old-tip &&
+
+			git config core.multiPackIndex true &&
+			if test "MIDX" = "$from"
+			then
+				GIT_TEST_MULTI_PACK_INDEX=0 git repack -Ad &&
+				git multi-pack-index write --bitmap
+			else
+				GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb
+			fi
+		)
+	'
+
+	test_expect_success "build bitmap from existing ($from -> $to)" '
+		(
+			cd repo &&
+			test_commit_bulk --id=further 16 &&
+			git tag new-tip &&
+
+			if test "MIDX" = "$to"
+			then
+				GIT_TEST_MULTI_PACK_INDEX=0 git repack -d &&
+				git multi-pack-index write --bitmap
+			else
+				GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb
+			fi
+		)
+	'
+
+	test_expect_success "verify resulting bitmaps ($from -> $to)" '
+		(
+			cd repo &&
+			git for-each-ref &&
+			git rev-list --test-bitmap refs/tags/old-tip &&
+			git rev-list --test-bitmap refs/tags/new-tip
+		)
+	'
+}
+
+bitmap_reuse_tests 'pack' 'MIDX'
+bitmap_reuse_tests 'MIDX' 'pack'
+bitmap_reuse_tests 'MIDX' 'MIDX'
+
+test_expect_success 'missing object closure fails gracefully' '
+	rm -fr repo &&
+	git init repo &&
+	test_when_finished "rm -fr repo" &&
+	(
+		cd repo &&
+
+		test_commit loose &&
+		test_commit packed &&
+
+		# Do not pass "--revs"; we want a pack without the "loose"
+		# commit.
+		git pack-objects $objdir/pack/pack <<-EOF &&
+		$(git rev-parse packed)
+		EOF
+
+		test_must_fail git multi-pack-index write --bitmap 2>err &&
+		grep "doesn.t have full closure" err &&
+		test_path_is_missing $midx
+	)
+'
+
+test_expect_success 'setup partial bitmaps' '
+	test_commit packed &&
+	git repack &&
+	test_commit loose &&
+	git multi-pack-index write --bitmap 2>err &&
+	test_path_is_file $midx &&
+	test_path_is_file $midx-$(midx_checksum $objdir).bitmap
+'
+
+basic_bitmap_tests HEAD~
+
+test_expect_success 'removing a MIDX clears stale bitmaps' '
+	rm -fr repo &&
+	git init repo &&
+	test_when_finished "rm -fr repo" &&
+	(
+		cd repo &&
+		test_commit base &&
+		git repack &&
+		git multi-pack-index write --bitmap &&
+
+		# Write a MIDX and bitmap; remove the MIDX but leave the bitmap.
+		stale_bitmap=$midx-$(midx_checksum $objdir).bitmap &&
+		rm $midx &&
+
+		# Then write a new MIDX.
+		test_commit new &&
+		git repack &&
+		git multi-pack-index write --bitmap &&
+
+		test_path_is_file $midx &&
+		test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
+		test_path_is_missing $stale_bitmap
+	)
+'
+
+test_expect_success 'pack.preferBitmapTips' '
+	git init repo &&
+	test_when_finished "rm -fr repo" &&
+	(
+		cd repo &&
+
+		test_commit_bulk --message="%s" 103 &&
+
+		git log --format="%H" >commits.raw &&
+		sort <commits.raw >commits &&
+
+		git log --format="create refs/tags/%s %H" HEAD >refs &&
+		git update-ref --stdin <refs &&
+
+		git multi-pack-index write --bitmap &&
+		test_path_is_file $midx &&
+		test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
+
+		test-tool bitmap list-commits | sort >bitmaps &&
+		comm -13 bitmaps commits >before &&
+		test_line_count = 1 before &&
+
+		perl -ne "printf(\"create refs/tags/include/%d \", $.); print" \
+			<before | git update-ref --stdin &&
+
+		rm -fr $midx-$(midx_checksum $objdir).bitmap &&
+		rm -fr $midx-$(midx_checksum $objdir).rev &&
+		rm -fr $midx &&
+
+		git -c pack.preferBitmapTips=refs/tags/include \
+			multi-pack-index write --bitmap &&
+		test-tool bitmap list-commits | sort >bitmaps &&
+		comm -13 bitmaps commits >after &&
+
+		! test_cmp before after
+	)
+'
+
+test_done

From 96b5fb98267775c7d0fa209e8a61dddfb2c857ba Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 27 Jul 2021 17:20:12 -0400
Subject: [PATCH 092/198] t0410: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP

Generating a MIDX bitmap causes tests which repack in a partial clone to
fail because they are missing objects. Missing objects is an expected
component of tests in t0410, so disable this knob altogether. Graceful
degradation when writing a bitmap with missing objects is tested in
t5326.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t0410-partial-clone.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 1667450917f3c9..4fd8e83da1f5d2 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -4,6 +4,9 @@ test_description='partial clone'
 
 . ./test-lib.sh
 
+# missing promisor objects cause repacks which write bitmaps to fail
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+
 delete_object () {
 	rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|')
 }

From 491850d49ff9f2fbe0eb6b2b439cda5bc4c7960e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 27 Jul 2021 17:20:15 -0400
Subject: [PATCH 093/198] t5310: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP

Generating a MIDX bitmap confuses many of the tests in t5310, which
expect to control whether and how bitmaps are written. Since the
relevant MIDX-bitmap tests here are covered already in t5326, let's just
disable the flag for the whole t5310 script.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5310-pack-bitmaps.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index 4318f84d534516..673baa5c3ccc8a 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -8,6 +8,10 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . "$TEST_DIRECTORY"/lib-bundle.sh
 . "$TEST_DIRECTORY"/lib-bitmap.sh
 
+# t5310 deals only with single-pack bitmaps, so don't write MIDX bitmaps in
+# their place.
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+
 objpath () {
 	echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')"
 }

From b301a8737e1179e46f0cf8e1a73c528a2f91f549 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:18 -0400
Subject: [PATCH 094/198] t5319: don't write MIDX bitmaps in t5319

This test is specifically about generating a midx still respecting a
pack-based bitmap file. Generating a MIDX bitmap would confuse the test.
Let's override the 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' variable to
make sure we don't do so.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5319-multi-pack-index.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 909f9bf7b8ee79..a30095c9d761e2 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -491,7 +491,8 @@ test_expect_success 'repack preserves multi-pack-index when creating packs' '
 compare_results_with_midx "after repack"
 
 test_expect_success 'multi-pack-index and pack-bitmap' '
-	git -c repack.writeBitmaps=true repack -ad &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -c repack.writeBitmaps=true repack -ad &&
 	git multi-pack-index write &&
 	git rev-list --test-bitmap HEAD
 '

From 2b872a8859f1f3988cf84ef730bc98e364d9f180 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:20 -0400
Subject: [PATCH 095/198] t7700: update to work with MIDX bitmap test knob

A number of these tests are focused only on pack-based bitmaps and need
to be updated to disable 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' where
necessary.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7700-repack.sh | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh
index 25b235c0630385..98eda3bfeb56c4 100755
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@@ -63,13 +63,14 @@ test_expect_success 'objects in packs marked .keep are not repacked' '
 
 test_expect_success 'writing bitmaps via command-line can duplicate .keep objects' '
 	# build on $oid, $packid, and .keep state from previous
-	git repack -Adbl &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 git repack -Adbl &&
 	test_has_duplicate_object true
 '
 
 test_expect_success 'writing bitmaps via config can duplicate .keep objects' '
 	# build on $oid, $packid, and .keep state from previous
-	git -c repack.writebitmaps=true repack -Adl &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -c repack.writebitmaps=true repack -Adl &&
 	test_has_duplicate_object true
 '
 
@@ -189,7 +190,9 @@ test_expect_success 'repack --keep-pack' '
 
 test_expect_success 'bitmaps are created by default in bare repos' '
 	git clone --bare .git bare.git &&
-	git -C bare.git repack -ad &&
+	rm -f bare.git/objects/pack/*.bitmap &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -C bare.git repack -ad &&
 	bitmap=$(ls bare.git/objects/pack/*.bitmap) &&
 	test_path_is_file "$bitmap"
 '
@@ -200,7 +203,8 @@ test_expect_success 'incremental repack does not complain' '
 '
 
 test_expect_success 'bitmaps can be disabled on bare repos' '
-	git -c repack.writeBitmaps=false -C bare.git repack -ad &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -c repack.writeBitmaps=false -C bare.git repack -ad &&
 	bitmap=$(ls bare.git/objects/pack/*.bitmap || :) &&
 	test -z "$bitmap"
 '
@@ -211,7 +215,8 @@ test_expect_success 'no bitmaps created if .keep files present' '
 	keep=${pack%.pack}.keep &&
 	test_when_finished "rm -f \"\$keep\"" &&
 	>"$keep" &&
-	git -C bare.git repack -ad 2>stderr &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -C bare.git repack -ad 2>stderr &&
 	test_must_be_empty stderr &&
 	find bare.git/objects/pack/ -type f -name "*.bitmap" >actual &&
 	test_must_be_empty actual
@@ -222,7 +227,8 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
 	blob=$(test-tool genrandom big $((1024*1024)) |
 	       git -C bare.git hash-object -w --stdin) &&
 	git -C bare.git update-ref refs/tags/big $blob &&
-	git -C bare.git repack -ad 2>stderr &&
+	GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
+		git -C bare.git repack -ad 2>stderr &&
 	test_must_be_empty stderr &&
 	find bare.git/objects/pack -type f -name "*.bitmap" >actual &&
 	test_must_be_empty actual

From 1786e92e75fe2f161cd48fa11616961d519cb17c Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:23 -0400
Subject: [PATCH 096/198] midx: respect
 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP'

Introduce a new 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' environment
variable to also write a multi-pack bitmap when
'GIT_TEST_MULTI_PACK_INDEX' is set.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/repack.c          | 12 ++++++++++--
 ci/run-build-and-tests.sh |  1 +
 midx.h                    |  2 ++
 t/README                  |  4 ++++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/builtin/repack.c b/builtin/repack.c
index 5f9bc74adc05af..82ab6682726f7a 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -515,6 +515,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		if (!(pack_everything & ALL_INTO_ONE) ||
 		    !is_bare_repository())
 			write_bitmaps = 0;
+	} else if (write_bitmaps &&
+		   git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0) &&
+		   git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) {
+		write_bitmaps = 0;
 	}
 	if (pack_kept_objects < 0)
 		pack_kept_objects = write_bitmaps > 0;
@@ -725,8 +729,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		update_server_info(0);
 	remove_temporary_files();
 
-	if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
-		write_midx_file(get_object_directory(), NULL, 0);
+	if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) {
+		unsigned flags = 0;
+		if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0))
+			flags |= MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX;
+		write_midx_file(get_object_directory(), NULL, flags);
+	}
 
 	string_list_clear(&names, 0);
 	string_list_clear(&rollback, 0);
diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh
index 3ce81ffee941b2..7ee9ba9325fb3a 100755
--- a/ci/run-build-and-tests.sh
+++ b/ci/run-build-and-tests.sh
@@ -23,6 +23,7 @@ linux-gcc)
 	export GIT_TEST_COMMIT_GRAPH=1
 	export GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=1
 	export GIT_TEST_MULTI_PACK_INDEX=1
+	export GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=1
 	export GIT_TEST_ADD_I_USE_BUILTIN=1
 	export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
 	export GIT_TEST_WRITE_REV_INDEX=1
diff --git a/midx.h b/midx.h
index 350f4d0a7b4c20..aa3da557bb0ae9 100644
--- a/midx.h
+++ b/midx.h
@@ -8,6 +8,8 @@ struct pack_entry;
 struct repository;
 
 #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX"
+#define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \
+	"GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP"
 
 struct multi_pack_index {
 	struct multi_pack_index *next;
diff --git a/t/README b/t/README
index 1a2072b2c8a282..1311b8e17ad65a 100644
--- a/t/README
+++ b/t/README
@@ -425,6 +425,10 @@ GIT_TEST_MULTI_PACK_INDEX=<boolean>, when true, forces the multi-pack-
 index to be written after every 'git repack' command, and overrides the
 'core.multiPackIndex' setting to true.
 
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=<boolean>, when true, sets the
+'--bitmap' option on all invocations of 'git multi-pack-index write',
+and ignores pack-objects' '--write-bitmap-index'.
+
 GIT_TEST_SIDEBAND_ALL=<boolean>, when true, overrides the
 'uploadpack.allowSidebandAll' setting to true, and when false, forces
 fetch-pack to not request sideband-all (even if the server advertises

From 2195bea25eb6e6ef181a1274209ddf1ffe92fca0 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:26 -0400
Subject: [PATCH 097/198] p5310: extract full and partial bitmap tests

A new p5326 introduced by the next patch will want these same tests,
interjecting its own setup in between. Move them out so that both perf
tests can reuse them.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/lib-bitmap.sh         | 69 ++++++++++++++++++++++++++++++++++++
 t/perf/p5310-pack-bitmaps.sh | 65 ++-------------------------------
 2 files changed, 72 insertions(+), 62 deletions(-)
 create mode 100644 t/perf/lib-bitmap.sh

diff --git a/t/perf/lib-bitmap.sh b/t/perf/lib-bitmap.sh
new file mode 100644
index 00000000000000..63d3bc7cece599
--- /dev/null
+++ b/t/perf/lib-bitmap.sh
@@ -0,0 +1,69 @@
+# Helper functions for testing bitmap performance; see p5310.
+
+test_full_bitmap () {
+	test_perf 'simulated clone' '
+		git pack-objects --stdout --all </dev/null >/dev/null
+	'
+
+	test_perf 'simulated fetch' '
+		have=$(git rev-list HEAD~100 -1) &&
+		{
+			echo HEAD &&
+			echo ^$have
+		} | git pack-objects --revs --stdout >/dev/null
+	'
+
+	test_perf 'pack to file (bitmap)' '
+		git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null
+	'
+
+	test_perf 'rev-list (commits)' '
+		git rev-list --all --use-bitmap-index >/dev/null
+	'
+
+	test_perf 'rev-list (objects)' '
+		git rev-list --all --use-bitmap-index --objects >/dev/null
+	'
+
+	test_perf 'rev-list with tag negated via --not --all (objects)' '
+		git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null
+	'
+
+	test_perf 'rev-list with negative tag (objects)' '
+		git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null
+	'
+
+	test_perf 'rev-list count with blob:none' '
+		git rev-list --use-bitmap-index --count --objects --all \
+			--filter=blob:none >/dev/null
+	'
+
+	test_perf 'rev-list count with blob:limit=1k' '
+		git rev-list --use-bitmap-index --count --objects --all \
+			--filter=blob:limit=1k >/dev/null
+	'
+
+	test_perf 'rev-list count with tree:0' '
+		git rev-list --use-bitmap-index --count --objects --all \
+			--filter=tree:0 >/dev/null
+	'
+
+	test_perf 'simulated partial clone' '
+		git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null
+	'
+}
+
+test_partial_bitmap () {
+	test_perf 'clone (partial bitmap)' '
+		git pack-objects --stdout --all </dev/null >/dev/null
+	'
+
+	test_perf 'pack to file (partial bitmap)' '
+		git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null
+	'
+
+	test_perf 'rev-list with tree filter (partial bitmap)' '
+		git rev-list --use-bitmap-index --count --objects --all \
+			--filter=tree:0 >/dev/null
+	'
+}
diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh
index 452be01056c6b4..7ad4f237bc37ff 100755
--- a/t/perf/p5310-pack-bitmaps.sh
+++ b/t/perf/p5310-pack-bitmaps.sh
@@ -2,6 +2,7 @@
 
 test_description='Tests pack performance using bitmaps'
 . ./perf-lib.sh
+. "${TEST_DIRECTORY}/perf/lib-bitmap.sh"
 
 test_perf_large_repo
 
@@ -25,56 +26,7 @@ test_perf 'repack to disk' '
 	git repack -ad
 '
 
-test_perf 'simulated clone' '
-	git pack-objects --stdout --all </dev/null >/dev/null
-'
-
-test_perf 'simulated fetch' '
-	have=$(git rev-list HEAD~100 -1) &&
-	{
-		echo HEAD &&
-		echo ^$have
-	} | git pack-objects --revs --stdout >/dev/null
-'
-
-test_perf 'pack to file (bitmap)' '
-	git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null
-'
-
-test_perf 'rev-list (commits)' '
-	git rev-list --all --use-bitmap-index >/dev/null
-'
-
-test_perf 'rev-list (objects)' '
-	git rev-list --all --use-bitmap-index --objects >/dev/null
-'
-
-test_perf 'rev-list with tag negated via --not --all (objects)' '
-	git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null
-'
-
-test_perf 'rev-list with negative tag (objects)' '
-	git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null
-'
-
-test_perf 'rev-list count with blob:none' '
-	git rev-list --use-bitmap-index --count --objects --all \
-		--filter=blob:none >/dev/null
-'
-
-test_perf 'rev-list count with blob:limit=1k' '
-	git rev-list --use-bitmap-index --count --objects --all \
-		--filter=blob:limit=1k >/dev/null
-'
-
-test_perf 'rev-list count with tree:0' '
-	git rev-list --use-bitmap-index --count --objects --all \
-		--filter=tree:0 >/dev/null
-'
-
-test_perf 'simulated partial clone' '
-	git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null
-'
+test_full_bitmap
 
 test_expect_success 'create partial bitmap state' '
 	# pick a commit to represent the repo tip in the past
@@ -97,17 +49,6 @@ test_expect_success 'create partial bitmap state' '
 	git update-ref HEAD $orig_tip
 '
 
-test_perf 'clone (partial bitmap)' '
-	git pack-objects --stdout --all </dev/null >/dev/null
-'
-
-test_perf 'pack to file (partial bitmap)' '
-	git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null
-'
-
-test_perf 'rev-list with tree filter (partial bitmap)' '
-	git rev-list --use-bitmap-index --count --objects --all \
-		--filter=tree:0 >/dev/null
-'
+test_partial_bitmap
 
 test_done

From 2f168e6f496cfeb40dbffbce1c62af1bcd2bfdbd Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Tue, 27 Jul 2021 17:20:28 -0400
Subject: [PATCH 098/198] p5326: perf tests for MIDX bitmaps

These new performance tests demonstrate effectively the same behavior as
p5310, but use a multi-pack bitmap instead of a single-pack one.

Notably, p5326 does not create a MIDX bitmap with multiple packs. This
is so we can measure a direct comparison between it and p5310. Any
difference between the two is measuring just the overhead of using MIDX
bitmaps.

Here are the results of p5310 and p5326 together, measured at the same
time and on the same machine (using a Xenon W-2255 CPU):

    Test                                                  HEAD
    ------------------------------------------------------------------------
    5310.2: repack to disk                                96.78(93.39+11.33)
    5310.3: simulated clone                               9.98(9.79+0.19)
    5310.4: simulated fetch                               1.75(4.26+0.19)
    5310.5: pack to file (bitmap)                         28.20(27.87+8.70)
    5310.6: rev-list (commits)                            0.41(0.36+0.05)
    5310.7: rev-list (objects)                            1.61(1.54+0.07)
    5310.8: rev-list count with blob:none                 0.25(0.21+0.04)
    5310.9: rev-list count with blob:limit=1k             2.65(2.54+0.10)
    5310.10: rev-list count with tree:0                   0.23(0.19+0.04)
    5310.11: simulated partial clone                      4.34(4.21+0.12)
    5310.13: clone (partial bitmap)                       11.05(12.21+0.48)
    5310.14: pack to file (partial bitmap)                31.25(34.22+3.70)
    5310.15: rev-list with tree filter (partial bitmap)   0.26(0.22+0.04)

versus the same tests (this time using a multi-pack index):

    Test                                                  HEAD
    ------------------------------------------------------------------------
    5326.2: setup multi-pack index                        78.99(75.29+11.58)
    5326.3: simulated clone                               11.78(11.56+0.22)
    5326.4: simulated fetch                               1.70(4.49+0.13)
    5326.5: pack to file (bitmap)                         28.02(27.72+8.76)
    5326.6: rev-list (commits)                            0.42(0.36+0.06)
    5326.7: rev-list (objects)                            1.65(1.58+0.06)
    5326.8: rev-list count with blob:none                 0.26(0.21+0.05)
    5326.9: rev-list count with blob:limit=1k             2.97(2.86+0.10)
    5326.10: rev-list count with tree:0                   0.25(0.20+0.04)
    5326.11: simulated partial clone                      5.65(5.49+0.16)
    5326.13: clone (partial bitmap)                       12.22(13.43+0.38)
    5326.14: pack to file (partial bitmap)                30.05(31.57+7.25)
    5326.15: rev-list with tree filter (partial bitmap)   0.24(0.20+0.04)

There is slight overhead in "simulated clone", "simulated partial
clone", and "clone (partial bitmap)". Unsurprisingly, that overhead is
due to using the MIDX's reverse index to map between bit positions and
MIDX positions.

This can be reproduced by running "git repack -adb" along with "git
multi-pack-index write --bitmap" in a large-ish repository. Then run:

    $ perf record -o pack.perf git -c core.multiPackIndex=false \
      pack-objects --all --stdout >/dev/null </dev/null
    $ perf record -o midx.perf git -c core.multiPackIndex=true \
      pack-objects --all --stdout >/dev/null </dev/null

and compare the two with "perf diff -c delta -o 1 pack.perf midx.perf".
The most notable results are below (the next largest positive delta is
+0.14%):

    # Event 'cycles'
    #
    # Baseline    Delta  Shared Object       Symbol
    # ........  .......  ..................  ..........................
    #
                 +5.86%  git                 [.] nth_midxed_offset
                 +5.24%  git                 [.] nth_midxed_pack_int_id
         3.45%   +0.97%  git                 [.] offset_to_pack_pos
         3.30%   +0.57%  git                 [.] pack_pos_to_offset
                 +0.30%  git                 [.] pack_pos_to_midx

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/p5326-multi-pack-bitmaps.sh | 43 ++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100755 t/perf/p5326-multi-pack-bitmaps.sh

diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh
new file mode 100755
index 00000000000000..5845109ac787ac
--- /dev/null
+++ b/t/perf/p5326-multi-pack-bitmaps.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+test_description='Tests performance using midx bitmaps'
+. ./perf-lib.sh
+. "${TEST_DIRECTORY}/perf/lib-bitmap.sh"
+
+test_perf_large_repo
+
+test_expect_success 'enable multi-pack index' '
+	git config core.multiPackIndex true
+'
+
+test_perf 'setup multi-pack index' '
+	git repack -ad &&
+	git multi-pack-index write --bitmap
+'
+
+test_full_bitmap
+
+test_expect_success 'create partial bitmap state' '
+	# pick a commit to represent the repo tip in the past
+	cutoff=$(git rev-list HEAD~100 -1) &&
+	orig_tip=$(git rev-parse HEAD) &&
+
+	# now pretend we have just one tip
+	rm -rf .git/logs .git/refs/* .git/packed-refs &&
+	git update-ref HEAD $cutoff &&
+
+	# and then repack, which will leave us with a nice
+	# big bitmap pack of the "old" history, and all of
+	# the new history will be loose, as if it had been pushed
+	# up incrementally and exploded via unpack-objects
+	git repack -Ad &&
+	git multi-pack-index write --bitmap &&
+
+	# and now restore our original tip, as if the pushes
+	# had happened
+	git update-ref HEAD $orig_tip
+'
+
+test_partial_bitmap
+
+test_done

From 8dc56c3c62fc8537764a1284baae485d288995c7 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Wed, 28 Jul 2021 10:55:07 -0700
Subject: [PATCH 099/198] ll-merge: teach ll_binary_merge() a trivial three-way
 merge

The low-level binary merge code assumed that the caller will not
feed trivial merges that would have been resolved at the tree level;
because of this, ll_binary_merge() assumes the ancestor is different
from either side, always failing the merge in conflict unless -Xours
or -Xtheirs is in effect.

But "git apply --3way" codepath could ask us to perform three-way
merge between two binaries A and B using A as the ancestor version.
The current code always fails such an application, but when given a
binary patch that turns A into B and asked to apply it to A, there
is no reason to fail such a request---we can trivially tell that the
result must be B.

Arguably, this fix may belong to one level higher at ll_merge()
function, which dispatches to lower-level merge drivers, possibly
even before it renormalizes the three input buffers.  But let's
first see how this goes.

Signed-off-by: Jerry Zhang <jerry@skydio.com>
[jc: stolen new tests from Jerry's patch]
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 ll-merge.c                | 56 +++++++++++++++++++++++++++------------
 t/t4108-apply-threeway.sh | 45 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/ll-merge.c b/ll-merge.c
index 261657578c756c..301e244971697e 100644
--- a/ll-merge.c
+++ b/ll-merge.c
@@ -46,6 +46,13 @@ void reset_merge_attributes(void)
 	merge_attributes = NULL;
 }
 
+static int same_mmfile(mmfile_t *a, mmfile_t *b)
+{
+	if (a->size != b->size)
+		return 0;
+	return !memcmp(a->ptr, b->ptr, a->size);
+}
+
 /*
  * Built-in low-levels
  */
@@ -58,9 +65,18 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused,
 			   const struct ll_merge_options *opts,
 			   int marker_size)
 {
+	int status;
 	mmfile_t *stolen;
 	assert(opts);
 
+	/*
+	 * With -Xtheirs or -Xours, we have cleanly merged;
+	 * otherwise we got a conflict, unless 3way trivially
+	 * resolves.
+	 */
+	status = (opts->variant == XDL_MERGE_FAVOR_OURS ||
+		  opts->variant == XDL_MERGE_FAVOR_THEIRS) ? 0 : 1;
+
 	/*
 	 * The tentative merge result is the common ancestor for an
 	 * internal merge.  For the final merge, it is "ours" by
@@ -68,18 +84,30 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused,
 	 */
 	if (opts->virtual_ancestor) {
 		stolen = orig;
+		status = 0;
 	} else {
-		switch (opts->variant) {
-		default:
-			warning("Cannot merge binary files: %s (%s vs. %s)",
-				path, name1, name2);
-			/* fallthru */
-		case XDL_MERGE_FAVOR_OURS:
-			stolen = src1;
-			break;
-		case XDL_MERGE_FAVOR_THEIRS:
+		if (same_mmfile(orig, src1)) {
 			stolen = src2;
-			break;
+			status = 0;
+		} else if (same_mmfile(orig, src2)) {
+			stolen = src1;
+			status = 0;
+		} else if (same_mmfile(src1, src2)) {
+			stolen = src1;
+			status = 0;
+		} else {
+			switch (opts->variant) {
+			default:
+				warning("Cannot merge binary files: %s (%s vs. %s)",
+					path, name1, name2);
+				/* fallthru */
+			case XDL_MERGE_FAVOR_OURS:
+				stolen = src1;
+				break;
+			case XDL_MERGE_FAVOR_THEIRS:
+				stolen = src2;
+				break;
+			}
 		}
 	}
 
@@ -87,13 +115,7 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused,
 	result->size = stolen->size;
 	stolen->ptr = NULL;
 
-	/*
-	 * With -Xtheirs or -Xours, we have cleanly merged;
-	 * otherwise we got a conflict.
-	 */
-	return opts->variant == XDL_MERGE_FAVOR_OURS ||
-	       opts->variant == XDL_MERGE_FAVOR_THEIRS ?
-	       0 : 1;
+	return status;
 }
 
 static int ll_xdl_merge(const struct ll_merge_driver *drv_unused,
diff --git a/t/t4108-apply-threeway.sh b/t/t4108-apply-threeway.sh
index 65147efdea9a00..cc3aa3314a3448 100755
--- a/t/t4108-apply-threeway.sh
+++ b/t/t4108-apply-threeway.sh
@@ -230,4 +230,49 @@ test_expect_success 'apply with --3way --cached and conflicts' '
 	test_cmp expect.diff actual.diff
 '
 
+test_expect_success 'apply binary file patch' '
+	git reset --hard main &&
+	cp "$TEST_DIRECTORY/test-binary-1.png" bin.png &&
+	git add bin.png &&
+	git commit -m "add binary file" &&
+
+	cp "$TEST_DIRECTORY/test-binary-2.png" bin.png &&
+
+	git diff --binary >bin.diff &&
+	git reset --hard &&
+
+	# Apply must succeed.
+	git apply bin.diff
+'
+
+test_expect_success 'apply binary file patch with 3way' '
+	git reset --hard main &&
+	cp "$TEST_DIRECTORY/test-binary-1.png" bin.png &&
+	git add bin.png &&
+	git commit -m "add binary file" &&
+
+	cp "$TEST_DIRECTORY/test-binary-2.png" bin.png &&
+
+	git diff --binary >bin.diff &&
+	git reset --hard &&
+
+	# Apply must succeed.
+	git apply --3way --index bin.diff
+'
+
+test_expect_success 'apply full-index patch with 3way' '
+	git reset --hard main &&
+	cp "$TEST_DIRECTORY/test-binary-1.png" bin.png &&
+	git add bin.png &&
+	git commit -m "add binary file" &&
+
+	cp "$TEST_DIRECTORY/test-binary-2.png" bin.png &&
+
+	git diff --full-index >bin.diff &&
+	git reset --hard &&
+
+	# Apply must succeed.
+	git apply --3way --index bin.diff
+'
+
 test_done

From 1578215dab1650b1793b154935d1e8a9bdcec24e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:24:07 +0200
Subject: [PATCH 100/198] refs/files: remove unused REF_DELETING in
 lock_ref_oid_basic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lock_ref_oid_basic() function has gradually been replaced by
most callers no longer performing a low-level "acquire lock,
update and release", and instead using the ref transaction API.
So there are only 4 remaining callers of lock_ref_oid_basic().

None of those callers pass REF_DELETING anymore, the last caller went
away in 92b1551b1d (refs: resolve symbolic refs first,
2016-04-25).

Before that we'd refactored and moved this code in:

 - 8df4e511387 (struct ref_update: move "have_old" into "flags",
   2015-02-17)

 - 7bd9bcf372d (refs: split filesystem-based refs code into a new
   file, 2015-11-09)

 - 165056b2fc (lock_ref_for_update(): new function, 2016-04-24)

We then finally stopped using it in 92b1551b1d (noted above). So let's
remove the handling of this parameter.

By itself this change doesn't benefit us much, but it's the start of
even more removal of unused code in and around this function in
subsequent commits.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 677b7e4cdd2d05..326f0224218beb 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -934,8 +934,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 
 	if (mustexist)
 		resolve_flags |= RESOLVE_REF_READING;
-	if (flags & REF_DELETING)
-		resolve_flags |= RESOLVE_REF_ALLOW_BAD_NAME;
 
 	files_ref_path(refs, &ref_file, refname);
 	resolved = !!refs_resolve_ref_unsafe(&refs->base,

From 9e31fdd015bea033a7210f77b3e892e997588484 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:24:08 +0200
Subject: [PATCH 101/198] refs/files: remove unused "extras/skip" in
 lock_ref_oid_basic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lock_ref_oid_basic() function has gradually been replaced by use
of the file transaction API, there are only 4 remaining callers of
it.

None of those callers pass non-NULL "extras" and "skip" parameters,
the last such caller went away in 92b1551b1d4 (refs: resolve symbolic
refs first, 2016-04-25), so let's remove the parameters.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 326f0224218beb..a59823d667ecff 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -915,8 +915,6 @@ static int create_reflock(const char *path, void *cb)
 static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 					   const char *refname,
 					   const struct object_id *old_oid,
-					   const struct string_list *extras,
-					   const struct string_list *skip,
 					   unsigned int flags, int *type,
 					   struct strbuf *err)
 {
@@ -950,7 +948,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 			last_errno = errno;
 			if (!refs_verify_refname_available(
 					    &refs->base,
-					    refname, extras, skip, err))
+					    refname, NULL, NULL, err))
 				strbuf_addf(err, "there are still refs under '%s'",
 					    refname);
 			goto error_return;
@@ -963,7 +961,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 		last_errno = errno;
 		if (last_errno != ENOTDIR ||
 		    !refs_verify_refname_available(&refs->base, refname,
-						   extras, skip, err))
+						   NULL, NULL, err))
 			strbuf_addf(err, "unable to resolve reference '%s': %s",
 				    refname, strerror(last_errno));
 
@@ -978,7 +976,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	 */
 	if (is_null_oid(&lock->old_oid) &&
 	    refs_verify_refname_available(refs->packed_ref_store, refname,
-					  extras, skip, err)) {
+					  NULL, NULL, err)) {
 		last_errno = ENOTDIR;
 		goto error_return;
 	}
@@ -1413,8 +1411,8 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 
 	logmoved = log;
 
-	lock = lock_ref_oid_basic(refs, newrefname, NULL, NULL, NULL,
-				  REF_NO_DEREF, NULL, &err);
+	lock = lock_ref_oid_basic(refs, newrefname, NULL, REF_NO_DEREF, NULL,
+				  &err);
 	if (!lock) {
 		if (copy)
 			error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf);
@@ -1436,7 +1434,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 	goto out;
 
  rollback:
-	lock = lock_ref_oid_basic(refs, oldrefname, NULL, NULL, NULL,
+	lock = lock_ref_oid_basic(refs, oldrefname, NULL,
 				  REF_NO_DEREF, NULL, &err);
 	if (!lock) {
 		error("unable to lock %s for rollback: %s", oldrefname, err.buf);
@@ -1845,7 +1843,7 @@ static int files_create_symref(struct ref_store *ref_store,
 	int ret;
 
 	lock = lock_ref_oid_basic(refs, refname, NULL,
-				  NULL, NULL, REF_NO_DEREF, NULL,
+				  REF_NO_DEREF, NULL,
 				  &err);
 	if (!lock) {
 		error("%s", err.buf);
@@ -3064,7 +3062,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 	 * reference if --updateref was specified:
 	 */
 	lock = lock_ref_oid_basic(refs, refname, oid,
-				  NULL, NULL, REF_NO_DEREF,
+				  REF_NO_DEREF,
 				  &type, &err);
 	if (!lock) {
 		error("cannot lock ref '%s': %s", refname, err.buf);

From d7373ed033d019db864d58ed675e4eb2fd649ab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:24:09 +0200
Subject: [PATCH 102/198] refs/files: remove unused "skip" in lock_raw_ref()
 too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the unused "skip" parameter to lock_raw_ref(), it was never
used. We do use it when passing "skip" to the
refs_rename_ref_available() function in files_copy_or_rename_ref(),
but not here.

This is part of a larger series that modifies lock_ref_oid_basic()
extensively, there will be no more modifications of this function in
this series, but since the preceding commit removed this unused
parameter from lock_ref_oid_basic(), let's do it here too for
consistency.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index a59823d667ecff..af332fa8fe4fe3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -531,7 +531,6 @@ static void unlock_ref(struct ref_lock *lock)
 static int lock_raw_ref(struct files_ref_store *refs,
 			const char *refname, int mustexist,
 			const struct string_list *extras,
-			const struct string_list *skip,
 			struct ref_lock **lock_p,
 			struct strbuf *referent,
 			unsigned int *type,
@@ -568,7 +567,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 		 * reason to expect this error to be transitory.
 		 */
 		if (refs_verify_refname_available(&refs->base, refname,
-						  extras, skip, err)) {
+						  extras, NULL, err)) {
 			if (mustexist) {
 				/*
 				 * To the user the relevant error is
@@ -673,7 +672,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 							  REMOVE_DIR_EMPTY_ONLY)) {
 				if (refs_verify_refname_available(
 						    &refs->base, refname,
-						    extras, skip, err)) {
+						    extras, NULL, err)) {
 					/*
 					 * The error message set by
 					 * verify_refname_available() is OK.
@@ -710,7 +709,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 		 */
 		if (refs_verify_refname_available(
 				    refs->packed_ref_store, refname,
-				    extras, skip, err))
+				    extras, NULL, err))
 			goto error_return;
 	}
 
@@ -2412,7 +2411,7 @@ static int lock_ref_for_update(struct files_ref_store *refs,
 	}
 
 	ret = lock_raw_ref(refs, update->refname, mustexist,
-			   affected_refnames, NULL,
+			   affected_refnames,
 			   &lock, &referent,
 			   &update->type, err);
 	if (ret) {

From ada77feb5fdaf2c96e939ebcb88b6a0ed9b1cc6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:24:10 +0200
Subject: [PATCH 103/198] refs/debug: re-indent argument list for "prepare"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Re-indent this argument list that's been mis-indented since it was
added in 34c319970d1 (refs/debug: trace into reflog expiry too,
2021-04-23). This makes a subsequent change smaller.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/refs/debug.c b/refs/debug.c
index 7db4abccc341b0..449ac3e6cc8e68 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -364,8 +364,8 @@ struct debug_reflog_expiry_should_prune {
 };
 
 static void debug_reflog_expiry_prepare(const char *refname,
-				    const struct object_id *oid,
-				    void *cb_data)
+					const struct object_id *oid,
+					void *cb_data)
 {
 	struct debug_reflog_expiry_should_prune *prune = cb_data;
 	trace_printf_key(&trace_refs, "reflog_expire_prepare: %s\n", refname);

From ab3bfeb93eb099a768224a2dc28be3ef254a7f08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:22 +0200
Subject: [PATCH 104/198] refs: make repo_dwim_log() accept a NULL oid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the repo_dwim_log() function initially added as dwim_log() in
eb3a48221fd (log --reflog: use dwim_log, 2007-02-09) to accept a NULL
oid parameter. The refs_resolve_ref_unsafe() function it invokes
already deals with it, but it didn't.

This allows for a bit more clarity in a reflog-walk.c codepath added
in f2eba66d4d1 (Enable HEAD@{...} and make it independent from the
current branch, 2007-02-03). We'll shortly use this in
builtin/reflog.c as well.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reflog-walk.c | 3 +--
 refs.c        | 5 +++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/reflog-walk.c b/reflog-walk.c
index e9cd3283694dec..8ac4b284b6b685 100644
--- a/reflog-walk.c
+++ b/reflog-walk.c
@@ -158,10 +158,9 @@ int add_reflog_for_walk(struct reflog_walk_info *info,
 		}
 		reflogs = read_complete_reflog(branch);
 		if (!reflogs || reflogs->nr == 0) {
-			struct object_id oid;
 			char *b;
 			int ret = dwim_log(branch, strlen(branch),
-					   &oid, &b);
+					   NULL, &b);
 			if (ret > 1)
 				free(b);
 			else if (ret == 1) {
diff --git a/refs.c b/refs.c
index 8b9f7c3a80a0f6..d963543675992a 100644
--- a/refs.c
+++ b/refs.c
@@ -698,7 +698,7 @@ int repo_dwim_log(struct repository *r, const char *str, int len,
 		strbuf_addf(&path, *p, len, str);
 		ref = refs_resolve_ref_unsafe(refs, path.buf,
 					      RESOLVE_REF_READING,
-					      &hash, NULL);
+					      oid ? &hash : NULL, NULL);
 		if (!ref)
 			continue;
 		if (refs_reflog_exists(refs, path.buf))
@@ -710,7 +710,8 @@ int repo_dwim_log(struct repository *r, const char *str, int len,
 			continue;
 		if (!logs_found++) {
 			*log = xstrdup(it);
-			oidcpy(oid, &hash);
+			if (oid)
+				oidcpy(oid, &hash);
 		}
 		if (!warn_ambiguous_refs)
 			break;

From 74271b968f29ebf01fd595e1e8dae97da1fa02ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:23 +0200
Subject: [PATCH 105/198] refs/files: add a comment about refs_reflog_exists()
 call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a comment about why it is that we need to check for the the
existence of a reflog we're deleting after we've successfully acquired
the lock in files_reflog_expire(). As noted in [1] the lock protocol
for reflogs is somewhat intuitive.

This early exit code the comment applies to dates all the way back to
4264dc15e19 (git reflog expire, 2006-12-19).

1. https://lore.kernel.org/git/54DCDA42.2060800@alum.mit.edu/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index af332fa8fe4fe3..54153064166d9b 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3068,6 +3068,19 @@ static int files_reflog_expire(struct ref_store *ref_store,
 		strbuf_release(&err);
 		return -1;
 	}
+
+	/*
+	 * When refs are deleted, their reflog is deleted before the
+	 * ref itself is deleted. This is because there is no separate
+	 * lock for reflog; instead we take a lock on the ref with
+	 * lock_ref_oid_basic().
+	 *
+	 * If a race happens and the reflog doesn't exist after we've
+	 * acquired the lock that's OK. We've got nothing more to do;
+	 * We were asked to delete the reflog, but someone else
+	 * deleted it! The caller doesn't care that we deleted it,
+	 * just that it is deleted. So we can return successfully.
+	 */
 	if (!refs_reflog_exists(ref_store, refname)) {
 		unlock_ref(lock);
 		return 0;

From b3736601d0cc489757eac92545e75f9e75341638 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 20 Jul 2021 12:33:24 +0200
Subject: [PATCH 106/198] refs file backend: move raceproof_create_file() here
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the raceproof_create_file() API added to cache.h and
object-file.c in 177978f56ad (raceproof_create_file(): new function,
2017-01-06) to its only user, refs/files-backend.c.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h              |  43 -----------------
 object-file.c        |  68 ---------------------------
 refs/files-backend.c | 109 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 111 deletions(-)

diff --git a/cache.h b/cache.h
index ba04ff8bd36b36..eb4dfe6381f548 100644
--- a/cache.h
+++ b/cache.h
@@ -1202,49 +1202,6 @@ enum scld_error safe_create_leading_directories(char *path);
 enum scld_error safe_create_leading_directories_const(const char *path);
 enum scld_error safe_create_leading_directories_no_share(char *path);
 
-/*
- * Callback function for raceproof_create_file(). This function is
- * expected to do something that makes dirname(path) permanent despite
- * the fact that other processes might be cleaning up empty
- * directories at the same time. Usually it will create a file named
- * path, but alternatively it could create another file in that
- * directory, or even chdir() into that directory. The function should
- * return 0 if the action was completed successfully. On error, it
- * should return a nonzero result and set errno.
- * raceproof_create_file() treats two errno values specially:
- *
- * - ENOENT -- dirname(path) does not exist. In this case,
- *             raceproof_create_file() tries creating dirname(path)
- *             (and any parent directories, if necessary) and calls
- *             the function again.
- *
- * - EISDIR -- the file already exists and is a directory. In this
- *             case, raceproof_create_file() removes the directory if
- *             it is empty (and recursively any empty directories that
- *             it contains) and calls the function again.
- *
- * Any other errno causes raceproof_create_file() to fail with the
- * callback's return value and errno.
- *
- * Obviously, this function should be OK with being called again if it
- * fails with ENOENT or EISDIR. In other scenarios it will not be
- * called again.
- */
-typedef int create_file_fn(const char *path, void *cb);
-
-/*
- * Create a file in dirname(path) by calling fn, creating leading
- * directories if necessary. Retry a few times in case we are racing
- * with another process that is trying to clean up the directory that
- * contains path. See the documentation for create_file_fn for more
- * details.
- *
- * Return the value and set the errno that resulted from the most
- * recent call of fn. fn is always called at least once, and will be
- * called more than once if it returns ENOENT or EISDIR.
- */
-int raceproof_create_file(const char *path, create_file_fn fn, void *cb);
-
 int mkdir_in_gitdir(const char *path);
 char *expand_user_path(const char *path, int real_home);
 const char *enter_repo(const char *path, int strict);
diff --git a/object-file.c b/object-file.c
index ecca5a8da00f14..231a02997baced 100644
--- a/object-file.c
+++ b/object-file.c
@@ -414,74 +414,6 @@ enum scld_error safe_create_leading_directories_const(const char *path)
 	return result;
 }
 
-int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
-{
-	/*
-	 * The number of times we will try to remove empty directories
-	 * in the way of path. This is only 1 because if another
-	 * process is racily creating directories that conflict with
-	 * us, we don't want to fight against them.
-	 */
-	int remove_directories_remaining = 1;
-
-	/*
-	 * The number of times that we will try to create the
-	 * directories containing path. We are willing to attempt this
-	 * more than once, because another process could be trying to
-	 * clean up empty directories at the same time as we are
-	 * trying to create them.
-	 */
-	int create_directories_remaining = 3;
-
-	/* A scratch copy of path, filled lazily if we need it: */
-	struct strbuf path_copy = STRBUF_INIT;
-
-	int ret, save_errno;
-
-	/* Sanity check: */
-	assert(*path);
-
-retry_fn:
-	ret = fn(path, cb);
-	save_errno = errno;
-	if (!ret)
-		goto out;
-
-	if (errno == EISDIR && remove_directories_remaining-- > 0) {
-		/*
-		 * A directory is in the way. Maybe it is empty; try
-		 * to remove it:
-		 */
-		if (!path_copy.len)
-			strbuf_addstr(&path_copy, path);
-
-		if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
-			goto retry_fn;
-	} else if (errno == ENOENT && create_directories_remaining-- > 0) {
-		/*
-		 * Maybe the containing directory didn't exist, or
-		 * maybe it was just deleted by a process that is
-		 * racing with us to clean up empty directories. Try
-		 * to create it:
-		 */
-		enum scld_error scld_result;
-
-		if (!path_copy.len)
-			strbuf_addstr(&path_copy, path);
-
-		do {
-			scld_result = safe_create_leading_directories(path_copy.buf);
-			if (scld_result == SCLD_OK)
-				goto retry_fn;
-		} while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
-	}
-
-out:
-	strbuf_release(&path_copy);
-	errno = save_errno;
-	return ret;
-}
-
 static void fill_loose_path(struct strbuf *buf, const struct object_id *oid)
 {
 	int i;
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 5d12003471e586..913868b85d33fe 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -852,6 +852,115 @@ static struct ref_iterator *files_ref_iterator_begin(
 	return ref_iterator;
 }
 
+/*
+ * Callback function for raceproof_create_file(). This function is
+ * expected to do something that makes dirname(path) permanent despite
+ * the fact that other processes might be cleaning up empty
+ * directories at the same time. Usually it will create a file named
+ * path, but alternatively it could create another file in that
+ * directory, or even chdir() into that directory. The function should
+ * return 0 if the action was completed successfully. On error, it
+ * should return a nonzero result and set errno.
+ * raceproof_create_file() treats two errno values specially:
+ *
+ * - ENOENT -- dirname(path) does not exist. In this case,
+ *             raceproof_create_file() tries creating dirname(path)
+ *             (and any parent directories, if necessary) and calls
+ *             the function again.
+ *
+ * - EISDIR -- the file already exists and is a directory. In this
+ *             case, raceproof_create_file() removes the directory if
+ *             it is empty (and recursively any empty directories that
+ *             it contains) and calls the function again.
+ *
+ * Any other errno causes raceproof_create_file() to fail with the
+ * callback's return value and errno.
+ *
+ * Obviously, this function should be OK with being called again if it
+ * fails with ENOENT or EISDIR. In other scenarios it will not be
+ * called again.
+ */
+typedef int create_file_fn(const char *path, void *cb);
+
+/*
+ * Create a file in dirname(path) by calling fn, creating leading
+ * directories if necessary. Retry a few times in case we are racing
+ * with another process that is trying to clean up the directory that
+ * contains path. See the documentation for create_file_fn for more
+ * details.
+ *
+ * Return the value and set the errno that resulted from the most
+ * recent call of fn. fn is always called at least once, and will be
+ * called more than once if it returns ENOENT or EISDIR.
+ */
+static int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
+{
+	/*
+	 * The number of times we will try to remove empty directories
+	 * in the way of path. This is only 1 because if another
+	 * process is racily creating directories that conflict with
+	 * us, we don't want to fight against them.
+	 */
+	int remove_directories_remaining = 1;
+
+	/*
+	 * The number of times that we will try to create the
+	 * directories containing path. We are willing to attempt this
+	 * more than once, because another process could be trying to
+	 * clean up empty directories at the same time as we are
+	 * trying to create them.
+	 */
+	int create_directories_remaining = 3;
+
+	/* A scratch copy of path, filled lazily if we need it: */
+	struct strbuf path_copy = STRBUF_INIT;
+
+	int ret, save_errno;
+
+	/* Sanity check: */
+	assert(*path);
+
+retry_fn:
+	ret = fn(path, cb);
+	save_errno = errno;
+	if (!ret)
+		goto out;
+
+	if (errno == EISDIR && remove_directories_remaining-- > 0) {
+		/*
+		 * A directory is in the way. Maybe it is empty; try
+		 * to remove it:
+		 */
+		if (!path_copy.len)
+			strbuf_addstr(&path_copy, path);
+
+		if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
+			goto retry_fn;
+	} else if (errno == ENOENT && create_directories_remaining-- > 0) {
+		/*
+		 * Maybe the containing directory didn't exist, or
+		 * maybe it was just deleted by a process that is
+		 * racing with us to clean up empty directories. Try
+		 * to create it:
+		 */
+		enum scld_error scld_result;
+
+		if (!path_copy.len)
+			strbuf_addstr(&path_copy, path);
+
+		do {
+			scld_result = safe_create_leading_directories(path_copy.buf);
+			if (scld_result == SCLD_OK)
+				goto retry_fn;
+		} while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
+	}
+
+out:
+	strbuf_release(&path_copy);
+	errno = save_errno;
+	return ret;
+}
+
 static int remove_empty_directories(struct strbuf *path)
 {
 	/*

From 466d936b9db984588689cf881ec49354928b1ceb Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:25 +0200
Subject: [PATCH 107/198] refs: remove EINVAL errno output from specification
 of read_raw_ref_fn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit does not change code; it documents the fact that an alternate ref
backend does not need to return EINVAL from read_raw_ref_fn to function
properly.

This is correct, because refs_read_raw_ref is only called from;

* resolve_ref_unsafe(), which does not care for the EINVAL errno result.

* refs_verify_refname_available(), which does not inspect errno.

* files-backend.c, where errno is overwritten on failure.

* packed-backend.c (is_packed_transaction_needed), which calls it for the
  packed ref backend, which never emits EINVAL.

A grep for EINVAL */*c reveals that no code checks errno against EINVAL after
reading references. In addition, the refs.h file does not mention errno at all.

A grep over resolve_ref_unsafe() turned up the following callers that inspect
errno:

* sequencer.c::print_commit_summary, which uses it for die_errno

* lock_ref_oid_basic(), which only treats EISDIR and ENOTDIR specially.

The files ref backend does use EINVAL. The files backend does not call into
the generic API (refs_read_raw), but into the files-specific function
(files_read_raw_ref), which we are not changing in this commit.

As the errno sideband is unintuitive and error-prone, remove EINVAL
value, as a step towards getting rid of the errno sideband altogether.

Spotted by Ævar Arnfjörð Bjarmason <avarab@gmail.com>.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/refs-internal.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 3155708345fcbc..dc0d826c3acc83 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -622,9 +622,9 @@ typedef int reflog_expire_fn(struct ref_store *ref_store,
  *
  * Return 0 on success. If the ref doesn't exist, set errno to ENOENT
  * and return -1. If the ref exists but is neither a symbolic ref nor
- * an object ID, it is broken; set REF_ISBROKEN in type, set errno to
- * EINVAL, and return -1. If there is another error reading the ref,
- * set errno appropriately and return -1.
+ * an object ID, it is broken; set REF_ISBROKEN in type, and return -1
+ * (errno should not be ENOENT) If there is another error reading the
+ * ref, set errno appropriately and return -1.
  *
  * Backend-specific flags might be set in type as well, regardless of
  * outcome.

From ffe2bc99bb77674b69ee7212e597963f2abf150d Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:26 +0200
Subject: [PATCH 108/198] refs/files-backend: stop setting errno from
 lock_ref_oid_basic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refs/files-backend.c::lock_ref_oid_basic() tries to signal how it failed
to its callers using errno.

It is safe to stop setting errno here, because the callers of this
file-scope static function are

* files_copy_or_rename_ref()
* files_create_symref()
* files_reflog_expire()

None of them looks at errno after seeing a negative return from
lock_ref_oid_basic() to make any decision, and no caller of these three
functions looks at errno after they signal a failure by returning a
negative value. In particular,

* files_copy_or_rename_ref() - here, calls are followed by error()
(which performs I/O) or write_ref_to_lockfile() (which calls
parse_object() which may perform I/O)

* files_create_symref() - here, calls are followed by error() or
create_symref_locked() (which performs I/O and does not inspect
errno)

* files_reflog_expire() - here, calls are followed by error() or
refs_reflog_exists() (which calls a function in a vtable that is not
documented to use and/or preserve errno)

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 913868b85d33fe..635e7653c29f64 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -982,7 +982,6 @@ static int create_reflock(const char *path, void *cb)
 
 /*
  * Locks a ref returning the lock on success and NULL on failure.
- * On failure errno is set to something meaningful.
  */
 static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 					   const char *refname,
@@ -991,7 +990,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 {
 	struct strbuf ref_file = STRBUF_INIT;
 	struct ref_lock *lock;
-	int last_errno = 0;
 
 	files_assert_main_repository(refs, "lock_ref_oid_basic");
 	assert(err);
@@ -1002,11 +1000,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	if (!refs_resolve_ref_unsafe(&refs->base, refname,
 				     RESOLVE_REF_NO_RECURSE,
 				     &lock->old_oid, type)) {
-		last_errno = errno;
 		if (!refs_verify_refname_available(&refs->base, refname,
 						   NULL, NULL, err))
 			strbuf_addf(err, "unable to resolve reference '%s': %s",
-				    refname, strerror(last_errno));
+				    refname, strerror(errno));
 
 		goto error_return;
 	}
@@ -1019,15 +1016,12 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	 */
 	if (is_null_oid(&lock->old_oid) &&
 	    refs_verify_refname_available(refs->packed_ref_store, refname,
-					  NULL, NULL, err)) {
-		last_errno = ENOTDIR;
+					  NULL, NULL, err))
 		goto error_return;
-	}
 
 	lock->ref_name = xstrdup(refname);
 
 	if (raceproof_create_file(ref_file.buf, create_reflock, &lock->lk)) {
-		last_errno = errno;
 		unable_to_lock_message(ref_file.buf, errno, err);
 		goto error_return;
 	}
@@ -1044,7 +1038,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 
  out:
 	strbuf_release(&ref_file);
-	errno = last_errno;
 	return lock;
 }
 

From 8bb2a971949c50787809f14ccf1d2a5d5324f4e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:24 +0200
Subject: [PATCH 109/198] reflog expire: don't lock reflogs using previously
 seen OID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During reflog expiry, the cmd_reflog_expire() function first iterates
over all reflogs in logs/*, and then one-by-one acquires the lock for
each one and expires it. This behavior has been with us since this
command was implemented in 4264dc15e1 ("git reflog expire",
2006-12-19).

Change this to stop calling lock_ref_oid_basic() with the OID we saw
when we looped over the logs, instead have it pass the OID it managed
to lock.

This mostly mitigates a race condition where e.g. "git gc" will fail
in a concurrently updated repository because the branch moved since
"git reflog expire --all" was started. I.e. with:

    error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B>

This behavior of passing in an "oid" was needed for an edge-case that
I've untangled in this and preceding commits though, namely that we
needed this OID because we'd:

 1. Lookup the reflog name/OID via dwim_log()
 2. With that OID, lock the reflog
 3. Later in builtin/reflog.c we use the OID we looked as input to
    lookup_commit_reference_gently(), assured that it's equal to the
    OID we got from dwim_log().

We can be sure that this change is safe to make because between
dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other
logic relevant to the OID or expiry run in the cmd_reflog_expire()
caller.

We can thus treat that code as a black box, before and after this
change it would get an OID that's been locked, the only difference is
that now we mostly won't be failing to get the lock due to the TOCTOU
race[0]. That failure was purely an implementation detail in how the
"current OID" was looked up, it was divorced from the locking
mechanism.

What do we mean with "mostly"? It mostly mitigates it because we'll
still run into cases where the ref is locked and being updated as we
want to expire it, and other git processes wanting to update the refs
will in turn race with us as we expire the reflog.

That remaining race can in turn be mitigated with the
core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry
acquiring reference locks for 100ms", 2017-08-21). In practice if that
value is high enough we'll probably never have ref updates or reflog
expiry failing, since the clients involved will retry for far longer
than the time any of those operations could take.

See [1] for an initial report of how this impacted "git gc" and a
large discussion about this change in early 2019. In particular patch
looked good to Michael Haggerty, see his[2]. That message seems to not
have made it to the ML archive, its content is quoted in full in my
[3].

I'm leaving behind now-unused code the refs API etc. that takes the
now-NULL "unused_oid" argument, and other code that can be simplified now
that we never have on OID in that context, that'll be cleaned up in
subsequent commits, but for now let's narrowly focus on fixing the
"git gc" issue. As the modified assert() shows we always pass a NULL
oid to reflog_expire() now.

Unfortunately this sort of probabilistic contention is hard to turn
into a test. I've tested this by running the following three subshells
in concurrent terminals:

    (
        rm -rf /tmp/git &&
        git init /tmp/git &&
        while true
        do
            head -c 10 /dev/urandom | hexdump >/tmp/git/out &&
            git -C /tmp/git add out &&
            git -C /tmp/git commit -m"out"
        done
    )

    (
	rm -rf /tmp/git-clone &&
        git clone file:///tmp/git /tmp/git-clone &&
        while git -C /tmp/git-clone pull
        do
            date
        done
    )

    (
        while git -C /tmp/git-clone reflog expire --all
        do
            date
        done
    )

Before this change the "reflog expire" would fail really quickly with
the "but expected" error noted above.

After this change both the "pull" and "reflog expire" will run for a
while, but eventually fail because I get unlucky with
core.filesRefLockTimeout (the "reflog expire" is in a really tight
loop). As noted above that can in turn be mitigated with higher values
of core.filesRefLockTimeout than the 100ms default.

As noted in the commentary added in the preceding commit there's also
the case of branches being racily deleted, that can be tested by
adding this to the above:

    (
        while git -C /tmp/git-clone branch topic master &&
	      git -C /tmp/git-clone branch -D topic
        do
            date
        done
    )

With core.filesRefLockTimeout set to 10 seconds (it can probably be a
lot lower) I managed to run all four of these concurrently for about
an hour, and accumulated ~125k commits, auto-gc's and all, and didn't
have a single failure. The loops visibly stall while waiting for the
lock, but that's expected and desired behavior.

0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use
1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/
2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu
3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/reflog.c     | 13 ++++++-------
 refs.h               |  2 +-
 refs/files-backend.c |  7 +++++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/builtin/reflog.c b/builtin/reflog.c
index 09541d1c80483c..61795f22d53f3d 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -629,8 +629,9 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
 		free_worktrees(worktrees);
 		for (i = 0; i < collected.nr; i++) {
 			struct collected_reflog *e = collected.e[i];
+
 			set_reflog_expiry_param(&cb.cmd, explicit_expiry, e->reflog);
-			status |= reflog_expire(e->reflog, &e->oid, flags,
+			status |= reflog_expire(e->reflog, NULL, flags,
 						reflog_expiry_prepare,
 						should_expire_reflog_ent,
 						reflog_expiry_cleanup,
@@ -642,13 +643,12 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
 
 	for (; i < argc; i++) {
 		char *ref;
-		struct object_id oid;
-		if (!dwim_log(argv[i], strlen(argv[i]), &oid, &ref)) {
+		if (!dwim_log(argv[i], strlen(argv[i]), NULL, &ref)) {
 			status |= error(_("%s points nowhere!"), argv[i]);
 			continue;
 		}
 		set_reflog_expiry_param(&cb.cmd, explicit_expiry, ref);
-		status |= reflog_expire(ref, &oid, flags,
+		status |= reflog_expire(ref, NULL, flags,
 					reflog_expiry_prepare,
 					should_expire_reflog_ent,
 					reflog_expiry_cleanup,
@@ -700,7 +700,6 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix)
 
 	for ( ; i < argc; i++) {
 		const char *spec = strstr(argv[i], "@{");
-		struct object_id oid;
 		char *ep, *ref;
 		int recno;
 
@@ -709,7 +708,7 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix)
 			continue;
 		}
 
-		if (!dwim_log(argv[i], spec - argv[i], &oid, &ref)) {
+		if (!dwim_log(argv[i], spec - argv[i], NULL, &ref)) {
 			status |= error(_("no reflog for '%s'"), argv[i]);
 			continue;
 		}
@@ -724,7 +723,7 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix)
 			cb.cmd.expire_total = 0;
 		}
 
-		status |= reflog_expire(ref, &oid, flags,
+		status |= reflog_expire(ref, NULL, flags,
 					reflog_expiry_prepare,
 					should_expire_reflog_ent,
 					reflog_expiry_cleanup,
diff --git a/refs.h b/refs.h
index 48970dfc7e0f0d..ddbf15f1c2119d 100644
--- a/refs.h
+++ b/refs.h
@@ -796,7 +796,7 @@ enum expire_reflog_flags {
  * expiration policy that is desired.
  *
  * reflog_expiry_prepare_fn -- Called once after the reference is
- *     locked.
+ *     locked. Called with the OID of the locked reference.
  *
  * reflog_expiry_should_prune_fn -- Called once for each entry in the
  *     existing reflog. It should return true iff that entry should be
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 54153064166d9b..ccdf45504985b3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3032,7 +3032,7 @@ static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid,
 }
 
 static int files_reflog_expire(struct ref_store *ref_store,
-			       const char *refname, const struct object_id *oid,
+			       const char *refname, const struct object_id *unused_oid,
 			       unsigned int flags,
 			       reflog_expiry_prepare_fn prepare_fn,
 			       reflog_expiry_should_prune_fn should_prune_fn,
@@ -3049,6 +3049,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 	int status = 0;
 	int type;
 	struct strbuf err = STRBUF_INIT;
+	const struct object_id *oid;
 
 	memset(&cb, 0, sizeof(cb));
 	cb.flags = flags;
@@ -3060,7 +3061,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 	 * reference itself, plus we might need to update the
 	 * reference if --updateref was specified:
 	 */
-	lock = lock_ref_oid_basic(refs, refname, oid,
+	lock = lock_ref_oid_basic(refs, refname, NULL,
 				  REF_NO_DEREF,
 				  &type, &err);
 	if (!lock) {
@@ -3068,6 +3069,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 		strbuf_release(&err);
 		return -1;
 	}
+	oid = &lock->old_oid;
 
 	/*
 	 * When refs are deleted, their reflog is deleted before the
@@ -3111,6 +3113,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 		}
 	}
 
+	assert(!unused_oid);
 	(*prepare_fn)(refname, oid, cb.policy_cb);
 	refs_for_each_reflog_ent(ref_store, refname, expire_reflog_ent, &cb);
 	(*cleanup_fn)(cb.policy_cb);

From 5794236350422402e16f34a161b82cc1004b7a4d Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:27 +0200
Subject: [PATCH 110/198] refs: make errno output explicit for read_raw_ref_fn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes it explicit how alternative ref backends should report errors in
read_raw_ref_fn.

read_raw_ref_fn needs to supply a credible errno for a number of cases. These
are primarily:

1) The files backend calls read_raw_ref from lock_raw_ref, and uses the
resulting error codes to create/remove directories as needed.

2) ENOENT should be translated in a zero OID, optionally with REF_ISBROKEN set,
returning the last successfully resolved symref. This is necessary so
read_raw_ref("HEAD") on an empty repo returns refs/heads/main (or the default branch
du-jour), and we know on which branch to create the first commit.

Make this information flow explicit by adding a failure_errno to the signature
of read_raw_ref. All errnos from the files backend are still propagated
unchanged, even though inspection suggests only ENOTDIR, EISDIR and ENOENT are
relevant.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c                |  2 +-
 refs/debug.c          |  4 ++--
 refs/files-backend.c  | 29 +++++++++++++++--------------
 refs/packed-backend.c |  8 ++++----
 refs/refs-internal.h  | 20 ++++++++++++--------
 5 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/refs.c b/refs.c
index d963543675992a..fc6c0ddffaed41 100644
--- a/refs.c
+++ b/refs.c
@@ -1682,7 +1682,7 @@ int refs_read_raw_ref(struct ref_store *ref_store,
 	}
 
 	return ref_store->be->read_raw_ref(ref_store, refname, oid, referent,
-					   type);
+					   type, &errno);
 }
 
 /* This function needs to return a meaningful errno on failure */
diff --git a/refs/debug.c b/refs/debug.c
index 449ac3e6cc8e68..70a78b516abaf2 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -238,7 +238,7 @@ debug_ref_iterator_begin(struct ref_store *ref_store, const char *prefix,
 
 static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname,
 			      struct object_id *oid, struct strbuf *referent,
-			      unsigned int *type)
+			      unsigned int *type, int *failure_errno)
 {
 	struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
 	int res = 0;
@@ -246,7 +246,7 @@ static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	oidcpy(oid, null_oid());
 	errno = 0;
 	res = drefs->refs->be->read_raw_ref(drefs->refs, refname, oid, referent,
-					    type);
+					    type, failure_errno);
 
 	if (res == 0) {
 		trace_printf_key(&trace_refs, "read_raw_ref: %s: %s (=> %s) type %x: %d\n",
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 635e7653c29f64..ee37c2e6c99806 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -341,9 +341,9 @@ static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs)
 	return refs->loose;
 }
 
-static int files_read_raw_ref(struct ref_store *ref_store,
-			      const char *refname, struct object_id *oid,
-			      struct strbuf *referent, unsigned int *type)
+static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
+			      struct object_id *oid, struct strbuf *referent,
+			      unsigned int *type, int *failure_errno)
 {
 	struct files_ref_store *refs =
 		files_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
@@ -354,7 +354,6 @@ static int files_read_raw_ref(struct ref_store *ref_store,
 	struct stat st;
 	int fd;
 	int ret = -1;
-	int save_errno;
 	int remaining_retries = 3;
 
 	*type = 0;
@@ -459,10 +458,9 @@ static int files_read_raw_ref(struct ref_store *ref_store,
 	ret = parse_loose_ref_contents(buf, oid, referent, type);
 
 out:
-	save_errno = errno;
+	*failure_errno = errno;
 	strbuf_release(&sb_path);
 	strbuf_release(&sb_contents);
-	errno = save_errno;
 	return ret;
 }
 
@@ -540,6 +538,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 	struct strbuf ref_file = STRBUF_INIT;
 	int attempts_remaining = 3;
 	int ret = TRANSACTION_GENERIC_ERROR;
+	int failure_errno;
 
 	assert(err);
 	files_assert_main_repository(refs, "lock_raw_ref");
@@ -610,7 +609,9 @@ static int lock_raw_ref(struct files_ref_store *refs,
 	if (hold_lock_file_for_update_timeout(
 			    &lock->lk, ref_file.buf, LOCK_NO_DEREF,
 			    get_files_ref_lock_timeout_ms()) < 0) {
-		if (errno == ENOENT && --attempts_remaining > 0) {
+		int myerr = errno;
+		errno = 0;
+		if (myerr == ENOENT && --attempts_remaining > 0) {
 			/*
 			 * Maybe somebody just deleted one of the
 			 * directories leading to ref_file.  Try
@@ -618,7 +619,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 			 */
 			goto retry;
 		} else {
-			unable_to_lock_message(ref_file.buf, errno, err);
+			unable_to_lock_message(ref_file.buf, myerr, err);
 			goto error_return;
 		}
 	}
@@ -628,9 +629,9 @@ static int lock_raw_ref(struct files_ref_store *refs,
 	 * fear that its value will change.
 	 */
 
-	if (files_read_raw_ref(&refs->base, refname,
-			       &lock->old_oid, referent, type)) {
-		if (errno == ENOENT) {
+	if (files_read_raw_ref(&refs->base, refname, &lock->old_oid, referent,
+			       type, &failure_errno)) {
+		if (failure_errno == ENOENT) {
 			if (mustexist) {
 				/* Garden variety missing reference. */
 				strbuf_addf(err, "unable to resolve reference '%s'",
@@ -654,7 +655,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
 				 *   reference named "refs/foo/bar/baz".
 				 */
 			}
-		} else if (errno == EISDIR) {
+		} else if (failure_errno == EISDIR) {
 			/*
 			 * There is a directory in the way. It might have
 			 * contained references that have been deleted. If
@@ -692,13 +693,13 @@ static int lock_raw_ref(struct files_ref_store *refs,
 					goto error_return;
 				}
 			}
-		} else if (errno == EINVAL && (*type & REF_ISBROKEN)) {
+		} else if (failure_errno == EINVAL && (*type & REF_ISBROKEN)) {
 			strbuf_addf(err, "unable to resolve reference '%s': "
 				    "reference broken", refname);
 			goto error_return;
 		} else {
 			strbuf_addf(err, "unable to resolve reference '%s': %s",
-				    refname, strerror(errno));
+				    refname, strerror(failure_errno));
 			goto error_return;
 		}
 
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 24a360b719ff25..159ac7762401b8 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -724,9 +724,9 @@ static struct snapshot *get_snapshot(struct packed_ref_store *refs)
 	return refs->snapshot;
 }
 
-static int packed_read_raw_ref(struct ref_store *ref_store,
-			       const char *refname, struct object_id *oid,
-			       struct strbuf *referent, unsigned int *type)
+static int packed_read_raw_ref(struct ref_store *ref_store, const char *refname,
+			       struct object_id *oid, struct strbuf *referent,
+			       unsigned int *type, int *failure_errno)
 {
 	struct packed_ref_store *refs =
 		packed_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
@@ -739,7 +739,7 @@ static int packed_read_raw_ref(struct ref_store *ref_store,
 
 	if (!rec) {
 		/* refname is not a packed reference. */
-		errno = ENOENT;
+		*failure_errno = ENOENT;
 		return -1;
 	}
 
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dc0d826c3acc83..33a31d5c2366c5 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -620,11 +620,15 @@ typedef int reflog_expire_fn(struct ref_store *ref_store,
  * properly-formatted or even safe reference name. NEITHER INPUT NOR
  * OUTPUT REFERENCE NAMES ARE VALIDATED WITHIN THIS FUNCTION.
  *
- * Return 0 on success. If the ref doesn't exist, set errno to ENOENT
- * and return -1. If the ref exists but is neither a symbolic ref nor
- * an object ID, it is broken; set REF_ISBROKEN in type, and return -1
- * (errno should not be ENOENT) If there is another error reading the
- * ref, set errno appropriately and return -1.
+ * Return 0 on success, or -1 on failure. If the ref exists but is neither a
+ * symbolic ref nor an object ID, it is broken. In this case set REF_ISBROKEN in
+ * type, and return -1 (failure_errno should not be ENOENT)
+ *
+ * failure_errno provides errno codes that are interpreted beyond error
+ * reporting. The following error codes have special meaning:
+ *    * ENOENT: the ref doesn't exist
+ *    * EISDIR: ref name is a directory
+ *    * ENOTDIR: ref prefix is not a directory
  *
  * Backend-specific flags might be set in type as well, regardless of
  * outcome.
@@ -638,9 +642,9 @@ typedef int reflog_expire_fn(struct ref_store *ref_store,
  * - in all other cases, referent will be untouched, and therefore
  *   refname will still be valid and unchanged.
  */
-typedef int read_raw_ref_fn(struct ref_store *ref_store,
-			    const char *refname, struct object_id *oid,
-			    struct strbuf *referent, unsigned int *type);
+typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
+			    struct object_id *oid, struct strbuf *referent,
+			    unsigned int *type, int *failure_errno);
 
 struct ref_storage_be {
 	struct ref_storage_be *next;

From 066c64af8603953234c191e64c5b50bd226ad198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:25 +0200
Subject: [PATCH 111/198] refs/files: remove unused "oid" in
 lock_ref_oid_basic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the preceding commit the last caller that passed a non-NULL OID was
changed to pass NULL to lock_ref_oid_basic(). As noted in preceding
commits use of this API has been going away (we should use ref
transactions, or lock_raw_ref()), so we're unlikely to gain new
callers that want to pass the "oid".

So let's remove it, doing so means we can remove the "mustexist"
condition, and therefore anything except the "flags =
RESOLVE_REF_NO_RECURSE" case.

Furthermore, since the verify_lock() function we called did most of
its work when the "oid" was passed (as "old_oid") we can inline the
trivial part of it that remains in its only remaining caller. Without
a NULL "oid" passed it was equivalent to calling refs_read_ref_full()
followed by oidclr().

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 72 ++++++++------------------------------------
 1 file changed, 12 insertions(+), 60 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index ccdf45504985b3..98d9aa1601fa7b 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -852,42 +852,6 @@ static struct ref_iterator *files_ref_iterator_begin(
 	return ref_iterator;
 }
 
-/*
- * Verify that the reference locked by lock has the value old_oid
- * (unless it is NULL).  Fail if the reference doesn't exist and
- * mustexist is set. Return 0 on success. On error, write an error
- * message to err, set errno, and return a negative value.
- */
-static int verify_lock(struct ref_store *ref_store, struct ref_lock *lock,
-		       const struct object_id *old_oid, int mustexist,
-		       struct strbuf *err)
-{
-	assert(err);
-
-	if (refs_read_ref_full(ref_store, lock->ref_name,
-			       mustexist ? RESOLVE_REF_READING : 0,
-			       &lock->old_oid, NULL)) {
-		if (old_oid) {
-			int save_errno = errno;
-			strbuf_addf(err, "can't verify ref '%s'", lock->ref_name);
-			errno = save_errno;
-			return -1;
-		} else {
-			oidclr(&lock->old_oid);
-			return 0;
-		}
-	}
-	if (old_oid && !oideq(&lock->old_oid, old_oid)) {
-		strbuf_addf(err, "ref '%s' is at %s but expected %s",
-			    lock->ref_name,
-			    oid_to_hex(&lock->old_oid),
-			    oid_to_hex(old_oid));
-		errno = EBUSY;
-		return -1;
-	}
-	return 0;
-}
-
 static int remove_empty_directories(struct strbuf *path)
 {
 	/*
@@ -913,15 +877,12 @@ static int create_reflock(const char *path, void *cb)
  */
 static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 					   const char *refname,
-					   const struct object_id *old_oid,
 					   unsigned int flags, int *type,
 					   struct strbuf *err)
 {
 	struct strbuf ref_file = STRBUF_INIT;
 	struct ref_lock *lock;
 	int last_errno = 0;
-	int mustexist = (old_oid && !is_null_oid(old_oid));
-	int resolve_flags = RESOLVE_REF_NO_RECURSE;
 	int resolved;
 
 	files_assert_main_repository(refs, "lock_ref_oid_basic");
@@ -929,12 +890,9 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 
 	CALLOC_ARRAY(lock, 1);
 
-	if (mustexist)
-		resolve_flags |= RESOLVE_REF_READING;
-
 	files_ref_path(refs, &ref_file, refname);
-	resolved = !!refs_resolve_ref_unsafe(&refs->base,
-					     refname, resolve_flags,
+	resolved = !!refs_resolve_ref_unsafe(&refs->base, refname,
+					     RESOLVE_REF_NO_RECURSE,
 					     &lock->old_oid, type);
 	if (!resolved && errno == EISDIR) {
 		/*
@@ -952,8 +910,8 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 					    refname);
 			goto error_return;
 		}
-		resolved = !!refs_resolve_ref_unsafe(&refs->base,
-						     refname, resolve_flags,
+		resolved = !!refs_resolve_ref_unsafe(&refs->base, refname,
+						     RESOLVE_REF_NO_RECURSE,
 						     &lock->old_oid, type);
 	}
 	if (!resolved) {
@@ -988,10 +946,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 		goto error_return;
 	}
 
-	if (verify_lock(&refs->base, lock, old_oid, mustexist, err)) {
-		last_errno = errno;
-		goto error_return;
-	}
+	if (refs_read_ref_full(&refs->base, lock->ref_name,
+			       0,
+			       &lock->old_oid, NULL))
+		oidclr(&lock->old_oid);
 	goto out;
 
  error_return:
@@ -1410,8 +1368,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 
 	logmoved = log;
 
-	lock = lock_ref_oid_basic(refs, newrefname, NULL, REF_NO_DEREF, NULL,
-				  &err);
+	lock = lock_ref_oid_basic(refs, newrefname, REF_NO_DEREF, NULL, &err);
 	if (!lock) {
 		if (copy)
 			error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf);
@@ -1433,8 +1390,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 	goto out;
 
  rollback:
-	lock = lock_ref_oid_basic(refs, oldrefname, NULL,
-				  REF_NO_DEREF, NULL, &err);
+	lock = lock_ref_oid_basic(refs, oldrefname, REF_NO_DEREF, NULL, &err);
 	if (!lock) {
 		error("unable to lock %s for rollback: %s", oldrefname, err.buf);
 		strbuf_release(&err);
@@ -1841,9 +1797,7 @@ static int files_create_symref(struct ref_store *ref_store,
 	struct ref_lock *lock;
 	int ret;
 
-	lock = lock_ref_oid_basic(refs, refname, NULL,
-				  REF_NO_DEREF, NULL,
-				  &err);
+	lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, NULL, &err);
 	if (!lock) {
 		error("%s", err.buf);
 		strbuf_release(&err);
@@ -3061,9 +3015,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 	 * reference itself, plus we might need to update the
 	 * reference if --updateref was specified:
 	 */
-	lock = lock_ref_oid_basic(refs, refname, NULL,
-				  REF_NO_DEREF,
-				  &type, &err);
+	lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, &type, &err);
 	if (!lock) {
 		error("cannot lock ref '%s': %s", refname, err.buf);
 		strbuf_release(&err);

From 0dac16d8eb9c2bb9ed394639327cd9c5cd8f5235 Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:28 +0200
Subject: [PATCH 112/198] refs: add failure_errno to refs_read_raw_ref()
 signature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This lets us use the explicit errno output parameter in refs_resolve_ref_unsafe.

Some of our callers explicitly do not care about the errno, rather
than understanding NULL let's have them declare that they don't care
by passing in an "ignore_errno". There's only three of them, and using
that pattern will make it more obvious that they want to throw away
data, let's also add a comment to one of the callers about why we'd
like to ignore the errno.

Let's not extend that to refs_resolve_ref_unsafe() itself for now, it
has a large set of legacy callers, so we're faking up the old "errno"
behavior for it. We can convert those callers to
refs_resolve_ref_unsafe_with_errno() later.

We are leaving out out the refs_read_special_head() in
refs_read_raw_ref() for now, as noted in the next commit moving it to
"failure_errno" will require some special consideration.

We're intentionally mis-indenting the argument list of the new
refs_resolve_ref_unsafe_with_errno(), it will be non-static in a
subsequent commit, doing it this way makes that diff smaller.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c                | 61 ++++++++++++++++++++++++++++++-------------
 refs/files-backend.c  | 10 ++++---
 refs/packed-backend.c |  7 ++---
 refs/refs-internal.h  |  6 ++---
 4 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/refs.c b/refs.c
index fc6c0ddffaed41..728285c922000e 100644
--- a/refs.c
+++ b/refs.c
@@ -1672,30 +1672,33 @@ static int refs_read_special_head(struct ref_store *ref_store,
 	return result;
 }
 
-int refs_read_raw_ref(struct ref_store *ref_store,
-		      const char *refname, struct object_id *oid,
-		      struct strbuf *referent, unsigned int *type)
+int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
+		      struct object_id *oid, struct strbuf *referent,
+		      unsigned int *type, int *failure_errno)
 {
+	assert(failure_errno);
 	if (!strcmp(refname, "FETCH_HEAD") || !strcmp(refname, "MERGE_HEAD")) {
 		return refs_read_special_head(ref_store, refname, oid, referent,
 					      type);
 	}
 
 	return ref_store->be->read_raw_ref(ref_store, refname, oid, referent,
-					   type, &errno);
+					   type, failure_errno);
 }
 
-/* This function needs to return a meaningful errno on failure */
-const char *refs_resolve_ref_unsafe(struct ref_store *refs,
-				    const char *refname,
-				    int resolve_flags,
-				    struct object_id *oid, int *flags)
+static const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs,
+					       const char *refname,
+					       int resolve_flags,
+					       struct object_id *oid,
+					       int *flags, int *failure_errno)
 {
 	static struct strbuf sb_refname = STRBUF_INIT;
 	struct object_id unused_oid;
 	int unused_flags;
 	int symref_count;
 
+	assert(failure_errno);
+
 	if (!oid)
 		oid = &unused_oid;
 	if (!flags)
@@ -1706,7 +1709,7 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 	if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
 		if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
 		    !refname_is_safe(refname)) {
-			errno = EINVAL;
+			*failure_errno = EINVAL;
 			return NULL;
 		}
 
@@ -1724,8 +1727,8 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 	for (symref_count = 0; symref_count < SYMREF_MAXDEPTH; symref_count++) {
 		unsigned int read_flags = 0;
 
-		if (refs_read_raw_ref(refs, refname,
-				      oid, &sb_refname, &read_flags)) {
+		if (refs_read_raw_ref(refs, refname, oid, &sb_refname,
+				      &read_flags, failure_errno)) {
 			*flags |= read_flags;
 
 			/* In reading mode, refs must eventually resolve */
@@ -1737,9 +1740,9 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 			 * may show errors besides ENOENT if there are
 			 * similarly-named refs.
 			 */
-			if (errno != ENOENT &&
-			    errno != EISDIR &&
-			    errno != ENOTDIR)
+			if (*failure_errno != ENOENT &&
+			    *failure_errno != EISDIR &&
+			    *failure_errno != ENOTDIR)
 				return NULL;
 
 			oidclr(oid);
@@ -1766,7 +1769,7 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 		if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
 			if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
 			    !refname_is_safe(refname)) {
-				errno = EINVAL;
+				*failure_errno = EINVAL;
 				return NULL;
 			}
 
@@ -1774,10 +1777,24 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 		}
 	}
 
-	errno = ELOOP;
+	*failure_errno = ELOOP;
 	return NULL;
 }
 
+const char *refs_resolve_ref_unsafe(struct ref_store *refs, const char *refname,
+				    int resolve_flags, struct object_id *oid,
+				    int *flags)
+{
+	int failure_errno = 0;
+	const char *refn;
+	refn = refs_resolve_ref_unsafe_with_errno(refs, refname, resolve_flags,
+						  oid, flags, &failure_errno);
+	if (!refn)
+		/* For unmigrated legacy callers */
+		errno = failure_errno;
+	return refn;
+}
+
 /* backend functions */
 int refs_init_db(struct strbuf *err)
 {
@@ -2228,6 +2245,13 @@ int refs_verify_refname_available(struct ref_store *refs,
 
 	strbuf_grow(&dirname, strlen(refname) + 1);
 	for (slash = strchr(refname, '/'); slash; slash = strchr(slash + 1, '/')) {
+		/*
+		 * Just saying "Is a directory" when we e.g. can't
+		 * lock some multi-level ref isn't very informative,
+		 * the user won't be told *what* is a directory, so
+		 * let's not use strerror() below.
+		 */
+		int ignore_errno;
 		/* Expand dirname to the new prefix, not including the trailing slash: */
 		strbuf_add(&dirname, refname + dirname.len, slash - refname - dirname.len);
 
@@ -2239,7 +2263,8 @@ int refs_verify_refname_available(struct ref_store *refs,
 		if (skip && string_list_has_string(skip, dirname.buf))
 			continue;
 
-		if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent, &type)) {
+		if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
+				       &type, &ignore_errno)) {
 			strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
 				    dirname.buf, refname);
 			goto cleanup;
diff --git a/refs/files-backend.c b/refs/files-backend.c
index ee37c2e6c99806..7191f036ba70f5 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -381,10 +381,11 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 		goto out;
 
 	if (lstat(path, &st) < 0) {
+		int ignore_errno;
 		if (errno != ENOENT)
 			goto out;
-		if (refs_read_raw_ref(refs->packed_ref_store, refname,
-				      oid, referent, type)) {
+		if (refs_read_raw_ref(refs->packed_ref_store, refname, oid,
+				      referent, type, &ignore_errno)) {
 			errno = ENOENT;
 			goto out;
 		}
@@ -418,13 +419,14 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 
 	/* Is it a directory? */
 	if (S_ISDIR(st.st_mode)) {
+		int ignore_errno;
 		/*
 		 * Even though there is a directory where the loose
 		 * ref is supposed to be, there could still be a
 		 * packed ref:
 		 */
-		if (refs_read_raw_ref(refs->packed_ref_store, refname,
-				      oid, referent, type)) {
+		if (refs_read_raw_ref(refs->packed_ref_store, refname, oid,
+				      referent, type, &ignore_errno)) {
 			errno = EISDIR;
 			goto out;
 		}
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 159ac7762401b8..923b9ad89de314 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1347,6 +1347,7 @@ int is_packed_transaction_needed(struct ref_store *ref_store,
 	ret = 0;
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *update = transaction->updates[i];
+		int failure_errno;
 		unsigned int type;
 		struct object_id oid;
 
@@ -1357,9 +1358,9 @@ int is_packed_transaction_needed(struct ref_store *ref_store,
 			 */
 			continue;
 
-		if (!refs_read_raw_ref(ref_store, update->refname,
-				       &oid, &referent, &type) ||
-		    errno != ENOENT) {
+		if (!refs_read_raw_ref(ref_store, update->refname, &oid,
+				       &referent, &type, &failure_errno) ||
+		    failure_errno != ENOENT) {
 			/*
 			 * We have to actually delete that reference
 			 * -> this transaction is needed.
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 33a31d5c2366c5..7beb38f79ccd81 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -149,9 +149,9 @@ struct ref_update {
 	const char refname[FLEX_ARRAY];
 };
 
-int refs_read_raw_ref(struct ref_store *ref_store,
-		      const char *refname, struct object_id *oid,
-		      struct strbuf *referent, unsigned int *type);
+int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
+		      struct object_id *oid, struct strbuf *referent,
+		      unsigned int *type, int *failure_errno);
 
 /*
  * Write an error to `err` and return a nonzero value iff the same

From ac7337ee0d2cd84fcbde733ce9ffcbc1dcc768b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:26 +0200
Subject: [PATCH 113/198] refs/files: remove unused "errno == EISDIR" code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we lock a reference like "foo" we need to handle the case where
"foo" exists, but is an empty directory. That's what this code added
in bc7127ef0f (ref locking: allow 'foo' when 'foo/bar' used to exist
but not anymore., 2006-09-30) seems like it should be dealing with.

Except it doesn't, and we never take this branch. The reason is that
when bc7127ef0f was written this looked like:

	ref = resolve_ref([...]);
	if (!ref && errno == EISDIR) {
	[...]

And in resolve_ref() we had this code:

	fd = open(path, O_RDONLY);
	if (fd < 0)
		return NULL;

I.e. we would attempt to read "foo" with open(), which would fail with
EISDIR and we'd return NULL. We'd then take this branch, call
remove_empty_directories() and continue.

Since a1c1d8170d (refs_resolve_ref_unsafe: handle d/f conflicts for
writes, 2017-10-06) we don't. E.g. in the case of
files_copy_or_rename_ref() our callstack will look something like:

	[...] ->
	files_copy_or_rename_ref() ->
	lock_ref_oid_basic() ->
	refs_resolve_ref_unsafe()

At that point the first (now only) refs_resolve_ref_unsafe() call in
lock_ref_oid_basic() would do the equivalent of this in the resulting
call to refs_read_raw_ref() in refs_resolve_ref_unsafe():

	/* Via refs_read_raw_ref() */
	fd = open(path, O_RDONLY);
	if (fd < 0)
		/* get errno == EISDIR */
	/* later, in refs_resolve_ref_unsafe() */
	if ([...] && errno != EISDIR)
		return NULL;
	[...]
	/* returns the refs/heads/foo to the caller, even though it's a directory */
	return refname;

I.e. even though we got an "errno == EISDIR" we won't take this
branch, since in cases of EISDIR "resolved" is always
non-NULL. I.e. we pretend at this point as though everything's OK and
there is no "foo" directory.

We then proceed with the entire ref update and don't call
remove_empty_directories() until we call commit_ref_update(). See
5387c0d883 (commit_ref(): if there is an empty dir in the way, delete
it, 2016-05-05) for the addition of that code, and
a1c1d8170db (refs_resolve_ref_unsafe: handle d/f conflicts for writes,
2017-10-06) for the commit that changed the original codepath added in
bc7127ef0f to use this "EISDIR" handling.

Further historical commentary:

Before the two preceding commits the caller in files_reflog_expire()
was the only one out of our 4 callers that would pass non-NULL as an
oid. We would then set a (now gone) "resolve_flags" to
"RESOLVE_REF_READING" and just before that "errno != EISDIR" check do:

	if (resolve_flags & RESOLVE_REF_READING)
		return NULL;

There may have been some case where this ended up mattering and we
couldn't safely make this change before we removed the "oid"
parameter, but I don't think there was, see [1] for some discussion on
that.

In any case, now that we've removed the "oid" parameter in a preceding
commit we can be sure that this code is redundant, so let's remove it.

1. http://lore.kernel.org/git/871r801yp6.fsf@evledraar.gmail.com

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 98d9aa1601fa7b..28cd8853f5266b 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -883,7 +883,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	struct strbuf ref_file = STRBUF_INIT;
 	struct ref_lock *lock;
 	int last_errno = 0;
-	int resolved;
 
 	files_assert_main_repository(refs, "lock_ref_oid_basic");
 	assert(err);
@@ -891,30 +890,9 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	CALLOC_ARRAY(lock, 1);
 
 	files_ref_path(refs, &ref_file, refname);
-	resolved = !!refs_resolve_ref_unsafe(&refs->base, refname,
-					     RESOLVE_REF_NO_RECURSE,
-					     &lock->old_oid, type);
-	if (!resolved && errno == EISDIR) {
-		/*
-		 * we are trying to lock foo but we used to
-		 * have foo/bar which now does not exist;
-		 * it is normal for the empty directory 'foo'
-		 * to remain.
-		 */
-		if (remove_empty_directories(&ref_file)) {
-			last_errno = errno;
-			if (!refs_verify_refname_available(
-					    &refs->base,
-					    refname, NULL, NULL, err))
-				strbuf_addf(err, "there are still refs under '%s'",
-					    refname);
-			goto error_return;
-		}
-		resolved = !!refs_resolve_ref_unsafe(&refs->base, refname,
-						     RESOLVE_REF_NO_RECURSE,
-						     &lock->old_oid, type);
-	}
-	if (!resolved) {
+	if (!refs_resolve_ref_unsafe(&refs->base, refname,
+				     RESOLVE_REF_NO_RECURSE,
+				     &lock->old_oid, type)) {
 		last_errno = errno;
 		if (last_errno != ENOTDIR ||
 		    !refs_verify_refname_available(&refs->base, refname,

From a073125011b776bd7bd1deedfd0ec898915f6763 Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:29 +0200
Subject: [PATCH 114/198] refs: explicitly return failure_errno from
 parse_loose_ref_contents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EINVAL error from parse_loose_ref_contents is used in files-backend
to create a custom error message.

In untangling this we discovered a tricky edge case. The
refs_read_special_head() function was relying on
parse_loose_ref_contents() setting EINVAL.

By converting it to use "saved_errno" we can migrate away from "errno"
in this part of the code entirely, and do away with an existing
"save_errno" pattern, its only purpose was to not clobber the "errno"
we previously needed at the end of files_read_raw_ref().

Let's assert that we can do that by not having files_read_raw_ref()
itself operate on *failure_errno in addition to passing it on. Instead
we'll assert that if we return non-zero we actually do set errno, thus
assuring ourselves and callers that they can trust the resulting
"failure_errno".

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c               |  8 +++++---
 refs/files-backend.c | 30 +++++++++++++++++++-----------
 refs/refs-internal.h |  6 ++++--
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/refs.c b/refs.c
index 728285c922000e..b31dbdd0fa2be5 100644
--- a/refs.c
+++ b/refs.c
@@ -1654,7 +1654,8 @@ int for_each_fullref_in_prefixes(const char *namespace,
 
 static int refs_read_special_head(struct ref_store *ref_store,
 				  const char *refname, struct object_id *oid,
-				  struct strbuf *referent, unsigned int *type)
+				  struct strbuf *referent, unsigned int *type,
+				  int *failure_errno)
 {
 	struct strbuf full_path = STRBUF_INIT;
 	struct strbuf content = STRBUF_INIT;
@@ -1664,7 +1665,8 @@ static int refs_read_special_head(struct ref_store *ref_store,
 	if (strbuf_read_file(&content, full_path.buf, 0) < 0)
 		goto done;
 
-	result = parse_loose_ref_contents(content.buf, oid, referent, type);
+	result = parse_loose_ref_contents(content.buf, oid, referent, type,
+					  failure_errno);
 
 done:
 	strbuf_release(&full_path);
@@ -1679,7 +1681,7 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	assert(failure_errno);
 	if (!strcmp(refname, "FETCH_HEAD") || !strcmp(refname, "MERGE_HEAD")) {
 		return refs_read_special_head(ref_store, refname, oid, referent,
-					      type);
+					      type, failure_errno);
 	}
 
 	return ref_store->be->read_raw_ref(ref_store, refname, oid, referent,
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 7191f036ba70f5..be9e2c548272f3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -355,6 +355,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	int fd;
 	int ret = -1;
 	int remaining_retries = 3;
+	int myerr = 0;
 
 	*type = 0;
 	strbuf_reset(&sb_path);
@@ -382,11 +383,13 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 
 	if (lstat(path, &st) < 0) {
 		int ignore_errno;
-		if (errno != ENOENT)
+		myerr = errno;
+		errno = 0;
+		if (myerr != ENOENT)
 			goto out;
 		if (refs_read_raw_ref(refs->packed_ref_store, refname, oid,
 				      referent, type, &ignore_errno)) {
-			errno = ENOENT;
+			myerr = ENOENT;
 			goto out;
 		}
 		ret = 0;
@@ -397,7 +400,9 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	if (S_ISLNK(st.st_mode)) {
 		strbuf_reset(&sb_contents);
 		if (strbuf_readlink(&sb_contents, path, st.st_size) < 0) {
-			if (errno == ENOENT || errno == EINVAL)
+			myerr = errno;
+			errno = 0;
+			if (myerr == ENOENT || myerr == EINVAL)
 				/* inconsistent with lstat; retry */
 				goto stat_ref;
 			else
@@ -427,7 +432,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 		 */
 		if (refs_read_raw_ref(refs->packed_ref_store, refname, oid,
 				      referent, type, &ignore_errno)) {
-			errno = EISDIR;
+			myerr = EISDIR;
 			goto out;
 		}
 		ret = 0;
@@ -440,7 +445,8 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	 */
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		if (errno == ENOENT && !S_ISLNK(st.st_mode))
+		myerr = errno;
+		if (myerr == ENOENT && !S_ISLNK(st.st_mode))
 			/* inconsistent with lstat; retry */
 			goto stat_ref;
 		else
@@ -448,26 +454,28 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
 	}
 	strbuf_reset(&sb_contents);
 	if (strbuf_read(&sb_contents, fd, 256) < 0) {
-		int save_errno = errno;
 		close(fd);
-		errno = save_errno;
 		goto out;
 	}
 	close(fd);
 	strbuf_rtrim(&sb_contents);
 	buf = sb_contents.buf;
 
-	ret = parse_loose_ref_contents(buf, oid, referent, type);
+	ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
 
 out:
-	*failure_errno = errno;
+	if (ret && !myerr)
+		BUG("returning non-zero %d, should have set myerr!", ret);
+	*failure_errno = myerr;
+
 	strbuf_release(&sb_path);
 	strbuf_release(&sb_contents);
 	return ret;
 }
 
 int parse_loose_ref_contents(const char *buf, struct object_id *oid,
-			     struct strbuf *referent, unsigned int *type)
+			     struct strbuf *referent, unsigned int *type,
+			     int *failure_errno)
 {
 	const char *p;
 	if (skip_prefix(buf, "ref:", &buf)) {
@@ -486,7 +494,7 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
 	if (parse_oid_hex(buf, oid, &p) ||
 	    (*p != '\0' && !isspace(*p))) {
 		*type |= REF_ISBROKEN;
-		errno = EINVAL;
+		*failure_errno = EINVAL;
 		return -1;
 	}
 	return 0;
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 7beb38f79ccd81..9aa4af8183692d 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -692,10 +692,12 @@ struct ref_store {
 };
 
 /*
- * Parse contents of a loose ref file.
+ * Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
+ * invalid contents.
  */
 int parse_loose_ref_contents(const char *buf, struct object_id *oid,
-			     struct strbuf *referent, unsigned int *type);
+			     struct strbuf *referent, unsigned int *type,
+			     int *failure_errno);
 
 /*
  * Fill in the generic part of refs and add it to our collection of

From af5d6e2c1fbd2d09424ea0898ad6c5e35b7545be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 27 Jul 2021 01:44:27 +0200
Subject: [PATCH 115/198] refs/files: remove unused "errno != ENOTDIR"
 condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a follow-up to the preceding commit where we removed the adjacent
"errno == EISDIR" condition in the same function, remove the
"last_errno != ENOTDIR" condition here.

It's not possible for us to hit this condition added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11). Since a1c1d8170db (refs_resolve_ref_unsafe:
handle d/f conflicts for writes, 2017-10-06) we've explicitly caught
these in refs_resolve_ref_unsafe() before returning NULL:

	if (errno != ENOENT &&
	    errno != EISDIR &&
	    errno != ENOTDIR)
		return NULL;

We'd then always return the refname from refs_resolve_ref_unsafe()
even if we were in a broken state as explained in the preceding
commit. The elided context here is a call to refs_resolve_ref_unsafe().

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 28cd8853f5266b..5d12003471e586 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -894,8 +894,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 				     RESOLVE_REF_NO_RECURSE,
 				     &lock->old_oid, type)) {
 		last_errno = errno;
-		if (last_errno != ENOTDIR ||
-		    !refs_verify_refname_available(&refs->base, refname,
+		if (!refs_verify_refname_available(&refs->base, refname,
 						   NULL, NULL, err))
 			strbuf_addf(err, "unable to resolve reference '%s': %s",
 				    refname, strerror(last_errno));

From 9f6b82ec2801fdde1d09abc6b651c48aba381567 Mon Sep 17 00:00:00 2001
From: Han-Wen Nienhuys <hanwen@google.com>
Date: Tue, 20 Jul 2021 12:33:30 +0200
Subject: [PATCH 116/198] refs: make errno output explicit for
 refs_resolve_ref_unsafe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This introduces refs_resolve_ref_unsafe_with_errno(), which makes the API
contract for the errno output explicit. The implementation still relies on
the global errno variable to ensure no side effects of this refactoring.

lock_ref_oid_basic() in files-backend.c is the only caller of refs_resolve_ref()
that needs error information to make logic decisions, so update that caller

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c               |  2 +-
 refs.h               | 11 +++++++++++
 refs/files-backend.c | 10 ++++++----
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/refs.c b/refs.c
index b31dbdd0fa2be5..9497b51e0a1aab 100644
--- a/refs.c
+++ b/refs.c
@@ -1688,7 +1688,7 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
 					   type, failure_errno);
 }
 
-static const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs,
+const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs,
 					       const char *refname,
 					       int resolve_flags,
 					       struct object_id *oid,
diff --git a/refs.h b/refs.h
index ddbf15f1c2119d..9a5b4763ee6a78 100644
--- a/refs.h
+++ b/refs.h
@@ -68,6 +68,17 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs,
 				    int resolve_flags,
 				    struct object_id *oid,
 				    int *flags);
+/**
+ * refs_resolve_ref_unsafe_with_errno() is like
+ * refs_resolve_ref_unsafe(), but provide access to errno code that
+ * lead to a failure. We guarantee that errno is set to a meaningful
+ * value on non-zero return.
+ */
+const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs,
+					       const char *refname,
+					       int resolve_flags,
+					       struct object_id *oid,
+					       int *flags, int *failure_errno);
 const char *resolve_ref_unsafe(const char *refname, int resolve_flags,
 			       struct object_id *oid, int *flags);
 
diff --git a/refs/files-backend.c b/refs/files-backend.c
index be9e2c548272f3..598a487db9e07e 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1001,6 +1001,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 {
 	struct strbuf ref_file = STRBUF_INIT;
 	struct ref_lock *lock;
+	int resolve_errno = 0;
 
 	files_assert_main_repository(refs, "lock_ref_oid_basic");
 	assert(err);
@@ -1008,13 +1009,14 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
 	CALLOC_ARRAY(lock, 1);
 
 	files_ref_path(refs, &ref_file, refname);
-	if (!refs_resolve_ref_unsafe(&refs->base, refname,
-				     RESOLVE_REF_NO_RECURSE,
-				     &lock->old_oid, type)) {
+	if (!refs_resolve_ref_unsafe_with_errno(&refs->base, refname,
+						RESOLVE_REF_NO_RECURSE,
+						&lock->old_oid, type,
+						&resolve_errno)) {
 		if (!refs_verify_refname_available(&refs->base, refname,
 						   NULL, NULL, err))
 			strbuf_addf(err, "unable to resolve reference '%s': %s",
-				    refname, strerror(errno));
+				    refname, strerror(resolve_errno));
 
 		goto error_return;
 	}

From 326f261b5e79c372ff5138d30d4d814591b604f0 Mon Sep 17 00:00:00 2001
From: ZheNing Hu <adlternative@gmail.com>
Date: Tue, 3 Aug 2021 01:16:21 +0000
Subject: [PATCH 117/198] cherry-pick: fix bug when used with
 GIT_CHERRY_PICK_HELP

GIT_CHERRY_PICK_HELP is an environment variable, as the
implementation detail of some porcelain in git to help realize
the rebasing steps. E.g. `git rebase -p` set GIT_CHERRY_PICK_HELP
value in `git-rebase--preserve-merges.sh`, `git rebase --merge` set
GIT_CHERRY_PICK_HELP value in run_specific_rebase(). But If we set
the value of GIT_CHERRY_PICK_HELP when using `git cherry-pick`,
CHERRY_PICK_HEAD will be deleted, then we will get an error when we
try to use `git cherry-pick --continue` or other cherr-pick command.

Introduce new "hidden" option `--delete-cherry-pick-head` for git
cherry-pick which indicates that CHERRY_PICK_HEAD will be deleted when
conflict occurs, which provided for some porcelain commands of git like
`git-rebase--preserve-merges.sh`. After `git rebase -p` completely
abolished, this option should be removed. At the same time, add the flag
`delete_cherry_pick_head` to `struct replay_opts`, We can decide whether
to delete CHERRY_PICK_HEAD by setting and checking this flag bit.

Then we split print_advice() into two part: Firstly, print_advice()
will only be responsible for outputting content; Secondly, check if
we set the `delete_cherry_pick_head` flag; if set, delete CHERRY_PICK_HEAD.
In this way, the steps of printing advice and deleting CHERRY_PICK_HEAD
are decoupled. Finally, let `git-rebase--preserve-merges.sh` use the
`--delete-cherry-pick-head` option when it executes git cherry-pick, and
set the `delete_cherry_pick_head` flag in get_replay_opts() when we
are using `git rebase --merge`, which can fix this breakage.

It is worth mentioning that now we use advice() to print the content
of GIT_CHERRY_PICK_HELP in print_advice(), each line of output will
start with "hint: ".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by Hariom Verma <hariom18599@gmail.com>:
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Hepled-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/rebase.c                |  1 +
 builtin/revert.c                |  2 ++
 git-rebase--preserve-merges.sh  |  2 +-
 sequencer.c                     | 28 +++++++++++++---------------
 sequencer.h                     |  1 +
 t/t3507-cherry-pick-conflict.sh | 31 +++++++++++++++++++++----------
 6 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 12f093121d9ed3..08ba437c6a0a33 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -152,6 +152,7 @@ static struct replay_opts get_replay_opts(const struct rebase_options *opts)
 		oidcpy(&replay.squash_onto, opts->squash_onto);
 		replay.have_squash_onto = 1;
 	}
+	replay.delete_cherry_pick_head = 1;
 
 	return replay;
 }
diff --git a/builtin/revert.c b/builtin/revert.c
index 237f2f18d4c035..15a4b6fe4eeba5 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -127,6 +127,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 			OPT_BOOL(0, "allow-empty", &opts->allow_empty, N_("preserve initially empty commits")),
 			OPT_BOOL(0, "allow-empty-message", &opts->allow_empty_message, N_("allow commits with empty messages")),
 			OPT_BOOL(0, "keep-redundant-commits", &opts->keep_redundant_commits, N_("keep redundant, empty commits")),
+			OPT_BOOL_F(0, "delete-cherry-pick-head", &opts->delete_cherry_pick_head,
+				   N_("delete CHERRY_PICK_HEAD when conflict occurs"), PARSE_OPT_HIDDEN),
 			OPT_END(),
 		};
 		options = parse_options_concat(options, cp_extra);
diff --git a/git-rebase--preserve-merges.sh b/git-rebase--preserve-merges.sh
index b9c71d2a71bdd3..eaa8f9de2c5a16 100644
--- a/git-rebase--preserve-merges.sh
+++ b/git-rebase--preserve-merges.sh
@@ -444,7 +444,7 @@ pick_one_preserving_merges () {
 			output eval git cherry-pick $allow_rerere_autoupdate \
 				$allow_empty_message \
 				${gpg_sign_opt:+$(git rev-parse --sq-quote "$gpg_sign_opt")} \
-				"$strategy_args" "$@" ||
+				"$strategy_args" --delete-cherry-pick-head "$@" ||
 				die_with_patch $sha1 "$(eval_gettext "Could not pick \$sha1")"
 			;;
 		esac
diff --git a/sequencer.c b/sequencer.c
index 0bec01cf38e817..83cf6a5da3c24e 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -397,24 +397,13 @@ static void free_message(struct commit *commit, struct commit_message *msg)
 	unuse_commit_buffer(commit, msg->message);
 }
 
-static void print_advice(struct repository *r, int show_hint,
-			 struct replay_opts *opts)
+static void print_advice(struct replay_opts *opts, int show_hint)
 {
 	char *msg = getenv("GIT_CHERRY_PICK_HELP");
 
 	if (msg) {
-		fprintf(stderr, "%s\n", msg);
-		/*
-		 * A conflict has occurred but the porcelain
-		 * (typically rebase --interactive) wants to take care
-		 * of the commit itself so remove CHERRY_PICK_HEAD
-		 */
-		refs_delete_ref(get_main_ref_store(r), "", "CHERRY_PICK_HEAD",
-				NULL, 0);
-		return;
-	}
-
-	if (show_hint) {
+		advise("%s\n", msg);
+	} else if (show_hint) {
 		if (opts->no_commit)
 			advise(_("after resolving the conflicts, mark the corrected paths\n"
 				 "with 'git add <paths>' or 'git rm <paths>'"));
@@ -2265,7 +2254,16 @@ static int do_pick_commit(struct repository *r,
 		      ? _("could not revert %s... %s")
 		      : _("could not apply %s... %s"),
 		      short_commit_name(commit), msg.subject);
-		print_advice(r, res == 1, opts);
+		print_advice(opts, res == 1);
+		if (opts->delete_cherry_pick_head) {
+			/*
+			 * A conflict has occurred but the porcelain
+			 * (typically rebase --interactive) wants to take care
+			 * of the commit itself so remove CHERRY_PICK_HEAD
+			 */
+			refs_delete_ref(get_main_ref_store(r), "", "CHERRY_PICK_HEAD",
+					NULL, 0);
+		}
 		repo_rerere(r, opts->allow_rerere_auto);
 		goto leave;
 	}
diff --git a/sequencer.h b/sequencer.h
index d57d8ea23d7a22..76fb4af56fd464 100644
--- a/sequencer.h
+++ b/sequencer.h
@@ -49,6 +49,7 @@ struct replay_opts {
 	int reschedule_failed_exec;
 	int committer_date_is_author_date;
 	int ignore_date;
+	int delete_cherry_pick_head;
 
 	int mainline;
 
diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh
index 014001b8f325c0..af5678d981ab59 100755
--- a/t/t3507-cherry-pick-conflict.sh
+++ b/t/t3507-cherry-pick-conflict.sh
@@ -76,12 +76,33 @@ test_expect_success 'advice from failed cherry-pick --no-commit' "
 	test_cmp expected actual
 "
 
+test_expect_success 'advice from failed cherry-pick with GIT_CHERRY_PICK_HELP' "
+	pristine_detach initial &&
+	(
+		picked=\$(git rev-parse --short picked) &&
+		cat <<-EOF >expected &&
+		error: could not apply \$picked... picked
+		hint: and then do something else
+		EOF
+		GIT_CHERRY_PICK_HELP='and then do something else' &&
+		export GIT_CHERRY_PICK_HELP &&
+		test_must_fail git cherry-pick picked 2>actual &&
+		test_cmp expected actual
+	)
+"
+
 test_expect_success 'failed cherry-pick sets CHERRY_PICK_HEAD' '
 	pristine_detach initial &&
 	test_must_fail git cherry-pick picked &&
 	test_cmp_rev picked CHERRY_PICK_HEAD
 '
 
+test_expect_success 'failed cherry-pick with --delete-cherry-pick-head does not set CHERRY_PICK_HEAD' '
+	pristine_detach initial &&
+	test_must_fail git cherry-pick --delete-cherry-pick-head picked &&
+	test_must_fail git rev-parse --verify CHERRY_PICK_HEAD
+'
+
 test_expect_success 'successful cherry-pick does not set CHERRY_PICK_HEAD' '
 	pristine_detach initial &&
 	git cherry-pick base &&
@@ -109,16 +130,6 @@ test_expect_success \
 	test_must_fail git rev-parse --verify CHERRY_PICK_HEAD
 '
 
-test_expect_success 'GIT_CHERRY_PICK_HELP suppresses CHERRY_PICK_HEAD' '
-	pristine_detach initial &&
-	(
-		GIT_CHERRY_PICK_HELP="and then do something else" &&
-		export GIT_CHERRY_PICK_HELP &&
-		test_must_fail git cherry-pick picked
-	) &&
-	test_must_fail git rev-parse --verify CHERRY_PICK_HEAD
-'
-
 test_expect_success 'git reset clears CHERRY_PICK_HEAD' '
 	pristine_detach initial &&
 

From 86215558032cea84dcc14da1fa58286c333d58f8 Mon Sep 17 00:00:00 2001
From: ZheNing Hu <adlternative@gmail.com>
Date: Tue, 3 Aug 2021 01:16:22 +0000
Subject: [PATCH 118/198] cherry-pick: use better advice message

In the past, git cherry-pick would print such advice when
there was a conflict:

hint: after resolving the conflicts, mark the corrected paths
hint: with 'git add <paths>' or 'git rm <paths>'
hint: and commit the result with 'git commit'

But in fact, when we want to cherry-pick multiple commits,
we should not use "git commit" after resolving conflicts, which
will make Git generate some errors. We should recommend users to
use `git cherry-pick --continue`, `git cherry-pick --abort`, just
like git rebase does.

This is the improved advice:

hint: Resolve all conflicts manually, mark them as resolved with
hint: "git add/rm <conflicted_files>", then run
hint: "git cherry-pick --continue".
hint: You can instead skip this commit: run "git cherry-pick --skip".
hint: To abort and get back to the state before "git cherry-pick",
hint: run "git cherry-pick --abort".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by Hariom Verma <hariom18599@gmail.com>:
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Hepled-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sequencer.c                     | 10 +++++++++-
 t/t3507-cherry-pick-conflict.sh | 17 ++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 83cf6a5da3c24e..bf7dbea81ddffb 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -404,7 +404,15 @@ static void print_advice(struct replay_opts *opts, int show_hint)
 	if (msg) {
 		advise("%s\n", msg);
 	} else if (show_hint) {
-		if (opts->no_commit)
+		if (opts->action == REPLAY_PICK) {
+			advise(_("Resolve all conflicts manually, mark them as resolved with\n"
+				 "\"git add/rm <conflicted_files>\", then run\n"
+				 "\"git cherry-pick --continue\".\n"
+				 "You can instead skip this commit: run \"git cherry-pick --skip\".\n"
+				 "To abort and get back to the state before \"git cherry-pick\",\n"
+				 "run \"git cherry-pick --abort\"."));
+
+		} else if (opts->no_commit)
 			advise(_("after resolving the conflicts, mark the corrected paths\n"
 				 "with 'git add <paths>' or 'git rm <paths>'"));
 		else
diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh
index af5678d981ab59..e953b54e54d346 100755
--- a/t/t3507-cherry-pick-conflict.sh
+++ b/t/t3507-cherry-pick-conflict.sh
@@ -53,9 +53,12 @@ test_expect_success 'advice from failed cherry-pick' "
 	picked=\$(git rev-parse --short picked) &&
 	cat <<-EOF >expected &&
 	error: could not apply \$picked... picked
-	hint: after resolving the conflicts, mark the corrected paths
-	hint: with 'git add <paths>' or 'git rm <paths>'
-	hint: and commit the result with 'git commit'
+	hint: Resolve all conflicts manually, mark them as resolved with
+	hint: \"git add/rm <conflicted_files>\", then run
+	hint: \"git cherry-pick --continue\".
+	hint: You can instead skip this commit: run \"git cherry-pick --skip\".
+	hint: To abort and get back to the state before \"git cherry-pick\",
+	hint: run \"git cherry-pick --abort\".
 	EOF
 	test_must_fail git cherry-pick picked 2>actual &&
 
@@ -68,8 +71,12 @@ test_expect_success 'advice from failed cherry-pick --no-commit' "
 	picked=\$(git rev-parse --short picked) &&
 	cat <<-EOF >expected &&
 	error: could not apply \$picked... picked
-	hint: after resolving the conflicts, mark the corrected paths
-	hint: with 'git add <paths>' or 'git rm <paths>'
+	hint: Resolve all conflicts manually, mark them as resolved with
+	hint: \"git add/rm <conflicted_files>\", then run
+	hint: \"git cherry-pick --continue\".
+	hint: You can instead skip this commit: run \"git cherry-pick --skip\".
+	hint: To abort and get back to the state before \"git cherry-pick\",
+	hint: run \"git cherry-pick --abort\".
 	EOF
 	test_must_fail git cherry-pick --no-commit picked 2>actual &&
 

From c2223851649c88a4723b79e6c2fe87efe6feffc1 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:50 +0000
Subject: [PATCH 119/198] ssh signing: preliminary refactoring and clean-up

Openssh v8.2p1 added some new options to ssh-keygen for signature
creation and verification. These allow us to use ssh keys for git
signatures easily.

In our corporate environment we use PIV x509 Certs on Yubikeys for email
signing/encryption and ssh keys which I think is quite common
(at least for the email part). This way we can establish the correct
trust for the SSH Keys without setting up a separate GPG Infrastructure
(which is still quite painful for users) or implementing x509 signing
support for git (which lacks good forwarding mechanisms).
Using ssh agent forwarding makes this feature easily usable in todays
development environments where code is often checked out in remote VMs / containers.
In such a setup the keyring & revocationKeyring can be centrally
generated from the x509 CA information and distributed to the users.

To be able to implement new signing formats this commit:
 - makes the sigc structure more generic by renaming "gpg_output" to
   "output"
 - introduces function pointers in the gpg_format structure to call
   format specific signing and verification functions
 - moves format detection from verify_signed_buffer into the check_signature
   api function and calls the format specific verify
 - renames and wraps sign_buffer to handle format specific signing logic
   as well

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fmt-merge-msg.c |   6 +--
 gpg-interface.c | 104 +++++++++++++++++++++++++++++-------------------
 gpg-interface.h |   2 +-
 log-tree.c      |   8 ++--
 pretty.c        |   4 +-
 5 files changed, 74 insertions(+), 50 deletions(-)

diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c
index 0f66818e0f839f..fb300bb4b67861 100644
--- a/fmt-merge-msg.c
+++ b/fmt-merge-msg.c
@@ -526,11 +526,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out)
 			buf = payload.buf;
 			len = payload.len;
 			if (check_signature(payload.buf, payload.len, sig.buf,
-					 sig.len, &sigc) &&
-				!sigc.gpg_output)
+					    sig.len, &sigc) &&
+			    !sigc.output)
 				strbuf_addstr(&sig, "gpg verification failed.\n");
 			else
-				strbuf_addstr(&sig, sigc.gpg_output);
+				strbuf_addstr(&sig, sigc.output);
 		}
 		signature_check_clear(&sigc);
 
diff --git a/gpg-interface.c b/gpg-interface.c
index 127aecfc2b071f..db54b05416257d 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -15,6 +15,12 @@ struct gpg_format {
 	const char *program;
 	const char **verify_args;
 	const char **sigs;
+	int (*verify_signed_buffer)(struct signature_check *sigc,
+				    struct gpg_format *fmt, const char *payload,
+				    size_t payload_size, const char *signature,
+				    size_t signature_size);
+	int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature,
+			   const char *signing_key);
 };
 
 static const char *openpgp_verify_args[] = {
@@ -35,14 +41,29 @@ static const char *x509_sigs[] = {
 	NULL
 };
 
+static int verify_gpg_signed_buffer(struct signature_check *sigc,
+				    struct gpg_format *fmt, const char *payload,
+				    size_t payload_size, const char *signature,
+				    size_t signature_size);
+static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
+			   const char *signing_key);
+
 static struct gpg_format gpg_format[] = {
-	{ .name = "openpgp", .program = "gpg",
-	  .verify_args = openpgp_verify_args,
-	  .sigs = openpgp_sigs
+	{
+		.name = "openpgp",
+		.program = "gpg",
+		.verify_args = openpgp_verify_args,
+		.sigs = openpgp_sigs,
+		.verify_signed_buffer = verify_gpg_signed_buffer,
+		.sign_buffer = sign_buffer_gpg,
 	},
-	{ .name = "x509", .program = "gpgsm",
-	  .verify_args = x509_verify_args,
-	  .sigs = x509_sigs
+	{
+		.name = "x509",
+		.program = "gpgsm",
+		.verify_args = x509_verify_args,
+		.sigs = x509_sigs,
+		.verify_signed_buffer = verify_gpg_signed_buffer,
+		.sign_buffer = sign_buffer_gpg,
 	},
 };
 
@@ -72,7 +93,7 @@ static struct gpg_format *get_format_by_sig(const char *sig)
 void signature_check_clear(struct signature_check *sigc)
 {
 	FREE_AND_NULL(sigc->payload);
-	FREE_AND_NULL(sigc->gpg_output);
+	FREE_AND_NULL(sigc->output);
 	FREE_AND_NULL(sigc->gpg_status);
 	FREE_AND_NULL(sigc->signer);
 	FREE_AND_NULL(sigc->key);
@@ -257,16 +278,16 @@ static void parse_gpg_output(struct signature_check *sigc)
 	FREE_AND_NULL(sigc->key);
 }
 
-static int verify_signed_buffer(const char *payload, size_t payload_size,
-				const char *signature, size_t signature_size,
-				struct strbuf *gpg_output,
-				struct strbuf *gpg_status)
+static int verify_gpg_signed_buffer(struct signature_check *sigc,
+				    struct gpg_format *fmt, const char *payload,
+				    size_t payload_size, const char *signature,
+				    size_t signature_size)
 {
 	struct child_process gpg = CHILD_PROCESS_INIT;
-	struct gpg_format *fmt;
 	struct tempfile *temp;
 	int ret;
-	struct strbuf buf = STRBUF_INIT;
+	struct strbuf gpg_stdout = STRBUF_INIT;
+	struct strbuf gpg_stderr = STRBUF_INIT;
 
 	temp = mks_tempfile_t(".git_vtag_tmpXXXXXX");
 	if (!temp)
@@ -279,10 +300,6 @@ static int verify_signed_buffer(const char *payload, size_t payload_size,
 		return -1;
 	}
 
-	fmt = get_format_by_sig(signature);
-	if (!fmt)
-		BUG("bad signature '%s'", signature);
-
 	strvec_push(&gpg.args, fmt->program);
 	strvec_pushv(&gpg.args, fmt->verify_args);
 	strvec_pushl(&gpg.args,
@@ -290,18 +307,22 @@ static int verify_signed_buffer(const char *payload, size_t payload_size,
 		     "--verify", temp->filename.buf, "-",
 		     NULL);
 
-	if (!gpg_status)
-		gpg_status = &buf;
-
 	sigchain_push(SIGPIPE, SIG_IGN);
-	ret = pipe_command(&gpg, payload, payload_size,
-			   gpg_status, 0, gpg_output, 0);
+	ret = pipe_command(&gpg, payload, payload_size, &gpg_stdout, 0,
+			   &gpg_stderr, 0);
 	sigchain_pop(SIGPIPE);
 
 	delete_tempfile(&temp);
 
-	ret |= !strstr(gpg_status->buf, "\n[GNUPG:] GOODSIG ");
-	strbuf_release(&buf); /* no matter it was used or not */
+	ret |= !strstr(gpg_stdout.buf, "\n[GNUPG:] GOODSIG ");
+	sigc->payload = xmemdupz(payload, payload_size);
+	sigc->output = strbuf_detach(&gpg_stderr, NULL);
+	sigc->gpg_status = strbuf_detach(&gpg_stdout, NULL);
+
+	parse_gpg_output(sigc);
+
+	strbuf_release(&gpg_stdout);
+	strbuf_release(&gpg_stderr);
 
 	return ret;
 }
@@ -309,35 +330,32 @@ static int verify_signed_buffer(const char *payload, size_t payload_size,
 int check_signature(const char *payload, size_t plen, const char *signature,
 	size_t slen, struct signature_check *sigc)
 {
-	struct strbuf gpg_output = STRBUF_INIT;
-	struct strbuf gpg_status = STRBUF_INIT;
+	struct gpg_format *fmt;
 	int status;
 
 	sigc->result = 'N';
 	sigc->trust_level = -1;
 
-	status = verify_signed_buffer(payload, plen, signature, slen,
-				      &gpg_output, &gpg_status);
-	if (status && !gpg_output.len)
-		goto out;
-	sigc->payload = xmemdupz(payload, plen);
-	sigc->gpg_output = strbuf_detach(&gpg_output, NULL);
-	sigc->gpg_status = strbuf_detach(&gpg_status, NULL);
-	parse_gpg_output(sigc);
+	fmt = get_format_by_sig(signature);
+	if (!fmt)
+		die(_("bad/incompatible signature '%s'"), signature);
+
+	status = fmt->verify_signed_buffer(sigc, fmt, payload, plen, signature,
+					   slen);
+
+	if (status && !sigc->output)
+		return !!status;
+
 	status |= sigc->result != 'G';
 	status |= sigc->trust_level < configured_min_trust_level;
 
- out:
-	strbuf_release(&gpg_status);
-	strbuf_release(&gpg_output);
-
 	return !!status;
 }
 
 void print_signature_buffer(const struct signature_check *sigc, unsigned flags)
 {
-	const char *output = flags & GPG_VERIFY_RAW ?
-		sigc->gpg_status : sigc->gpg_output;
+	const char *output = flags & GPG_VERIFY_RAW ? sigc->gpg_status :
+							    sigc->output;
 
 	if (flags & GPG_VERIFY_VERBOSE && sigc->payload)
 		fputs(sigc->payload, stdout);
@@ -441,6 +459,12 @@ const char *get_signing_key(void)
 }
 
 int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *signing_key)
+{
+	return use_format->sign_buffer(buffer, signature, signing_key);
+}
+
+static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
+			  const char *signing_key)
 {
 	struct child_process gpg = CHILD_PROCESS_INIT;
 	int ret;
diff --git a/gpg-interface.h b/gpg-interface.h
index 80567e4894868d..feac4decf8b79f 100644
--- a/gpg-interface.h
+++ b/gpg-interface.h
@@ -17,7 +17,7 @@ enum signature_trust_level {
 
 struct signature_check {
 	char *payload;
-	char *gpg_output;
+	char *output;
 	char *gpg_status;
 
 	/*
diff --git a/log-tree.c b/log-tree.c
index 7b823786c2cba7..20af9bd1c82575 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -513,10 +513,10 @@ static void show_signature(struct rev_info *opt, struct commit *commit)
 
 	status = check_signature(payload.buf, payload.len, signature.buf,
 				 signature.len, &sigc);
-	if (status && !sigc.gpg_output)
+	if (status && !sigc.output)
 		show_sig_lines(opt, status, "No signature\n");
 	else
-		show_sig_lines(opt, status, sigc.gpg_output);
+		show_sig_lines(opt, status, sigc.output);
 	signature_check_clear(&sigc);
 
  out:
@@ -583,8 +583,8 @@ static int show_one_mergetag(struct commit *commit,
 		/* could have a good signature */
 		status = check_signature(payload.buf, payload.len,
 					 signature.buf, signature.len, &sigc);
-		if (sigc.gpg_output)
-			strbuf_addstr(&verify_message, sigc.gpg_output);
+		if (sigc.output)
+			strbuf_addstr(&verify_message, sigc.output);
 		else
 			strbuf_addstr(&verify_message, "No signature\n");
 		signature_check_clear(&sigc);
diff --git a/pretty.c b/pretty.c
index b1ecd039cef29e..daa71394efd749 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1432,8 +1432,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
 			check_commit_signature(c->commit, &(c->signature_check));
 		switch (placeholder[1]) {
 		case 'G':
-			if (c->signature_check.gpg_output)
-				strbuf_addstr(sb, c->signature_check.gpg_output);
+			if (c->signature_check.output)
+				strbuf_addstr(sb, c->signature_check.output);
 			break;
 		case '?':
 			switch (c->signature_check.result) {

From 3a3fdc0b4ea60f884f3bdb18058111e47014c998 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:51 +0000
Subject: [PATCH 120/198] ssh signing: add test prereqs

Generate some ssh keys and a allowedSignersFile for testing

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-gpg.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/t/lib-gpg.sh b/t/lib-gpg.sh
index 9fc5241228e800..f99ef3e859dd43 100644
--- a/t/lib-gpg.sh
+++ b/t/lib-gpg.sh
@@ -87,6 +87,34 @@ test_lazy_prereq RFC1991 '
 	echo | gpg --homedir "${GNUPGHOME}" -b --rfc1991 >/dev/null
 '
 
+GPGSSH_KEY_PRIMARY="${GNUPGHOME}/ed25519_ssh_signing_key"
+GPGSSH_KEY_SECONDARY="${GNUPGHOME}/rsa_2048_ssh_signing_key"
+GPGSSH_KEY_UNTRUSTED="${GNUPGHOME}/untrusted_ssh_signing_key"
+GPGSSH_KEY_WITH_PASSPHRASE="${GNUPGHOME}/protected_ssh_signing_key"
+GPGSSH_KEY_PASSPHRASE="super_secret"
+GPGSSH_ALLOWED_SIGNERS="${GNUPGHOME}/ssh.all_valid.allowedSignersFile"
+
+GPGSSH_GOOD_SIGNATURE_TRUSTED='Good "git" signature for'
+GPGSSH_GOOD_SIGNATURE_UNTRUSTED='Good "git" signature with'
+GPGSSH_KEY_NOT_TRUSTED="No principal matched"
+GPGSSH_BAD_SIGNATURE="Signature verification failed"
+
+test_lazy_prereq GPGSSH '
+	ssh_version=$(ssh-keygen -Y find-principals -n "git" 2>&1)
+	test $? != 127 || exit 1
+	echo $ssh_version | grep -q "find-principals:missing signature file"
+	test $? = 0 || exit 1;
+	mkdir -p "${GNUPGHOME}" &&
+	chmod 0700 "${GNUPGHOME}" &&
+	ssh-keygen -t ed25519 -N "" -C "git ed25519 key" -f "${GPGSSH_KEY_PRIMARY}" >/dev/null &&
+	echo "\"principal with number 1\" $(cat "${GPGSSH_KEY_PRIMARY}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" &&
+	ssh-keygen -t rsa -b 2048 -N "" -C "git rsa2048 key" -f "${GPGSSH_KEY_SECONDARY}" >/dev/null &&
+	echo "\"principal with number 2\" $(cat "${GPGSSH_KEY_SECONDARY}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" &&
+	ssh-keygen -t ed25519 -N "${GPGSSH_KEY_PASSPHRASE}" -C "git ed25519 encrypted key" -f "${GPGSSH_KEY_WITH_PASSPHRASE}" >/dev/null &&
+	echo "\"principal with number 3\" $(cat "${GPGSSH_KEY_WITH_PASSPHRASE}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" &&
+	ssh-keygen -t ed25519 -N "" -f "${GPGSSH_KEY_UNTRUSTED}" >/dev/null
+'
+
 sanitize_pgp() {
 	perl -ne '
 		/^-----END PGP/ and $in_pgp = 0;

From c7e2d30efec488ff34afa5560c3f0fa364d04fbf Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:52 +0000
Subject: [PATCH 121/198] ssh signing: add ssh key format and signing code

Implements the actual sign_buffer_ssh operation and move some shared
cleanup code into a strbuf function

Set gpg.format = ssh and user.signingkey to either a ssh public key
string (like from an authorized_keys file), or a ssh key file.
If the key file or the config value itself contains only a public key
then the private key needs to be available via ssh-agent.

gpg.ssh.program can be set to an alternative location of ssh-keygen.
A somewhat recent openssh version (8.2p1+) of ssh-keygen is needed for
this feature. Since only ssh-keygen is needed it can this way be
installed seperately without upgrading your system openssh packages.

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/gpg.txt  |   4 +-
 Documentation/config/user.txt |   5 ++
 gpg-interface.c               | 138 ++++++++++++++++++++++++++++++++--
 3 files changed, 137 insertions(+), 10 deletions(-)

diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt
index d94025cb3684d8..88531b15f0ff32 100644
--- a/Documentation/config/gpg.txt
+++ b/Documentation/config/gpg.txt
@@ -11,13 +11,13 @@ gpg.program::
 
 gpg.format::
 	Specifies which key format to use when signing with `--gpg-sign`.
-	Default is "openpgp" and another possible value is "x509".
+	Default is "openpgp". Other possible values are "x509", "ssh".
 
 gpg.<format>.program::
 	Use this to customize the program used for the signing format you
 	chose. (see `gpg.program` and `gpg.format`) `gpg.program` can still
 	be used as a legacy synonym for `gpg.openpgp.program`. The default
-	value for `gpg.x509.program` is "gpgsm".
+	value for `gpg.x509.program` is "gpgsm" and `gpg.ssh.program` is "ssh-keygen".
 
 gpg.minTrustLevel::
 	Specifies a minimum trust level for signature verification.  If
diff --git a/Documentation/config/user.txt b/Documentation/config/user.txt
index 59aec7c3aed32a..2155128957c93e 100644
--- a/Documentation/config/user.txt
+++ b/Documentation/config/user.txt
@@ -36,3 +36,8 @@ user.signingKey::
 	commit, you can override the default selection with this variable.
 	This option is passed unchanged to gpg's --local-user parameter,
 	so you may specify a key using any method that gpg supports.
+	If gpg.format is set to "ssh" this can contain the literal ssh public
+	key (e.g.: "ssh-rsa XXXXXX identifier") or a file which contains it and
+	corresponds to the private key used for signing. The private key
+	needs to be available via ssh-agent. Alternatively it can be set to
+	a file containing a private key directly.
diff --git a/gpg-interface.c b/gpg-interface.c
index db54b05416257d..7ca682ac6d6849 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -41,12 +41,20 @@ static const char *x509_sigs[] = {
 	NULL
 };
 
+static const char *ssh_verify_args[] = { NULL };
+static const char *ssh_sigs[] = {
+	"-----BEGIN SSH SIGNATURE-----",
+	NULL
+};
+
 static int verify_gpg_signed_buffer(struct signature_check *sigc,
 				    struct gpg_format *fmt, const char *payload,
 				    size_t payload_size, const char *signature,
 				    size_t signature_size);
 static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
 			   const char *signing_key);
+static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature,
+			   const char *signing_key);
 
 static struct gpg_format gpg_format[] = {
 	{
@@ -65,6 +73,14 @@ static struct gpg_format gpg_format[] = {
 		.verify_signed_buffer = verify_gpg_signed_buffer,
 		.sign_buffer = sign_buffer_gpg,
 	},
+	{
+		.name = "ssh",
+		.program = "ssh-keygen",
+		.verify_args = ssh_verify_args,
+		.sigs = ssh_sigs,
+		.verify_signed_buffer = NULL, /* TODO */
+		.sign_buffer = sign_buffer_ssh
+	},
 };
 
 static struct gpg_format *use_format = &gpg_format[0];
@@ -443,6 +459,9 @@ int git_gpg_config(const char *var, const char *value, void *cb)
 	if (!strcmp(var, "gpg.x509.program"))
 		fmtname = "x509";
 
+	if (!strcmp(var, "gpg.ssh.program"))
+		fmtname = "ssh";
+
 	if (fmtname) {
 		fmt = get_format_by_name(fmtname);
 		return git_config_string(&fmt->program, var, value);
@@ -463,12 +482,30 @@ int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *sig
 	return use_format->sign_buffer(buffer, signature, signing_key);
 }
 
+/*
+ * Strip CR from the line endings, in case we are on Windows.
+ * NEEDSWORK: make it trim only CRs before LFs and rename
+ */
+static void remove_cr_after(struct strbuf *buffer, size_t offset)
+{
+	size_t i, j;
+
+	for (i = j = offset; i < buffer->len; i++) {
+		if (buffer->buf[i] != '\r') {
+			if (i != j)
+				buffer->buf[j] = buffer->buf[i];
+			j++;
+		}
+	}
+	strbuf_setlen(buffer, j);
+}
+
 static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
 			  const char *signing_key)
 {
 	struct child_process gpg = CHILD_PROCESS_INIT;
 	int ret;
-	size_t i, j, bottom;
+	size_t bottom;
 	struct strbuf gpg_status = STRBUF_INIT;
 
 	strvec_pushl(&gpg.args,
@@ -494,13 +531,98 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
 		return error(_("gpg failed to sign the data"));
 
 	/* Strip CR from the line endings, in case we are on Windows. */
-	for (i = j = bottom; i < signature->len; i++)
-		if (signature->buf[i] != '\r') {
-			if (i != j)
-				signature->buf[j] = signature->buf[i];
-			j++;
-		}
-	strbuf_setlen(signature, j);
+	remove_cr_after(signature, bottom);
 
 	return 0;
 }
+
+static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature,
+			   const char *signing_key)
+{
+	struct child_process signer = CHILD_PROCESS_INIT;
+	int ret = -1;
+	size_t bottom, keylen;
+	struct strbuf signer_stderr = STRBUF_INIT;
+	struct tempfile *key_file = NULL, *buffer_file = NULL;
+	char *ssh_signing_key_file = NULL;
+	struct strbuf ssh_signature_filename = STRBUF_INIT;
+
+	if (!signing_key || signing_key[0] == '\0')
+		return error(
+			_("user.signingkey needs to be set for ssh signing"));
+
+	if (starts_with(signing_key, "ssh-")) {
+		/* A literal ssh key */
+		key_file = mks_tempfile_t(".git_signing_key_tmpXXXXXX");
+		if (!key_file)
+			return error_errno(
+				_("could not create temporary file"));
+		keylen = strlen(signing_key);
+		if (write_in_full(key_file->fd, signing_key, keylen) < 0 ||
+		    close_tempfile_gently(key_file) < 0) {
+			error_errno(_("failed writing ssh signing key to '%s'"),
+				    key_file->filename.buf);
+			goto out;
+		}
+		ssh_signing_key_file = strbuf_detach(&key_file->filename, NULL);
+	} else {
+		/* We assume a file */
+		ssh_signing_key_file = expand_user_path(signing_key, 1);
+	}
+
+	buffer_file = mks_tempfile_t(".git_signing_buffer_tmpXXXXXX");
+	if (!buffer_file) {
+		error_errno(_("could not create temporary file"));
+		goto out;
+	}
+
+	if (write_in_full(buffer_file->fd, buffer->buf, buffer->len) < 0 ||
+	    close_tempfile_gently(buffer_file) < 0) {
+		error_errno(_("failed writing ssh signing key buffer to '%s'"),
+			    buffer_file->filename.buf);
+		goto out;
+	}
+
+	strvec_pushl(&signer.args, use_format->program,
+		     "-Y", "sign",
+		     "-n", "git",
+		     "-f", ssh_signing_key_file,
+		     buffer_file->filename.buf,
+		     NULL);
+
+	sigchain_push(SIGPIPE, SIG_IGN);
+	ret = pipe_command(&signer, NULL, 0, NULL, 0, &signer_stderr, 0);
+	sigchain_pop(SIGPIPE);
+
+	if (ret) {
+		if (strstr(signer_stderr.buf, "usage:"))
+			error(_("ssh-keygen -Y sign is needed for ssh signing (available in openssh version 8.2p1+)"));
+
+		error("%s", signer_stderr.buf);
+		goto out;
+	}
+
+	bottom = signature->len;
+
+	strbuf_addbuf(&ssh_signature_filename, &buffer_file->filename);
+	strbuf_addstr(&ssh_signature_filename, ".sig");
+	if (strbuf_read_file(signature, ssh_signature_filename.buf, 0) < 0) {
+		error_errno(
+			_("failed reading ssh signing data buffer from '%s'"),
+			ssh_signature_filename.buf);
+	}
+	unlink_or_warn(ssh_signature_filename.buf);
+
+	/* Strip CR from the line endings, in case we are on Windows. */
+	remove_cr_after(signature, bottom);
+
+out:
+	if (key_file)
+		delete_tempfile(&key_file);
+	if (buffer_file)
+		delete_tempfile(&buffer_file);
+	strbuf_release(&signer_stderr);
+	strbuf_release(&ssh_signature_filename);
+	FREE_AND_NULL(ssh_signing_key_file);
+	return ret;
+}

From 54937221225569332b92f6cd7876d4fa08dc3277 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:53 +0000
Subject: [PATCH 122/198] ssh signing: retrieve a default key from ssh-agent

If user.signingkey is not set and a ssh signature is requested we call
gpg.ssh.defaultKeyCommand (typically "ssh-add -L") and use the first key we get

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/gpg.txt  |  6 +++
 Documentation/config/user.txt |  4 +-
 gpg-interface.c               | 70 ++++++++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt
index 88531b15f0ff32..9b95dd280c3749 100644
--- a/Documentation/config/gpg.txt
+++ b/Documentation/config/gpg.txt
@@ -33,3 +33,9 @@ gpg.minTrustLevel::
 * `marginal`
 * `fully`
 * `ultimate`
+
+gpg.ssh.defaultKeyCommand:
+	This command that will be run when user.signingkey is not set and a ssh
+	signature is requested. On successful exit a valid ssh public key is
+	expected in the	first line of its output. To automatically use the first
+	available key from your ssh-agent set this to "ssh-add -L".
diff --git a/Documentation/config/user.txt b/Documentation/config/user.txt
index 2155128957c93e..ad78dce9ecbfc6 100644
--- a/Documentation/config/user.txt
+++ b/Documentation/config/user.txt
@@ -40,4 +40,6 @@ user.signingKey::
 	key (e.g.: "ssh-rsa XXXXXX identifier") or a file which contains it and
 	corresponds to the private key used for signing. The private key
 	needs to be available via ssh-agent. Alternatively it can be set to
-	a file containing a private key directly.
+	a file containing a private key directly. If not set git will call
+	gpg.ssh.defaultKeyCommand (e.g.: "ssh-add -L") and try to use the first
+	key available.
diff --git a/gpg-interface.c b/gpg-interface.c
index 7ca682ac6d6849..3a0cca1b1d2109 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -6,8 +6,10 @@
 #include "gpg-interface.h"
 #include "sigchain.h"
 #include "tempfile.h"
+#include "alias.h"
 
 static char *configured_signing_key;
+static const char *ssh_default_key_command;
 static enum signature_trust_level configured_min_trust_level = TRUST_UNDEFINED;
 
 struct gpg_format {
@@ -21,6 +23,7 @@ struct gpg_format {
 				    size_t signature_size);
 	int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature,
 			   const char *signing_key);
+	const char *(*get_default_key)(void);
 };
 
 static const char *openpgp_verify_args[] = {
@@ -56,6 +59,8 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
 static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature,
 			   const char *signing_key);
 
+static const char *get_default_ssh_signing_key(void);
+
 static struct gpg_format gpg_format[] = {
 	{
 		.name = "openpgp",
@@ -64,6 +69,7 @@ static struct gpg_format gpg_format[] = {
 		.sigs = openpgp_sigs,
 		.verify_signed_buffer = verify_gpg_signed_buffer,
 		.sign_buffer = sign_buffer_gpg,
+		.get_default_key = NULL,
 	},
 	{
 		.name = "x509",
@@ -72,6 +78,7 @@ static struct gpg_format gpg_format[] = {
 		.sigs = x509_sigs,
 		.verify_signed_buffer = verify_gpg_signed_buffer,
 		.sign_buffer = sign_buffer_gpg,
+		.get_default_key = NULL,
 	},
 	{
 		.name = "ssh",
@@ -79,7 +86,8 @@ static struct gpg_format gpg_format[] = {
 		.verify_args = ssh_verify_args,
 		.sigs = ssh_sigs,
 		.verify_signed_buffer = NULL, /* TODO */
-		.sign_buffer = sign_buffer_ssh
+		.sign_buffer = sign_buffer_ssh,
+		.get_default_key = get_default_ssh_signing_key,
 	},
 };
 
@@ -453,6 +461,12 @@ int git_gpg_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp(var, "gpg.ssh.defaultkeycommand")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return git_config_string(&ssh_default_key_command, var, value);
+	}
+
 	if (!strcmp(var, "gpg.program") || !strcmp(var, "gpg.openpgp.program"))
 		fmtname = "openpgp";
 
@@ -470,11 +484,63 @@ int git_gpg_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
+/* Returns the first public key from an ssh-agent to use for signing */
+static const char *get_default_ssh_signing_key(void)
+{
+	struct child_process ssh_default_key = CHILD_PROCESS_INIT;
+	int ret = -1;
+	struct strbuf key_stdout = STRBUF_INIT, key_stderr = STRBUF_INIT;
+	struct strbuf **keys;
+	char *key_command = NULL;
+	const char **argv;
+	int n;
+	char *default_key = NULL;
+
+	if (!ssh_default_key_command)
+		die(_("either user.signingkey or gpg.ssh.defaultKeyCommand needs to be configured"));
+
+	key_command = xstrdup(ssh_default_key_command);
+	n = split_cmdline(key_command, &argv);
+
+	if (n < 0)
+		die("malformed build-time gpg.ssh.defaultKeyCommand: %s",
+		    split_cmdline_strerror(n));
+
+	strvec_pushv(&ssh_default_key.args, argv);
+	ret = pipe_command(&ssh_default_key, NULL, 0, &key_stdout, 0,
+			   &key_stderr, 0);
+
+	if (!ret) {
+		keys = strbuf_split_max(&key_stdout, '\n', 2);
+		if (keys[0] && starts_with(keys[0]->buf, "ssh-")) {
+			default_key = strbuf_detach(keys[0], NULL);
+		} else {
+			warning(_("gpg.ssh.defaultKeycommand succeeded but returned no keys: %s %s"),
+				key_stderr.buf, key_stdout.buf);
+		}
+
+		strbuf_list_free(keys);
+	} else {
+		warning(_("gpg.ssh.defaultKeyCommand failed: %s %s"),
+			key_stderr.buf, key_stdout.buf);
+	}
+
+	free(key_command);
+	free(argv);
+	strbuf_release(&key_stdout);
+
+	return default_key;
+}
+
 const char *get_signing_key(void)
 {
 	if (configured_signing_key)
 		return configured_signing_key;
-	return git_committer_info(IDENT_STRICT|IDENT_NO_DATE);
+	if (use_format->get_default_key) {
+		return use_format->get_default_key();
+	}
+
+	return git_committer_info(IDENT_STRICT | IDENT_NO_DATE);
 }
 
 int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *signing_key)

From 6869f1f60c1e9c9fa226d8d67ce11df904aaa963 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:54 +0000
Subject: [PATCH 123/198] ssh signing: provide a textual signing_key_id

For ssh the user.signingkey can be a filename/path or even a literal ssh pubkey.
In push certs and textual output we prefer the ssh fingerprint instead.

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 gpg-interface.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 gpg-interface.h |  6 ++++++
 send-pack.c     |  8 +++----
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/gpg-interface.c b/gpg-interface.c
index 3a0cca1b1d2109..0f1c6a02e53257 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -24,6 +24,7 @@ struct gpg_format {
 	int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature,
 			   const char *signing_key);
 	const char *(*get_default_key)(void);
+	const char *(*get_key_id)(void);
 };
 
 static const char *openpgp_verify_args[] = {
@@ -61,6 +62,8 @@ static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature,
 
 static const char *get_default_ssh_signing_key(void);
 
+static const char *get_ssh_key_id(void);
+
 static struct gpg_format gpg_format[] = {
 	{
 		.name = "openpgp",
@@ -70,6 +73,7 @@ static struct gpg_format gpg_format[] = {
 		.verify_signed_buffer = verify_gpg_signed_buffer,
 		.sign_buffer = sign_buffer_gpg,
 		.get_default_key = NULL,
+		.get_key_id = NULL,
 	},
 	{
 		.name = "x509",
@@ -79,6 +83,7 @@ static struct gpg_format gpg_format[] = {
 		.verify_signed_buffer = verify_gpg_signed_buffer,
 		.sign_buffer = sign_buffer_gpg,
 		.get_default_key = NULL,
+		.get_key_id = NULL,
 	},
 	{
 		.name = "ssh",
@@ -88,6 +93,7 @@ static struct gpg_format gpg_format[] = {
 		.verify_signed_buffer = NULL, /* TODO */
 		.sign_buffer = sign_buffer_ssh,
 		.get_default_key = get_default_ssh_signing_key,
+		.get_key_id = get_ssh_key_id,
 	},
 };
 
@@ -484,6 +490,41 @@ int git_gpg_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
+static char *get_ssh_key_fingerprint(const char *signing_key)
+{
+	struct child_process ssh_keygen = CHILD_PROCESS_INIT;
+	int ret = -1;
+	struct strbuf fingerprint_stdout = STRBUF_INIT;
+	struct strbuf **fingerprint;
+
+	/*
+	 * With SSH Signing this can contain a filename or a public key
+	 * For textual representation we usually want a fingerprint
+	 */
+	if (starts_with(signing_key, "ssh-")) {
+		strvec_pushl(&ssh_keygen.args, "ssh-keygen", "-lf", "-", NULL);
+		ret = pipe_command(&ssh_keygen, signing_key,
+				   strlen(signing_key), &fingerprint_stdout, 0,
+				   NULL, 0);
+	} else {
+		strvec_pushl(&ssh_keygen.args, "ssh-keygen", "-lf",
+			     configured_signing_key, NULL);
+		ret = pipe_command(&ssh_keygen, NULL, 0, &fingerprint_stdout, 0,
+				   NULL, 0);
+	}
+
+	if (!!ret)
+		die_errno(_("failed to get the ssh fingerprint for key '%s'"),
+			  signing_key);
+
+	fingerprint = strbuf_split_max(&fingerprint_stdout, ' ', 3);
+	if (!fingerprint[1])
+		die_errno(_("failed to get the ssh fingerprint for key '%s'"),
+			  signing_key);
+
+	return strbuf_detach(fingerprint[1], NULL);
+}
+
 /* Returns the first public key from an ssh-agent to use for signing */
 static const char *get_default_ssh_signing_key(void)
 {
@@ -532,6 +573,21 @@ static const char *get_default_ssh_signing_key(void)
 	return default_key;
 }
 
+static const char *get_ssh_key_id(void) {
+	return get_ssh_key_fingerprint(get_signing_key());
+}
+
+/* Returns a textual but unique representation of the signing key */
+const char *get_signing_key_id(void)
+{
+	if (use_format->get_key_id) {
+		return use_format->get_key_id();
+	}
+
+	/* GPG/GPGSM only store a key id on this variable */
+	return get_signing_key();
+}
+
 const char *get_signing_key(void)
 {
 	if (configured_signing_key)
diff --git a/gpg-interface.h b/gpg-interface.h
index feac4decf8b79f..beefacbb1e9025 100644
--- a/gpg-interface.h
+++ b/gpg-interface.h
@@ -64,6 +64,12 @@ int sign_buffer(struct strbuf *buffer, struct strbuf *signature,
 int git_gpg_config(const char *, const char *, void *);
 void set_signing_key(const char *);
 const char *get_signing_key(void);
+
+/*
+ * Returns a textual unique representation of the signing key in use
+ * Either a GPG KeyID or a SSH Key Fingerprint
+ */
+const char *get_signing_key_id(void);
 int check_signature(const char *payload, size_t plen,
 		    const char *signature, size_t slen,
 		    struct signature_check *sigc);
diff --git a/send-pack.c b/send-pack.c
index 5a79e0e7110319..50cca7e439b34b 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -341,13 +341,13 @@ static int generate_push_cert(struct strbuf *req_buf,
 {
 	const struct ref *ref;
 	struct string_list_item *item;
-	char *signing_key = xstrdup(get_signing_key());
+	char *signing_key_id = xstrdup(get_signing_key_id());
 	const char *cp, *np;
 	struct strbuf cert = STRBUF_INIT;
 	int update_seen = 0;
 
 	strbuf_addstr(&cert, "certificate version 0.1\n");
-	strbuf_addf(&cert, "pusher %s ", signing_key);
+	strbuf_addf(&cert, "pusher %s ", signing_key_id);
 	datestamp(&cert);
 	strbuf_addch(&cert, '\n');
 	if (args->url && *args->url) {
@@ -374,7 +374,7 @@ static int generate_push_cert(struct strbuf *req_buf,
 	if (!update_seen)
 		goto free_return;
 
-	if (sign_buffer(&cert, &cert, signing_key))
+	if (sign_buffer(&cert, &cert, get_signing_key()))
 		die(_("failed to sign the push certificate"));
 
 	packet_buf_write(req_buf, "push-cert%c%s", 0, cap_string);
@@ -386,7 +386,7 @@ static int generate_push_cert(struct strbuf *req_buf,
 	packet_buf_write(req_buf, "push-cert-end\n");
 
 free_return:
-	free(signing_key);
+	free(signing_key_id);
 	strbuf_release(&cert);
 	return update_seen;
 }

From 9048bb3c9b829cb19298532e960a1e735b2f1b18 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:55 +0000
Subject: [PATCH 124/198] ssh signing: verify signatures using ssh-keygen

To verify a ssh signature we first call ssh-keygen -Y find-principal to
look up the signing principal by their public key from the
allowedSignersFile. If the key is found then we do a verify. Otherwise
we only validate the signature but can not verify the signers identity.

Verification uses the gpg.ssh.allowedSignersFile (see ssh-keygen(1) "ALLOWED
SIGNERS") which contains valid public keys and a principal (usually
user@domain). Depending on the environment this file can be managed by
the individual developer or for example generated by the central
repository server from known ssh keys with push access. This file is usually
stored outside the repository, but if the repository only allows signed
commits/pushes, the user might choose to store it in the repository.

To revoke a key put the public key without the principal prefix into
gpg.ssh.revocationKeyring or generate a KRL (see ssh-keygen(1)
"KEY REVOCATION LISTS"). The same considerations about who to trust for
verification as with the allowedSignersFile apply.

Using SSH CA Keys with these files is also possible. Add
"cert-authority" as key option between the principal and the key to mark
it as a CA and all keys signed by it as valid for this CA.
See "CERTIFICATES" in ssh-keygen(1).

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/gpg.txt |  35 ++++++
 builtin/receive-pack.c       |   4 +
 gpg-interface.c              | 209 ++++++++++++++++++++++++++++++++++-
 3 files changed, 246 insertions(+), 2 deletions(-)

diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt
index 9b95dd280c3749..51a756b2f15f85 100644
--- a/Documentation/config/gpg.txt
+++ b/Documentation/config/gpg.txt
@@ -39,3 +39,38 @@ gpg.ssh.defaultKeyCommand:
 	signature is requested. On successful exit a valid ssh public key is
 	expected in the	first line of its output. To automatically use the first
 	available key from your ssh-agent set this to "ssh-add -L".
+
+gpg.ssh.allowedSignersFile::
+	A file containing ssh public keys which you are willing to trust.
+	The file consists of one or more lines of principals followed by an ssh
+	public key.
+	e.g.: user1@example.com,user2@example.com ssh-rsa AAAAX1...
+	See ssh-keygen(1) "ALLOWED SIGNERS" for details.
+	The principal is only used to identify the key and is available when
+	verifying a signature.
++
+SSH has no concept of trust levels like gpg does. To be able to differentiate
+between valid signatures and trusted signatures the trust level of a signature
+verification is set to `fully` when the public key is present in the allowedSignersFile.
+Therefore to only mark fully trusted keys as verified set gpg.minTrustLevel to `fully`.
+Otherwise valid but untrusted signatures will still verify but show no principal
+name of the signer.
++
+This file can be set to a location outside of the repository and every developer
+maintains their own trust store. A central repository server could generate this
+file automatically from ssh keys with push access to verify the code against.
+In a corporate setting this file is probably generated at a global location
+from automation that already handles developer ssh keys.
++
+A repository that only allows signed commits can store the file
+in the repository itself using a path relative to the top-level of the working tree.
+This way only committers with an already valid key can add or change keys in the keyring.
++
+Using a SSH CA key with the cert-authority option
+(see ssh-keygen(1) "CERTIFICATES") is also valid.
+
+gpg.ssh.revocationFile::
+	Either a SSH KRL or a list of revoked public keys (without the principal prefix).
+	See ssh-keygen(1) for details.
+	If a public key is found in this file then it will always be treated
+	as having trust level "never" and signatures will show as invalid.
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index a34742513aca7e..f17c7d2246b031 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -131,6 +131,10 @@ static int receive_pack_config(const char *var, const char *value, void *cb)
 {
 	int status = parse_hide_refs_config(var, value, "receive");
 
+	if (status)
+		return status;
+
+	status = git_gpg_config(var, value, NULL);
 	if (status)
 		return status;
 
diff --git a/gpg-interface.c b/gpg-interface.c
index 0f1c6a02e53257..9c1ef11a563f3f 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -3,13 +3,14 @@
 #include "config.h"
 #include "run-command.h"
 #include "strbuf.h"
+#include "dir.h"
 #include "gpg-interface.h"
 #include "sigchain.h"
 #include "tempfile.h"
 #include "alias.h"
 
 static char *configured_signing_key;
-static const char *ssh_default_key_command;
+static const char *ssh_default_key_command, *ssh_allowed_signers, *ssh_revocation_file;
 static enum signature_trust_level configured_min_trust_level = TRUST_UNDEFINED;
 
 struct gpg_format {
@@ -55,6 +56,10 @@ static int verify_gpg_signed_buffer(struct signature_check *sigc,
 				    struct gpg_format *fmt, const char *payload,
 				    size_t payload_size, const char *signature,
 				    size_t signature_size);
+static int verify_ssh_signed_buffer(struct signature_check *sigc,
+				    struct gpg_format *fmt, const char *payload,
+				    size_t payload_size, const char *signature,
+				    size_t signature_size);
 static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature,
 			   const char *signing_key);
 static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature,
@@ -90,7 +95,7 @@ static struct gpg_format gpg_format[] = {
 		.program = "ssh-keygen",
 		.verify_args = ssh_verify_args,
 		.sigs = ssh_sigs,
-		.verify_signed_buffer = NULL, /* TODO */
+		.verify_signed_buffer = verify_ssh_signed_buffer,
 		.sign_buffer = sign_buffer_ssh,
 		.get_default_key = get_default_ssh_signing_key,
 		.get_key_id = get_ssh_key_id,
@@ -357,6 +362,194 @@ static int verify_gpg_signed_buffer(struct signature_check *sigc,
 	return ret;
 }
 
+static void parse_ssh_output(struct signature_check *sigc)
+{
+	const char *line, *principal, *search;
+	char *key = NULL;
+
+	/*
+	 * ssh-keygen output should be:
+	 * Good "git" signature for PRINCIPAL with RSA key SHA256:FINGERPRINT
+	 *
+	 * or for valid but unknown keys:
+	 * Good "git" signature with RSA key SHA256:FINGERPRINT
+	 *
+	 * Note that "PRINCIPAL" can contain whitespace, "RSA" and
+	 * "SHA256" part could be a different token that names of
+	 * the algorithms used, and "FINGERPRINT" is a hexadecimal
+	 * string.  By finding the last occurence of " with ", we can
+	 * reliably parse out the PRINCIPAL.
+	 */
+	sigc->result = 'B';
+	sigc->trust_level = TRUST_NEVER;
+
+	line = xmemdupz(sigc->output, strcspn(sigc->output, "\n"));
+
+	if (skip_prefix(line, "Good \"git\" signature for ", &line)) {
+		/* Valid signature and known principal */
+		sigc->result = 'G';
+		sigc->trust_level = TRUST_FULLY;
+
+		/* Search for the last "with" to get the full principal */
+		principal = line;
+		do {
+			search = strstr(line, " with ");
+			if (search)
+				line = search + 1;
+		} while (search != NULL);
+		sigc->signer = xmemdupz(principal, line - principal - 1);
+	} else if (skip_prefix(line, "Good \"git\" signature with ", &line)) {
+		/* Valid signature, but key unknown */
+		sigc->result = 'G';
+		sigc->trust_level = TRUST_UNDEFINED;
+	} else {
+		return;
+	}
+
+	key = strstr(line, "key");
+	if (key) {
+		sigc->fingerprint = xstrdup(strstr(line, "key") + 4);
+		sigc->key = xstrdup(sigc->fingerprint);
+	} else {
+		/*
+		 * Output did not match what we expected
+		 * Treat the signature as bad
+		 */
+		sigc->result = 'B';
+	}
+}
+
+static int verify_ssh_signed_buffer(struct signature_check *sigc,
+				    struct gpg_format *fmt, const char *payload,
+				    size_t payload_size, const char *signature,
+				    size_t signature_size)
+{
+	struct child_process ssh_keygen = CHILD_PROCESS_INIT;
+	struct tempfile *buffer_file;
+	int ret = -1;
+	const char *line;
+	size_t trust_size;
+	char *principal;
+	struct strbuf ssh_keygen_out = STRBUF_INIT;
+	struct strbuf ssh_keygen_err = STRBUF_INIT;
+
+	if (!ssh_allowed_signers) {
+		error(_("gpg.ssh.allowedSignersFile needs to be configured and exist for ssh signature verification"));
+		return -1;
+	}
+
+	buffer_file = mks_tempfile_t(".git_vtag_tmpXXXXXX");
+	if (!buffer_file)
+		return error_errno(_("could not create temporary file"));
+	if (write_in_full(buffer_file->fd, signature, signature_size) < 0 ||
+	    close_tempfile_gently(buffer_file) < 0) {
+		error_errno(_("failed writing detached signature to '%s'"),
+			    buffer_file->filename.buf);
+		delete_tempfile(&buffer_file);
+		return -1;
+	}
+
+	/* Find the principal from the signers */
+	strvec_pushl(&ssh_keygen.args, fmt->program,
+		     "-Y", "find-principals",
+		     "-f", ssh_allowed_signers,
+		     "-s", buffer_file->filename.buf,
+		     NULL);
+	ret = pipe_command(&ssh_keygen, NULL, 0, &ssh_keygen_out, 0,
+			   &ssh_keygen_err, 0);
+	if (ret && strstr(ssh_keygen_err.buf, "usage:")) {
+		error(_("ssh-keygen -Y find-principals/verify is needed for ssh signature verification (available in openssh version 8.2p1+)"));
+		goto out;
+	}
+	if (ret || !ssh_keygen_out.len) {
+		/*
+		 * We did not find a matching principal in the allowedSigners
+		 * Check without validation
+		 */
+		child_process_init(&ssh_keygen);
+		strvec_pushl(&ssh_keygen.args, fmt->program,
+			     "-Y", "check-novalidate",
+			     "-n", "git",
+			     "-s", buffer_file->filename.buf,
+			     NULL);
+		pipe_command(&ssh_keygen, payload, payload_size,
+				   &ssh_keygen_out, 0, &ssh_keygen_err, 0);
+
+		/*
+		 * Fail on unknown keys
+		 * we still call check-novalidate to display the signature info
+		 */
+		ret = -1;
+	} else {
+		/* Check every principal we found (one per line) */
+		for (line = ssh_keygen_out.buf; *line;
+		     line = strchrnul(line + 1, '\n')) {
+			while (*line == '\n')
+				line++;
+			if (!*line)
+				break;
+
+			trust_size = strcspn(line, "\n");
+			principal = xmemdupz(line, trust_size);
+
+			child_process_init(&ssh_keygen);
+			strbuf_release(&ssh_keygen_out);
+			strbuf_release(&ssh_keygen_err);
+			strvec_push(&ssh_keygen.args, fmt->program);
+			/*
+			 * We found principals
+			 * Try with each until we find a match
+			 */
+			strvec_pushl(&ssh_keygen.args, "-Y", "verify",
+				     "-n", "git",
+				     "-f", ssh_allowed_signers,
+				     "-I", principal,
+				     "-s", buffer_file->filename.buf,
+				     NULL);
+
+			if (ssh_revocation_file) {
+				if (file_exists(ssh_revocation_file)) {
+					strvec_pushl(&ssh_keygen.args, "-r",
+						     ssh_revocation_file, NULL);
+				} else {
+					warning(_("ssh signing revocation file configured but not found: %s"),
+						ssh_revocation_file);
+				}
+			}
+
+			sigchain_push(SIGPIPE, SIG_IGN);
+			ret = pipe_command(&ssh_keygen, payload, payload_size,
+					   &ssh_keygen_out, 0, &ssh_keygen_err, 0);
+			sigchain_pop(SIGPIPE);
+
+			FREE_AND_NULL(principal);
+
+			if (!ret)
+				ret = !starts_with(ssh_keygen_out.buf, "Good");
+
+			if (!ret)
+				break;
+		}
+	}
+
+	sigc->payload = xmemdupz(payload, payload_size);
+	strbuf_stripspace(&ssh_keygen_out, 0);
+	strbuf_stripspace(&ssh_keygen_err, 0);
+	strbuf_add(&ssh_keygen_out, ssh_keygen_err.buf, ssh_keygen_err.len);
+	sigc->output = strbuf_detach(&ssh_keygen_out, NULL);
+	sigc->gpg_status = xstrdup(sigc->output);
+
+	parse_ssh_output(sigc);
+
+out:
+	if (buffer_file)
+		delete_tempfile(&buffer_file);
+	strbuf_release(&ssh_keygen_out);
+	strbuf_release(&ssh_keygen_err);
+
+	return ret;
+}
+
 int check_signature(const char *payload, size_t plen, const char *signature,
 	size_t slen, struct signature_check *sigc)
 {
@@ -473,6 +666,18 @@ int git_gpg_config(const char *var, const char *value, void *cb)
 		return git_config_string(&ssh_default_key_command, var, value);
 	}
 
+	if (!strcmp(var, "gpg.ssh.allowedsignersfile")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return git_config_string(&ssh_allowed_signers, var, value);
+	}
+
+	if (!strcmp(var, "gpg.ssh.revocationfile")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return git_config_string(&ssh_revocation_file, var, value);
+	}
+
 	if (!strcmp(var, "gpg.program") || !strcmp(var, "gpg.openpgp.program"))
 		fmtname = "openpgp";
 

From 587967698ab2d679a53f79162a35fda6e2eaac08 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:56 +0000
Subject: [PATCH 125/198] ssh signing: duplicate t7510 tests for commits

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7528-signed-commit-ssh.sh | 398 +++++++++++++++++++++++++++++++++++
 1 file changed, 398 insertions(+)
 create mode 100755 t/t7528-signed-commit-ssh.sh

diff --git a/t/t7528-signed-commit-ssh.sh b/t/t7528-signed-commit-ssh.sh
new file mode 100755
index 00000000000000..3e093168eef571
--- /dev/null
+++ b/t/t7528-signed-commit-ssh.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+
+test_description='ssh signed commit tests'
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+GNUPGHOME_NOT_USED=$GNUPGHOME
+. "$TEST_DIRECTORY/lib-gpg.sh"
+
+test_expect_success GPGSSH 'create signed commits' '
+	test_oid_cache <<-\EOF &&
+	header sha1:gpgsig
+	header sha256:gpgsig-sha256
+	EOF
+
+	test_when_finished "test_unconfig commit.gpgsign" &&
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+
+	echo 1 >file && git add file &&
+	test_tick && git commit -S -m initial &&
+	git tag initial &&
+	git branch side &&
+
+	echo 2 >file && test_tick && git commit -a -S -m second &&
+	git tag second &&
+
+	git checkout side &&
+	echo 3 >elif && git add elif &&
+	test_tick && git commit -m "third on side" &&
+
+	git checkout main &&
+	test_tick && git merge -S side &&
+	git tag merge &&
+
+	echo 4 >file && test_tick && git commit -a -m "fourth unsigned" &&
+	git tag fourth-unsigned &&
+
+	test_tick && git commit --amend -S -m "fourth signed" &&
+	git tag fourth-signed &&
+
+	git config commit.gpgsign true &&
+	echo 5 >file && test_tick && git commit -a -m "fifth signed" &&
+	git tag fifth-signed &&
+
+	git config commit.gpgsign false &&
+	echo 6 >file && test_tick && git commit -a -m "sixth" &&
+	git tag sixth-unsigned &&
+
+	git config commit.gpgsign true &&
+	echo 7 >file && test_tick && git commit -a -m "seventh" --no-gpg-sign &&
+	git tag seventh-unsigned &&
+
+	test_tick && git rebase -f HEAD^^ && git tag sixth-signed HEAD^ &&
+	git tag seventh-signed &&
+
+	echo 8 >file && test_tick && git commit -a -m eighth -S"${GPGSSH_KEY_UNTRUSTED}" &&
+	git tag eighth-signed-alt &&
+
+	# commit.gpgsign is still on but this must not be signed
+	echo 9 | git commit-tree HEAD^{tree} >oid &&
+	test_line_count = 1 oid &&
+	git tag ninth-unsigned $(cat oid) &&
+	# explicit -S of course must sign.
+	echo 10 | git commit-tree -S HEAD^{tree} >oid &&
+	test_line_count = 1 oid &&
+	git tag tenth-signed $(cat oid) &&
+
+	# --gpg-sign[=<key-id>] must sign.
+	echo 11 | git commit-tree --gpg-sign HEAD^{tree} >oid &&
+	test_line_count = 1 oid &&
+	git tag eleventh-signed $(cat oid) &&
+	echo 12 | git commit-tree --gpg-sign="${GPGSSH_KEY_UNTRUSTED}" HEAD^{tree} >oid &&
+	test_line_count = 1 oid &&
+	git tag twelfth-signed-alt $(cat oid)
+'
+
+test_expect_success GPGSSH 'verify and show signatures' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	test_config gpg.mintrustlevel UNDEFINED &&
+	(
+		for commit in initial second merge fourth-signed \
+			fifth-signed sixth-signed seventh-signed tenth-signed \
+			eleventh-signed
+		do
+			git verify-commit $commit &&
+			git show --pretty=short --show-signature $commit >actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $commit OK || exit 1
+		done
+	) &&
+	(
+		for commit in merge^2 fourth-unsigned sixth-unsigned \
+			seventh-unsigned ninth-unsigned
+		do
+			test_must_fail git verify-commit $commit &&
+			git show --pretty=short --show-signature $commit >actual &&
+			! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $commit OK || exit 1
+		done
+	) &&
+	(
+		for commit in eighth-signed-alt twelfth-signed-alt
+		do
+			git show --pretty=short --show-signature $commit >actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			grep "${KEY_NOT_TRUSTED}" actual &&
+			echo $commit OK || exit 1
+		done
+	)
+'
+
+test_expect_success GPGSSH 'verify-commit exits failure on untrusted signature' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	test_must_fail git verify-commit eighth-signed-alt 2>actual &&
+	grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual &&
+	! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+	grep "${KEY_NOT_TRUSTED}" actual
+'
+
+test_expect_success GPGSSH 'verify-commit exits success with matching minTrustLevel' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	test_config gpg.minTrustLevel fully &&
+	git verify-commit sixth-signed
+'
+
+test_expect_success GPGSSH 'verify-commit exits success with low minTrustLevel' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	test_config gpg.minTrustLevel marginal &&
+	git verify-commit sixth-signed
+'
+
+test_expect_success GPGSSH 'verify-commit exits failure with high minTrustLevel' '
+	test_config gpg.minTrustLevel ultimate &&
+	test_must_fail git verify-commit eighth-signed-alt
+'
+
+test_expect_success GPGSSH 'verify signatures with --raw' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	(
+		for commit in initial second merge fourth-signed fifth-signed sixth-signed seventh-signed
+		do
+			git verify-commit --raw $commit 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $commit OK || exit 1
+		done
+	) &&
+	(
+		for commit in merge^2 fourth-unsigned sixth-unsigned seventh-unsigned
+		do
+			test_must_fail git verify-commit --raw $commit 2>actual &&
+			! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $commit OK || exit 1
+		done
+	) &&
+	(
+		for commit in eighth-signed-alt
+		do
+			test_must_fail git verify-commit --raw $commit 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $commit OK || exit 1
+		done
+	)
+'
+
+test_expect_success GPGSSH 'proper header is used for hash algorithm' '
+	git cat-file commit fourth-signed >output &&
+	grep "^$(test_oid header) -----BEGIN SSH SIGNATURE-----" output
+'
+
+test_expect_success GPGSSH 'show signed commit with signature' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git show -s initial >commit &&
+	git show -s --show-signature initial >show &&
+	git verify-commit -v initial >verify.1 2>verify.2 &&
+	git cat-file commit initial >cat &&
+	grep -v -e "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" -e "Warning: " show >show.commit &&
+	grep -e "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" -e "Warning: " show >show.gpg &&
+	grep -v "^ " cat | grep -v "^gpgsig.* " >cat.commit &&
+	test_cmp show.commit commit &&
+	test_cmp show.gpg verify.2 &&
+	test_cmp cat.commit verify.1
+'
+
+test_expect_success GPGSSH 'detect fudged signature' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git cat-file commit seventh-signed >raw &&
+	sed -e "s/^seventh/7th forged/" raw >forged1 &&
+	git hash-object -w -t commit forged1 >forged1.commit &&
+	test_must_fail git verify-commit $(cat forged1.commit) &&
+	git show --pretty=short --show-signature $(cat forged1.commit) >actual1 &&
+	grep "${GPGSSH_BAD_SIGNATURE}" actual1 &&
+	! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual1 &&
+	! grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual1
+'
+
+test_expect_success GPGSSH 'detect fudged signature with NUL' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git cat-file commit seventh-signed >raw &&
+	cat raw >forged2 &&
+	echo Qwik | tr "Q" "\000" >>forged2 &&
+	git hash-object -w -t commit forged2 >forged2.commit &&
+	test_must_fail git verify-commit $(cat forged2.commit) &&
+	git show --pretty=short --show-signature $(cat forged2.commit) >actual2 &&
+	grep "${GPGSSH_BAD_SIGNATURE}" actual2 &&
+	! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual2
+'
+
+test_expect_success GPGSSH 'amending already signed commit' '
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git checkout fourth-signed^0 &&
+	git commit --amend -S --no-edit &&
+	git verify-commit HEAD &&
+	git show -s --show-signature HEAD >actual &&
+	grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+	! grep "${GPGSSH_BAD_SIGNATURE}" actual
+'
+
+test_expect_success GPGSSH 'show good signature with custom format' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") &&
+	cat >expect.tmpl <<-\EOF &&
+	G
+	FINGERPRINT
+	principal with number 1
+	FINGERPRINT
+
+	EOF
+	sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect &&
+	git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" sixth-signed >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'show bad signature with custom format' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	cat >expect <<-\EOF &&
+	B
+
+
+
+
+	EOF
+	git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" $(cat forged1.commit) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'show untrusted signature with custom format' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	cat >expect.tmpl <<-\EOF &&
+	U
+	FINGERPRINT
+
+	FINGERPRINT
+
+	EOF
+	git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" eighth-signed-alt >actual &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_UNTRUSTED}" | awk "{print \$2;}") &&
+	sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'show untrusted signature with undefined trust level' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	cat >expect.tmpl <<-\EOF &&
+	undefined
+	FINGERPRINT
+
+	FINGERPRINT
+
+	EOF
+	git log -1 --format="%GT%n%GK%n%GS%n%GF%n%GP" eighth-signed-alt >actual &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_UNTRUSTED}" | awk "{print \$2;}") &&
+	sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'show untrusted signature with ultimate trust level' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	cat >expect.tmpl <<-\EOF &&
+	fully
+	FINGERPRINT
+	principal with number 1
+	FINGERPRINT
+
+	EOF
+	git log -1 --format="%GT%n%GK%n%GS%n%GF%n%GP" sixth-signed >actual &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") &&
+	sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'show lack of signature with custom format' '
+	cat >expect <<-\EOF &&
+	N
+
+
+
+
+	EOF
+	git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" seventh-unsigned >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'log.showsignature behaves like --show-signature' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	test_config log.showsignature true &&
+	git show initial >actual &&
+	grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual
+'
+
+test_expect_success GPGSSH 'check config gpg.format values' '
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+	test_config gpg.format ssh &&
+	git commit -S --amend -m "success" &&
+	test_config gpg.format OpEnPgP &&
+	test_must_fail git commit -S --amend -m "fail"
+'
+
+test_expect_failure GPGSSH 'detect fudged commit with double signature (TODO)' '
+	sed -e "/gpgsig/,/END PGP/d" forged1 >double-base &&
+	sed -n -e "/gpgsig/,/END PGP/p" forged1 | \
+		sed -e "s/^$(test_oid header)//;s/^ //" | gpg --dearmor >double-sig1.sig &&
+	gpg -o double-sig2.sig -u 29472784 --detach-sign double-base &&
+	cat double-sig1.sig double-sig2.sig | gpg --enarmor >double-combined.asc &&
+	sed -e "s/^\(-.*\)ARMORED FILE/\1SIGNATURE/;1s/^/$(test_oid header) /;2,\$s/^/ /" \
+		double-combined.asc > double-gpgsig &&
+	sed -e "/committer/r double-gpgsig" double-base >double-commit &&
+	git hash-object -w -t commit double-commit >double-commit.commit &&
+	test_must_fail git verify-commit $(cat double-commit.commit) &&
+	git show --pretty=short --show-signature $(cat double-commit.commit) >double-actual &&
+	grep "BAD signature from" double-actual &&
+	grep "Good signature from" double-actual
+'
+
+test_expect_failure GPGSSH 'show double signature with custom format (TODO)' '
+	cat >expect <<-\EOF &&
+	E
+
+
+
+
+	EOF
+	git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" $(cat double-commit.commit) >actual &&
+	test_cmp expect actual
+'
+
+
+test_expect_failure GPGSSH 'verify-commit verifies multiply signed commits (TODO)' '
+	git init multiply-signed &&
+	cd multiply-signed &&
+	test_commit first &&
+	echo 1 >second &&
+	git add second &&
+	tree=$(git write-tree) &&
+	parent=$(git rev-parse HEAD^{commit}) &&
+	git commit --gpg-sign -m second &&
+	git cat-file commit HEAD &&
+	# Avoid trailing whitespace.
+	sed -e "s/^Q//" -e "s/^Z/ /" >commit <<-EOF &&
+	Qtree $tree
+	Qparent $parent
+	Qauthor A U Thor <author@example.com> 1112912653 -0700
+	Qcommitter C O Mitter <committer@example.com> 1112912653 -0700
+	Qgpgsig -----BEGIN PGP SIGNATURE-----
+	QZ
+	Q iHQEABECADQWIQRz11h0S+chaY7FTocTtvUezd5DDQUCX/uBDRYcY29tbWl0dGVy
+	Q QGV4YW1wbGUuY29tAAoJEBO29R7N3kMNd+8AoK1I8mhLHviPH+q2I5fIVgPsEtYC
+	Q AKCTqBh+VabJceXcGIZuF0Ry+udbBQ==
+	Q =tQ0N
+	Q -----END PGP SIGNATURE-----
+	Qgpgsig-sha256 -----BEGIN PGP SIGNATURE-----
+	QZ
+	Q iHQEABECADQWIQRz11h0S+chaY7FTocTtvUezd5DDQUCX/uBIBYcY29tbWl0dGVy
+	Q QGV4YW1wbGUuY29tAAoJEBO29R7N3kMN/NEAn0XO9RYSBj2dFyozi0JKSbssYMtO
+	Q AJwKCQ1BQOtuwz//IjU8TiS+6S4iUw==
+	Q =pIwP
+	Q -----END PGP SIGNATURE-----
+	Q
+	Qsecond
+	EOF
+	head=$(git hash-object -t commit -w commit) &&
+	git reset --hard $head &&
+	git verify-commit $head 2>actual &&
+	grep "Good signature from" actual &&
+	! grep "BAD signature from" actual
+'
+
+test_done

From 52ac6bd36f7435c9f2b68be6691eacfcf5a80538 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:57 +0000
Subject: [PATCH 126/198] ssh signing: tests for logs, tags & push certs

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4202-log.sh                   |  23 +++++
 t/t5534-push-signed.sh           | 101 +++++++++++++++++++
 t/t7031-verify-tag-signed-ssh.sh | 161 +++++++++++++++++++++++++++++++
 3 files changed, 285 insertions(+)
 create mode 100755 t/t7031-verify-tag-signed-ssh.sh

diff --git a/t/t4202-log.sh b/t/t4202-log.sh
index 39e746fbcbe228..79702716549a14 100755
--- a/t/t4202-log.sh
+++ b/t/t4202-log.sh
@@ -1616,6 +1616,16 @@ test_expect_success GPGSM 'setup signed branch x509' '
 	git commit -S -m signed_commit
 '
 
+test_expect_success GPGSSH 'setup sshkey signed branch' '
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+	test_when_finished "git reset --hard && git checkout main" &&
+	git checkout -b signed-ssh main &&
+	echo foo >foo &&
+	git add foo &&
+	git commit -S -m signed_commit
+'
+
 test_expect_success GPGSM 'log x509 fingerprint' '
 	echo "F8BF62E0693D0694816377099909C779FA23FD65 | " >expect &&
 	git log -n1 --format="%GF | %GP" signed-x509 >actual &&
@@ -1628,6 +1638,13 @@ test_expect_success GPGSM 'log OpenPGP fingerprint' '
 	test_cmp expect actual
 '
 
+test_expect_success GPGSSH 'log ssh key fingerprint' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	ssh-keygen -lf  "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2\" | \"}" >expect &&
+	git log -n1 --format="%GF | %GP" signed-ssh >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success GPG 'log --graph --show-signature' '
 	git log --graph --show-signature -n1 signed >actual &&
 	grep "^| gpg: Signature made" actual &&
@@ -1640,6 +1657,12 @@ test_expect_success GPGSM 'log --graph --show-signature x509' '
 	grep "^| gpgsm: Good signature" actual
 '
 
+test_expect_success GPGSSH 'log --graph --show-signature ssh' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git log --graph --show-signature -n1 signed-ssh >actual &&
+	grep "${GOOD_SIGNATURE_TRUSTED}" actual
+'
+
 test_expect_success GPG 'log --graph --show-signature for merged tag' '
 	test_when_finished "git reset --hard && git checkout main" &&
 	git checkout -b plain main &&
diff --git a/t/t5534-push-signed.sh b/t/t5534-push-signed.sh
index bba768f5ded1fc..24d374adbae884 100755
--- a/t/t5534-push-signed.sh
+++ b/t/t5534-push-signed.sh
@@ -137,6 +137,53 @@ test_expect_success GPG 'signed push sends push certificate' '
 	test_cmp expect dst/push-cert-status
 '
 
+test_expect_success GPGSSH 'ssh signed push sends push certificate' '
+	prepare_dst &&
+	mkdir -p dst/.git/hooks &&
+	git -C dst config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git -C dst config receive.certnonceseed sekrit &&
+	write_script dst/.git/hooks/post-receive <<-\EOF &&
+	# discard the update list
+	cat >/dev/null
+	# record the push certificate
+	if test -n "${GIT_PUSH_CERT-}"
+	then
+		git cat-file blob $GIT_PUSH_CERT >../push-cert
+	fi &&
+
+	cat >../push-cert-status <<E_O_F
+	SIGNER=${GIT_PUSH_CERT_SIGNER-nobody}
+	KEY=${GIT_PUSH_CERT_KEY-nokey}
+	STATUS=${GIT_PUSH_CERT_STATUS-nostatus}
+	NONCE_STATUS=${GIT_PUSH_CERT_NONCE_STATUS-nononcestatus}
+	NONCE=${GIT_PUSH_CERT_NONCE-nononce}
+	E_O_F
+
+	EOF
+
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") &&
+	git push --signed dst noop ff +noff &&
+
+	(
+		cat <<-\EOF &&
+		SIGNER=principal with number 1
+		KEY=FINGERPRINT
+		STATUS=G
+		NONCE_STATUS=OK
+		EOF
+		sed -n -e "s/^nonce /NONCE=/p" -e "/^$/q" dst/push-cert
+	) | sed -e "s|FINGERPRINT|$FINGERPRINT|" >expect &&
+
+	noop=$(git rev-parse noop) &&
+	ff=$(git rev-parse ff) &&
+	noff=$(git rev-parse noff) &&
+	grep "$noop $ff refs/heads/ff" dst/push-cert &&
+	grep "$noop $noff refs/heads/noff" dst/push-cert &&
+	test_cmp expect dst/push-cert-status
+'
+
 test_expect_success GPG 'inconsistent push options in signed push not allowed' '
 	# First, invoke receive-pack with dummy input to obtain its preamble.
 	prepare_dst &&
@@ -276,6 +323,60 @@ test_expect_success GPGSM 'fail without key and heed user.signingkey x509' '
 	test_cmp expect dst/push-cert-status
 '
 
+test_expect_success GPGSSH 'fail without key and heed user.signingkey ssh' '
+	test_config gpg.format ssh &&
+	prepare_dst &&
+	mkdir -p dst/.git/hooks &&
+	git -C dst config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git -C dst config receive.certnonceseed sekrit &&
+	write_script dst/.git/hooks/post-receive <<-\EOF &&
+	# discard the update list
+	cat >/dev/null
+	# record the push certificate
+	if test -n "${GIT_PUSH_CERT-}"
+	then
+		git cat-file blob $GIT_PUSH_CERT >../push-cert
+	fi &&
+
+	cat >../push-cert-status <<E_O_F
+	SIGNER=${GIT_PUSH_CERT_SIGNER-nobody}
+	KEY=${GIT_PUSH_CERT_KEY-nokey}
+	STATUS=${GIT_PUSH_CERT_STATUS-nostatus}
+	NONCE_STATUS=${GIT_PUSH_CERT_NONCE_STATUS-nononcestatus}
+	NONCE=${GIT_PUSH_CERT_NONCE-nononce}
+	E_O_F
+
+	EOF
+
+	test_config user.email hasnokey@nowhere.com &&
+	test_config gpg.format ssh &&
+	test_config user.signingkey "" &&
+	(
+		sane_unset GIT_COMMITTER_EMAIL &&
+		test_must_fail git push --signed dst noop ff +noff
+	) &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+	FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") &&
+	git push --signed dst noop ff +noff &&
+
+	(
+		cat <<-\EOF &&
+		SIGNER=principal with number 1
+		KEY=FINGERPRINT
+		STATUS=G
+		NONCE_STATUS=OK
+		EOF
+		sed -n -e "s/^nonce /NONCE=/p" -e "/^$/q" dst/push-cert
+	) | sed -e "s|FINGERPRINT|$FINGERPRINT|" >expect &&
+
+	noop=$(git rev-parse noop) &&
+	ff=$(git rev-parse ff) &&
+	noff=$(git rev-parse noff) &&
+	grep "$noop $ff refs/heads/ff" dst/push-cert &&
+	grep "$noop $noff refs/heads/noff" dst/push-cert &&
+	test_cmp expect dst/push-cert-status
+'
+
 test_expect_success GPG 'failed atomic push does not execute GPG' '
 	prepare_dst &&
 	git -C dst config receive.certnonceseed sekrit &&
diff --git a/t/t7031-verify-tag-signed-ssh.sh b/t/t7031-verify-tag-signed-ssh.sh
new file mode 100755
index 00000000000000..06c9dd6c9339f2
--- /dev/null
+++ b/t/t7031-verify-tag-signed-ssh.sh
@@ -0,0 +1,161 @@
+#!/bin/sh
+
+test_description='signed tag tests'
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-gpg.sh"
+
+test_expect_success GPGSSH 'create signed tags ssh' '
+	test_when_finished "test_unconfig commit.gpgsign" &&
+	test_config gpg.format ssh &&
+	test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" &&
+
+	echo 1 >file && git add file &&
+	test_tick && git commit -m initial &&
+	git tag -s -m initial initial &&
+	git branch side &&
+
+	echo 2 >file && test_tick && git commit -a -m second &&
+	git tag -s -m second second &&
+
+	git checkout side &&
+	echo 3 >elif && git add elif &&
+	test_tick && git commit -m "third on side" &&
+
+	git checkout main &&
+	test_tick && git merge -S side &&
+	git tag -s -m merge merge &&
+
+	echo 4 >file && test_tick && git commit -a -S -m "fourth unsigned" &&
+	git tag -a -m fourth-unsigned fourth-unsigned &&
+
+	test_tick && git commit --amend -S -m "fourth signed" &&
+	git tag -s -m fourth fourth-signed &&
+
+	echo 5 >file && test_tick && git commit -a -m "fifth" &&
+	git tag fifth-unsigned &&
+
+	git config commit.gpgsign true &&
+	echo 6 >file && test_tick && git commit -a -m "sixth" &&
+	git tag -a -m sixth sixth-unsigned &&
+
+	test_tick && git rebase -f HEAD^^ && git tag -s -m 6th sixth-signed HEAD^ &&
+	git tag -m seventh -s seventh-signed &&
+
+	echo 8 >file && test_tick && git commit -a -m eighth &&
+	git tag -u"${GPGSSH_KEY_UNTRUSTED}" -m eighth eighth-signed-alt
+'
+
+test_expect_success GPGSSH 'verify and show ssh signatures' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	(
+		for tag in initial second merge fourth-signed sixth-signed seventh-signed
+		do
+			git verify-tag $tag 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $tag OK || exit 1
+		done
+	) &&
+	(
+		for tag in fourth-unsigned fifth-unsigned sixth-unsigned
+		do
+			test_must_fail git verify-tag $tag 2>actual &&
+			! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $tag OK || exit 1
+		done
+	) &&
+	(
+		for tag in eighth-signed-alt
+		do
+			test_must_fail git verify-tag $tag 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			grep "${GPGSSH_KEY_NOT_TRUSTED}" actual &&
+			echo $tag OK || exit 1
+		done
+	)
+'
+
+test_expect_success GPGSSH 'detect fudged ssh signature' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git cat-file tag seventh-signed >raw &&
+	sed -e "/^tag / s/seventh/7th forged/" raw >forged1 &&
+	git hash-object -w -t tag forged1 >forged1.tag &&
+	test_must_fail git verify-tag $(cat forged1.tag) 2>actual1 &&
+	grep "${GPGSSH_BAD_SIGNATURE}" actual1 &&
+	! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual1 &&
+	! grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual1
+'
+
+test_expect_success GPGSSH 'verify ssh signatures with --raw' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	(
+		for tag in initial second merge fourth-signed sixth-signed seventh-signed
+		do
+			git verify-tag --raw $tag 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $tag OK || exit 1
+		done
+	) &&
+	(
+		for tag in fourth-unsigned fifth-unsigned sixth-unsigned
+		do
+			test_must_fail git verify-tag --raw $tag 2>actual &&
+			! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $tag OK || exit 1
+		done
+	) &&
+	(
+		for tag in eighth-signed-alt
+		do
+			test_must_fail git verify-tag --raw $tag 2>actual &&
+			grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual &&
+			! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+			echo $tag OK || exit 1
+		done
+	)
+'
+
+test_expect_success GPGSSH 'verify signatures with --raw ssh' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	git verify-tag --raw sixth-signed 2>actual &&
+	grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual &&
+	! grep "${GPGSSH_BAD_SIGNATURE}" actual &&
+	echo sixth-signed OK
+'
+
+test_expect_success GPGSSH 'verify multiple tags ssh' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	tags="seventh-signed sixth-signed" &&
+	for i in $tags
+	do
+		git verify-tag -v --raw $i || return 1
+	done >expect.stdout 2>expect.stderr.1 &&
+	grep "^${GPGSSH_GOOD_SIGNATURE_TRUSTED}" <expect.stderr.1 >expect.stderr &&
+	git verify-tag -v --raw $tags >actual.stdout 2>actual.stderr.1 &&
+	grep "^${GPGSSH_GOOD_SIGNATURE_TRUSTED}" <actual.stderr.1 >actual.stderr &&
+	test_cmp expect.stdout actual.stdout &&
+	test_cmp expect.stderr actual.stderr
+'
+
+test_expect_success GPGSSH 'verifying tag with --format - ssh' '
+	test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+	cat >expect <<-\EOF &&
+	tagname : fourth-signed
+	EOF
+	git verify-tag --format="tagname : %(tag)" "fourth-signed" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success GPGSSH 'verifying a forged tag with --format should fail silently - ssh' '
+	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
+	test_must_be_empty actual-forged
+'
+
+test_done

From 4ff5911494cb29f9ce073368544193f9283b2617 Mon Sep 17 00:00:00 2001
From: Fabian Stelzer <fs@gigacodes.de>
Date: Tue, 3 Aug 2021 13:45:58 +0000
Subject: [PATCH 127/198] ssh signing: test that gpg fails for unkown keys

Test that verify-commit/tag will fail when a gpg key is completely
unknown. To do this we have to generate a key, use it for a signature
and delete it from our keyring aferwards completely.

Signed-off-by: Fabian Stelzer <fs@gigacodes.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7510-signed-commit.sh | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/t/t7510-signed-commit.sh b/t/t7510-signed-commit.sh
index 8df5a74f1db4ec..d65a0171f29c85 100755
--- a/t/t7510-signed-commit.sh
+++ b/t/t7510-signed-commit.sh
@@ -71,7 +71,25 @@ test_expect_success GPG 'create signed commits' '
 	git tag eleventh-signed $(cat oid) &&
 	echo 12 | git commit-tree --gpg-sign=B7227189 HEAD^{tree} >oid &&
 	test_line_count = 1 oid &&
-	git tag twelfth-signed-alt $(cat oid)
+	git tag twelfth-signed-alt $(cat oid) &&
+
+	cat >keydetails <<-\EOF &&
+	Key-Type: RSA
+	Key-Length: 2048
+	Subkey-Type: RSA
+	Subkey-Length: 2048
+	Name-Real: Unknown User
+	Name-Email: unknown@git.com
+	Expire-Date: 0
+	%no-ask-passphrase
+	%no-protection
+	EOF
+	gpg --batch --gen-key keydetails &&
+	echo 13 >file && git commit -a -S"unknown@git.com" -m thirteenth &&
+	git tag thirteenth-signed &&
+	DELETE_FINGERPRINT=$(gpg -K --with-colons --fingerprint --batch unknown@git.com | grep "^fpr" | head -n 1 | awk -F ":" "{print \$10;}") &&
+	gpg --batch --yes --delete-secret-keys $DELETE_FINGERPRINT &&
+	gpg --batch --yes --delete-keys unknown@git.com
 '
 
 test_expect_success GPG 'verify and show signatures' '
@@ -110,6 +128,13 @@ test_expect_success GPG 'verify and show signatures' '
 	)
 '
 
+test_expect_success GPG 'verify-commit exits failure on unknown signature' '
+	test_must_fail git verify-commit thirteenth-signed 2>actual &&
+	! grep "Good signature from" actual &&
+	! grep "BAD signature from" actual &&
+	grep -q -F -e "No public key" -e "public key not found" actual
+'
+
 test_expect_success GPG 'verify-commit exits success on untrusted signature' '
 	git verify-commit eighth-signed-alt 2>actual &&
 	grep "Good signature from" actual &&
@@ -338,6 +363,8 @@ test_expect_success GPG 'show double signature with custom format' '
 '
 
 
+# NEEDSWORK: This test relies on the test_tick commit/author dates from the first
+# 'create signed commits' test even though it creates its own
 test_expect_success GPG 'verify-commit verifies multiply signed commits' '
 	git init multiply-signed &&
 	cd multiply-signed &&

From 993b69e5baa4515f97c93e26956ff8618b0f0054 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:27 +0200
Subject: [PATCH 128/198] Makefile: mark "check" target as .PHONY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a bug in 44c9e8594e (Fix up header file dependencies and add
sparse checking rules, 2005-07-03), we never marked the phony "check"
target as such.

Perhaps we should just remove it, since as of a combination of
912f9980d2 (Makefile: help people who run 'make check' by mistake,
2008-11-11) 0bcd9ae85d (sparse: Fix errors due to missing
target-specific variables, 2011-04-21) we've been suggesting the user
run "make sparse" directly.

But under that mode it still does something, as well as directing the
user to run "make test" under non-sparse. So let's punt that and
narrowly fix the PHONY bug.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index c6f6246bf63ef2..2ff038069e8a56 100644
--- a/Makefile
+++ b/Makefile
@@ -2931,6 +2931,7 @@ hdr-check: $(HCO)
 style:
 	git clang-format --style file --diff --extensions c,h
 
+.PHONY: check
 check: config-list.h command-list.h
 	@if sparse; \
 	then \

From b0b9d4f3a2f68017c19b1b5261d9ec2926eae264 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:28 +0200
Subject: [PATCH 129/198] Makefile: stop hardcoding {command,config}-list.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change various places that hardcode the names of these two files to
refer to either $(GENERATED_H), or to a new generated-hdrs
target. That target is consistent with the *-objs targets I recently
added in 029bac01a8 (Makefile: add {program,xdiff,test,git,fuzz}-objs
& objects targets, 2021-02-23).

A subsequent commit will add a new generated hook-list.h. By doing
this refactoring we'll only need to add the new file to the
GENERATED_H variable, not EXCEPT_HDRS, the vcbuild/README etc.

I have not tested the Windows-specific change in config.mak.uname
being made here, but we use other variables from the Makefile in the
same block, and the GENERATED_H is fully defined before we include
config.mak.uname.

Hardcoding command-list.h there seems to have been a case of
copy/paste programming in 976aaedca0 (msvc: add a Makefile target to
pre-generate the Visual Studio solution, 2019-07-29). The
config-list.h was added later in 709df95b78 (help: move
list_config_help to builtin/help, 2020-04-16).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile              | 6 ++++--
 compat/vcbuild/README | 2 +-
 config.mak.uname      | 6 +++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 2ff038069e8a56..89bf0dd73325bd 100644
--- a/Makefile
+++ b/Makefile
@@ -823,6 +823,8 @@ XDIFF_LIB = xdiff/lib.a
 
 GENERATED_H += command-list.h
 GENERATED_H += config-list.h
+.PHONY: generated-hdrs
+generated-hdrs: $(GENERATED_H)
 
 LIB_H := $(sort $(patsubst ./%,%,$(shell git ls-files '*.h' ':!t/' ':!Documentation/' 2>/dev/null || \
 	$(FIND) . \
@@ -2909,7 +2911,7 @@ $(SP_OBJ): %.sp: %.c GIT-CFLAGS FORCE
 .PHONY: sparse $(SP_OBJ)
 sparse: $(SP_OBJ)
 
-EXCEPT_HDRS := command-list.h config-list.h unicode-width.h compat/% xdiff/%
+EXCEPT_HDRS := $(GENERATED_H) unicode-width.h compat/% xdiff/%
 ifndef GCRYPT_SHA256
 	EXCEPT_HDRS += sha256/gcrypt.h
 endif
@@ -2932,7 +2934,7 @@ style:
 	git clang-format --style file --diff --extensions c,h
 
 .PHONY: check
-check: config-list.h command-list.h
+check: $(GENERATED_H)
 	@if sparse; \
 	then \
 		echo >&2 "Use 'make sparse' instead"; \
diff --git a/compat/vcbuild/README b/compat/vcbuild/README
index 51fb083dbbe213..29ec1d0f104b80 100644
--- a/compat/vcbuild/README
+++ b/compat/vcbuild/README
@@ -92,7 +92,7 @@ The Steps of Build Git with VS2008
    the git operations.
 
 3. Inside Git's directory run the command:
-       make command-list.h config-list.h
+       make generated-hdrs
    to generate the header file needed to compile git.
 
 4. Then either build Git with the GNU Make Makefile in the Git projects
diff --git a/config.mak.uname b/config.mak.uname
index 69413fb3dc0ad8..9988378160b87c 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -732,9 +732,9 @@ vcxproj:
 	 echo '</Project>') >git-remote-http/LinkOrCopyRemoteHttp.targets
 	git add -f git/LinkOrCopyBuiltins.targets git-remote-http/LinkOrCopyRemoteHttp.targets
 
-	# Add command-list.h and config-list.h
-	$(MAKE) MSVC=1 SKIP_VCPKG=1 prefix=/mingw64 config-list.h command-list.h
-	git add -f config-list.h command-list.h
+	# Add generated headers
+	$(MAKE) MSVC=1 SKIP_VCPKG=1 prefix=/mingw64 $(GENERATED_H)
+	git add -f $(GENERATED_H)
 
 	# Add scripts
 	rm -f perl/perl.mak

From cfd9bd26a1171153dccedda2f612cef5e80c2da4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:29 +0200
Subject: [PATCH 130/198] Makefile: remove an out-of-date comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This comment added in dfea575017 (Makefile: lazily compute header
dependencies, 2010-01-26) has been out of date since
92b88eba9f (Makefile: use `git ls-files` to list header files, if
possible, 2019-03-04), when we did exactly what it tells us not to do
and added $(GENERATED_H) to $(OBJECTS) dependencies.

The rest of it was also somewhere between inaccurate and outdated,
since as of b8ba629264 (Makefile: fold MISC_H into LIB_H, 2012-06-20)
it's not followed by a list of header files, that got moved earlier in
the file into LIB_H in 60d24dd255 (Makefile: fold XDIFF_H and VCSSVN_H
into LIB_H, 2012-07-06).

Let's just remove it entirely, to the extent that we have anything
useful to say here the comment on the
"USE_COMPUTED_HEADER_DEPENDENCIES" variable a few lines above this
change does the job for us.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/Makefile b/Makefile
index 89bf0dd73325bd..0a540dcd34e6ac 100644
--- a/Makefile
+++ b/Makefile
@@ -2519,13 +2519,6 @@ ifneq ($(dep_files_present),)
 include $(dep_files_present)
 endif
 else
-# Dependencies on header files, for platforms that do not support
-# the gcc -MMD option.
-#
-# Dependencies on automatically generated headers such as command-list.h
-# should _not_ be included here, since they are necessary even when
-# building an object for the first time.
-
 $(OBJECTS): $(LIB_H) $(GENERATED_H)
 endif
 

From 4a832f0e4e6d619fef7e2933933350d8ab317788 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:30 +0200
Subject: [PATCH 131/198] hook.[ch]: move find_hook() to this new library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the find_hook() function from run-command.c to a new hook.c
library. This change establishes a stub library that's pretty
pointless right now, but will see much wider use with Emily Shaffer's
upcoming "configuration-based hooks" series.

Eventually all the hook related code will live in hook.[ch]. Let's
start that process by moving the simple find_hook() function over
as-is.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile               |  1 +
 builtin/am.c           |  1 +
 builtin/bugreport.c    |  2 +-
 builtin/commit.c       |  1 +
 builtin/merge.c        |  1 +
 builtin/receive-pack.c |  1 +
 builtin/worktree.c     |  1 +
 hook.c                 | 37 +++++++++++++++++++++++++++++++++++++
 hook.h                 | 11 +++++++++++
 refs.c                 |  1 +
 run-command.c          | 35 +----------------------------------
 run-command.h          |  7 -------
 sequencer.c            |  1 +
 transport.c            |  1 +
 14 files changed, 59 insertions(+), 42 deletions(-)
 create mode 100644 hook.c
 create mode 100644 hook.h

diff --git a/Makefile b/Makefile
index 0a540dcd34e6ac..bc258886904ba5 100644
--- a/Makefile
+++ b/Makefile
@@ -910,6 +910,7 @@ LIB_OBJS += hash-lookup.o
 LIB_OBJS += hashmap.o
 LIB_OBJS += help.o
 LIB_OBJS += hex.o
+LIB_OBJS += hook.o
 LIB_OBJS += ident.o
 LIB_OBJS += json-writer.o
 LIB_OBJS += kwset.o
diff --git a/builtin/am.c b/builtin/am.c
index 0c2ad96b70eef4..c603f3cebdf5a2 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -11,6 +11,7 @@
 #include "parse-options.h"
 #include "dir.h"
 #include "run-command.h"
+#include "hook.h"
 #include "quote.h"
 #include "tempfile.h"
 #include "lockfile.h"
diff --git a/builtin/bugreport.c b/builtin/bugreport.c
index 9915a5841def8c..596f079a7f98a0 100644
--- a/builtin/bugreport.c
+++ b/builtin/bugreport.c
@@ -3,7 +3,7 @@
 #include "strbuf.h"
 #include "help.h"
 #include "compat/compiler.h"
-#include "run-command.h"
+#include "hook.h"
 
 
 static void get_system_info(struct strbuf *sys_info)
diff --git a/builtin/commit.c b/builtin/commit.c
index 7436262aae21b5..51b07ee02eacf6 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -19,6 +19,7 @@
 #include "revision.h"
 #include "wt-status.h"
 #include "run-command.h"
+#include "hook.h"
 #include "refs.h"
 #include "log-tree.h"
 #include "strbuf.h"
diff --git a/builtin/merge.c b/builtin/merge.c
index a8a843b1f54113..be98d66b0a8734 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -13,6 +13,7 @@
 #include "builtin.h"
 #include "lockfile.h"
 #include "run-command.h"
+#include "hook.h"
 #include "diff.h"
 #include "diff-merges.h"
 #include "refs.h"
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 2d1f97e1ca7b53..97aebdc15bd4e8 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -7,6 +7,7 @@
 #include "pkt-line.h"
 #include "sideband.h"
 #include "run-command.h"
+#include "hook.h"
 #include "exec-cmd.h"
 #include "commit.h"
 #include "object.h"
diff --git a/builtin/worktree.c b/builtin/worktree.c
index 0d0a80da61f1ee..d22ece93e1a805 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -8,6 +8,7 @@
 #include "branch.h"
 #include "refs.h"
 #include "run-command.h"
+#include "hook.h"
 #include "sigchain.h"
 #include "submodule.h"
 #include "utf8.h"
diff --git a/hook.c b/hook.c
new file mode 100644
index 00000000000000..c4dbef1d0ef839
--- /dev/null
+++ b/hook.c
@@ -0,0 +1,37 @@
+#include "cache.h"
+#include "hook.h"
+#include "run-command.h"
+
+const char *find_hook(const char *name)
+{
+	static struct strbuf path = STRBUF_INIT;
+
+	strbuf_reset(&path);
+	strbuf_git_path(&path, "hooks/%s", name);
+	if (access(path.buf, X_OK) < 0) {
+		int err = errno;
+
+#ifdef STRIP_EXTENSION
+		strbuf_addstr(&path, STRIP_EXTENSION);
+		if (access(path.buf, X_OK) >= 0)
+			return path.buf;
+		if (errno == EACCES)
+			err = errno;
+#endif
+
+		if (err == EACCES && advice_ignored_hook) {
+			static struct string_list advise_given = STRING_LIST_INIT_DUP;
+
+			if (!string_list_lookup(&advise_given, name)) {
+				string_list_insert(&advise_given, name);
+				advise(_("The '%s' hook was ignored because "
+					 "it's not set as executable.\n"
+					 "You can disable this warning with "
+					 "`git config advice.ignoredHook false`."),
+				       path.buf);
+			}
+		}
+		return NULL;
+	}
+	return path.buf;
+}
diff --git a/hook.h b/hook.h
new file mode 100644
index 00000000000000..68624f16059dfb
--- /dev/null
+++ b/hook.h
@@ -0,0 +1,11 @@
+#ifndef HOOK_H
+#define HOOK_H
+
+/*
+ * Returns the path to the hook file, or NULL if the hook is missing
+ * or disabled. Note that this points to static storage that will be
+ * overwritten by further calls to find_hook and run_hook_*.
+ */
+const char *find_hook(const char *name);
+
+#endif
diff --git a/refs.c b/refs.c
index 8b9f7c3a80a0f6..6211692eaaea53 100644
--- a/refs.c
+++ b/refs.c
@@ -10,6 +10,7 @@
 #include "refs.h"
 #include "refs/refs-internal.h"
 #include "run-command.h"
+#include "hook.h"
 #include "object-store.h"
 #include "object.h"
 #include "tag.h"
diff --git a/run-command.c b/run-command.c
index f72e72cce73f1a..352f5be16465df 100644
--- a/run-command.c
+++ b/run-command.c
@@ -8,6 +8,7 @@
 #include "string-list.h"
 #include "quote.h"
 #include "config.h"
+#include "hook.h"
 
 void child_process_init(struct child_process *child)
 {
@@ -1319,40 +1320,6 @@ int async_with_fork(void)
 #endif
 }
 
-const char *find_hook(const char *name)
-{
-	static struct strbuf path = STRBUF_INIT;
-
-	strbuf_reset(&path);
-	strbuf_git_path(&path, "hooks/%s", name);
-	if (access(path.buf, X_OK) < 0) {
-		int err = errno;
-
-#ifdef STRIP_EXTENSION
-		strbuf_addstr(&path, STRIP_EXTENSION);
-		if (access(path.buf, X_OK) >= 0)
-			return path.buf;
-		if (errno == EACCES)
-			err = errno;
-#endif
-
-		if (err == EACCES && advice_ignored_hook) {
-			static struct string_list advise_given = STRING_LIST_INIT_DUP;
-
-			if (!string_list_lookup(&advise_given, name)) {
-				string_list_insert(&advise_given, name);
-				advise(_("The '%s' hook was ignored because "
-					 "it's not set as executable.\n"
-					 "You can disable this warning with "
-					 "`git config advice.ignoredHook false`."),
-				       path.buf);
-			}
-		}
-		return NULL;
-	}
-	return path.buf;
-}
-
 int run_hook_ve(const char *const *env, const char *name, va_list args)
 {
 	struct child_process hook = CHILD_PROCESS_INIT;
diff --git a/run-command.h b/run-command.h
index af1296769f9862..f76b740f927b3b 100644
--- a/run-command.h
+++ b/run-command.h
@@ -204,13 +204,6 @@ int finish_command_in_signal(struct child_process *);
  */
 int run_command(struct child_process *);
 
-/*
- * Returns the path to the hook file, or NULL if the hook is missing
- * or disabled. Note that this points to static storage that will be
- * overwritten by further calls to find_hook and run_hook_*.
- */
-const char *find_hook(const char *name);
-
 /**
  * Run a hook.
  * The first argument is a pathname to an index file, or NULL
diff --git a/sequencer.c b/sequencer.c
index 7f07cd00f3f20a..ea4199d65a4d7c 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -8,6 +8,7 @@
 #include "sequencer.h"
 #include "tag.h"
 #include "run-command.h"
+#include "hook.h"
 #include "exec-cmd.h"
 #include "utf8.h"
 #include "cache-tree.h"
diff --git a/transport.c b/transport.c
index 17e9629710a2f8..77e196f75f551d 100644
--- a/transport.c
+++ b/transport.c
@@ -2,6 +2,7 @@
 #include "config.h"
 #include "transport.h"
 #include "run-command.h"
+#include "hook.h"
 #include "pkt-line.h"
 #include "fetch-pack.h"
 #include "remote.h"

From bb7449213c1301424ebdcf791bb2d90fe48d7de6 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:31 +0200
Subject: [PATCH 132/198] hook.c: add a hook_exists() wrapper and use it in
 bugreport.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a boolean version of the find_hook() function for those callers
who are only interested in checking whether the hook exists, not what
the path to it is.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/bugreport.c | 2 +-
 hook.c              | 5 +++++
 hook.h              | 5 +++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/builtin/bugreport.c b/builtin/bugreport.c
index 596f079a7f98a0..941c8d5e27004c 100644
--- a/builtin/bugreport.c
+++ b/builtin/bugreport.c
@@ -82,7 +82,7 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hook); i++)
-		if (find_hook(hook[i]))
+		if (hook_exists(hook[i]))
 			strbuf_addf(hook_info, "%s\n", hook[i]);
 }
 
diff --git a/hook.c b/hook.c
index c4dbef1d0ef839..97cd799a320c67 100644
--- a/hook.c
+++ b/hook.c
@@ -35,3 +35,8 @@ const char *find_hook(const char *name)
 	}
 	return path.buf;
 }
+
+int hook_exists(const char *name)
+{
+	return !!find_hook(name);
+}
diff --git a/hook.h b/hook.h
index 68624f16059dfb..4c547ac15e5ba8 100644
--- a/hook.h
+++ b/hook.h
@@ -8,4 +8,9 @@
  */
 const char *find_hook(const char *name);
 
+/*
+ * A boolean version of find_hook()
+ */
+int hook_exists(const char *hookname);
+
 #endif

From f1f399d19632546ec71d50dc07ed558619efdf05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:32 +0200
Subject: [PATCH 133/198] hook.c users: use "hook_exists()" insted of
 "find_hook()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new hook_exists() function instead of find_hook() where the
latter was called in boolean contexts. This make subsequent changes in
a series where we further refactor the hook API clearer, as we won't
conflate wanting to get the path of the hook with checking for its
existence.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/commit.c       | 2 +-
 builtin/merge.c        | 2 +-
 builtin/receive-pack.c | 2 +-
 sequencer.c            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 51b07ee02eacf6..aa3c741efa9d37 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1052,7 +1052,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 		return 0;
 	}
 
-	if (!no_verify && find_hook("pre-commit")) {
+	if (!no_verify && hook_exists("pre-commit")) {
 		/*
 		 * Re-read the index as pre-commit hook could have updated it,
 		 * and write it out as a tree.  We must do this before we invoke
diff --git a/builtin/merge.c b/builtin/merge.c
index be98d66b0a8734..03f244dd5a09a1 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -849,7 +849,7 @@ static void prepare_to_commit(struct commit_list *remoteheads)
 	 * and write it out as a tree.  We must do this before we invoke
 	 * the editor and after we invoke run_status above.
 	 */
-	if (find_hook("pre-merge-commit"))
+	if (hook_exists("pre-merge-commit"))
 		discard_cache();
 	read_cache_from(index_file);
 	strbuf_addbuf(&msg, &merge_msg);
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 97aebdc15bd4e8..91fa799b66e407 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1464,7 +1464,7 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w
 
 	strvec_pushf(&env, "GIT_DIR=%s", absolute_path(git_dir));
 
-	if (!find_hook(push_to_checkout_hook))
+	if (!hook_exists(push_to_checkout_hook))
 		retval = push_to_deploy(sha1, &env, work_tree);
 	else
 		retval = push_to_checkout(sha1, &env, work_tree);
diff --git a/sequencer.c b/sequencer.c
index ea4199d65a4d7c..9aac08c15451a6 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1446,7 +1446,7 @@ static int try_to_commit(struct repository *r,
 		}
 	}
 
-	if (find_hook("prepare-commit-msg")) {
+	if (hook_exists("prepare-commit-msg")) {
 		res = run_prepare_commit_msg_hook(r, msg, hook_commit);
 		if (res)
 			goto out;

From 117a741a6ec6ffa613eb1713ce9be384b36da4da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:33 +0200
Subject: [PATCH 134/198] hook-list.h: add a generated list of hooks, like
 config-list.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make githooks(5) the source of truth for what hooks git supports, and
die hooks we don't know about in find_hook(). This ensures that the
documentation and the C code's idea about existing hooks doesn't
diverge.

We still have Perl and Python code running its own hooks, but that'll
be addressed by Emily Shaffer's upcoming "git hook run" command.

This resolves a long-standing TODO item in bugreport.c of there being
no centralized listing of hooks, and fixes a bug with the bugreport
listing only knowing about 1/4 of the p4 hooks. It didn't know about
the recent "reference-transaction" hook either.

I have not been able to directly test the CMake change being made
here. Since 4c2c38e800 (ci: modification of main.yml to use cmake for
vs-build job, 2020-06-26) some of the Windows CI has a hard dependency
on CMake, this change works there, and is to my eyes an obviously
correct use of a pattern established in previous CMake changes,
namely:

 - 061c2240b1 (Introduce CMake support for configuring Git,
    2020-06-12)
 - 709df95b78 (help: move list_config_help to builtin/help,
    2020-04-16)
 - 976aaedca0 (msvc: add a Makefile target to pre-generate the Visual
   Studio solution, 2019-07-29)

The LC_ALL=C is needed because at least in my locale the dash ("-") is
ignored for the purposes of sorting, which results in a different
order. I'm not aware of anything in git that has a hard dependency on
the order, but e.g. the bugreport output would end up using whatever
locale was in effect when git was compiled.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .gitignore                          |  1 +
 Makefile                            | 10 ++++++-
 builtin/bugreport.c                 | 44 ++++++-----------------------
 contrib/buildsystems/CMakeLists.txt |  7 +++++
 generate-hooklist.sh                | 18 ++++++++++++
 hook.c                              | 19 +++++++++++++
 6 files changed, 62 insertions(+), 37 deletions(-)
 create mode 100755 generate-hooklist.sh

diff --git a/.gitignore b/.gitignore
index 311841f9bed577..6be9de41ae83b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -190,6 +190,7 @@
 /gitweb/static/gitweb.min.*
 /config-list.h
 /command-list.h
+/hook-list.h
 *.tar.gz
 *.dsc
 *.deb
diff --git a/Makefile b/Makefile
index bc258886904ba5..1740b99db006b1 100644
--- a/Makefile
+++ b/Makefile
@@ -823,6 +823,8 @@ XDIFF_LIB = xdiff/lib.a
 
 GENERATED_H += command-list.h
 GENERATED_H += config-list.h
+GENERATED_H += hook-list.h
+
 .PHONY: generated-hdrs
 generated-hdrs: $(GENERATED_H)
 
@@ -2226,7 +2228,9 @@ git$X: git.o GIT-LDFLAGS $(BUILTIN_OBJS) $(GITLIBS)
 
 help.sp help.s help.o: command-list.h
 
-builtin/help.sp builtin/help.s builtin/help.o: config-list.h GIT-PREFIX
+hook.sp hook.s hook.o: hook-list.h
+
+builtin/help.sp builtin/help.s builtin/help.o: config-list.h hook-list.h GIT-PREFIX
 builtin/help.sp builtin/help.s builtin/help.o: EXTRA_CPPFLAGS = \
 	'-DGIT_HTML_PATH="$(htmldir_relative_SQ)"' \
 	'-DGIT_MAN_PATH="$(mandir_relative_SQ)"' \
@@ -2259,6 +2263,10 @@ command-list.h: $(wildcard Documentation/git*.txt)
 		$(patsubst %,--exclude-program %,$(EXCLUDED_PROGRAMS)) \
 		command-list.txt >$@+ && mv $@+ $@
 
+hook-list.h: generate-hooklist.sh Documentation/githooks.txt
+	$(QUIET_GEN)$(SHELL_PATH) ./generate-hooklist.sh \
+		>$@+ && mv $@+ $@
+
 SCRIPT_DEFINES = $(SHELL_PATH_SQ):$(DIFF_SQ):$(GIT_VERSION):\
 	$(localedir_SQ):$(NO_CURL):$(USE_GETTEXT_SCHEME):$(SANE_TOOL_PATH_SQ):\
 	$(gitwebdir_SQ):$(PERL_PATH_SQ):$(SANE_TEXT_GREP):$(PAGER_ENV):\
diff --git a/builtin/bugreport.c b/builtin/bugreport.c
index 941c8d5e27004c..a7a1fcb8a7ad02 100644
--- a/builtin/bugreport.c
+++ b/builtin/bugreport.c
@@ -4,6 +4,7 @@
 #include "help.h"
 #include "compat/compiler.h"
 #include "hook.h"
+#include "hook-list.h"
 
 
 static void get_system_info(struct strbuf *sys_info)
@@ -41,39 +42,7 @@ static void get_system_info(struct strbuf *sys_info)
 
 static void get_populated_hooks(struct strbuf *hook_info, int nongit)
 {
-	/*
-	 * NEEDSWORK: Doesn't look like there is a list of all possible hooks;
-	 * so below is a transcription of `git help hooks`. Later, this should
-	 * be replaced with some programmatically generated list (generated from
-	 * doc or else taken from some library which tells us about all the
-	 * hooks)
-	 */
-	static const char *hook[] = {
-		"applypatch-msg",
-		"pre-applypatch",
-		"post-applypatch",
-		"pre-commit",
-		"pre-merge-commit",
-		"prepare-commit-msg",
-		"commit-msg",
-		"post-commit",
-		"pre-rebase",
-		"post-checkout",
-		"post-merge",
-		"pre-push",
-		"pre-receive",
-		"update",
-		"post-receive",
-		"post-update",
-		"push-to-checkout",
-		"pre-auto-gc",
-		"post-rewrite",
-		"sendemail-validate",
-		"fsmonitor-watchman",
-		"p4-pre-submit",
-		"post-index-change",
-	};
-	int i;
+	const char **p;
 
 	if (nongit) {
 		strbuf_addstr(hook_info,
@@ -81,9 +50,12 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit)
 		return;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(hook); i++)
-		if (hook_exists(hook[i]))
-			strbuf_addf(hook_info, "%s\n", hook[i]);
+	for (p = hook_name_list; *p; p++) {
+		const char *hook = *p;
+
+		if (hook_exists(hook))
+			strbuf_addf(hook_info, "%s\n", hook);
+	}
 }
 
 static const char * const bugreport_usage[] = {
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 171b4124afef58..fd1399c440f84a 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -624,6 +624,13 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/config-list.h)
 			OUTPUT_FILE ${CMAKE_BINARY_DIR}/config-list.h)
 endif()
 
+if(NOT EXISTS ${CMAKE_BINARY_DIR}/hook-list.h)
+	message("Generating hook-list.h")
+	execute_process(COMMAND ${SH_EXE} ${CMAKE_SOURCE_DIR}/generate-hooklist.sh
+			WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+			OUTPUT_FILE ${CMAKE_BINARY_DIR}/hook-list.h)
+endif()
+
 include_directories(${CMAKE_BINARY_DIR})
 
 #build
diff --git a/generate-hooklist.sh b/generate-hooklist.sh
new file mode 100755
index 00000000000000..6d4e56d1a31ea3
--- /dev/null
+++ b/generate-hooklist.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+#
+# Usage: ./generate-hooklist.sh >hook-list.h
+
+cat <<EOF
+/* Automatically generated by generate-hooklist.sh */
+
+static const char *hook_name_list[] = {
+EOF
+
+sed -n -e '/^~~~~*$/ {x; s/^.*$/	"&",/; p;}; x' \
+	<Documentation/githooks.txt |
+	LC_ALL=C sort
+
+cat <<EOF
+	NULL,
+};
+EOF
diff --git a/hook.c b/hook.c
index 97cd799a320c67..1f1db1ec9bf4f8 100644
--- a/hook.c
+++ b/hook.c
@@ -1,11 +1,30 @@
 #include "cache.h"
 #include "hook.h"
 #include "run-command.h"
+#include "hook-list.h"
+
+static int known_hook(const char *name)
+{
+	const char **p;
+	size_t len = strlen(name);
+	for (p = hook_name_list; *p; p++) {
+		const char *hook = *p;
+
+		if (!strncmp(name, hook, len) && hook[len] == '\0')
+			return 1;
+	}
+
+	return 0;
+}
 
 const char *find_hook(const char *name)
 {
 	static struct strbuf path = STRBUF_INIT;
 
+	if (!known_hook(name))
+		die(_("the hook '%s' is not known to git, should be in hook-list.h via githooks(5)"),
+		    name);
+
 	strbuf_reset(&path);
 	strbuf_git_path(&path, "hooks/%s", name);
 	if (access(path.buf, X_OK) < 0) {

From 91ebe8e76feeff99f8a3d0701bbd928a1a3f3f97 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:34 +0200
Subject: [PATCH 135/198] hook: add 'run' subcommand
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to enable hooks to be run as an external process, by a
standalone Git command, or by tools which wrap Git, provide an external
means to run all configured hook commands for a given hook event.

Most of our hooks require more complex functionality than this, but
let's start with the bare minimum required to support our simplest
hooks.

In terms of implementation the usage_with_options() and "goto usage"
pattern here mirrors that of
builtin/{commit-graph,multi-pack-index}.c.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .gitignore                 |   1 +
 Documentation/git-hook.txt |  38 +++++++++++
 Documentation/githooks.txt |   4 ++
 Makefile                   |   1 +
 builtin.h                  |   1 +
 builtin/hook.c             |  85 ++++++++++++++++++++++++
 command-list.txt           |   1 +
 git.c                      |   1 +
 hook.c                     | 111 +++++++++++++++++++++++++++++++
 hook.h                     |  41 ++++++++++++
 t/t1800-hook.sh            | 133 +++++++++++++++++++++++++++++++++++++
 11 files changed, 417 insertions(+)
 create mode 100644 Documentation/git-hook.txt
 create mode 100644 builtin/hook.c
 create mode 100755 t/t1800-hook.sh

diff --git a/.gitignore b/.gitignore
index 6be9de41ae83b3..66189ca3cdc9f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,7 @@
 /git-grep
 /git-hash-object
 /git-help
+/git-hook
 /git-http-backend
 /git-http-fetch
 /git-http-push
diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt
new file mode 100644
index 00000000000000..660d6a992a0f8d
--- /dev/null
+++ b/Documentation/git-hook.txt
@@ -0,0 +1,38 @@
+git-hook(1)
+===========
+
+NAME
+----
+git-hook - run git hooks
+
+SYNOPSIS
+--------
+[verse]
+'git hook' run <hook-name> [-- <hook-args>]
+
+DESCRIPTION
+-----------
+
+This command is an interface to git hooks (see linkgit:githooks[5]).
+Currently it only provides a convenience wrapper for running hooks for
+use by git itself. In the future it might gain other functionality.
+
+SUBCOMMANDS
+-----------
+
+run::
+	Run the `<hook-name>` hook. See linkgit:githooks[5] for
+	the hook names we support.
++
+Any positional arguments to the hook should be passed after an
+optional `--` (or `--end-of-options`, see linkgit:gitcli[7]). The
+arguments (if any) differ by hook name, see linkgit:githooks[5] for
+what those are.
+
+SEE ALSO
+--------
+linkgit:githooks[5]
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt
index b51959ff9418fd..a16e62bc8c8ea7 100644
--- a/Documentation/githooks.txt
+++ b/Documentation/githooks.txt
@@ -698,6 +698,10 @@ and "0" meaning they were not.
 Only one parameter should be set to "1" when the hook runs.  The hook
 running passing "1", "1" should not be possible.
 
+SEE ALSO
+--------
+linkgit:git-hook[1]
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index 1740b99db006b1..e68ca41507f012 100644
--- a/Makefile
+++ b/Makefile
@@ -1114,6 +1114,7 @@ BUILTIN_OBJS += builtin/get-tar-commit-id.o
 BUILTIN_OBJS += builtin/grep.o
 BUILTIN_OBJS += builtin/hash-object.o
 BUILTIN_OBJS += builtin/help.o
+BUILTIN_OBJS += builtin/hook.o
 BUILTIN_OBJS += builtin/index-pack.o
 BUILTIN_OBJS += builtin/init-db.o
 BUILTIN_OBJS += builtin/interpret-trailers.o
diff --git a/builtin.h b/builtin.h
index 16ecd5586f0bee..91740c151494d4 100644
--- a/builtin.h
+++ b/builtin.h
@@ -164,6 +164,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix);
 int cmd_grep(int argc, const char **argv, const char *prefix);
 int cmd_hash_object(int argc, const char **argv, const char *prefix);
 int cmd_help(int argc, const char **argv, const char *prefix);
+int cmd_hook(int argc, const char **argv, const char *prefix);
 int cmd_index_pack(int argc, const char **argv, const char *prefix);
 int cmd_init_db(int argc, const char **argv, const char *prefix);
 int cmd_interpret_trailers(int argc, const char **argv, const char *prefix);
diff --git a/builtin/hook.c b/builtin/hook.c
new file mode 100644
index 00000000000000..41dd15550cf142
--- /dev/null
+++ b/builtin/hook.c
@@ -0,0 +1,85 @@
+#include "cache.h"
+#include "builtin.h"
+#include "config.h"
+#include "hook.h"
+#include "parse-options.h"
+#include "strbuf.h"
+#include "strvec.h"
+
+#define BUILTIN_HOOK_RUN_USAGE \
+	N_("git hook run <hook-name> [-- <hook-args>]")
+
+static const char * const builtin_hook_usage[] = {
+	BUILTIN_HOOK_RUN_USAGE,
+	NULL
+};
+
+static const char * const builtin_hook_run_usage[] = {
+	BUILTIN_HOOK_RUN_USAGE,
+	NULL
+};
+
+static int run(int argc, const char **argv, const char *prefix)
+{
+	int i;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	const char *hook_name;
+	const char *hook_path;
+	struct option run_options[] = {
+		OPT_END(),
+	};
+	int ret;
+
+	argc = parse_options(argc, argv, prefix, run_options,
+			     builtin_hook_run_usage,
+			     PARSE_OPT_KEEP_DASHDASH);
+
+	if (!argc)
+		goto usage;
+
+	/*
+	 * Having a -- for "run" when providing <hook-args> is
+	 * mandatory.
+	 */
+	if (argc > 1 && strcmp(argv[1], "--") &&
+	    strcmp(argv[1], "--end-of-options"))
+		goto usage;
+
+	/* Add our arguments, start after -- */
+	for (i = 2 ; i < argc; i++)
+		strvec_push(&opt.args, argv[i]);
+
+	/* Need to take into account core.hooksPath */
+	git_config(git_default_config, NULL);
+
+	hook_name = argv[0];
+	hook_path = find_hook(hook_name);
+	if (!hook_path) {
+		error("cannot find a hook named %s", hook_name);
+		return 1;
+	}
+
+	ret = run_hooks(hook_name, hook_path, &opt);
+	run_hooks_opt_clear(&opt);
+	return ret;
+usage:
+	usage_with_options(builtin_hook_run_usage, run_options);
+}
+
+int cmd_hook(int argc, const char **argv, const char *prefix)
+{
+	struct option builtin_hook_options[] = {
+		OPT_END(),
+	};
+
+	argc = parse_options(argc, argv, NULL, builtin_hook_options,
+			     builtin_hook_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		goto usage;
+
+	if (!strcmp(argv[0], "run"))
+		return run(argc, argv, prefix);
+
+usage:
+	usage_with_options(builtin_hook_usage, builtin_hook_options);
+}
diff --git a/command-list.txt b/command-list.txt
index a289f09ed6fbf9..9ccd8e5aebeb81 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -103,6 +103,7 @@ git-grep                                mainporcelain           info
 git-gui                                 mainporcelain
 git-hash-object                         plumbingmanipulators
 git-help                                ancillaryinterrogators          complete
+git-hook                                mainporcelain
 git-http-backend                        synchingrepositories
 git-http-fetch                          synchelpers
 git-http-push                           synchelpers
diff --git a/git.c b/git.c
index 18bed9a99647aa..540909c391ff89 100644
--- a/git.c
+++ b/git.c
@@ -538,6 +538,7 @@ static struct cmd_struct commands[] = {
 	{ "grep", cmd_grep, RUN_SETUP_GENTLY },
 	{ "hash-object", cmd_hash_object },
 	{ "help", cmd_help },
+	{ "hook", cmd_hook, RUN_SETUP },
 	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
 	{ "init", cmd_init_db },
 	{ "init-db", cmd_init_db },
diff --git a/hook.c b/hook.c
index 1f1db1ec9bf4f8..c8ff39750430c7 100644
--- a/hook.c
+++ b/hook.c
@@ -2,11 +2,14 @@
 #include "hook.h"
 #include "run-command.h"
 #include "hook-list.h"
+#include "config.h"
 
 static int known_hook(const char *name)
 {
 	const char **p;
 	size_t len = strlen(name);
+	static int test_hooks_ok = -1;
+
 	for (p = hook_name_list; *p; p++) {
 		const char *hook = *p;
 
@@ -14,6 +17,14 @@ static int known_hook(const char *name)
 			return 1;
 	}
 
+	if (test_hooks_ok == -1)
+		test_hooks_ok = git_env_bool("GIT_TEST_FAKE_HOOKS", 0);
+
+	if (test_hooks_ok &&
+	    (!strcmp(name, "test-hook") ||
+	     !strcmp(name, "does-not-exist")))
+		return 1;
+
 	return 0;
 }
 
@@ -59,3 +70,103 @@ int hook_exists(const char *name)
 {
 	return !!find_hook(name);
 }
+
+void run_hooks_opt_clear(struct run_hooks_opt *o)
+{
+	strvec_clear(&o->env);
+	strvec_clear(&o->args);
+}
+
+static int pick_next_hook(struct child_process *cp,
+			  struct strbuf *out,
+			  void *pp_cb,
+			  void **pp_task_cb)
+{
+	struct hook_cb_data *hook_cb = pp_cb;
+	struct hook *run_me = hook_cb->run_me;
+
+	if (!run_me)
+		return 0;
+
+	cp->no_stdin = 1;
+	cp->env = hook_cb->options->env.v;
+	cp->stdout_to_stderr = 1;
+	cp->trace2_hook_name = hook_cb->hook_name;
+
+	/* add command */
+	strvec_push(&cp->args, run_me->hook_path);
+
+	/*
+	 * add passed-in argv, without expanding - let the user get back
+	 * exactly what they put in
+	 */
+	strvec_pushv(&cp->args, hook_cb->options->args.v);
+
+	/* Provide context for errors if necessary */
+	*pp_task_cb = run_me;
+
+	/*
+	 * This pick_next_hook() will be called again, we're only
+	 * running one hook, so indicate that no more work will be
+	 * done.
+	 */
+	hook_cb->run_me = NULL;
+
+	return 1;
+}
+
+static int notify_start_failure(struct strbuf *out,
+				void *pp_cb,
+				void *pp_task_cp)
+{
+	struct hook_cb_data *hook_cb = pp_cb;
+	struct hook *attempted = pp_task_cp;
+
+	hook_cb->rc |= 1;
+
+	strbuf_addf(out, _("Couldn't start hook '%s'\n"),
+		    attempted->hook_path);
+
+	return 1;
+}
+
+static int notify_hook_finished(int result,
+				struct strbuf *out,
+				void *pp_cb,
+				void *pp_task_cb)
+{
+	struct hook_cb_data *hook_cb = pp_cb;
+
+	hook_cb->rc |= result;
+
+	return 0;
+}
+
+int run_hooks(const char *hook_name, const char *hook_path,
+	      struct run_hooks_opt *options)
+{
+	struct hook my_hook = {
+		.hook_path = hook_path,
+	};
+	struct hook_cb_data cb_data = {
+		.rc = 0,
+		.hook_name = hook_name,
+		.options = options,
+	};
+	int jobs = 1;
+
+	if (!options)
+		BUG("a struct run_hooks_opt must be provided to run_hooks");
+
+	cb_data.run_me = &my_hook;
+
+	run_processes_parallel_tr2(jobs,
+				   pick_next_hook,
+				   notify_start_failure,
+				   notify_hook_finished,
+				   &cb_data,
+				   "hook",
+				   hook_name);
+
+	return cb_data.rc;
+}
diff --git a/hook.h b/hook.h
index 4c547ac15e5ba8..361984c69d49ae 100644
--- a/hook.h
+++ b/hook.h
@@ -1,5 +1,8 @@
 #ifndef HOOK_H
 #define HOOK_H
+#include "strbuf.h"
+#include "strvec.h"
+#include "run-command.h"
 
 /*
  * Returns the path to the hook file, or NULL if the hook is missing
@@ -13,4 +16,42 @@ const char *find_hook(const char *name);
  */
 int hook_exists(const char *hookname);
 
+struct hook {
+	/* The path to the hook */
+	const char *hook_path;
+};
+
+struct run_hooks_opt
+{
+	/* Environment vars to be set for each hook */
+	struct strvec env;
+
+	/* Args to be passed to each hook */
+	struct strvec args;
+};
+
+#define RUN_HOOKS_OPT_INIT { \
+	.env = STRVEC_INIT, \
+	.args = STRVEC_INIT, \
+}
+
+/*
+ * Callback provided to feed_pipe_fn and consume_sideband_fn.
+ */
+struct hook_cb_data {
+	/* rc reflects the cumulative failure state */
+	int rc;
+	const char *hook_name;
+	struct hook *run_me;
+	struct run_hooks_opt *options;
+};
+
+void run_hooks_opt_clear(struct run_hooks_opt *o);
+
+/**
+ * Takes an already resolved hook found via find_hook() and runs
+ * it. Does not call run_hooks_opt_clear() for you.
+ */
+int run_hooks(const char *hookname, const char *hook_path,
+	      struct run_hooks_opt *options);
 #endif
diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh
new file mode 100755
index 00000000000000..644df0a583c836
--- /dev/null
+++ b/t/t1800-hook.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+test_description='git-hook command'
+
+. ./test-lib.sh
+
+test_expect_success 'git hook usage' '
+	test_expect_code 129 git hook &&
+	test_expect_code 129 git hook run &&
+	test_expect_code 129 git hook run -h &&
+	test_expect_code 129 git hook run --unknown 2>err &&
+	grep "unknown option" err
+'
+
+test_expect_success 'setup GIT_TEST_FAKE_HOOKS=true to permit "test-hook" and "does-not-exist" names"' '
+	GIT_TEST_FAKE_HOOKS=true &&
+	export GIT_TEST_FAKE_HOOKS
+'
+
+test_expect_success 'git hook run: nonexistent hook' '
+	cat >stderr.expect <<-\EOF &&
+	error: cannot find a hook named test-hook
+	EOF
+	test_expect_code 1 git hook run test-hook 2>stderr.actual &&
+	test_cmp stderr.expect stderr.actual
+'
+
+test_expect_success 'git hook run: basic' '
+	write_script .git/hooks/test-hook <<-EOF &&
+	echo Test hook
+	EOF
+
+	cat >expect <<-\EOF &&
+	Test hook
+	EOF
+	git hook run test-hook 2>actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git hook run: stdout and stderr both write to our stderr' '
+	write_script .git/hooks/test-hook <<-EOF &&
+	echo >&1 Will end up on stderr
+	echo >&2 Will end up on stderr
+	EOF
+
+	cat >stderr.expect <<-\EOF &&
+	Will end up on stderr
+	Will end up on stderr
+	EOF
+	git hook run test-hook >stdout.actual 2>stderr.actual &&
+	test_cmp stderr.expect stderr.actual &&
+	test_must_be_empty stdout.actual
+'
+
+test_expect_success 'git hook run: exit codes are passed along' '
+	write_script .git/hooks/test-hook <<-EOF &&
+	exit 1
+	EOF
+
+	test_expect_code 1 git hook run test-hook &&
+
+	write_script .git/hooks/test-hook <<-EOF &&
+	exit 2
+	EOF
+
+	test_expect_code 2 git hook run test-hook &&
+
+	write_script .git/hooks/test-hook <<-EOF &&
+	exit 128
+	EOF
+
+	test_expect_code 128 git hook run test-hook &&
+
+	write_script .git/hooks/test-hook <<-EOF &&
+	exit 129
+	EOF
+
+	test_expect_code 129 git hook run test-hook
+'
+
+test_expect_success 'git hook run arg u ments without -- is not allowed' '
+	test_expect_code 129 git hook run test-hook arg u ments
+'
+
+test_expect_success 'git hook run -- pass arguments' '
+	write_script .git/hooks/test-hook <<-\EOF &&
+	echo $1
+	echo $2
+	EOF
+
+	cat >expect <<-EOF &&
+	arg
+	u ments
+	EOF
+
+	git hook run test-hook -- arg "u ments" 2>actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git hook run -- out-of-repo runs excluded' '
+	write_script .git/hooks/test-hook <<-EOF &&
+	echo Test hook
+	EOF
+
+	nongit test_must_fail git hook run test-hook
+'
+
+test_expect_success 'git -c core.hooksPath=<PATH> hook run' '
+	mkdir my-hooks &&
+	write_script my-hooks/test-hook <<-\EOF &&
+	echo Hook ran $1 >>actual
+	EOF
+
+	cat >expect <<-\EOF &&
+	Test hook
+	Hook ran one
+	Hook ran two
+	Hook ran three
+	Hook ran four
+	EOF
+
+	# Test various ways of specifying the path. See also
+	# t1350-config-hooks-path.sh
+	>actual &&
+	git hook run test-hook -- ignored 2>>actual &&
+	git -c core.hooksPath=my-hooks hook run test-hook -- one 2>>actual &&
+	git -c core.hooksPath=my-hooks/ hook run test-hook -- two 2>>actual &&
+	git -c core.hooksPath="$PWD/my-hooks" hook run test-hook -- three 2>>actual &&
+	git -c core.hooksPath="$PWD/my-hooks/" hook run test-hook -- four 2>>actual &&
+	test_cmp expect actual
+'
+
+test_done

From 22f54200dc0918c97856e2f31c1139dd73e33029 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:35 +0200
Subject: [PATCH 136/198] gc: use hook library for pre-auto-gc hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the pre-auto-gc hook away from run-command.h to and over to the
new hook.h library.

To do this introduce a simple run_hooks_oneshot() wrapper, we'll be
using it extensively for these simple cases of wanting to run a single
hook under a given name, and having it free the memory we allocate for
us.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/gc.c |  3 ++-
 hook.c       | 21 +++++++++++++++++++++
 hook.h       | 13 +++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index f05d2f0a1ac9cd..2f74cf394d3ccb 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -32,6 +32,7 @@
 #include "remote.h"
 #include "object-store.h"
 #include "exec-cmd.h"
+#include "hook.h"
 
 #define FAILED_RUN "failed to run %s"
 
@@ -394,7 +395,7 @@ static int need_to_gc(void)
 	else
 		return 0;
 
-	if (run_hook_le(NULL, "pre-auto-gc", NULL))
+	if (run_hooks_oneshot("pre-auto-gc", NULL))
 		return 0;
 	return 1;
 }
diff --git a/hook.c b/hook.c
index c8ff39750430c7..981a9bf46e6893 100644
--- a/hook.c
+++ b/hook.c
@@ -170,3 +170,24 @@ int run_hooks(const char *hook_name, const char *hook_path,
 
 	return cb_data.rc;
 }
+
+int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options)
+{
+	const char *hook_path;
+	int ret;
+	struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT;
+
+	if (!options)
+		options = &hook_opt_scratch;
+
+	hook_path = find_hook(hook_name);
+	if (!hook_path) {
+		ret = 0;
+		goto cleanup;
+	}
+
+	ret = run_hooks(hook_name, hook_path, options);
+cleanup:
+	run_hooks_opt_clear(options);
+	return ret;
+}
diff --git a/hook.h b/hook.h
index 361984c69d49ae..2201ab9d5681a1 100644
--- a/hook.h
+++ b/hook.h
@@ -51,7 +51,20 @@ void run_hooks_opt_clear(struct run_hooks_opt *o);
 /**
  * Takes an already resolved hook found via find_hook() and runs
  * it. Does not call run_hooks_opt_clear() for you.
+ *
+ * See run_hooks_oneshot() for the simpler one-shot API.
  */
 int run_hooks(const char *hookname, const char *hook_path,
 	      struct run_hooks_opt *options);
+
+/**
+ * Calls find_hook() on your "hook_name" and runs the hooks (if any)
+ * with run_hooks().
+ *
+ * If "options" is provided calls run_hooks_opt_clear() on it for
+ * you. If "options" is NULL a scratch one will be provided for you
+ * before calling run_hooks().
+ */
+int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options);
+
 #endif

From aec5e6f509e7977213970ea59244a15b1b923e10 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:36 +0200
Subject: [PATCH 137/198] rebase: convert pre-rebase to use hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the pre-rebase hook away from run-command.h to and over to the
new hook.h library.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/rebase.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 12f093121d9ed3..e7c668c99b1caf 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -28,6 +28,7 @@
 #include "sequencer.h"
 #include "rebase-interactive.h"
 #include "reset.h"
+#include "hook.h"
 
 #define DEFAULT_REFLOG_ACTION "rebase"
 
@@ -1313,6 +1314,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
 	char *squash_onto_name = NULL;
 	int reschedule_failed_exec = -1;
 	int allow_preemptive_ff = 1;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
 	struct option builtin_rebase_options[] = {
 		OPT_STRING(0, "onto", &options.onto_name,
 			   N_("revision"),
@@ -2022,9 +2024,9 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
 	}
 
 	/* If a hook exists, give it a chance to interrupt*/
+	strvec_pushl(&hook_opt.args, options.upstream_arg, argc ? argv[0] : NULL, NULL);
 	if (!ok_to_skip_pre_rebase &&
-	    run_hook_le(NULL, "pre-rebase", options.upstream_arg,
-			argc ? argv[0] : NULL, NULL))
+	    run_hooks_oneshot("pre-rebase", &hook_opt))
 		die(_("The pre-rebase hook refused to rebase."));
 
 	if (options.flags & REBASE_DIFFSTAT) {

From 406b8fdfc86a4cc7de3ae86105f47f350e48548e Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:37 +0200
Subject: [PATCH 138/198] am: convert applypatch to use hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Teach pre-applypatch, post-applypatch, and applypatch-msg to use the
hook.h library instead of the run-command.h library.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/am.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index c603f3cebdf5a2..e444b18b64a34f 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -446,9 +446,11 @@ static void am_destroy(const struct am_state *state)
 static int run_applypatch_msg_hook(struct am_state *state)
 {
 	int ret;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 
 	assert(state->msg);
-	ret = run_hook_le(NULL, "applypatch-msg", am_path(state, "final-commit"), NULL);
+	strvec_push(&opt.args, am_path(state, "final-commit"));
+	ret = run_hooks_oneshot("applypatch-msg", &opt);
 
 	if (!ret) {
 		FREE_AND_NULL(state->msg);
@@ -1609,7 +1611,7 @@ static void do_commit(const struct am_state *state)
 	const char *reflog_msg, *author, *committer = NULL;
 	struct strbuf sb = STRBUF_INIT;
 
-	if (run_hook_le(NULL, "pre-applypatch", NULL))
+	if (run_hooks_oneshot("pre-applypatch", NULL))
 		exit(1);
 
 	if (write_cache_as_tree(&tree, 0, NULL))
@@ -1661,7 +1663,7 @@ static void do_commit(const struct am_state *state)
 		fclose(fp);
 	}
 
-	run_hook_le(NULL, "post-applypatch", NULL);
+	run_hooks_oneshot("post-applypatch", NULL);
 
 	strbuf_release(&sb);
 }

From 47ed741eafcfff13acf0835d7fdac011b173ef53 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:38 +0200
Subject: [PATCH 139/198] hooks: convert 'post-checkout' hook to hook library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the running of the 'post-checkout' hook away from run-command.h
to the new hook.h library. For "worktree" this requires a change to it
to run the hooks from a given directory.

We could strictly speaking skip the "absolute_path" flag and just
check if "dir" is specified, but let's split them up for clarity, as
well as for any future user who'd like to set "dir" but not implicitly
change the argument to an absolute path.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/checkout.c | 14 +++++++++-----
 builtin/clone.c    |  6 ++++--
 builtin/worktree.c | 28 ++++++++++++----------------
 hook.c             |  9 +++++++++
 hook.h             |  8 ++++++++
 read-cache.c       |  1 +
 reset.c            | 14 ++++++++++----
 7 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index f4cd7747d35dd1..6d69b4c0113fed 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -9,6 +9,7 @@
 #include "config.h"
 #include "diff.h"
 #include "dir.h"
+#include "hook.h"
 #include "ll-merge.h"
 #include "lockfile.h"
 #include "merge-recursive.h"
@@ -106,13 +107,16 @@ struct branch_info {
 static int post_checkout_hook(struct commit *old_commit, struct commit *new_commit,
 			      int changed)
 {
-	return run_hook_le(NULL, "post-checkout",
-			   oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()),
-			   oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()),
-			   changed ? "1" : "0", NULL);
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+
 	/* "new_commit" can be NULL when checking out from the index before
 	   a commit exists. */
-
+	strvec_pushl(&opt.args,
+		     oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()),
+		     oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()),
+		     changed ? "1" : "0",
+		     NULL);
+	return run_hooks_oneshot("post-checkout", &opt);
 }
 
 static int update_some(const struct object_id *oid, struct strbuf *base,
diff --git a/builtin/clone.c b/builtin/clone.c
index 66fe66679c8498..27fc05ee511ef5 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -32,6 +32,7 @@
 #include "connected.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "hook.h"
 
 /*
  * Overall FIXMEs:
@@ -775,6 +776,7 @@ static int checkout(int submodule_progress)
 	struct tree *tree;
 	struct tree_desc t;
 	int err = 0;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
 
 	if (option_no_checkout)
 		return 0;
@@ -820,8 +822,8 @@ static int checkout(int submodule_progress)
 	if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
 		die(_("unable to write new index file"));
 
-	err |= run_hook_le(NULL, "post-checkout", oid_to_hex(null_oid()),
-			   oid_to_hex(&oid), "1", NULL);
+	strvec_pushl(&hook_opt.args, oid_to_hex(null_oid()), oid_to_hex(&oid), "1", NULL);
+	err |= run_hooks_oneshot("post-checkout", &hook_opt);
 
 	if (!err && (option_recurse_submodules.nr > 0)) {
 		struct strvec args = STRVEC_INIT;
diff --git a/builtin/worktree.c b/builtin/worktree.c
index d22ece93e1a805..330867c19bf329 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -382,22 +382,18 @@ static int add_worktree(const char *path, const char *refname,
 	 * is_junk is cleared, but do return appropriate code when hook fails.
 	 */
 	if (!ret && opts->checkout) {
-		const char *hook = find_hook("post-checkout");
-		if (hook) {
-			const char *env[] = { "GIT_DIR", "GIT_WORK_TREE", NULL };
-			cp.git_cmd = 0;
-			cp.no_stdin = 1;
-			cp.stdout_to_stderr = 1;
-			cp.dir = path;
-			cp.env = env;
-			cp.argv = NULL;
-			cp.trace2_hook_name = "post-checkout";
-			strvec_pushl(&cp.args, absolute_path(hook),
-				     oid_to_hex(null_oid()),
-				     oid_to_hex(&commit->object.oid),
-				     "1", NULL);
-			ret = run_command(&cp);
-		}
+		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+
+		strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL);
+		strvec_pushl(&opt.args,
+			     oid_to_hex(null_oid()),
+			     oid_to_hex(&commit->object.oid),
+			     "1",
+			     NULL);
+		opt.dir = path;
+		opt.absolute_path = 1;
+
+		ret = run_hooks_oneshot("post-checkout", &opt);
 	}
 
 	strvec_clear(&child_env);
diff --git a/hook.c b/hook.c
index 981a9bf46e6893..5ecf79c83977a6 100644
--- a/hook.c
+++ b/hook.c
@@ -92,6 +92,7 @@ static int pick_next_hook(struct child_process *cp,
 	cp->env = hook_cb->options->env.v;
 	cp->stdout_to_stderr = 1;
 	cp->trace2_hook_name = hook_cb->hook_name;
+	cp->dir = hook_cb->options->dir;
 
 	/* add command */
 	strvec_push(&cp->args, run_me->hook_path);
@@ -145,6 +146,7 @@ static int notify_hook_finished(int result,
 int run_hooks(const char *hook_name, const char *hook_path,
 	      struct run_hooks_opt *options)
 {
+	struct strbuf abs_path = STRBUF_INIT;
 	struct hook my_hook = {
 		.hook_path = hook_path,
 	};
@@ -158,6 +160,10 @@ int run_hooks(const char *hook_name, const char *hook_path,
 	if (!options)
 		BUG("a struct run_hooks_opt must be provided to run_hooks");
 
+	if (options->absolute_path) {
+		strbuf_add_absolute_path(&abs_path, hook_path);
+		my_hook.hook_path = abs_path.buf;
+	}
 	cb_data.run_me = &my_hook;
 
 	run_processes_parallel_tr2(jobs,
@@ -168,6 +174,9 @@ int run_hooks(const char *hook_name, const char *hook_path,
 				   "hook",
 				   hook_name);
 
+	if (options->absolute_path)
+		strbuf_release(&abs_path);
+
 	return cb_data.rc;
 }
 
diff --git a/hook.h b/hook.h
index 2201ab9d5681a1..53ea3a9649efa2 100644
--- a/hook.h
+++ b/hook.h
@@ -28,6 +28,14 @@ struct run_hooks_opt
 
 	/* Args to be passed to each hook */
 	struct strvec args;
+
+	/* Resolve and run the "absolute_path(hook)" instead of
+	 * "hook". Used for "git worktree" hooks
+	 */
+	int absolute_path;
+
+	/* Path to initial working directory for subprocess */
+	const char *dir;
 };
 
 #define RUN_HOOKS_OPT_INIT { \
diff --git a/read-cache.c b/read-cache.c
index 99a174b91e65a3..c9e2b013972e17 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -28,6 +28,7 @@
 #include "sparse-index.h"
 #include "csum-file.h"
 #include "promisor-remote.h"
+#include "hook.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
diff --git a/reset.c b/reset.c
index 4bea758053bbea..6499bc5127d022 100644
--- a/reset.c
+++ b/reset.c
@@ -7,6 +7,7 @@
 #include "tree-walk.h"
 #include "tree.h"
 #include "unpack-trees.h"
+#include "hook.h"
 
 int reset_head(struct repository *r, struct object_id *oid, const char *action,
 	       const char *switch_to_branch, unsigned flags,
@@ -126,10 +127,15 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action,
 			ret = create_symref("HEAD", switch_to_branch,
 					    reflog_head);
 	}
-	if (run_hook)
-		run_hook_le(NULL, "post-checkout",
-			    oid_to_hex(orig ? orig : null_oid()),
-			    oid_to_hex(oid), "1", NULL);
+	if (run_hook) {
+		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+		strvec_pushl(&opt.args,
+			     oid_to_hex(orig ? orig : null_oid()),
+			     oid_to_hex(oid),
+			     "1",
+			     NULL);
+		run_hooks_oneshot("post-checkout", &opt);
+	}
 
 leave_reset_head:
 	strbuf_release(&msg);

From f91d94abc2b0dbd5018d1aee09c9e1c5a14ca9b9 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:39 +0200
Subject: [PATCH 140/198] merge: convert post-merge to use hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Teach post-merge to use the hook.h library instead of the
run-command.h library to run hooks.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/merge.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 03f244dd5a09a1..4965df2ac29f8d 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -448,6 +448,7 @@ static void finish(struct commit *head_commit,
 		   const struct object_id *new_head, const char *msg)
 {
 	struct strbuf reflog_message = STRBUF_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 	const struct object_id *head = &head_commit->object.oid;
 
 	if (!msg)
@@ -489,7 +490,8 @@ static void finish(struct commit *head_commit,
 	}
 
 	/* Run a post-merge hook */
-	run_hook_le(NULL, "post-merge", squash ? "1" : "0", NULL);
+	strvec_push(&opt.args, squash ? "1" : "0");
+	run_hooks_oneshot("post-merge", &opt);
 
 	apply_autostash(git_path_merge_autostash(the_repository));
 	strbuf_release(&reflog_message);

From 2a83a2fa5b556d859956da0f4e542b717200c0b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:40 +0200
Subject: [PATCH 141/198] git hook run: add an --ignore-missing flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For certain one-shot hooks we'd like to optimistically run them, and
not complain if they don't exist. This will be used by send-email in a
subsequent commit.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-hook.txt | 10 +++++++++-
 builtin/hook.c             |  7 ++++++-
 t/t1800-hook.sh            |  5 +++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt
index 660d6a992a0f8d..097fb9de63bdf7 100644
--- a/Documentation/git-hook.txt
+++ b/Documentation/git-hook.txt
@@ -8,7 +8,7 @@ git-hook - run git hooks
 SYNOPSIS
 --------
 [verse]
-'git hook' run <hook-name> [-- <hook-args>]
+'git hook' run [--ignore-missing] <hook-name> [-- <hook-args>]
 
 DESCRIPTION
 -----------
@@ -29,6 +29,14 @@ optional `--` (or `--end-of-options`, see linkgit:gitcli[7]). The
 arguments (if any) differ by hook name, see linkgit:githooks[5] for
 what those are.
 
+OPTIONS
+-------
+
+--ignore-missing::
+	Ignore any missing hook by quietly returning zero. Used for
+	tools that want to do a blind one-shot run of a hook that may
+	or may not be present.
+
 SEE ALSO
 --------
 linkgit:githooks[5]
diff --git a/builtin/hook.c b/builtin/hook.c
index 41dd15550cf142..f33db9953c72bd 100644
--- a/builtin/hook.c
+++ b/builtin/hook.c
@@ -7,7 +7,7 @@
 #include "strvec.h"
 
 #define BUILTIN_HOOK_RUN_USAGE \
-	N_("git hook run <hook-name> [-- <hook-args>]")
+	N_("git hook run [--ignore-missing] <hook-name> [-- <hook-args>]")
 
 static const char * const builtin_hook_usage[] = {
 	BUILTIN_HOOK_RUN_USAGE,
@@ -23,9 +23,12 @@ static int run(int argc, const char **argv, const char *prefix)
 {
 	int i;
 	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	int ignore_missing = 0;
 	const char *hook_name;
 	const char *hook_path;
 	struct option run_options[] = {
+		OPT_BOOL(0, "ignore-missing", &ignore_missing,
+			 N_("exit quietly with a zero exit code if the requested hook cannot be found")),
 		OPT_END(),
 	};
 	int ret;
@@ -53,6 +56,8 @@ static int run(int argc, const char **argv, const char *prefix)
 	git_config(git_default_config, NULL);
 
 	hook_name = argv[0];
+	if (ignore_missing)
+		return run_hooks_oneshot(hook_name, &opt);
 	hook_path = find_hook(hook_name);
 	if (!hook_path) {
 		error("cannot find a hook named %s", hook_name);
diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh
index 644df0a583c836..49df5a2cdfb278 100755
--- a/t/t1800-hook.sh
+++ b/t/t1800-hook.sh
@@ -25,6 +25,11 @@ test_expect_success 'git hook run: nonexistent hook' '
 	test_cmp stderr.expect stderr.actual
 '
 
+test_expect_success 'git hook run: nonexistent hook with --ignore-missing' '
+	git hook run --ignore-missing does-not-exist 2>stderr.actual &&
+	test_must_be_empty stderr.actual
+'
+
 test_expect_success 'git hook run: basic' '
 	write_script .git/hooks/test-hook <<-EOF &&
 	echo Test hook

From 7a0bcb5fb74c191fd36af11b61011570392bfe94 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:41 +0200
Subject: [PATCH 142/198] send-email: use 'git hook run' for
 'sendemail-validate'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the "sendmail-validate" hook to be run via the "git hook run"
wrapper instead of via a direct invocation.

This is the smallest possibly change to get "send-email" using "git
hook run". We still check the hook itself with "-x", and set a
"GIT_DIR" variable, both of which are asserted by our tests. We'll
need to get rid of this special behavior if we start running N hooks,
but for now let's be as close to bug-for-bug compatible as possible.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-send-email.perl   | 20 ++++++++++++--------
 t/t9001-send-email.sh |  4 ++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/git-send-email.perl b/git-send-email.perl
index e65d969d0bb257..126850d974bbd4 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -195,13 +195,13 @@ sub format_2822_time {
 my $editor;
 
 sub system_or_msg {
-	my ($args, $msg) = @_;
+	my ($args, $msg, $cmd_name) = @_;
 	system(@$args);
 	my $signalled = $? & 127;
 	my $exit_code = $? >> 8;
 	return unless $signalled or $exit_code;
 
-	my @sprintf_args = ($args->[0], $exit_code);
+	my @sprintf_args = ($cmd_name ? $cmd_name : $args->[0], $exit_code);
 	if (defined $msg) {
 		# Quiet the 'redundant' warning category, except we
 		# need to support down to Perl 5.8, so we can't do a
@@ -2031,10 +2031,10 @@ sub validate_patch {
 	my ($fn, $xfer_encoding) = @_;
 
 	if ($repo) {
+		my $hook_name = 'sendemail-validate';
 		my $hooks_path = $repo->command_oneline('rev-parse', '--git-path', 'hooks');
 		require File::Spec;
-		my $validate_hook = File::Spec->catfile($hooks_path,
-					    'sendemail-validate');
+		my $validate_hook = File::Spec->catfile($hooks_path, $hook_name);
 		my $hook_error;
 		if (-x $validate_hook) {
 			require Cwd;
@@ -2044,13 +2044,17 @@ sub validate_patch {
 			chdir($repo->wc_path() or $repo->repo_path())
 				or die("chdir: $!");
 			local $ENV{"GIT_DIR"} = $repo->repo_path();
-			$hook_error = system_or_msg([$validate_hook, $target]);
+			my @validate_hook = ("git", "hook", "run", "--ignore-missing", $hook_name, "--", $target);
+			$hook_error = system_or_msg(\@validate_hook, undef,
+						       "git hook run $hook_name -- <patch>");
 			chdir($cwd_save) or die("chdir: $!");
 		}
 		if ($hook_error) {
-			die sprintf(__("fatal: %s: rejected by sendemail-validate hook\n" .
-				       "%s\n" .
-				       "warning: no patches were sent\n"), $fn, $hook_error);
+			$hook_error = sprintf(__("fatal: %s: rejected by %s hook\n" .
+						 $hook_error . "\n" .
+						 "warning: no patches were sent\n"),
+					      $fn, $hook_name);
+			die $hook_error;
 		}
 	}
 
diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh
index 57fc10e7f82186..9ec7d75f0ff67c 100755
--- a/t/t9001-send-email.sh
+++ b/t/t9001-send-email.sh
@@ -539,7 +539,7 @@ test_expect_success $PREREQ "--validate respects relative core.hooksPath path" '
 	test_path_is_file my-hooks.ran &&
 	cat >expect <<-EOF &&
 	fatal: longline.patch: rejected by sendemail-validate hook
-	fatal: command '"'"'my-hooks/sendemail-validate'"'"' died with exit code 1
+	fatal: command '"'"'git hook run sendemail-validate -- <patch>'"'"' died with exit code 1
 	warning: no patches were sent
 	EOF
 	test_cmp expect actual
@@ -558,7 +558,7 @@ test_expect_success $PREREQ "--validate respects absolute core.hooksPath path" '
 	test_path_is_file my-hooks.ran &&
 	cat >expect <<-EOF &&
 	fatal: longline.patch: rejected by sendemail-validate hook
-	fatal: command '"'"'$hooks_path/sendemail-validate'"'"' died with exit code 1
+	fatal: command '"'"'git hook run sendemail-validate -- <patch>'"'"' died with exit code 1
 	warning: no patches were sent
 	EOF
 	test_cmp expect actual

From c4642909e217e0423949a6b0bd84c0acd1a9ea8b Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:42 +0200
Subject: [PATCH 143/198] git-p4: use 'git hook' to run hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of duplicating the behavior of run-command.h:run_hook_le() in
Python, we can directly call 'git hook run'. We emulate the existence
check with the --ignore-missing flag.

As this is the last hook execution in git.git to not go through "git
hook run" or the hook.[ch] library we can now be absolutely sure that
our assertion in hook.c that only hooks known by the generated (from
githooks(5)) hook-list.h are permitted.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-p4.py | 72 ++++++-------------------------------------------------
 1 file changed, 7 insertions(+), 65 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index 2b4500226aa7a4..1f24cbf0bca213 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -207,71 +207,13 @@ def decode_path(path):
         return path
 
 def run_git_hook(cmd, param=[]):
-    """Execute a hook if the hook exists."""
-    if verbose:
-        sys.stderr.write("Looking for hook: %s\n" % cmd)
-        sys.stderr.flush()
-
-    hooks_path = gitConfig("core.hooksPath")
-    if len(hooks_path) <= 0:
-        hooks_path = os.path.join(os.environ["GIT_DIR"], "hooks")
-
-    if not isinstance(param, list):
-        param=[param]
-
-    # resolve hook file name, OS depdenent
-    hook_file = os.path.join(hooks_path, cmd)
-    if platform.system() == 'Windows':
-        if not os.path.isfile(hook_file):
-            # look for the file with an extension
-            files = glob.glob(hook_file + ".*")
-            if not files:
-                return True
-            files.sort()
-            hook_file = files.pop()
-            while hook_file.upper().endswith(".SAMPLE"):
-                # The file is a sample hook. We don't want it
-                if len(files) > 0:
-                    hook_file = files.pop()
-                else:
-                    return True
-
-    if not os.path.isfile(hook_file) or not os.access(hook_file, os.X_OK):
-        return True
-
-    return run_hook_command(hook_file, param) == 0
-
-def run_hook_command(cmd, param):
-    """Executes a git hook command
-       cmd = the command line file to be executed. This can be
-       a file that is run by OS association.
-
-       param = a list of parameters to pass to the cmd command
-
-       On windows, the extension is checked to see if it should
-       be run with the Git for Windows Bash shell.  If there
-       is no file extension, the file is deemed a bash shell
-       and will be handed off to sh.exe. Otherwise, Windows
-       will be called with the shell to handle the file assocation.
-
-       For non Windows operating systems, the file is called
-       as an executable.
-    """
-    cli = [cmd] + param
-    use_shell = False
-    if platform.system() == 'Windows':
-        (root,ext) = os.path.splitext(cmd)
-        if ext == "":
-            exe_path = os.environ.get("EXEPATH")
-            if exe_path is None:
-                exe_path = ""
-            else:
-                exe_path = os.path.join(exe_path, "bin")
-            cli = [os.path.join(exe_path, "SH.EXE")] + cli
-        else:
-            use_shell = True
-    return subprocess.call(cli, shell=use_shell)
-
+    """args are specified with -a <arg> -a <arg> -a <arg>"""
+    args = ['git', 'hook', 'run', '--ignore-missing', cmd]
+    if param:
+        args.append("--")
+        for p in param:
+            args.append(p)
+    return subprocess.call(args) == 0
 
 def write_pipe(c, stdin):
     if verbose:

From 66d2308d1f287362b00171c293a3e8b47671a1ec Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:43 +0200
Subject: [PATCH 144/198] commit: convert {pre-commit,prepare-commit-msg} hook
 to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move these hooks hook away from run-command.h to and over to the new
hook.h library.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 commit.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/commit.c b/commit.c
index 143f472c0f24bf..63d7943a86dd51 100644
--- a/commit.c
+++ b/commit.c
@@ -21,6 +21,7 @@
 #include "commit-reach.h"
 #include "run-command.h"
 #include "shallow.h"
+#include "hook.h"
 
 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
 
@@ -1698,22 +1699,22 @@ size_t ignore_non_trailer(const char *buf, size_t len)
 int run_commit_hook(int editor_is_used, const char *index_file,
 		    const char *name, ...)
 {
-	struct strvec hook_env = STRVEC_INIT;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 	va_list args;
-	int ret;
+	const char *arg;
 
-	strvec_pushf(&hook_env, "GIT_INDEX_FILE=%s", index_file);
+	strvec_pushf(&opt.env, "GIT_INDEX_FILE=%s", index_file);
 
 	/*
 	 * Let the hook know that no editor will be launched.
 	 */
 	if (!editor_is_used)
-		strvec_push(&hook_env, "GIT_EDITOR=:");
+		strvec_push(&opt.env, "GIT_EDITOR=:");
 
 	va_start(args, name);
-	ret = run_hook_ve(hook_env.v, name, args);
+	while ((arg = va_arg(args, const char *)))
+		strvec_push(&opt.args, arg);
 	va_end(args);
-	strvec_clear(&hook_env);
 
-	return ret;
+	return run_hooks_oneshot(name, &opt);
 }

From b33b42e68951be3dd8056c61d7016edd32b1904a Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:44 +0200
Subject: [PATCH 145/198] read-cache: convert post-index-change to use hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the post-index-change hook away from run-command.h to and over to
the new hook.h library.

This removes the last direct user of run_hook_ve(), so we can make the
function static now. It'll be removed entirely soon.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c  | 10 +++++++---
 run-command.c |  2 +-
 run-command.h |  1 -
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index c9e2b013972e17..90099ca14dfacd 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3068,6 +3068,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
 {
 	int ret;
 	int was_full = !istate->sparse_index;
+	struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
 
 	ret = convert_to_sparse(istate);
 
@@ -3096,9 +3097,12 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
 	else
 		ret = close_lock_file_gently(lock);
 
-	run_hook_le(NULL, "post-index-change",
-			istate->updated_workdir ? "1" : "0",
-			istate->updated_skipworktree ? "1" : "0", NULL);
+	strvec_pushl(&hook_opt.args,
+		     istate->updated_workdir ? "1" : "0",
+		     istate->updated_skipworktree ? "1" : "0",
+		     NULL);
+	run_hooks_oneshot("post-index-change", &hook_opt);
+
 	istate->updated_workdir = 0;
 	istate->updated_skipworktree = 0;
 
diff --git a/run-command.c b/run-command.c
index 352f5be16465df..b4341ba1c7bcdb 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1320,7 +1320,7 @@ int async_with_fork(void)
 #endif
 }
 
-int run_hook_ve(const char *const *env, const char *name, va_list args)
+static int run_hook_ve(const char *const *env, const char *name, va_list args)
 {
 	struct child_process hook = CHILD_PROCESS_INIT;
 	const char *p;
diff --git a/run-command.h b/run-command.h
index f76b740f927b3b..7a867d41217fea 100644
--- a/run-command.h
+++ b/run-command.h
@@ -219,7 +219,6 @@ int run_command(struct child_process *);
  */
 LAST_ARG_MUST_BE_NULL
 int run_hook_le(const char *const *env, const char *name, ...);
-int run_hook_ve(const char *const *env, const char *name, va_list args);
 
 /*
  * Trigger an auto-gc

From 6e9209ee93dbefe856e9181b6e8ecac45531eb61 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:45 +0200
Subject: [PATCH 146/198] receive-pack: convert push-to-checkout hook to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the push-to-checkout hook away from run-command.h to and over to
the new hook.h library.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/receive-pack.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 91fa799b66e407..a7d03bbc7d386d 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1435,9 +1435,12 @@ static const char *push_to_checkout(unsigned char *hash,
 				    struct strvec *env,
 				    const char *work_tree)
 {
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+
 	strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree));
-	if (run_hook_le(env->v, push_to_checkout_hook,
-			hash_to_hex(hash), NULL))
+	strvec_pushv(&opt.env, env->v);
+	strvec_push(&opt.args, hash_to_hex(hash));
+	if (run_hooks_oneshot(push_to_checkout_hook, &opt))
 		return "push-to-checkout hook declined";
 	else
 		return NULL;

From 3b819b8bd5f51494ea9782c5f28fa633bee5127e Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:46 +0200
Subject: [PATCH 147/198] run-command: remove old run_hook_{le,ve}() hook API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new hook.h library has replaced all run-command.h hook-related
functionality. So let's delete this dead code.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 run-command.c | 32 --------------------------------
 run-command.h | 16 ----------------
 2 files changed, 48 deletions(-)

diff --git a/run-command.c b/run-command.c
index b4341ba1c7bcdb..1399243de8a3b3 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1320,38 +1320,6 @@ int async_with_fork(void)
 #endif
 }
 
-static int run_hook_ve(const char *const *env, const char *name, va_list args)
-{
-	struct child_process hook = CHILD_PROCESS_INIT;
-	const char *p;
-
-	p = find_hook(name);
-	if (!p)
-		return 0;
-
-	strvec_push(&hook.args, p);
-	while ((p = va_arg(args, const char *)))
-		strvec_push(&hook.args, p);
-	hook.env = env;
-	hook.no_stdin = 1;
-	hook.stdout_to_stderr = 1;
-	hook.trace2_hook_name = name;
-
-	return run_command(&hook);
-}
-
-int run_hook_le(const char *const *env, const char *name, ...)
-{
-	va_list args;
-	int ret;
-
-	va_start(args, name);
-	ret = run_hook_ve(env, name, args);
-	va_end(args);
-
-	return ret;
-}
-
 struct io_pump {
 	/* initialized by caller */
 	int fd;
diff --git a/run-command.h b/run-command.h
index 7a867d41217fea..cfb6887e4ae3b1 100644
--- a/run-command.h
+++ b/run-command.h
@@ -204,22 +204,6 @@ int finish_command_in_signal(struct child_process *);
  */
 int run_command(struct child_process *);
 
-/**
- * Run a hook.
- * The first argument is a pathname to an index file, or NULL
- * if the hook uses the default index file or no index is needed.
- * The second argument is the name of the hook.
- * The further arguments correspond to the hook arguments.
- * The last argument has to be NULL to terminate the arguments list.
- * If the hook does not exist or is not executable, the return
- * value will be zero.
- * If it is executable, the hook will be executed and the exit
- * status of the hook is returned.
- * On execution, .stdout_to_stderr and .no_stdin will be set.
- */
-LAST_ARG_MUST_BE_NULL
-int run_hook_le(const char *const *env, const char *name, ...);
-
 /*
  * Trigger an auto-gc
  */

From fe4ce60e0dcf47a52d4cc5d593e0f5ee65b51b22 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:47 +0200
Subject: [PATCH 148/198] run-command: allow stdin for run_processes_parallel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While it makes sense not to inherit stdin from the parent process to
avoid deadlocking, it's not necessary to completely ban stdin to
children. An informed user should be able to configure stdin safely. By
setting `some_child.process.no_stdin=1` before calling `get_next_task()`
we provide a reasonable default behavior but enable users to set up
stdin streaming for themselves during the callback.

`some_child.process.stdout_to_stderr`, however, remains unmodifiable by
`get_next_task()` - the rest of the run_processes_parallel() API depends
on child output in stderr.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 run-command.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/run-command.c b/run-command.c
index 1399243de8a3b3..482ee2d76c69c4 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1627,6 +1627,14 @@ static int pp_start_one(struct parallel_processes *pp)
 	if (i == pp->max_processes)
 		BUG("bookkeeping is hard");
 
+	/*
+	 * By default, do not inherit stdin from the parent process - otherwise,
+	 * all children would share stdin! Users may overwrite this to provide
+	 * something to the child's stdin by having their 'get_next_task'
+	 * callback assign 0 to .no_stdin and an appropriate integer to .in.
+	 */
+	pp->children[i].process.no_stdin = 1;
+
 	code = pp->get_next_task(&pp->children[i].process,
 				 &pp->children[i].err,
 				 pp->data,
@@ -1638,7 +1646,6 @@ static int pp_start_one(struct parallel_processes *pp)
 	}
 	pp->children[i].process.err = -1;
 	pp->children[i].process.stdout_to_stderr = 1;
-	pp->children[i].process.no_stdin = 1;
 
 	if (start_command(&pp->children[i].process)) {
 		code = pp->start_failure(&pp->children[i].err,

From 4787177bdd8e4a6e7d59bc124917448539cf47bb Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:48 +0200
Subject: [PATCH 149/198] hook: support passing stdin to hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some hooks (such as post-rewrite) need to take input via stdin.
Previously, callers provided stdin to hooks by setting
run-command.h:child_process.in, which takes a FD. Callers would open the
file in question themselves before calling run-command(). However, since
we will now need to seek to the front of the file and read it again for
every hook which runs, hook.h:run_command() takes a path and handles FD
management itself. Since this file is opened for read only, it should
not prevent later parallel execution support.

On the frontend, this is supported by asking for a file path, rather
than by reading stdin. Reading directly from stdin would involve caching
the entire stdin (to memory or to disk) and reading it back from the
beginning to each hook. We'd want to support cases like insufficient
memory or storage for the file. While this may prove useful later, for
now the path of least resistance is to just ask the user to make this
interim file themselves.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-hook.txt |  7 ++++++-
 builtin/hook.c             |  4 +++-
 hook.c                     |  8 +++++++-
 hook.h                     |  3 +++
 t/t1800-hook.sh            | 18 ++++++++++++++++++
 5 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt
index 097fb9de63bdf7..fa68c1f3912389 100644
--- a/Documentation/git-hook.txt
+++ b/Documentation/git-hook.txt
@@ -8,7 +8,7 @@ git-hook - run git hooks
 SYNOPSIS
 --------
 [verse]
-'git hook' run [--ignore-missing] <hook-name> [-- <hook-args>]
+'git hook' run [--to-stdin=<path>] [--ignore-missing] <hook-name> [-- <hook-args>]
 
 DESCRIPTION
 -----------
@@ -32,6 +32,11 @@ what those are.
 OPTIONS
 -------
 
+--to-stdin::
+	For "run"; Specify a file which will be streamed into the
+	hook's stdin. The hook will receive the entire file from
+	beginning to EOF.
+
 --ignore-missing::
 	Ignore any missing hook by quietly returning zero. Used for
 	tools that want to do a blind one-shot run of a hook that may
diff --git a/builtin/hook.c b/builtin/hook.c
index f33db9953c72bd..27dce6a2f0ec3c 100644
--- a/builtin/hook.c
+++ b/builtin/hook.c
@@ -7,7 +7,7 @@
 #include "strvec.h"
 
 #define BUILTIN_HOOK_RUN_USAGE \
-	N_("git hook run [--ignore-missing] <hook-name> [-- <hook-args>]")
+	N_("git hook run [--ignore-missing] [--to-stdin=<path>] <hook-name> [-- <hook-args>]")
 
 static const char * const builtin_hook_usage[] = {
 	BUILTIN_HOOK_RUN_USAGE,
@@ -29,6 +29,8 @@ static int run(int argc, const char **argv, const char *prefix)
 	struct option run_options[] = {
 		OPT_BOOL(0, "ignore-missing", &ignore_missing,
 			 N_("exit quietly with a zero exit code if the requested hook cannot be found")),
+		OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"),
+			   N_("file to read into hooks' stdin")),
 		OPT_END(),
 	};
 	int ret;
diff --git a/hook.c b/hook.c
index 5ecf79c83977a6..63c9a60921f856 100644
--- a/hook.c
+++ b/hook.c
@@ -88,7 +88,13 @@ static int pick_next_hook(struct child_process *cp,
 	if (!run_me)
 		return 0;
 
-	cp->no_stdin = 1;
+	/* reopen the file for stdin; run_command closes it. */
+	if (hook_cb->options->path_to_stdin) {
+		cp->no_stdin = 0;
+		cp->in = xopen(hook_cb->options->path_to_stdin, O_RDONLY);
+	} else {
+		cp->no_stdin = 1;
+	}
 	cp->env = hook_cb->options->env.v;
 	cp->stdout_to_stderr = 1;
 	cp->trace2_hook_name = hook_cb->hook_name;
diff --git a/hook.h b/hook.h
index 53ea3a9649efa2..cd6a68a3b505bb 100644
--- a/hook.h
+++ b/hook.h
@@ -36,6 +36,9 @@ struct run_hooks_opt
 
 	/* Path to initial working directory for subprocess */
 	const char *dir;
+
+	/* Path to file which should be piped to stdin for each hook */
+	const char *path_to_stdin;
 };
 
 #define RUN_HOOKS_OPT_INIT { \
diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh
index 49df5a2cdfb278..217db848b3f22c 100755
--- a/t/t1800-hook.sh
+++ b/t/t1800-hook.sh
@@ -135,4 +135,22 @@ test_expect_success 'git -c core.hooksPath=<PATH> hook run' '
 	test_cmp expect actual
 '
 
+test_expect_success 'stdin to hooks' '
+	write_script .git/hooks/test-hook <<-\EOF &&
+	echo BEGIN stdin
+	cat
+	echo END stdin
+	EOF
+
+	cat >expect <<-EOF &&
+	BEGIN stdin
+	hello
+	END stdin
+	EOF
+
+	echo hello >input &&
+	git hook run --to-stdin=input test-hook 2>actual &&
+	test_cmp expect actual
+'
+
 test_done

From 580bc69cb6581a7f343e0f8264a9a113f6c1901a Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:49 +0200
Subject: [PATCH 150/198] am: convert 'post-rewrite' hook to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/am.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index e444b18b64a34f..9e3d4d9ab446a7 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -467,24 +467,12 @@ static int run_applypatch_msg_hook(struct am_state *state)
  */
 static int run_post_rewrite_hook(const struct am_state *state)
 {
-	struct child_process cp = CHILD_PROCESS_INIT;
-	const char *hook = find_hook("post-rewrite");
-	int ret;
-
-	if (!hook)
-		return 0;
-
-	strvec_push(&cp.args, hook);
-	strvec_push(&cp.args, "rebase");
-
-	cp.in = xopen(am_path(state, "rewritten"), O_RDONLY);
-	cp.stdout_to_stderr = 1;
-	cp.trace2_hook_name = "post-rewrite";
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 
-	ret = run_command(&cp);
+	strvec_push(&opt.args, "rebase");
+	opt.path_to_stdin = am_path(state, "rewritten");
 
-	close(cp.in);
-	return ret;
+	return run_hooks_oneshot("post-rewrite", &opt);
 }
 
 /**

From c623a9a2e6f6ab30c42bd0df35eca562e091883e Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:50 +0200
Subject: [PATCH 151/198] run-command: add stdin callback for parallelization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a user of the run_processes_parallel() API wants to pipe a large
amount of information to stdin of each parallel command, that
information could exceed the buffer of the pipe allocated for that
process's stdin.  Generally this is solved by repeatedly writing to
child_process.in between calls to start_command() and finish_command();
run_processes_parallel() did not provide users an opportunity to access
child_process at that time.

Because the data might be extremely large (for example, a list of all
refs received during a push from a client) simply taking a string_list
or strbuf is not as scalable as using a callback; the rest of the
run_processes_parallel() API also uses callbacks, so making this feature
match the rest of the API reduces mental load on the user.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fetch.c             |  1 +
 builtin/submodule--helper.c |  2 +-
 hook.c                      |  1 +
 run-command.c               | 54 +++++++++++++++++++++++++++++++++++--
 run-command.h               | 17 +++++++++++-
 submodule.c                 |  1 +
 t/helper/test-run-command.c | 31 ++++++++++++++++++---
 t/t0061-run-command.sh      | 30 +++++++++++++++++++++
 8 files changed, 129 insertions(+), 8 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 25740c13df1bf8..fef6e85d003d48 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1817,6 +1817,7 @@ static int fetch_multiple(struct string_list *list, int max_children)
 		result = run_processes_parallel_tr2(max_children,
 						    &fetch_next_remote,
 						    &fetch_failed_to_start,
+						    NULL,
 						    &fetch_finished,
 						    &state,
 						    "fetch", "parallel/fetch");
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f73963ad67da33..f42ded548bffb6 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2294,7 +2294,7 @@ static int update_submodules(struct submodule_update_clone *suc)
 	int i;
 
 	run_processes_parallel_tr2(suc->max_jobs, update_clone_get_next_task,
-				   update_clone_start_failure,
+				   update_clone_start_failure, NULL,
 				   update_clone_task_finished, suc, "submodule",
 				   "parallel/update");
 
diff --git a/hook.c b/hook.c
index 63c9a60921f856..a7462639d97b49 100644
--- a/hook.c
+++ b/hook.c
@@ -175,6 +175,7 @@ int run_hooks(const char *hook_name, const char *hook_path,
 	run_processes_parallel_tr2(jobs,
 				   pick_next_hook,
 				   notify_start_failure,
+				   NULL,
 				   notify_hook_finished,
 				   &cb_data,
 				   "hook",
diff --git a/run-command.c b/run-command.c
index 482ee2d76c69c4..f1616858d18eaa 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1492,6 +1492,7 @@ struct parallel_processes {
 
 	get_next_task_fn get_next_task;
 	start_failure_fn start_failure;
+	feed_pipe_fn feed_pipe;
 	task_finished_fn task_finished;
 
 	struct {
@@ -1519,6 +1520,13 @@ static int default_start_failure(struct strbuf *out,
 	return 0;
 }
 
+static int default_feed_pipe(struct strbuf *pipe,
+			     void *pp_cb,
+			     void *pp_task_cb)
+{
+	return 1;
+}
+
 static int default_task_finished(int result,
 				 struct strbuf *out,
 				 void *pp_cb,
@@ -1549,6 +1557,7 @@ static void pp_init(struct parallel_processes *pp,
 		    int n,
 		    get_next_task_fn get_next_task,
 		    start_failure_fn start_failure,
+		    feed_pipe_fn feed_pipe,
 		    task_finished_fn task_finished,
 		    void *data)
 {
@@ -1567,6 +1576,7 @@ static void pp_init(struct parallel_processes *pp,
 	pp->get_next_task = get_next_task;
 
 	pp->start_failure = start_failure ? start_failure : default_start_failure;
+	pp->feed_pipe = feed_pipe ? feed_pipe : default_feed_pipe;
 	pp->task_finished = task_finished ? task_finished : default_task_finished;
 
 	pp->nr_processes = 0;
@@ -1664,6 +1674,37 @@ static int pp_start_one(struct parallel_processes *pp)
 	return 0;
 }
 
+static void pp_buffer_stdin(struct parallel_processes *pp)
+{
+	int i;
+	struct strbuf sb = STRBUF_INIT;
+
+	/* Buffer stdin for each pipe. */
+	for (i = 0; i < pp->max_processes; i++) {
+		if (pp->children[i].state == GIT_CP_WORKING &&
+		    pp->children[i].process.in > 0) {
+			int done;
+			strbuf_reset(&sb);
+			done = pp->feed_pipe(&sb, pp->data,
+					      pp->children[i].data);
+			if (sb.len) {
+				if (write_in_full(pp->children[i].process.in,
+					      sb.buf, sb.len) < 0) {
+					if (errno != EPIPE)
+						die_errno("write");
+					done = 1;
+				}
+			}
+			if (done) {
+				close(pp->children[i].process.in);
+				pp->children[i].process.in = 0;
+			}
+		}
+	}
+
+	strbuf_release(&sb);
+}
+
 static void pp_buffer_stderr(struct parallel_processes *pp, int output_timeout)
 {
 	int i;
@@ -1728,6 +1769,7 @@ static int pp_collect_finished(struct parallel_processes *pp)
 		pp->nr_processes--;
 		pp->children[i].state = GIT_CP_FREE;
 		pp->pfd[i].fd = -1;
+		pp->children[i].process.in = 0;
 		child_process_init(&pp->children[i].process);
 
 		if (i != pp->output_owner) {
@@ -1761,6 +1803,7 @@ static int pp_collect_finished(struct parallel_processes *pp)
 int run_processes_parallel(int n,
 			   get_next_task_fn get_next_task,
 			   start_failure_fn start_failure,
+			   feed_pipe_fn feed_pipe,
 			   task_finished_fn task_finished,
 			   void *pp_cb)
 {
@@ -1769,7 +1812,9 @@ int run_processes_parallel(int n,
 	int spawn_cap = 4;
 	struct parallel_processes pp;
 
-	pp_init(&pp, n, get_next_task, start_failure, task_finished, pp_cb);
+	sigchain_push(SIGPIPE, SIG_IGN);
+
+	pp_init(&pp, n, get_next_task, start_failure, feed_pipe, task_finished, pp_cb);
 	while (1) {
 		for (i = 0;
 		    i < spawn_cap && !pp.shutdown &&
@@ -1786,6 +1831,7 @@ int run_processes_parallel(int n,
 		}
 		if (!pp.nr_processes)
 			break;
+		pp_buffer_stdin(&pp);
 		pp_buffer_stderr(&pp, output_timeout);
 		pp_output(&pp);
 		code = pp_collect_finished(&pp);
@@ -1797,11 +1843,15 @@ int run_processes_parallel(int n,
 	}
 
 	pp_cleanup(&pp);
+
+	sigchain_pop(SIGPIPE);
+
 	return 0;
 }
 
 int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task,
 			       start_failure_fn start_failure,
+			       feed_pipe_fn feed_pipe,
 			       task_finished_fn task_finished, void *pp_cb,
 			       const char *tr2_category, const char *tr2_label)
 {
@@ -1811,7 +1861,7 @@ int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task,
 				   ((n < 1) ? online_cpus() : n));
 
 	result = run_processes_parallel(n, get_next_task, start_failure,
-					task_finished, pp_cb);
+					feed_pipe, task_finished, pp_cb);
 
 	trace2_region_leave(tr2_category, tr2_label, NULL);
 
diff --git a/run-command.h b/run-command.h
index cfb6887e4ae3b1..80d394664aec2a 100644
--- a/run-command.h
+++ b/run-command.h
@@ -422,6 +422,20 @@ typedef int (*start_failure_fn)(struct strbuf *out,
 				void *pp_cb,
 				void *pp_task_cb);
 
+/**
+ * This callback is called repeatedly on every child process who requests
+ * start_command() to create a pipe by setting child_process.in < 0.
+ *
+ * pp_cb is the callback cookie as passed into run_processes_parallel, and
+ * pp_task_cb is the callback cookie as passed into get_next_task_fn.
+ * The contents of 'send' will be read into the pipe and passed to the pipe.
+ *
+ * Return nonzero to close the pipe.
+ */
+typedef int (*feed_pipe_fn)(struct strbuf *pipe,
+			    void *pp_cb,
+			    void *pp_task_cb);
+
 /**
  * This callback is called on every child process that finished processing.
  *
@@ -456,10 +470,11 @@ typedef int (*task_finished_fn)(int result,
 int run_processes_parallel(int n,
 			   get_next_task_fn,
 			   start_failure_fn,
+			   feed_pipe_fn,
 			   task_finished_fn,
 			   void *pp_cb);
 int run_processes_parallel_tr2(int n, get_next_task_fn, start_failure_fn,
-			       task_finished_fn, void *pp_cb,
+			       feed_pipe_fn, task_finished_fn, void *pp_cb,
 			       const char *tr2_category, const char *tr2_label);
 
 /**
diff --git a/submodule.c b/submodule.c
index 8e611fe1dbf1f7..db1700a502dcd2 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1632,6 +1632,7 @@ int fetch_populated_submodules(struct repository *r,
 	run_processes_parallel_tr2(max_parallel_jobs,
 				   get_next_submodule,
 				   fetch_start_failure,
+				   NULL,
 				   fetch_finish,
 				   &spf,
 				   "submodule", "parallel/fetch");
diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c
index 7ae03dc7123468..9348184d303eab 100644
--- a/t/helper/test-run-command.c
+++ b/t/helper/test-run-command.c
@@ -32,8 +32,13 @@ static int parallel_next(struct child_process *cp,
 		return 0;
 
 	strvec_pushv(&cp->args, d->argv);
+	cp->in = d->in;
+	cp->no_stdin = d->no_stdin;
 	strbuf_addstr(err, "preloaded output of a child\n");
 	number_callbacks++;
+
+	*task_cb = xmalloc(sizeof(int));
+	*(int*)(*task_cb) = 2;
 	return 1;
 }
 
@@ -55,6 +60,17 @@ static int task_finished(int result,
 	return 1;
 }
 
+static int test_stdin(struct strbuf *pipe, void *cb, void *task_cb)
+{
+	int *lines_remaining = task_cb;
+
+	if (*lines_remaining)
+		strbuf_addf(pipe, "sample stdin %d\n", --(*lines_remaining));
+
+	return !(*lines_remaining);
+}
+
+
 struct testsuite {
 	struct string_list tests, failed;
 	int next;
@@ -185,7 +201,7 @@ static int testsuite(int argc, const char **argv)
 		suite.tests.nr, max_jobs);
 
 	ret = run_processes_parallel(max_jobs, next_test, test_failed,
-				     test_finished, &suite);
+				     test_stdin, test_finished, &suite);
 
 	if (suite.failed.nr > 0) {
 		ret = 1;
@@ -413,15 +429,22 @@ int cmd__run_command(int argc, const char **argv)
 
 	if (!strcmp(argv[1], "run-command-parallel"))
 		exit(run_processes_parallel(jobs, parallel_next,
-					    NULL, NULL, &proc));
+					    NULL, NULL, NULL, &proc));
 
 	if (!strcmp(argv[1], "run-command-abort"))
 		exit(run_processes_parallel(jobs, parallel_next,
-					    NULL, task_finished, &proc));
+					    NULL, NULL, task_finished, &proc));
 
 	if (!strcmp(argv[1], "run-command-no-jobs"))
 		exit(run_processes_parallel(jobs, no_job,
-					    NULL, task_finished, &proc));
+					    NULL, NULL, task_finished, &proc));
+
+	if (!strcmp(argv[1], "run-command-stdin")) {
+		proc.in = -1;
+		proc.no_stdin = 0;
+		exit (run_processes_parallel(jobs, parallel_next, NULL,
+					     test_stdin, NULL, &proc));
+	}
 
 	fprintf(stderr, "check usage\n");
 	return 1;
diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh
index 7d599675e35a75..87759482ad111e 100755
--- a/t/t0061-run-command.sh
+++ b/t/t0061-run-command.sh
@@ -143,6 +143,36 @@ test_expect_success 'run_command runs in parallel with more tasks than jobs avai
 	test_cmp expect actual
 '
 
+cat >expect <<-EOF
+preloaded output of a child
+listening for stdin:
+sample stdin 1
+sample stdin 0
+preloaded output of a child
+listening for stdin:
+sample stdin 1
+sample stdin 0
+preloaded output of a child
+listening for stdin:
+sample stdin 1
+sample stdin 0
+preloaded output of a child
+listening for stdin:
+sample stdin 1
+sample stdin 0
+EOF
+
+test_expect_success 'run_command listens to stdin' '
+	write_script stdin-script <<-\EOF &&
+	echo "listening for stdin:"
+	while read line; do
+		echo "$line"
+	done
+	EOF
+	test-tool run-command run-command-stdin 2 ./stdin-script 2>actual &&
+	test_cmp expect actual
+'
+
 cat >expect <<-EOF
 preloaded output of a child
 asking for a quick stop

From c848767a58e71c35135f6683d87f083af49408d1 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:51 +0200
Subject: [PATCH 152/198] hook: provide stdin by string_list or callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In cases where a hook requires only a small amount of information via
stdin, it should be simple for users to provide a string_list alone. But
in more complicated cases where the stdin is too large to hold in
memory, let's instead provide a callback the users can populate line
after line.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 hook.c | 33 ++++++++++++++++++++++++++++++++-
 hook.h | 27 +++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/hook.c b/hook.c
index a7462639d97b49..44e73827800813 100644
--- a/hook.c
+++ b/hook.c
@@ -77,6 +77,29 @@ void run_hooks_opt_clear(struct run_hooks_opt *o)
 	strvec_clear(&o->args);
 }
 
+int pipe_from_string_list(struct strbuf *pipe, void *pp_cb, void *pp_task_cb)
+{
+	int *item_idx;
+	struct hook *ctx = pp_task_cb;
+	struct hook_cb_data *hook_cb = pp_cb;
+	struct string_list *to_pipe = hook_cb->options->feed_pipe_ctx;
+
+	/* Bootstrap the state manager if necessary. */
+	if (!ctx->feed_pipe_cb_data) {
+		ctx->feed_pipe_cb_data = xmalloc(sizeof(unsigned int));
+		*(int*)ctx->feed_pipe_cb_data = 0;
+	}
+
+	item_idx = ctx->feed_pipe_cb_data;
+
+	if (*item_idx < to_pipe->nr) {
+		strbuf_addf(pipe, "%s\n", to_pipe->items[*item_idx].string);
+		(*item_idx)++;
+		return 0;
+	}
+	return 1;
+}
+
 static int pick_next_hook(struct child_process *cp,
 			  struct strbuf *out,
 			  void *pp_cb,
@@ -92,6 +115,10 @@ static int pick_next_hook(struct child_process *cp,
 	if (hook_cb->options->path_to_stdin) {
 		cp->no_stdin = 0;
 		cp->in = xopen(hook_cb->options->path_to_stdin, O_RDONLY);
+	} else if (hook_cb->options->feed_pipe) {
+		/* ask for start_command() to make a pipe for us */
+		cp->in = -1;
+		cp->no_stdin = 0;
 	} else {
 		cp->no_stdin = 1;
 	}
@@ -175,7 +202,7 @@ int run_hooks(const char *hook_name, const char *hook_path,
 	run_processes_parallel_tr2(jobs,
 				   pick_next_hook,
 				   notify_start_failure,
-				   NULL,
+				   options->feed_pipe,
 				   notify_hook_finished,
 				   &cb_data,
 				   "hook",
@@ -183,6 +210,7 @@ int run_hooks(const char *hook_name, const char *hook_path,
 
 	if (options->absolute_path)
 		strbuf_release(&abs_path);
+	free(my_hook.feed_pipe_cb_data);
 
 	return cb_data.rc;
 }
@@ -196,6 +224,9 @@ int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options)
 	if (!options)
 		options = &hook_opt_scratch;
 
+	if (options->path_to_stdin && options->feed_pipe)
+		BUG("choose only one method to populate stdin");
+
 	hook_path = find_hook(hook_name);
 	if (!hook_path) {
 		ret = 0;
diff --git a/hook.h b/hook.h
index cd6a68a3b505bb..b55f283f90bce9 100644
--- a/hook.h
+++ b/hook.h
@@ -19,6 +19,12 @@ int hook_exists(const char *hookname);
 struct hook {
 	/* The path to the hook */
 	const char *hook_path;
+
+	/*
+	 * Use this to keep state for your feed_pipe_fn if you are using
+	 * run_hooks_opt.feed_pipe. Otherwise, do not touch it.
+	 */
+	void *feed_pipe_cb_data;
 };
 
 struct run_hooks_opt
@@ -39,6 +45,19 @@ struct run_hooks_opt
 
 	/* Path to file which should be piped to stdin for each hook */
 	const char *path_to_stdin;
+
+	/*
+	 * Callback and state pointer to ask for more content to pipe to stdin.
+	 * Will be called repeatedly, for each hook. See
+	 * hook.c:pipe_from_stdin() for an example. Keep per-hook state in
+	 * hook.feed_pipe_cb_data (per process). Keep initialization context in
+	 * feed_pipe_ctx (shared by all processes).
+	 *
+	 * See 'pipe_from_string_list()' for info about how to specify a
+	 * string_list as the stdin input instead of writing your own handler.
+	 */
+	feed_pipe_fn feed_pipe;
+	void *feed_pipe_ctx;
 };
 
 #define RUN_HOOKS_OPT_INIT { \
@@ -46,6 +65,14 @@ struct run_hooks_opt
 	.args = STRVEC_INIT, \
 }
 
+/*
+ * To specify a 'struct string_list', set 'run_hooks_opt.feed_pipe_ctx' to the
+ * string_list and set 'run_hooks_opt.feed_pipe' to 'pipe_from_string_list()'.
+ * This will pipe each string in the list to stdin, separated by newlines.  (Do
+ * not inject your own newlines.)
+ */
+int pipe_from_string_list(struct strbuf *pipe, void *pp_cb, void *pp_task_cb);
+
 /*
  * Callback provided to feed_pipe_fn and consume_sideband_fn.
  */

From 73edbd92757b086e5e184481618afe5e0969311e Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:52 +0200
Subject: [PATCH 153/198] hook: convert 'post-rewrite' hook in sequencer.c to
 hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By using 'hook.h' for 'post-rewrite', we simplify hook invocations by
not needing to put together our own 'struct child_process'.

The signal handling that's being removed by this commit now takes
place in run-command.h:run_processes_parallel(), so it is OK to remove
them here.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sequencer.c | 79 ++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 47 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 9aac08c15451a6..77f809c00e46e6 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -35,6 +35,7 @@
 #include "commit-reach.h"
 #include "rebase-interactive.h"
 #include "reset.h"
+#include "string-list.h"
 
 #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION"
 
@@ -1147,33 +1148,27 @@ int update_head_with_reflog(const struct commit *old_head,
 static int run_rewrite_hook(const struct object_id *oldoid,
 			    const struct object_id *newoid)
 {
-	struct child_process proc = CHILD_PROCESS_INIT;
-	const char *argv[3];
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct strbuf tmp = STRBUF_INIT;
+	struct string_list to_stdin = STRING_LIST_INIT_DUP;
 	int code;
-	struct strbuf sb = STRBUF_INIT;
 
-	argv[0] = find_hook("post-rewrite");
-	if (!argv[0])
-		return 0;
+	strvec_push(&opt.args, "amend");
 
-	argv[1] = "amend";
-	argv[2] = NULL;
-
-	proc.argv = argv;
-	proc.in = -1;
-	proc.stdout_to_stderr = 1;
-	proc.trace2_hook_name = "post-rewrite";
-
-	code = start_command(&proc);
-	if (code)
-		return code;
-	strbuf_addf(&sb, "%s %s\n", oid_to_hex(oldoid), oid_to_hex(newoid));
-	sigchain_push(SIGPIPE, SIG_IGN);
-	write_in_full(proc.in, sb.buf, sb.len);
-	close(proc.in);
-	strbuf_release(&sb);
-	sigchain_pop(SIGPIPE);
-	return finish_command(&proc);
+	strbuf_addf(&tmp,
+		    "%s %s",
+		    oid_to_hex(oldoid),
+		    oid_to_hex(newoid));
+	string_list_append(&to_stdin, tmp.buf);
+
+	opt.feed_pipe = pipe_from_string_list;
+	opt.feed_pipe_ctx = &to_stdin;
+
+	code = run_hooks_oneshot("post-rewrite", &opt);
+
+	strbuf_release(&tmp);
+	string_list_clear(&to_stdin, 0);
+	return code;
 }
 
 void commit_post_rewrite(struct repository *r,
@@ -4526,30 +4521,20 @@ static int pick_commits(struct repository *r,
 		flush_rewritten_pending();
 		if (!stat(rebase_path_rewritten_list(), &st) &&
 				st.st_size > 0) {
-			struct child_process child = CHILD_PROCESS_INIT;
-			const char *post_rewrite_hook =
-				find_hook("post-rewrite");
-
-			child.in = open(rebase_path_rewritten_list(), O_RDONLY);
-			child.git_cmd = 1;
-			strvec_push(&child.args, "notes");
-			strvec_push(&child.args, "copy");
-			strvec_push(&child.args, "--for-rewrite=rebase");
+			struct child_process notes_cp = CHILD_PROCESS_INIT;
+			struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT;
+
+			notes_cp.in = open(rebase_path_rewritten_list(), O_RDONLY);
+			notes_cp.git_cmd = 1;
+			strvec_push(&notes_cp.args, "notes");
+			strvec_push(&notes_cp.args, "copy");
+			strvec_push(&notes_cp.args, "--for-rewrite=rebase");
 			/* we don't care if this copying failed */
-			run_command(&child);
-
-			if (post_rewrite_hook) {
-				struct child_process hook = CHILD_PROCESS_INIT;
-
-				hook.in = open(rebase_path_rewritten_list(),
-					O_RDONLY);
-				hook.stdout_to_stderr = 1;
-				hook.trace2_hook_name = "post-rewrite";
-				strvec_push(&hook.args, post_rewrite_hook);
-				strvec_push(&hook.args, "rebase");
-				/* we don't care if this hook failed */
-				run_command(&hook);
-			}
+			run_command(&notes_cp);
+
+			hook_opt.path_to_stdin = rebase_path_rewritten_list();
+			strvec_push(&hook_opt.args, "rebase");
+			run_hooks_oneshot("post-rewrite", &hook_opt);
 		}
 		apply_autostash(rebase_path_autostash());
 

From c5f26cb3d5e4fce09d740c4d746164e2a1953b47 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:53 +0200
Subject: [PATCH 154/198] transport: convert pre-push hook to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the pre-push hook away from run-command.h to and over to the new
hook.h library.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 transport.c | 56 ++++++++++++++---------------------------------------
 1 file changed, 14 insertions(+), 42 deletions(-)

diff --git a/transport.c b/transport.c
index 77e196f75f551d..4ca8fc0391d274 100644
--- a/transport.c
+++ b/transport.c
@@ -1203,63 +1203,35 @@ static void die_with_unpushed_submodules(struct string_list *needs_pushing)
 static int run_pre_push_hook(struct transport *transport,
 			     struct ref *remote_refs)
 {
-	int ret = 0, x;
+	int ret = 0;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 	struct ref *r;
-	struct child_process proc = CHILD_PROCESS_INIT;
-	struct strbuf buf;
-	const char *argv[4];
-
-	if (!(argv[0] = find_hook("pre-push")))
-		return 0;
-
-	argv[1] = transport->remote->name;
-	argv[2] = transport->url;
-	argv[3] = NULL;
-
-	proc.argv = argv;
-	proc.in = -1;
-	proc.trace2_hook_name = "pre-push";
-
-	if (start_command(&proc)) {
-		finish_command(&proc);
-		return -1;
-	}
+	struct string_list to_stdin = STRING_LIST_INIT_NODUP;
 
-	sigchain_push(SIGPIPE, SIG_IGN);
-
-	strbuf_init(&buf, 256);
+	strvec_push(&opt.args, transport->remote->name);
+	strvec_push(&opt.args, transport->url);
 
 	for (r = remote_refs; r; r = r->next) {
+		struct strbuf buf = STRBUF_INIT;
+
 		if (!r->peer_ref) continue;
 		if (r->status == REF_STATUS_REJECT_NONFASTFORWARD) continue;
 		if (r->status == REF_STATUS_REJECT_STALE) continue;
 		if (r->status == REF_STATUS_REJECT_REMOTE_UPDATED) continue;
 		if (r->status == REF_STATUS_UPTODATE) continue;
 
-		strbuf_reset(&buf);
-		strbuf_addf( &buf, "%s %s %s %s\n",
+		strbuf_addf(&buf, "%s %s %s %s",
 			 r->peer_ref->name, oid_to_hex(&r->new_oid),
 			 r->name, oid_to_hex(&r->old_oid));
-
-		if (write_in_full(proc.in, buf.buf, buf.len) < 0) {
-			/* We do not mind if a hook does not read all refs. */
-			if (errno != EPIPE)
-				ret = -1;
-			break;
-		}
+		string_list_append(&to_stdin, strbuf_detach(&buf, NULL));
 	}
 
-	strbuf_release(&buf);
-
-	x = close(proc.in);
-	if (!ret)
-		ret = x;
-
-	sigchain_pop(SIGPIPE);
+	opt.feed_pipe = pipe_from_string_list;
+	opt.feed_pipe_ctx = &to_stdin;
 
-	x = finish_command(&proc);
-	if (!ret)
-		ret = x;
+	ret = run_hooks_oneshot("pre-push", &opt);
+	to_stdin.strdup_strings = 1;
+	string_list_clear(&to_stdin, 0);
 
 	return ret;
 }

From 09dfbb9151f3c2462fff6ac9b0166901cf3cbfbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:54 +0200
Subject: [PATCH 155/198] hook tests: test for exact "pre-push" hook input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the tests added in ec55559f937 (push: Add support for pre-push
hooks, 2013-01-13) to exhaustively test for the exact input we're
expecting. This helps a parallel series that's refactoring how the
hook is called, to e.g. make sure that we don't miss a trailing
newline.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5571-pre-push-hook.sh | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/t/t5571-pre-push-hook.sh b/t/t5571-pre-push-hook.sh
index ad8d5804f7b7df..d2857a6fbc07df 100755
--- a/t/t5571-pre-push-hook.sh
+++ b/t/t5571-pre-push-hook.sh
@@ -11,7 +11,7 @@ HOOKDIR="$(git rev-parse --git-dir)/hooks"
 HOOK="$HOOKDIR/pre-push"
 mkdir -p "$HOOKDIR"
 write_script "$HOOK" <<EOF
-cat >/dev/null
+cat >actual
 exit 0
 EOF
 
@@ -20,10 +20,16 @@ test_expect_success 'setup' '
 	git init --bare repo1 &&
 	git remote add parent1 repo1 &&
 	test_commit one &&
-	git push parent1 HEAD:foreign
+	cat >expect <<-EOF &&
+	HEAD $(git rev-parse HEAD) refs/heads/foreign $(test_oid zero)
+	EOF
+
+	test_when_finished "rm actual" &&
+	git push parent1 HEAD:foreign &&
+	test_cmp expect actual
 '
 write_script "$HOOK" <<EOF
-cat >/dev/null
+cat >actual
 exit 1
 EOF
 
@@ -32,11 +38,18 @@ export COMMIT1
 
 test_expect_success 'push with failing hook' '
 	test_commit two &&
-	test_must_fail git push parent1 HEAD
+	cat >expect <<-EOF &&
+	HEAD $(git rev-parse HEAD) refs/heads/main $(test_oid zero)
+	EOF
+
+	test_when_finished "rm actual" &&
+	test_must_fail git push parent1 HEAD &&
+	test_cmp expect actual
 '
 
 test_expect_success '--no-verify bypasses hook' '
-	git push --no-verify parent1 HEAD
+	git push --no-verify parent1 HEAD &&
+	test_path_is_missing actual
 '
 
 COMMIT2="$(git rev-parse HEAD)"

From 33f69adb1b8b7c8d4aa4ba2c67b6d99be68581d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:38:55 +0200
Subject: [PATCH 156/198] hook tests: use a modern style for "pre-push" tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Indent the here-docs and use "test_cmp" instead of "diff" in tests
added in ec55559f937 (push: Add support for pre-push hooks,
2013-01-13). Let's also use the more typical "expect" instead of
"expected" to be consistent with the rest of the test file.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5571-pre-push-hook.sh | 71 ++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/t/t5571-pre-push-hook.sh b/t/t5571-pre-push-hook.sh
index d2857a6fbc07df..6d0d5b854ea3bb 100755
--- a/t/t5571-pre-push-hook.sh
+++ b/t/t5571-pre-push-hook.sh
@@ -61,15 +61,15 @@ echo "$2" >>actual
 cat >>actual
 EOF
 
-cat >expected <<EOF
-parent1
-repo1
-refs/heads/main $COMMIT2 refs/heads/foreign $COMMIT1
-EOF
-
 test_expect_success 'push with hook' '
+	cat >expected <<-EOF &&
+	parent1
+	repo1
+	refs/heads/main $COMMIT2 refs/heads/foreign $COMMIT1
+	EOF
+
 	git push parent1 main:foreign &&
-	diff expected actual
+	test_cmp expected actual
 '
 
 test_expect_success 'add a branch' '
@@ -80,49 +80,48 @@ test_expect_success 'add a branch' '
 COMMIT3="$(git rev-parse HEAD)"
 export COMMIT3
 
-cat >expected <<EOF
-parent1
-repo1
-refs/heads/other $COMMIT3 refs/heads/foreign $COMMIT2
-EOF
-
 test_expect_success 'push to default' '
+	cat >expect <<-EOF &&
+	parent1
+	repo1
+	refs/heads/other $COMMIT3 refs/heads/foreign $COMMIT2
+	EOF
 	git push &&
-	diff expected actual
+	test_cmp expect actual
 '
 
-cat >expected <<EOF
-parent1
-repo1
-refs/tags/one $COMMIT1 refs/tags/tag1 $ZERO_OID
-HEAD~ $COMMIT2 refs/heads/prev $ZERO_OID
-EOF
-
 test_expect_success 'push non-branches' '
+	cat >expect <<-EOF &&
+	parent1
+	repo1
+	refs/tags/one $COMMIT1 refs/tags/tag1 $ZERO_OID
+	HEAD~ $COMMIT2 refs/heads/prev $ZERO_OID
+	EOF
+
 	git push parent1 one:tag1 HEAD~:refs/heads/prev &&
-	diff expected actual
+	test_cmp expect actual
 '
 
-cat >expected <<EOF
-parent1
-repo1
-(delete) $ZERO_OID refs/heads/prev $COMMIT2
-EOF
-
 test_expect_success 'push delete' '
+	cat >expect <<-EOF &&
+	parent1
+	repo1
+	(delete) $ZERO_OID refs/heads/prev $COMMIT2
+	EOF
+
 	git push parent1 :prev &&
-	diff expected actual
+	test_cmp expect actual
 '
 
-cat >expected <<EOF
-repo1
-repo1
-HEAD $COMMIT3 refs/heads/other $ZERO_OID
-EOF
-
 test_expect_success 'push to URL' '
+	cat >expect <<-EOF &&
+	repo1
+	repo1
+	HEAD $COMMIT3 refs/heads/other $ZERO_OID
+	EOF
+
 	git push repo1 HEAD &&
-	diff expected actual
+	test_cmp expect actual
 '
 
 test_expect_success 'set up many-ref tests' '

From 2832b22fbecc29c6eb1fa27acca0c3474382e8c0 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:56 +0200
Subject: [PATCH 157/198] reference-transaction: use hook.h to run hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c | 40 +++++++++++++---------------------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/refs.c b/refs.c
index 6211692eaaea53..73d4a9392673fa 100644
--- a/refs.c
+++ b/refs.c
@@ -2062,47 +2062,33 @@ int ref_update_reject_duplicates(struct string_list *refnames,
 static int run_transaction_hook(struct ref_transaction *transaction,
 				const char *state)
 {
-	struct child_process proc = CHILD_PROCESS_INIT;
-	struct strbuf buf = STRBUF_INIT;
-	const char *hook;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct string_list to_stdin = STRING_LIST_INIT_NODUP;
 	int ret = 0, i;
 
-	hook = find_hook("reference-transaction");
-	if (!hook)
-		return ret;
-
-	strvec_pushl(&proc.args, hook, state, NULL);
-	proc.in = -1;
-	proc.stdout_to_stderr = 1;
-	proc.trace2_hook_name = "reference-transaction";
-
-	ret = start_command(&proc);
-	if (ret)
+	if (!hook_exists("reference-transaction"))
 		return ret;
 
-	sigchain_push(SIGPIPE, SIG_IGN);
+	strvec_push(&opt.args, state);
 
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *update = transaction->updates[i];
+		struct strbuf buf = STRBUF_INIT;
 
-		strbuf_reset(&buf);
-		strbuf_addf(&buf, "%s %s %s\n",
+		strbuf_addf(&buf, "%s %s %s",
 			    oid_to_hex(&update->old_oid),
 			    oid_to_hex(&update->new_oid),
 			    update->refname);
-
-		if (write_in_full(proc.in, buf.buf, buf.len) < 0) {
-			if (errno != EPIPE)
-				ret = -1;
-			break;
-		}
+		string_list_append(&to_stdin, strbuf_detach(&buf, NULL));
 	}
 
-	close(proc.in);
-	sigchain_pop(SIGPIPE);
-	strbuf_release(&buf);
+	opt.feed_pipe = pipe_from_string_list;
+	opt.feed_pipe_ctx = &to_stdin;
+
+	ret = run_hooks_oneshot("reference-transaction", &opt);
+	to_stdin.strdup_strings = 1;
+	string_list_clear(&to_stdin, 0);
 
-	ret |= finish_command(&proc);
 	return ret;
 }
 

From 2a7f82cc5caf324a0a501ba3d1bfb25256204988 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:57 +0200
Subject: [PATCH 158/198] run-command: allow capturing of collated output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some callers, for example server-side hooks which wish to relay hook
output to clients across a transport, want to capture what would
normally print to stderr and do something else with it. Allow that via a
callback.

By calling the callback regardless of whether there's output available,
we allow clients to send e.g. a keepalive if necessary.

Because we expose a strbuf, not a fd or FILE*, there's no need to create
a temporary pipe or similar - we can just skip the print to stderr and
instead hand it to the caller.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fetch.c             |  2 +-
 builtin/submodule--helper.c |  2 +-
 hook.c                      |  1 +
 run-command.c               | 33 +++++++++++++++++++++++++--------
 run-command.h               | 18 +++++++++++++++++-
 submodule.c                 |  2 +-
 t/helper/test-run-command.c | 25 ++++++++++++++++++++-----
 t/t0061-run-command.sh      |  7 +++++++
 8 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index fef6e85d003d48..de14df5085c0cb 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1817,7 +1817,7 @@ static int fetch_multiple(struct string_list *list, int max_children)
 		result = run_processes_parallel_tr2(max_children,
 						    &fetch_next_remote,
 						    &fetch_failed_to_start,
-						    NULL,
+						    NULL, NULL,
 						    &fetch_finished,
 						    &state,
 						    "fetch", "parallel/fetch");
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f42ded548bffb6..e5e5a8bcfb58e3 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2294,7 +2294,7 @@ static int update_submodules(struct submodule_update_clone *suc)
 	int i;
 
 	run_processes_parallel_tr2(suc->max_jobs, update_clone_get_next_task,
-				   update_clone_start_failure, NULL,
+				   update_clone_start_failure, NULL, NULL,
 				   update_clone_task_finished, suc, "submodule",
 				   "parallel/update");
 
diff --git a/hook.c b/hook.c
index 44e73827800813..9ae3007cdd336e 100644
--- a/hook.c
+++ b/hook.c
@@ -203,6 +203,7 @@ int run_hooks(const char *hook_name, const char *hook_path,
 				   pick_next_hook,
 				   notify_start_failure,
 				   options->feed_pipe,
+				   NULL,
 				   notify_hook_finished,
 				   &cb_data,
 				   "hook",
diff --git a/run-command.c b/run-command.c
index f1616858d18eaa..aacc336f9519cd 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1493,6 +1493,7 @@ struct parallel_processes {
 	get_next_task_fn get_next_task;
 	start_failure_fn start_failure;
 	feed_pipe_fn feed_pipe;
+	consume_sideband_fn consume_sideband;
 	task_finished_fn task_finished;
 
 	struct {
@@ -1558,6 +1559,7 @@ static void pp_init(struct parallel_processes *pp,
 		    get_next_task_fn get_next_task,
 		    start_failure_fn start_failure,
 		    feed_pipe_fn feed_pipe,
+		    consume_sideband_fn consume_sideband,
 		    task_finished_fn task_finished,
 		    void *data)
 {
@@ -1578,6 +1580,7 @@ static void pp_init(struct parallel_processes *pp,
 	pp->start_failure = start_failure ? start_failure : default_start_failure;
 	pp->feed_pipe = feed_pipe ? feed_pipe : default_feed_pipe;
 	pp->task_finished = task_finished ? task_finished : default_task_finished;
+	pp->consume_sideband = consume_sideband;
 
 	pp->nr_processes = 0;
 	pp->output_owner = 0;
@@ -1614,7 +1617,10 @@ static void pp_cleanup(struct parallel_processes *pp)
 	 * When get_next_task added messages to the buffer in its last
 	 * iteration, the buffered output is non empty.
 	 */
-	strbuf_write(&pp->buffered_output, stderr);
+	if (pp->consume_sideband)
+		pp->consume_sideband(&pp->buffered_output, pp->data);
+	else
+		strbuf_write(&pp->buffered_output, stderr);
 	strbuf_release(&pp->buffered_output);
 
 	sigchain_pop_common();
@@ -1735,9 +1741,13 @@ static void pp_buffer_stderr(struct parallel_processes *pp, int output_timeout)
 static void pp_output(struct parallel_processes *pp)
 {
 	int i = pp->output_owner;
+
 	if (pp->children[i].state == GIT_CP_WORKING &&
 	    pp->children[i].err.len) {
-		strbuf_write(&pp->children[i].err, stderr);
+		if (pp->consume_sideband)
+			pp->consume_sideband(&pp->children[i].err, pp->data);
+		else
+			strbuf_write(&pp->children[i].err, stderr);
 		strbuf_reset(&pp->children[i].err);
 	}
 }
@@ -1776,11 +1786,15 @@ static int pp_collect_finished(struct parallel_processes *pp)
 			strbuf_addbuf(&pp->buffered_output, &pp->children[i].err);
 			strbuf_reset(&pp->children[i].err);
 		} else {
-			strbuf_write(&pp->children[i].err, stderr);
+			/* Output errors, then all other finished child processes */
+			if (pp->consume_sideband) {
+				pp->consume_sideband(&pp->children[i].err, pp->data);
+				pp->consume_sideband(&pp->buffered_output, pp->data);
+			} else {
+				strbuf_write(&pp->children[i].err, stderr);
+				strbuf_write(&pp->buffered_output, stderr);
+			}
 			strbuf_reset(&pp->children[i].err);
-
-			/* Output all other finished child processes */
-			strbuf_write(&pp->buffered_output, stderr);
 			strbuf_reset(&pp->buffered_output);
 
 			/*
@@ -1804,6 +1818,7 @@ int run_processes_parallel(int n,
 			   get_next_task_fn get_next_task,
 			   start_failure_fn start_failure,
 			   feed_pipe_fn feed_pipe,
+			   consume_sideband_fn consume_sideband,
 			   task_finished_fn task_finished,
 			   void *pp_cb)
 {
@@ -1814,7 +1829,7 @@ int run_processes_parallel(int n,
 
 	sigchain_push(SIGPIPE, SIG_IGN);
 
-	pp_init(&pp, n, get_next_task, start_failure, feed_pipe, task_finished, pp_cb);
+	pp_init(&pp, n, get_next_task, start_failure, feed_pipe, consume_sideband, task_finished, pp_cb);
 	while (1) {
 		for (i = 0;
 		    i < spawn_cap && !pp.shutdown &&
@@ -1852,6 +1867,7 @@ int run_processes_parallel(int n,
 int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task,
 			       start_failure_fn start_failure,
 			       feed_pipe_fn feed_pipe,
+			       consume_sideband_fn consume_sideband,
 			       task_finished_fn task_finished, void *pp_cb,
 			       const char *tr2_category, const char *tr2_label)
 {
@@ -1861,7 +1877,8 @@ int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task,
 				   ((n < 1) ? online_cpus() : n));
 
 	result = run_processes_parallel(n, get_next_task, start_failure,
-					feed_pipe, task_finished, pp_cb);
+					feed_pipe, consume_sideband,
+					task_finished, pp_cb);
 
 	trace2_region_leave(tr2_category, tr2_label, NULL);
 
diff --git a/run-command.h b/run-command.h
index 80d394664aec2a..e321d23bbd2d8b 100644
--- a/run-command.h
+++ b/run-command.h
@@ -436,6 +436,20 @@ typedef int (*feed_pipe_fn)(struct strbuf *pipe,
 			    void *pp_cb,
 			    void *pp_task_cb);
 
+/**
+ * If this callback is provided, instead of collating process output to stderr,
+ * they will be collated into a new pipe. consume_sideband_fn will be called
+ * repeatedly. When output is available on that pipe, it will be contained in
+ * 'output'. But it will be called with an empty 'output' too, to allow for
+ * keepalives or similar operations if necessary.
+ *
+ * pp_cb is the callback cookie as passed into run_processes_parallel.
+ *
+ * Since this callback is provided with the collated output, no task cookie is
+ * provided.
+ */
+typedef void (*consume_sideband_fn)(struct strbuf *output, void *pp_cb);
+
 /**
  * This callback is called on every child process that finished processing.
  *
@@ -471,10 +485,12 @@ int run_processes_parallel(int n,
 			   get_next_task_fn,
 			   start_failure_fn,
 			   feed_pipe_fn,
+			   consume_sideband_fn,
 			   task_finished_fn,
 			   void *pp_cb);
 int run_processes_parallel_tr2(int n, get_next_task_fn, start_failure_fn,
-			       feed_pipe_fn, task_finished_fn, void *pp_cb,
+			       feed_pipe_fn, consume_sideband_fn,
+			       task_finished_fn, void *pp_cb,
 			       const char *tr2_category, const char *tr2_label);
 
 /**
diff --git a/submodule.c b/submodule.c
index db1700a502dcd2..32364d8bd56312 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1632,7 +1632,7 @@ int fetch_populated_submodules(struct repository *r,
 	run_processes_parallel_tr2(max_parallel_jobs,
 				   get_next_submodule,
 				   fetch_start_failure,
-				   NULL,
+				   NULL, NULL,
 				   fetch_finish,
 				   &spf,
 				   "submodule", "parallel/fetch");
diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c
index 9348184d303eab..d53db6d11c4dad 100644
--- a/t/helper/test-run-command.c
+++ b/t/helper/test-run-command.c
@@ -51,6 +51,16 @@ static int no_job(struct child_process *cp,
 	return 0;
 }
 
+static void test_consume_sideband(struct strbuf *output, void *cb)
+{
+	FILE *sideband;
+
+	sideband = fopen("./sideband", "a");
+
+	strbuf_write(output, sideband);
+	fclose(sideband);
+}
+
 static int task_finished(int result,
 			 struct strbuf *err,
 			 void *pp_cb,
@@ -201,7 +211,7 @@ static int testsuite(int argc, const char **argv)
 		suite.tests.nr, max_jobs);
 
 	ret = run_processes_parallel(max_jobs, next_test, test_failed,
-				     test_stdin, test_finished, &suite);
+				     test_stdin, NULL, test_finished, &suite);
 
 	if (suite.failed.nr > 0) {
 		ret = 1;
@@ -429,23 +439,28 @@ int cmd__run_command(int argc, const char **argv)
 
 	if (!strcmp(argv[1], "run-command-parallel"))
 		exit(run_processes_parallel(jobs, parallel_next,
-					    NULL, NULL, NULL, &proc));
+					    NULL, NULL, NULL, NULL, &proc));
 
 	if (!strcmp(argv[1], "run-command-abort"))
 		exit(run_processes_parallel(jobs, parallel_next,
-					    NULL, NULL, task_finished, &proc));
+					    NULL, NULL, NULL, task_finished, &proc));
 
 	if (!strcmp(argv[1], "run-command-no-jobs"))
 		exit(run_processes_parallel(jobs, no_job,
-					    NULL, NULL, task_finished, &proc));
+					    NULL, NULL, NULL, task_finished, &proc));
 
 	if (!strcmp(argv[1], "run-command-stdin")) {
 		proc.in = -1;
 		proc.no_stdin = 0;
 		exit (run_processes_parallel(jobs, parallel_next, NULL,
-					     test_stdin, NULL, &proc));
+					     test_stdin, NULL, NULL, &proc));
 	}
 
+	if (!strcmp(argv[1], "run-command-sideband"))
+		exit(run_processes_parallel(jobs, parallel_next, NULL, NULL,
+					    test_consume_sideband, NULL,
+					    &proc));
+
 	fprintf(stderr, "check usage\n");
 	return 1;
 }
diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh
index 87759482ad111e..e99f6c7f445805 100755
--- a/t/t0061-run-command.sh
+++ b/t/t0061-run-command.sh
@@ -143,6 +143,13 @@ test_expect_success 'run_command runs in parallel with more tasks than jobs avai
 	test_cmp expect actual
 '
 
+test_expect_success 'run_command can divert output' '
+	test_when_finished rm sideband &&
+	test-tool run-command run-command-sideband 3 sh -c "printf \"%s\n%s\n\" Hello World" 2>actual &&
+	test_must_be_empty actual &&
+	test_cmp expect sideband
+'
+
 cat >expect <<-EOF
 preloaded output of a child
 listening for stdin:

From 9ca517b509914c504fb04741edb103b4b9186f1d Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:58 +0200
Subject: [PATCH 159/198] hooks: allow callers to capture output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some server-side hooks will require capturing output to send over
sideband instead of printing directly to stderr. Expose that capability.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 hook.c | 3 ++-
 hook.h | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/hook.c b/hook.c
index 9ae3007cdd336e..efed5f73b4d1fb 100644
--- a/hook.c
+++ b/hook.c
@@ -203,12 +203,13 @@ int run_hooks(const char *hook_name, const char *hook_path,
 				   pick_next_hook,
 				   notify_start_failure,
 				   options->feed_pipe,
-				   NULL,
+				   options->consume_sideband,
 				   notify_hook_finished,
 				   &cb_data,
 				   "hook",
 				   hook_name);
 
+
 	if (options->absolute_path)
 		strbuf_release(&abs_path);
 	free(my_hook.feed_pipe_cb_data);
diff --git a/hook.h b/hook.h
index b55f283f90bce9..37a9690c2ca368 100644
--- a/hook.h
+++ b/hook.h
@@ -58,6 +58,14 @@ struct run_hooks_opt
 	 */
 	feed_pipe_fn feed_pipe;
 	void *feed_pipe_ctx;
+
+	/*
+	 * Populate this to capture output and prevent it from being printed to
+	 * stderr. This will be passed directly through to
+	 * run_command:run_parallel_processes(). See t/helper/test-run-command.c
+	 * for an example.
+	 */
+	consume_sideband_fn consume_sideband;
 };
 
 #define RUN_HOOKS_OPT_INIT { \

From 6ca3e24b8e923b4484586051088350b9f3b67c2d Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:38:59 +0200
Subject: [PATCH 160/198] receive-pack: convert 'update' hook to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes use of the new sideband API in hook.h added in the
preceding commit.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/receive-pack.c | 62 ++++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index a7d03bbc7d386d..31ce4ece4e76ba 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -937,33 +937,53 @@ static int run_receive_hook(struct command *commands,
 	return status;
 }
 
-static int run_update_hook(struct command *cmd)
+static void hook_output_to_sideband(struct strbuf *output, void *cb_data)
 {
-	const char *argv[5];
-	struct child_process proc = CHILD_PROCESS_INIT;
-	int code;
+	int keepalive_active = 0;
 
-	argv[0] = find_hook("update");
-	if (!argv[0])
-		return 0;
+	if (keepalive_in_sec <= 0)
+		use_keepalive = KEEPALIVE_NEVER;
+	if (use_keepalive == KEEPALIVE_ALWAYS)
+		keepalive_active = 1;
 
-	argv[1] = cmd->ref_name;
-	argv[2] = oid_to_hex(&cmd->old_oid);
-	argv[3] = oid_to_hex(&cmd->new_oid);
-	argv[4] = NULL;
+	/* send a keepalive if there is no data to write */
+	if (keepalive_active && !output->len) {
+		static const char buf[] = "0005\1";
+		write_or_die(1, buf, sizeof(buf) - 1);
+		return;
+	}
 
-	proc.no_stdin = 1;
-	proc.stdout_to_stderr = 1;
-	proc.err = use_sideband ? -1 : 0;
-	proc.argv = argv;
-	proc.trace2_hook_name = "update";
+	if (use_keepalive == KEEPALIVE_AFTER_NUL && !keepalive_active) {
+		const char *first_null = memchr(output->buf, '\0', output->len);
+		if (first_null) {
+			/* The null bit is excluded. */
+			size_t before_null = first_null - output->buf;
+			size_t after_null = output->len - (before_null + 1);
+			keepalive_active = 1;
+			send_sideband(1, 2, output->buf, before_null, use_sideband);
+			send_sideband(1, 2, first_null + 1, after_null, use_sideband);
+
+			return;
+		}
+	}
+
+	send_sideband(1, 2, output->buf, output->len, use_sideband);
+}
+
+static int run_update_hook(struct command *cmd)
+{
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+
+	strvec_pushl(&opt.args,
+		     cmd->ref_name,
+		     oid_to_hex(&cmd->old_oid),
+		     oid_to_hex(&cmd->new_oid),
+		     NULL);
 
-	code = start_command(&proc);
-	if (code)
-		return code;
 	if (use_sideband)
-		copy_to_sideband(proc.err, -1, NULL);
-	return finish_command(&proc);
+		opt.consume_sideband = hook_output_to_sideband;
+
+	return run_hooks_oneshot("update", &opt);
 }
 
 static struct command *find_command_by_refname(struct command *list,

From 908ed9a5db4da3ea01bae95e74fd0450915321e9 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:39:00 +0200
Subject: [PATCH 161/198] post-update: use hook.h library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/receive-pack.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 31ce4ece4e76ba..26e302aab85e37 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1650,33 +1650,20 @@ static const char *update(struct command *cmd, struct shallow_info *si)
 static void run_update_post_hook(struct command *commands)
 {
 	struct command *cmd;
-	struct child_process proc = CHILD_PROCESS_INIT;
-	const char *hook;
-
-	hook = find_hook("post-update");
-	if (!hook)
-		return;
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
 
 	for (cmd = commands; cmd; cmd = cmd->next) {
 		if (cmd->error_string || cmd->did_not_exist)
 			continue;
-		if (!proc.args.nr)
-			strvec_push(&proc.args, hook);
-		strvec_push(&proc.args, cmd->ref_name);
+		strvec_push(&opt.args, cmd->ref_name);
 	}
-	if (!proc.args.nr)
+	if (!opt.args.nr)
 		return;
 
-	proc.no_stdin = 1;
-	proc.stdout_to_stderr = 1;
-	proc.err = use_sideband ? -1 : 0;
-	proc.trace2_hook_name = "post-update";
+	if (use_sideband)
+		opt.consume_sideband = hook_output_to_sideband;
 
-	if (!start_command(&proc)) {
-		if (use_sideband)
-			copy_to_sideband(proc.err, -1, NULL);
-		finish_command(&proc);
-	}
+	run_hooks_oneshot("post-update", &opt);
 }
 
 static void check_aliased_update_internal(struct command *cmd,

From 4e5f31ba423c11ef17d8c51e3421684bd88666b8 Mon Sep 17 00:00:00 2001
From: Emily Shaffer <emilyshaffer@google.com>
Date: Tue, 3 Aug 2021 21:39:01 +0200
Subject: [PATCH 162/198] receive-pack: convert receive hooks to hook.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/receive-pack.c | 190 ++++++++++++++++++-----------------------
 1 file changed, 83 insertions(+), 107 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 26e302aab85e37..c3984680d7f2bd 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -747,7 +747,7 @@ static int check_cert_push_options(const struct string_list *push_options)
 	return retval;
 }
 
-static void prepare_push_cert_sha1(struct child_process *proc)
+static void prepare_push_cert_sha1(struct run_hooks_opt *opt)
 {
 	static int already_done;
 
@@ -771,110 +771,42 @@ static void prepare_push_cert_sha1(struct child_process *proc)
 		nonce_status = check_nonce(push_cert.buf, bogs);
 	}
 	if (!is_null_oid(&push_cert_oid)) {
-		strvec_pushf(&proc->env_array, "GIT_PUSH_CERT=%s",
+		strvec_pushf(&opt->env, "GIT_PUSH_CERT=%s",
 			     oid_to_hex(&push_cert_oid));
-		strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_SIGNER=%s",
+		strvec_pushf(&opt->env, "GIT_PUSH_CERT_SIGNER=%s",
 			     sigcheck.signer ? sigcheck.signer : "");
-		strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_KEY=%s",
+		strvec_pushf(&opt->env, "GIT_PUSH_CERT_KEY=%s",
 			     sigcheck.key ? sigcheck.key : "");
-		strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_STATUS=%c",
+		strvec_pushf(&opt->env, "GIT_PUSH_CERT_STATUS=%c",
 			     sigcheck.result);
 		if (push_cert_nonce) {
-			strvec_pushf(&proc->env_array,
+			strvec_pushf(&opt->env,
 				     "GIT_PUSH_CERT_NONCE=%s",
 				     push_cert_nonce);
-			strvec_pushf(&proc->env_array,
+			strvec_pushf(&opt->env,
 				     "GIT_PUSH_CERT_NONCE_STATUS=%s",
 				     nonce_status);
 			if (nonce_status == NONCE_SLOP)
-				strvec_pushf(&proc->env_array,
+				strvec_pushf(&opt->env,
 					     "GIT_PUSH_CERT_NONCE_SLOP=%ld",
 					     nonce_stamp_slop);
 		}
 	}
 }
 
+struct receive_hook_feed_context {
+	struct command *cmd;
+	int skip_broken;
+};
+
 struct receive_hook_feed_state {
 	struct command *cmd;
 	struct ref_push_report *report;
 	int skip_broken;
 	struct strbuf buf;
-	const struct string_list *push_options;
 };
 
-typedef int (*feed_fn)(void *, const char **, size_t *);
-static int run_and_feed_hook(const char *hook_name, feed_fn feed,
-			     struct receive_hook_feed_state *feed_state)
-{
-	struct child_process proc = CHILD_PROCESS_INIT;
-	struct async muxer;
-	const char *argv[2];
-	int code;
-
-	argv[0] = find_hook(hook_name);
-	if (!argv[0])
-		return 0;
-
-	argv[1] = NULL;
-
-	proc.argv = argv;
-	proc.in = -1;
-	proc.stdout_to_stderr = 1;
-	proc.trace2_hook_name = hook_name;
-
-	if (feed_state->push_options) {
-		int i;
-		for (i = 0; i < feed_state->push_options->nr; i++)
-			strvec_pushf(&proc.env_array,
-				     "GIT_PUSH_OPTION_%d=%s", i,
-				     feed_state->push_options->items[i].string);
-		strvec_pushf(&proc.env_array, "GIT_PUSH_OPTION_COUNT=%d",
-			     feed_state->push_options->nr);
-	} else
-		strvec_pushf(&proc.env_array, "GIT_PUSH_OPTION_COUNT");
-
-	if (tmp_objdir)
-		strvec_pushv(&proc.env_array, tmp_objdir_env(tmp_objdir));
-
-	if (use_sideband) {
-		memset(&muxer, 0, sizeof(muxer));
-		muxer.proc = copy_to_sideband;
-		muxer.in = -1;
-		code = start_async(&muxer);
-		if (code)
-			return code;
-		proc.err = muxer.in;
-	}
-
-	prepare_push_cert_sha1(&proc);
-
-	code = start_command(&proc);
-	if (code) {
-		if (use_sideband)
-			finish_async(&muxer);
-		return code;
-	}
-
-	sigchain_push(SIGPIPE, SIG_IGN);
-
-	while (1) {
-		const char *buf;
-		size_t n;
-		if (feed(feed_state, &buf, &n))
-			break;
-		if (write_in_full(proc.in, buf, n) < 0)
-			break;
-	}
-	close(proc.in);
-	if (use_sideband)
-		finish_async(&muxer);
-
-	sigchain_pop(SIGPIPE);
-
-	return finish_command(&proc);
-}
-
-static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep)
+static int feed_receive_hook(void *state_)
 {
 	struct receive_hook_feed_state *state = state_;
 	struct command *cmd = state->cmd;
@@ -883,9 +815,7 @@ static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep)
 	       state->skip_broken && (cmd->error_string || cmd->did_not_exist))
 		cmd = cmd->next;
 	if (!cmd)
-		return -1; /* EOF */
-	if (!bufp)
-		return 0; /* OK, can feed something. */
+		return 1; /* EOF - close the pipe*/
 	strbuf_reset(&state->buf);
 	if (!state->report)
 		state->report = cmd->report;
@@ -909,32 +839,36 @@ static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep)
 			    cmd->ref_name);
 		state->cmd = cmd->next;
 	}
-	if (bufp) {
-		*bufp = state->buf.buf;
-		*sizep = state->buf.len;
-	}
 	return 0;
 }
 
-static int run_receive_hook(struct command *commands,
-			    const char *hook_name,
-			    int skip_broken,
-			    const struct string_list *push_options)
+static int feed_receive_hook_cb(struct strbuf *pipe, void *pp_cb, void *pp_task_cb)
 {
-	struct receive_hook_feed_state state;
-	int status;
-
-	strbuf_init(&state.buf, 0);
-	state.cmd = commands;
-	state.skip_broken = skip_broken;
-	state.report = NULL;
-	if (feed_receive_hook(&state, NULL, NULL))
-		return 0;
-	state.cmd = commands;
-	state.push_options = push_options;
-	status = run_and_feed_hook(hook_name, feed_receive_hook, &state);
-	strbuf_release(&state.buf);
-	return status;
+	struct hook *hook = pp_task_cb;
+	struct receive_hook_feed_state *feed_state = hook->feed_pipe_cb_data;
+	int rc;
+
+	/* first-time setup */
+	if (!feed_state) {
+		struct hook_cb_data *hook_cb = pp_cb;
+		struct run_hooks_opt *opt = hook_cb->options;
+		struct receive_hook_feed_context *ctx = opt->feed_pipe_ctx;
+		if (!ctx)
+			BUG("run_hooks_opt.feed_pipe_ctx required for receive hook");
+
+		feed_state = xmalloc(sizeof(struct receive_hook_feed_state));
+		strbuf_init(&feed_state->buf, 0);
+		feed_state->cmd = ctx->cmd;
+		feed_state->skip_broken = ctx->skip_broken;
+		feed_state->report = NULL;
+
+		hook->feed_pipe_cb_data = feed_state;
+	}
+
+	rc = feed_receive_hook(feed_state);
+	if (!rc)
+		strbuf_addbuf(pipe, &feed_state->buf);
+	return rc;
 }
 
 static void hook_output_to_sideband(struct strbuf *output, void *cb_data)
@@ -970,6 +904,48 @@ static void hook_output_to_sideband(struct strbuf *output, void *cb_data)
 	send_sideband(1, 2, output->buf, output->len, use_sideband);
 }
 
+static int run_receive_hook(struct command *commands,
+			    const char *hook_name,
+			    int skip_broken,
+			    const struct string_list *push_options)
+{
+	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	struct receive_hook_feed_context ctx;
+	struct command *iter = commands;
+
+	/* if there are no valid commands, don't invoke the hook at all. */
+	while (iter && skip_broken && (iter->error_string || iter->did_not_exist))
+		iter = iter->next;
+	if (!iter)
+		return 0;
+
+	if (push_options) {
+		int i;
+		for (i = 0; i < push_options->nr; i++)
+			strvec_pushf(&opt.env, "GIT_PUSH_OPTION_%d=%s", i,
+				     push_options->items[i].string);
+		strvec_pushf(&opt.env, "GIT_PUSH_OPTION_COUNT=%d", push_options->nr);
+	} else
+		strvec_push(&opt.env, "GIT_PUSH_OPTION_COUNT");
+
+	if (tmp_objdir)
+		strvec_pushv(&opt.env, tmp_objdir_env(tmp_objdir));
+
+	prepare_push_cert_sha1(&opt);
+
+	/* set up sideband printer */
+	if (use_sideband)
+		opt.consume_sideband = hook_output_to_sideband;
+
+	/* set up stdin callback */
+	ctx.cmd = commands;
+	ctx.skip_broken = skip_broken;
+	opt.feed_pipe = feed_receive_hook_cb;
+	opt.feed_pipe_ctx = &ctx;
+
+	return run_hooks_oneshot(hook_name, &opt);
+}
+
 static int run_update_hook(struct command *cmd)
 {
 	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;

From 76f440d4f743433796e06a921ee68273edf17c17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Tue, 3 Aug 2021 21:39:02 +0200
Subject: [PATCH 163/198] hooks: fix a TOCTOU in "did we run a hook?" heuristic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a Time-of-check to time-of-use (TOCTOU) race in code added in
680ee550d72 (commit: skip discarding the index if there is no
pre-commit hook, 2017-08-14).

We can fix the race passing around information about whether or not we
ran the hook in question, instead of running hook_exists() after the
fact to check if the hook in question exists. This problem has been
noted on-list when 680ee550d72 was discussed[1], but had not been
fixed.

In addition to fixing this for the pre-commit hook as suggested there
I'm also fixing this for the pre-merge-commit hook. See
6098817fd7f (git-merge: honor pre-merge-commit hook, 2019-08-07) for
the introduction of its previous behavior.

Let's also change this for the push-to-checkout hook. Now instead of
checking if the hook exists and either doing a push to checkout or a
push to deploy we'll always attempt a push to checkout. If the hook
doesn't exist we'll fall back on push to deploy. The same behavior as
before, without the TOCTOU race. See 0855331941b (receive-pack:
support push-to-checkout hook, 2014-12-01) for the introduction of the
previous behavior.

This leaves uses of hook_exists() in two places that matter. The
"reference-transaction" check in refs.c, see 67541597670 (refs:
implement reference transaction hook, 2020-06-19), and the
prepare-commit-msg hook, see 66618a50f9c (sequencer: run
'prepare-commit-msg' hook, 2018-01-24).

In both of those cases we're saving ourselves CPU time by not
preparing data for the hook that we'll then do nothing with if we
don't have the hook. So using this "invoked_hook" pattern doesn't make
sense in those cases.

More importantly, in those cases the worst we'll do is miss that we
"should" run the hook because a new hook appeared, whereas in the
pre-commit and pre-merge-commit cases we'll skip an important
discard_cache() on the bases of our faulty guess.

I do think none of these races really matter in practice. It would be
some one-off issue as a hook was added or removed. I did think it was
stupid that we didn't pass a "did this run?" flag instead of doing
this guessing at a distance though, so now we're not guessing anymore.

1. https://lore.kernel.org/git/20170810191613.kpmhzg4seyxy3cpq@sigill.intra.peff.net/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/commit.c       | 18 +++++++++++-------
 builtin/merge.c        | 16 ++++++++++------
 builtin/receive-pack.c |  8 +++++---
 commit.c               |  1 +
 commit.h               |  3 ++-
 hook.c                 |  4 ++++
 hook.h                 | 10 ++++++++++
 sequencer.c            |  4 ++--
 8 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index aa3c741efa9d37..0b7642e8324c98 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -725,11 +725,13 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 	int clean_message_contents = (cleanup_mode != COMMIT_MSG_CLEANUP_NONE);
 	int old_display_comment_prefix;
 	int merge_contains_scissors = 0;
+	int invoked_hook = 0;
 
 	/* This checks and barfs if author is badly specified */
 	determine_author_info(author_ident);
 
-	if (!no_verify && run_commit_hook(use_editor, index_file, "pre-commit", NULL))
+	if (!no_verify && run_commit_hook(use_editor, index_file, &invoked_hook,
+					  "pre-commit", NULL))
 		return 0;
 
 	if (squash_message) {
@@ -1052,10 +1054,10 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 		return 0;
 	}
 
-	if (!no_verify && hook_exists("pre-commit")) {
+	if (!no_verify && invoked_hook) {
 		/*
-		 * Re-read the index as pre-commit hook could have updated it,
-		 * and write it out as a tree.  We must do this before we invoke
+		 * Re-read the index as the pre-commit-commit hook was invoked
+		 * and could have updated it. We must do this before we invoke
 		 * the editor and after we invoke run_status above.
 		 */
 		discard_cache();
@@ -1067,7 +1069,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 		return 0;
 	}
 
-	if (run_commit_hook(use_editor, index_file, "prepare-commit-msg",
+	if (run_commit_hook(use_editor, index_file, NULL, "prepare-commit-msg",
 			    git_path_commit_editmsg(), hook_arg1, hook_arg2, NULL))
 		return 0;
 
@@ -1084,7 +1086,8 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 	}
 
 	if (!no_verify &&
-	    run_commit_hook(use_editor, index_file, "commit-msg", git_path_commit_editmsg(), NULL)) {
+	    run_commit_hook(use_editor, index_file, NULL, "commit-msg",
+			    git_path_commit_editmsg(), NULL)) {
 		return 0;
 	}
 
@@ -1840,7 +1843,8 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
 
 	repo_rerere(the_repository, 0);
 	run_auto_maintenance(quiet);
-	run_commit_hook(use_editor, get_index_file(), "post-commit", NULL);
+	run_commit_hook(use_editor, get_index_file(), NULL, "post-commit",
+			NULL);
 	if (amend && !no_post_rewrite) {
 		commit_post_rewrite(the_repository, current_head, &oid);
 	}
diff --git a/builtin/merge.c b/builtin/merge.c
index 4965df2ac29f8d..9bd4a2532c315a 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -843,15 +843,18 @@ static void prepare_to_commit(struct commit_list *remoteheads)
 {
 	struct strbuf msg = STRBUF_INIT;
 	const char *index_file = get_index_file();
+	int invoked_hook = 0;
 
-	if (!no_verify && run_commit_hook(0 < option_edit, index_file, "pre-merge-commit", NULL))
+	if (!no_verify && run_commit_hook(0 < option_edit, index_file,
+					  &invoked_hook, "pre-merge-commit",
+					  NULL))
 		abort_commit(remoteheads, NULL);
 	/*
-	 * Re-read the index as pre-merge-commit hook could have updated it,
-	 * and write it out as a tree.  We must do this before we invoke
+	 * Re-read the index as the pre-merge-commit hook was invoked
+	 * and could have updated it. We must do this before we invoke
 	 * the editor and after we invoke run_status above.
 	 */
-	if (hook_exists("pre-merge-commit"))
+	if (invoked_hook)
 		discard_cache();
 	read_cache_from(index_file);
 	strbuf_addbuf(&msg, &merge_msg);
@@ -872,7 +875,8 @@ static void prepare_to_commit(struct commit_list *remoteheads)
 		append_signoff(&msg, ignore_non_trailer(msg.buf, msg.len), 0);
 	write_merge_heads(remoteheads);
 	write_file_buf(git_path_merge_msg(the_repository), msg.buf, msg.len);
-	if (run_commit_hook(0 < option_edit, get_index_file(), "prepare-commit-msg",
+	if (run_commit_hook(0 < option_edit, get_index_file(), NULL,
+			    "prepare-commit-msg",
 			    git_path_merge_msg(the_repository), "merge", NULL))
 		abort_commit(remoteheads, NULL);
 	if (0 < option_edit) {
@@ -881,7 +885,7 @@ static void prepare_to_commit(struct commit_list *remoteheads)
 	}
 
 	if (!no_verify && run_commit_hook(0 < option_edit, get_index_file(),
-					  "commit-msg",
+					  NULL, "commit-msg",
 					  git_path_merge_msg(the_repository), NULL))
 		abort_commit(remoteheads, NULL);
 
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index c3984680d7f2bd..ebec6f3bb10c05 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1428,10 +1428,12 @@ static const char *push_to_deploy(unsigned char *sha1,
 static const char *push_to_checkout_hook = "push-to-checkout";
 
 static const char *push_to_checkout(unsigned char *hash,
+				    int *invoked_hook,
 				    struct strvec *env,
 				    const char *work_tree)
 {
 	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+	opt.invoked_hook = invoked_hook;
 
 	strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree));
 	strvec_pushv(&opt.env, env->v);
@@ -1446,6 +1448,7 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w
 {
 	const char *retval, *work_tree, *git_dir = NULL;
 	struct strvec env = STRVEC_INIT;
+	int invoked_hook = 0;
 
 	if (worktree && worktree->path)
 		work_tree = worktree->path;
@@ -1463,10 +1466,9 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w
 
 	strvec_pushf(&env, "GIT_DIR=%s", absolute_path(git_dir));
 
-	if (!hook_exists(push_to_checkout_hook))
+	retval = push_to_checkout(sha1, &invoked_hook, &env, work_tree);
+	if (!invoked_hook)
 		retval = push_to_deploy(sha1, &env, work_tree);
-	else
-		retval = push_to_checkout(sha1, &env, work_tree);
 
 	strvec_clear(&env);
 	return retval;
diff --git a/commit.c b/commit.c
index 63d7943a86dd51..842e47beae214e 100644
--- a/commit.c
+++ b/commit.c
@@ -1697,6 +1697,7 @@ size_t ignore_non_trailer(const char *buf, size_t len)
 }
 
 int run_commit_hook(int editor_is_used, const char *index_file,
+		    int *invoked_hook,
 		    const char *name, ...)
 {
 	struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
diff --git a/commit.h b/commit.h
index df42eb434f314b..b5a542993c63af 100644
--- a/commit.h
+++ b/commit.h
@@ -363,7 +363,8 @@ int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
 int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused);
 
 LAST_ARG_MUST_BE_NULL
-int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...);
+int run_commit_hook(int editor_is_used, const char *index_file,
+		    int *invoked_hook, const char *name, ...);
 
 /* Sign a commit or tag buffer, storing the result in a header. */
 int sign_with_header(struct strbuf *buf, const char *keyid);
diff --git a/hook.c b/hook.c
index efed5f73b4d1fb..ee20b2e3658753 100644
--- a/hook.c
+++ b/hook.c
@@ -173,6 +173,9 @@ static int notify_hook_finished(int result,
 
 	hook_cb->rc |= result;
 
+	if (hook_cb->invoked_hook)
+		*hook_cb->invoked_hook = 1;
+
 	return 0;
 }
 
@@ -187,6 +190,7 @@ int run_hooks(const char *hook_name, const char *hook_path,
 		.rc = 0,
 		.hook_name = hook_name,
 		.options = options,
+		.invoked_hook = options->invoked_hook,
 	};
 	int jobs = 1;
 
diff --git a/hook.h b/hook.h
index 37a9690c2ca368..58dfbf474c9c1a 100644
--- a/hook.h
+++ b/hook.h
@@ -66,6 +66,15 @@ struct run_hooks_opt
 	 * for an example.
 	 */
 	consume_sideband_fn consume_sideband;
+
+	/*
+	 * A pointer which if provided will be set to 1 or 0 depending
+	 * on if a hook was invoked (i.e. existed), regardless of
+	 * whether or not that was successful. Used for avoiding
+	 * TOCTOU races in code that would otherwise call hook_exist()
+	 * after a "maybe hook run" to see if a hook was invoked.
+	 */
+	int *invoked_hook;
 };
 
 #define RUN_HOOKS_OPT_INIT { \
@@ -90,6 +99,7 @@ struct hook_cb_data {
 	const char *hook_name;
 	struct hook *run_me;
 	struct run_hooks_opt *options;
+	int *invoked_hook;
 };
 
 void run_hooks_opt_clear(struct run_hooks_opt *o);
diff --git a/sequencer.c b/sequencer.c
index 77f809c00e46e6..f451e23d0c120d 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1203,7 +1203,7 @@ static int run_prepare_commit_msg_hook(struct repository *r,
 	} else {
 		arg1 = "message";
 	}
-	if (run_commit_hook(0, r->index_file, "prepare-commit-msg", name,
+	if (run_commit_hook(0, r->index_file, NULL, "prepare-commit-msg", name,
 			    arg1, arg2, NULL))
 		ret = error(_("'prepare-commit-msg' hook failed"));
 
@@ -1533,7 +1533,7 @@ static int try_to_commit(struct repository *r,
 		goto out;
 	}
 
-	run_commit_hook(0, r->index_file, "post-commit", NULL);
+	run_commit_hook(0, r->index_file, NULL, "post-commit", NULL);
 	if (flags & AMEND_MSG)
 		commit_post_rewrite(r, current_head, oid);
 

From 74aafa37dd7f4636b187255703579662e88121ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98ystein=20Walle?= <oystwa@gmail.com>
Date: Mon, 2 Aug 2021 19:49:44 +0200
Subject: [PATCH 164/198] clone: Allow combining --bare and --origin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The constraint on passing both these options simultaneously has been
present since long before clone was ported to C. At the time no
configuration referencing the remote repository was written at all in
bare clones.

Since df61c88979 (clone: also configure url for bare clones, 2010-03-29)
the remote repository is mentioned in the configuration file also for
bare repos, so it makes sense to allow the user to rename it if they
wish.

Signed-off-by: Øystein Walle <oystwa@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c          | 3 ---
 t/t5606-clone-options.sh | 9 +++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 66fe66679c8498..70ec72ea854508 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1014,9 +1014,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		option_bare = 1;
 
 	if (option_bare) {
-		if (option_origin)
-			die(_("--bare and --origin %s options are incompatible."),
-			    option_origin);
 		if (real_git_dir)
 			die(_("--bare and --separate-git-dir are incompatible."));
 		option_no_checkout = 1;
diff --git a/t/t5606-clone-options.sh b/t/t5606-clone-options.sh
index 3a595c0f82c704..fd35b6d06b3420 100755
--- a/t/t5606-clone-options.sh
+++ b/t/t5606-clone-options.sh
@@ -30,11 +30,12 @@ test_expect_success 'rejects invalid -o/--origin' '
 
 '
 
-test_expect_success 'disallows --bare with --origin' '
+test_expect_success '--bare works with -o/--origin' '
 
-	test_must_fail git clone -o foo --bare parent clone-bare-o 2>err &&
-	test_debug "cat err" &&
-	test_i18ngrep -e "--bare and --origin foo options are incompatible" err
+	git clone --bare --origin=somewhere parent clone-bare-o &&
+	url="$(git -C clone-bare-o config --local remote.somewhere.url)" &&
+	test -n "$url" &&
+	test_must_fail git -C clone-bare-o config --local remote.origin.url
 
 '
 

From 4849f5ddf19b0a4d0c9583444518258b9022f138 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 12:37:24 +0200
Subject: [PATCH 165/198] test-lib tests: get rid of copy/pasted mock test code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we've split up the write_sub_test_lib_test*() and
run_sub_test_lib_test*() functions let's fix those tests in
t0000-basic.sh that were verbosely copy/pasting earlier tests.

In a subsequent commit we'll add an assertion to check whether we
caught all of the copy/pasting.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t0000-basic.sh | 228 +++++++++++++----------------------------------
 1 file changed, 60 insertions(+), 168 deletions(-)

diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index 6fdd5f43caecc3..a0b99d8334965d 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -226,18 +226,13 @@ test_expect_success 'subtest: --verbose option' '
 '
 
 test_expect_success 'subtest: --verbose-only option' '
-	write_and_run_sub_test_lib_test_err \
-		t2345-verbose-only-2 \
-		--verbose-only=2 <<-\EOF &&
-	test_expect_success "passing test" true
-	test_expect_success "test with output" "echo foo"
-	test_expect_success "failing test" false
-	test_done
-	EOF
-	check_sub_test_lib_test t2345-verbose-only-2 <<-\EOF
+	run_sub_test_lib_test_err \
+		t1234-verbose \
+		--verbose-only=2 &&
+	check_sub_test_lib_test t1234-verbose <<-\EOF
 	> ok 1 - passing test
 	> Z
-	> expecting success of 2345.2 '\''test with output'\'': echo foo
+	> expecting success of 1234.2 '\''test with output'\'': echo foo
 	> foo
 	> ok 2 - test with output
 	> Z
@@ -250,15 +245,9 @@ test_expect_success 'subtest: --verbose-only option' '
 
 test_expect_success 'subtest: skip one with GIT_SKIP_TESTS' '
 	(
-		write_and_run_sub_test_lib_test git-skip-tests-basic \
-			--skip="git.2" <<-\EOF &&
-		for i in 1 2 3
-		do
-			test_expect_success "passing test #$i" "true"
-		done
-		test_done
-		EOF
-		check_sub_test_lib_test git-skip-tests-basic <<-\EOF
+		run_sub_test_lib_test full-pass \
+			--skip="full.2" &&
+		check_sub_test_lib_test full-pass <<-\EOF
 		> ok 1 - passing test #1
 		> ok 2 # skip passing test #2 (GIT_SKIP_TESTS)
 		> ok 3 - passing test #3
@@ -293,15 +282,9 @@ test_expect_success 'subtest: skip several with GIT_SKIP_TESTS' '
 
 test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' '
 	(
-		write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \
-			--skip="git.[2-5]" <<-\EOF &&
-		for i in 1 2 3 4 5 6
-		do
-			test_expect_success "passing test #$i" "true"
-		done
-		test_done
-		EOF
-		check_sub_test_lib_test git-skip-tests-sh-pattern <<-\EOF
+		run_sub_test_lib_test git-skip-tests-several \
+			--skip="git.[2-5]" &&
+		check_sub_test_lib_test git-skip-tests-several <<-\EOF
 		> ok 1 - passing test #1
 		> ok 2 # skip passing test #2 (GIT_SKIP_TESTS)
 		> ok 3 # skip passing test #3 (GIT_SKIP_TESTS)
@@ -316,15 +299,10 @@ test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' '
 
 test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' '
 	(
-		write_and_run_sub_test_lib_test git-skip-tests-entire-suite \
-			--skip="git" <<-\EOF &&
-		for i in 1 2 3
-		do
-			test_expect_success "passing test #$i" "true"
-		done
-		test_done
-		EOF
-		check_sub_test_lib_test git-skip-tests-entire-suite <<-\EOF
+		GIT_SKIP_TESTS="git" && export GIT_SKIP_TESTS &&
+		run_sub_test_lib_test git-skip-tests-several \
+			--skip="git" &&
+		check_sub_test_lib_test git-skip-tests-several <<-\EOF
 		> 1..0 # SKIP skip all tests in git
 		EOF
 	)
@@ -332,15 +310,10 @@ test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' '
 
 test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' '
 	(
-		write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \
-			--skip="notgit" <<-\EOF &&
-		for i in 1 2 3
-		do
-			test_expect_success "passing test #$i" "true"
-		done
-		test_done
-		EOF
-		check_sub_test_lib_test git-skip-tests-unmatched-suite <<-\EOF
+		GIT_SKIP_TESTS="notgit" && export GIT_SKIP_TESTS &&
+		run_sub_test_lib_test full-pass \
+			--skip="notfull" &&
+		check_sub_test_lib_test full-pass <<-\EOF
 		> ok 1 - passing test #1
 		> ok 2 - passing test #2
 		> ok 3 - passing test #3
@@ -351,14 +324,8 @@ test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' '
 '
 
 test_expect_success 'subtest: --run basic' '
-	write_and_run_sub_test_lib_test run-basic --run="1,3,5" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-basic <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several --run="1,3,5" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 # skip passing test #2 (--run)
 	> ok 3 - passing test #3
@@ -371,15 +338,9 @@ test_expect_success 'subtest: --run basic' '
 '
 
 test_expect_success 'subtest: --run with a range' '
-	write_and_run_sub_test_lib_test run-range \
-		--run="1-3" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-range <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="1-3" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 - passing test #3
@@ -392,15 +353,9 @@ test_expect_success 'subtest: --run with a range' '
 '
 
 test_expect_success 'subtest: --run with two ranges' '
-	write_and_run_sub_test_lib_test run-two-ranges \
-		--run="1-2,5-6" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-two-ranges <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="1-2,5-6" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -413,15 +368,9 @@ test_expect_success 'subtest: --run with two ranges' '
 '
 
 test_expect_success 'subtest: --run with a left open range' '
-	write_and_run_sub_test_lib_test run-left-open-range \
-		--run="-3" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-left-open-range <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="-3" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 - passing test #3
@@ -434,15 +383,9 @@ test_expect_success 'subtest: --run with a left open range' '
 '
 
 test_expect_success 'subtest: --run with a right open range' '
-	write_and_run_sub_test_lib_test run-right-open-range \
-		--run="4-" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-right-open-range <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="4-" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 # skip passing test #1 (--run)
 	> ok 2 # skip passing test #2 (--run)
 	> ok 3 # skip passing test #3 (--run)
@@ -455,15 +398,9 @@ test_expect_success 'subtest: --run with a right open range' '
 '
 
 test_expect_success 'subtest: --run with basic negation' '
-	write_and_run_sub_test_lib_test run-basic-neg \
-		--run="!3" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-basic-neg <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="!3" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -476,15 +413,9 @@ test_expect_success 'subtest: --run with basic negation' '
 '
 
 test_expect_success 'subtest: --run with two negations' '
-	write_and_run_sub_test_lib_test run-two-neg \
-		--run="!3,!6" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-two-neg <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="!3,!6" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -497,15 +428,9 @@ test_expect_success 'subtest: --run with two negations' '
 '
 
 test_expect_success 'subtest: --run a range and negation' '
-	write_and_run_sub_test_lib_test run-range-and-neg \
-		--run="-4,!2" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-range-and-neg <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="-4,!2" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 # skip passing test #2 (--run)
 	> ok 3 - passing test #3
@@ -518,15 +443,9 @@ test_expect_success 'subtest: --run a range and negation' '
 '
 
 test_expect_success 'subtest: --run range negation' '
-	write_and_run_sub_test_lib_test run-range-neg \
-		--run="!1-3" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-range-neg <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="!1-3" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 # skip passing test #1 (--run)
 	> ok 2 # skip passing test #2 (--run)
 	> ok 3 # skip passing test #3 (--run)
@@ -539,15 +458,9 @@ test_expect_success 'subtest: --run range negation' '
 '
 
 test_expect_success 'subtest: --run include, exclude and include' '
-	write_and_run_sub_test_lib_test run-inc-neg-inc \
-		--run="1-5,!1-3,2" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-inc-neg-inc <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="1-5,!1-3,2" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 # skip passing test #1 (--run)
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -560,15 +473,9 @@ test_expect_success 'subtest: --run include, exclude and include' '
 '
 
 test_expect_success 'subtest: --run include, exclude and include, comma separated' '
-	write_and_run_sub_test_lib_test run-inc-neg-inc-comma \
-		--run=1-5,!1-3,2 <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-inc-neg-inc-comma <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run=1-5,!1-3,2 &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 # skip passing test #1 (--run)
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -581,15 +488,9 @@ test_expect_success 'subtest: --run include, exclude and include, comma separate
 '
 
 test_expect_success 'subtest: --run exclude and include' '
-	write_and_run_sub_test_lib_test run-neg-inc \
-		--run="!3-,5" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-neg-inc <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="!3-,5" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 - passing test #2
 	> ok 3 # skip passing test #3 (--run)
@@ -602,15 +503,9 @@ test_expect_success 'subtest: --run exclude and include' '
 '
 
 test_expect_success 'subtest: --run empty selectors' '
-	write_and_run_sub_test_lib_test run-empty-sel \
-		--run="1,,3,,,5" <<-\EOF &&
-	for i in 1 2 3 4 5 6
-	do
-		test_expect_success "passing test #$i" "true"
-	done
-	test_done
-	EOF
-	check_sub_test_lib_test run-empty-sel <<-\EOF
+	run_sub_test_lib_test git-skip-tests-several \
+		--run="1,,3,,,5" &&
+	check_sub_test_lib_test git-skip-tests-several <<-\EOF
 	> ok 1 - passing test #1
 	> ok 2 # skip passing test #2 (--run)
 	> ok 3 - passing test #3
@@ -660,12 +555,9 @@ test_expect_success 'subtest: --run keyword selection' '
 '
 
 test_expect_success 'subtest: --run invalid range end' '
-	write_and_run_sub_test_lib_test_err run-inv-range-end \
-		--run="1-z" <<-\EOF &&
-	test_expect_success "passing test #1" "true"
-	test_done
-	EOF
-	check_sub_test_lib_test_err run-inv-range-end \
+	run_sub_test_lib_test_err run-inv-range-start \
+		--run="1-z" &&
+	check_sub_test_lib_test_err run-inv-range-start \
 		<<-\EOF_OUT 3<<-EOF_ERR
 	> FATAL: Unexpected exit with code 1
 	EOF_OUT

From 96a4473d78a9cfa224abe986f8d3275e10228e59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 12:37:25 +0200
Subject: [PATCH 166/198] test-lib tests: assert no copy/pasted mock test code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the preceding commit we got rid of a bunch of copy/pasted test code
from t0000-basic.sh in favor of re-using earlier already set up tests.

Since copy/pasting the tests is likely to be something done by mistake
in the future, let's add an assertion that detects whether this has
happened.

Now that we don't provide a unique test_description="" to these tests
we can rely on the content written out being the same in this
case. Let's use the object store to hash it, and attempt to create a
"blob-<CONTENT_OID>" tag. If another test has already written out the
same content we'll fail, and the tag envelope will provide an error
message pointing us in the right direction..

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index cdbc2d933206bb..6857afdaa5d45b 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -1,3 +1,34 @@
+_assert_unique_sub_test () {
+	name=$1 &&
+
+	# Alert about the copy/paste programming
+	hash=$(git hash-object -w "$name") &&
+	cat >tag.sig <<-EOF &&
+	object $hash
+	type blob
+	tag $hash
+	tagger . <> 0 +0000
+
+	duplicate script detected!
+
+	This test script was already written as:
+
+	$name
+
+	You can just re-use its test code with your own
+	run_sub_test_lib_test*()
+	EOF
+
+	tag=$(git mktag <tag.sig) &&
+	if ! git update-ref refs/tags/blob-$hash $tag $(test_oid zero) 2>/dev/null
+	then
+		msg=$(git for-each-ref refs/tags/blob-$hash \
+			--format='%(contents)' refs/tags/blob-$hash)
+		error "on write of $name: $msg"
+		return 1
+	fi
+}
+
 write_sub_test_lib_test () {
 	name="$1" # stdin is the body of the test code
 	mkdir "$name" &&
@@ -7,7 +38,8 @@ write_sub_test_lib_test () {
 	# Point to the t/test-lib.sh, which isn't in ../ as usual
 	. "\$TEST_DIRECTORY"/test-lib.sh
 	EOF
-	cat >>"$name/$name.sh"
+	cat >>"$name/$name.sh" &&
+	_assert_unique_sub_test "$name/$name.sh"
 }
 
 _run_sub_test_lib_test_common () {

From e52e72f6208eaf1dbde94cb1843af65c513ea64e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 12:37:26 +0200
Subject: [PATCH 167/198] test-lib tests: avoid subshell for "test_cmp" for
 readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The use of a sub-shell for running the test_cmp of stdout/stderr for
the test author was introduced in this form in 565b6fa87bb (tests:
refactor mechanics of testing in a sub test-lib, 2012-12-16), but from
looking at the history that seemed to have diligently copied my
original ad-hoc implementation in 7b905119703 (t/t0000-basic.sh: Run
the passing TODO test inside its own test-lib, 2010-08-19).

There's no reason to use a subshell here, we try to avoid it in
general. It also improves readability, if the test fails we print out
the relative path in the trash directory that needs to be looked
at.

Before that was mostly obscured, since the "write_sub_test_lib_test"
will pick the directory for you from the test name.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 6857afdaa5d45b..529f3a4d08bc50 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -111,22 +111,16 @@ run_sub_test_lib_test_err () {
 
 check_sub_test_lib_test () {
 	name="$1" # stdin is the expected output from the test
-	(
-		cd "$name" &&
-		test_must_be_empty err &&
-		sed -e 's/^> //' -e 's/Z$//' >expect &&
-		test_cmp expect out
-	)
+	test_must_be_empty "$name"/err &&
+	sed -e 's/^> //' -e 's/Z$//' >"$name"/expect &&
+	test_cmp "$name/"expect "$name"/out
 }
 
 check_sub_test_lib_test_err () {
 	name="$1" # stdin is the expected output from the test
 	# expected error output is in descriptor 3
-	(
-		cd "$name" &&
-		sed -e 's/^> //' -e 's/Z$//' >expect.out &&
-		test_cmp expect.out out &&
-		sed -e 's/^> //' -e 's/Z$//' <&3 >expect.err &&
-		test_cmp expect.err err
-	)
+	sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out &&
+	test_cmp "$name"/expect.out "$name"/out &&
+	sed -e 's/^> //' -e 's/Z$//' <&3 >"$name"/expect.err &&
+	test_cmp "$name"/expect.err "$name"/err
 }

From dda14ce6f6a22f263ceab94fd6c3a2f59a667734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 12:37:27 +0200
Subject: [PATCH 168/198] test-lib tests: refactor common part of
 check_sub_test_lib_test*()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor the two check_sub_test_lib_test*() functions to avoid
duplicating the same comparison they did of stdout. This duplication
was initially added when check_sub_test_lib_test_err() was added in
0445e6f0a12 (test-lib: '--run' to run only specific tests,
2014-04-30).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 529f3a4d08bc50..0b9ecdb6dbf438 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -109,18 +109,22 @@ run_sub_test_lib_test_err () {
 	_run_sub_test_lib_test_common '!' "$@"
 }
 
+_check_sub_test_lib_test_common () {
+	name="$1" &&
+	sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out &&
+	test_cmp "$name"/expect.out "$name"/out
+}
+
 check_sub_test_lib_test () {
 	name="$1" # stdin is the expected output from the test
-	test_must_be_empty "$name"/err &&
-	sed -e 's/^> //' -e 's/Z$//' >"$name"/expect &&
-	test_cmp "$name/"expect "$name"/out
+	_check_sub_test_lib_test_common "$name" &&
+	test_must_be_empty "$name"/err
 }
 
 check_sub_test_lib_test_err () {
 	name="$1" # stdin is the expected output from the test
+	_check_sub_test_lib_test_common "$name" &&
 	# expected error output is in descriptor 3
-	sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out &&
-	test_cmp "$name"/expect.out "$name"/out &&
 	sed -e 's/^> //' -e 's/Z$//' <&3 >"$name"/expect.err &&
 	test_cmp "$name"/expect.err "$name"/err
 }

From 8161050e923bbeca8a47372064c6dffa9022b529 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 12:37:28 +0200
Subject: [PATCH 169/198] test-lib tests: assert 1 exit code, not non-zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Improve the testing for test-lib.sh itself to assert that we have a
exit code of 1, not any non-zero. Improves code added in
0445e6f0a12 (test-lib: '--run' to run only specific tests,
2014-04-30).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-subtest.sh | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh
index 0b9ecdb6dbf438..d6b625d397fdac 100644
--- a/t/lib-subtest.sh
+++ b/t/lib-subtest.sh
@@ -43,8 +43,8 @@ write_sub_test_lib_test () {
 }
 
 _run_sub_test_lib_test_common () {
-	neg="$1" name="$2" # stdin is the body of the test code
-	shift 2
+	cmp_op="$1" want_code="$2" name="$3" # stdin is the body of the test code
+	shift 3
 
 	# intercept pseudo-options at the front of the argument list that we
 	# will not pass to child script
@@ -80,33 +80,30 @@ _run_sub_test_lib_test_common () {
 		GIT_SKIP_TESTS=$skip &&
 		export GIT_SKIP_TESTS &&
 		sane_unset GIT_TEST_FAIL_PREREQS &&
-		if test -z "$neg"
-		then
-			./"$name.sh" "$@" >out 2>err
-		else
-			! ./"$name.sh" "$@" >out 2>err
-		fi
+		./"$name.sh" "$@" >out 2>err;
+		ret=$? &&
+		test "$ret" "$cmp_op" "$want_code"
 	)
 }
 
 write_and_run_sub_test_lib_test () {
 	name="$1" descr="$2" # stdin is the body of the test code
 	write_sub_test_lib_test "$@" || return 1
-	_run_sub_test_lib_test_common '' "$@"
+	_run_sub_test_lib_test_common -eq 0 "$@"
 }
 
 write_and_run_sub_test_lib_test_err () {
 	name="$1" descr="$2" # stdin is the body of the test code
 	write_sub_test_lib_test "$@" || return 1
-	_run_sub_test_lib_test_common '!' "$@"
+	_run_sub_test_lib_test_common -eq 1 "$@"
 }
 
 run_sub_test_lib_test () {
-	_run_sub_test_lib_test_common '' "$@"
+	_run_sub_test_lib_test_common -eq 0 "$@"
 }
 
 run_sub_test_lib_test_err () {
-	_run_sub_test_lib_test_common '!' "$@"
+	_run_sub_test_lib_test_common -eq 1 "$@"
 }
 
 _check_sub_test_lib_test_common () {

From 45f2aeb194fd6425bb56f3d0ebf61bf39883a496 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
Date: Thu, 5 Aug 2021 13:01:11 +0200
Subject: [PATCH 170/198] commit-graph: fix bogus counter in "Scanning merged
 commits" progress line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The final value of the counter of the "Scanning merged commits"
progress line is always one less than its expected total, e.g.:

  Scanning merged commits:  83% (5/6), done.

This happens because while iterating over an array the loop variable
is passed to display_progress() as-is, but while C arrays (and thus
the loop variable) start at 0 and end at N-1, the progress counter
must end at N. Fix this by passing 'i + 1' to display_progress(), like
most other callsites do.

There's an RFC series to add a GIT_TEST_CHECK_PROGRESS=1 mode[1] which
catches this issue in the 'fetch.writeCommitGraph' and
'fetch.writeCommitGraph with submodules' tests in
't5510-fetch.sh'. The GIT_TEST_CHECK_PROGRESS=1 mode is not part of
this series, but future changes to progress.c may add it or similar
assertions to catch this and similar bugs elsewhere.

1. https://lore.kernel.org/git/20210620200303.2328957-1-szeder.dev@gmail.com/

Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 commit-graph.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/commit-graph.c b/commit-graph.c
index 1a2602da618c9b..918061f207cf86 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -2096,7 +2096,7 @@ static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
 
 	ctx->num_extra_edges = 0;
 	for (i = 0; i < ctx->commits.nr; i++) {
-		display_progress(ctx->progress, i);
+		display_progress(ctx->progress, i + 1);
 
 		if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
 			  &ctx->commits.list[i]->object.oid)) {

From 2a943937dda6d49827f020ee79287a08f28afa64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 5 Aug 2021 13:01:12 +0200
Subject: [PATCH 171/198] midx: don't provide a total for QSORT() progress
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The quicksort algorithm can be anywhere between O(n) and O(n^2), so
providing a "num objects" as a total means that in some cases we're
going to go past 100%.

This fixes a logic error in 5ae18df9d8e (midx: during verify group
objects by packfile to speed verification, 2019-03-21), which in turn
seems to have been diligently copied from my own logic error in the
commit-graph.c code, see 890226ccb57 (commit-graph write: add
itermediate progress, 2019-01-19).

That commit-graph code of mine was removed in
1cbdbf3bef7 (commit-graph: drop count_distinct_commits() function,
2020-12-07), so we don't need to fix that too.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 midx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/midx.c b/midx.c
index 9a35b0255d5c04..eaae75ab195402 100644
--- a/midx.c
+++ b/midx.c
@@ -1291,7 +1291,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag
 
 	if (flags & MIDX_PROGRESS)
 		progress = start_sparse_progress(_("Sorting objects by packfile"),
-						 m->num_objects);
+						 0);
 	display_progress(progress, 0); /* TODO: Measure QSORT() progress */
 	QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
 	stop_progress(&progress);

From da660bd15ba6df415a26d9a3ff8df4e0cbfd5148 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
Date: Thu, 5 Aug 2021 13:01:13 +0200
Subject: [PATCH 172/198] entry: show finer-grained counter in "Filtering
 content" progress line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "Filtering content" progress in entry.c:finish_delayed_checkout()
is unusual because of how it calculates the progress count and because
it shows the progress of a nested loop.  It works basically like this:

  start_delayed_progress(p, nr_of_paths_to_filter)
  for_each_filter {
      display_progress(p, nr_of_paths_to_filter - nr_of_paths_still_left_to_filter)
      for_each_path_handled_by_the_current_filter {
          checkout_entry()
      }
  }
  stop_progress(p)

There are two issues with this approach:

  - The work done by the last filter (or the only filter if there is
    only one) is never counted, so if the last filter still has some
    paths to process, then the counter shown in the "done" progress
    line will not match the expected total.

    The partially-RFC series to add a GIT_TEST_CHECK_PROGRESS=1
    mode[1] helps spot this issue. Under it the 'missing file in
    delayed checkout' and 'invalid file in delayed checkout' tests in
    't0021-conversion.sh' fail, because both use only one
    filter.  (The test 'delayed checkout in process filter' uses two
    filters but the first one does all the work, so that test already
    happens to succeed even with GIT_TEST_CHECK_PROGRESS=1.)

  - The progress counter is updated only once per filter, not once per
    processed path, so if a filter has a lot of paths to process, then
    the counter might stay unchanged for a long while and then make a
    big jump (though the user still gets a sense of progress, because
    we call display_throughput() after each processed path to show the
    amount of processed data).

Move the display_progress() call to the inner loop, right next to that
checkout_entry() call that does the hard work for each path, and use a
dedicated counter variable that is incremented upon processing each
path.

After this change the 'invalid file in delayed checkout' in
't0021-conversion.sh' would succeed with the GIT_TEST_CHECK_PROGRESS=1
assertion discussed above, but the 'missing file in delayed checkout'
test would still fail.

It'll fail because its purposefully buggy filter doesn't process any
paths, so we won't execute that inner loop at all, see [2] for how to
spot that issue without GIT_TEST_CHECK_PROGRESS=1. It's not
straightforward to fix it with the current progress.c library (see [3]
for an attempt), so let's leave it for now.

1. https://lore.kernel.org/git/20210620200303.2328957-1-szeder.dev@gmail.com/
2. http://lore.kernel.org/git/20210802214827.GE23408@szeder.dev
3. https://lore.kernel.org/git/20210620200303.2328957-7-szeder.dev@gmail.com/

Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 entry.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/entry.c b/entry.c
index 125fabdbd52c4d..d92dd020b3dd94 100644
--- a/entry.c
+++ b/entry.c
@@ -162,7 +162,7 @@ static int remove_available_paths(struct string_list_item *item, void *cb_data)
 int finish_delayed_checkout(struct checkout *state, int *nr_checkouts)
 {
 	int errs = 0;
-	unsigned delayed_object_count;
+	unsigned processed_paths = 0;
 	off_t filtered_bytes = 0;
 	struct string_list_item *filter, *path;
 	struct progress *progress;
@@ -172,12 +172,10 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts)
 		return errs;
 
 	dco->state = CE_RETRY;
-	delayed_object_count = dco->paths.nr;
-	progress = start_delayed_progress(_("Filtering content"), delayed_object_count);
+	progress = start_delayed_progress(_("Filtering content"), dco->paths.nr);
 	while (dco->filters.nr > 0) {
 		for_each_string_list_item(filter, &dco->filters) {
 			struct string_list available_paths = STRING_LIST_INIT_NODUP;
-			display_progress(progress, delayed_object_count - dco->paths.nr);
 
 			if (!async_query_available_blobs(filter->string, &available_paths)) {
 				/* Filter reported an error */
@@ -224,6 +222,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts)
 				ce = index_file_exists(state->istate, path->string,
 						       strlen(path->string), 0);
 				if (ce) {
+					display_progress(progress, ++processed_paths);
 					errs |= checkout_entry(ce, state, NULL, nr_checkouts);
 					filtered_bytes += ce->ce_stat_data.sd_size;
 					display_throughput(progress, filtered_bytes);

From 232290273b8b817312fcd1f5a88e154343e62b92 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 5 Aug 2021 16:47:54 -0400
Subject: [PATCH 173/198] diff: drop unused options parameter from
 cmp_in_block_with_wsd()

When 8e809cbb2f (diff --color-moved-ws=allow-indentation-change:
simplify and optimize, 2021-07-20) stopped looking at o->emitted_symbols
and instead took the symbol as a parameter, we no longer need to look at
the diff_options struct at all.

Dropping the unused parameter makes it clear that the function is
independent of the diff options.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index 2956c8f7103c2a..164af13b4f428a 100644
--- a/diff.c
+++ b/diff.c
@@ -863,8 +863,7 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a,
 	return a_width - b_width;
 }
 
-static int cmp_in_block_with_wsd(const struct diff_options *o,
-				 const struct moved_entry *cur,
+static int cmp_in_block_with_wsd(const struct moved_entry *cur,
 				 const struct emitted_diff_symbol *l,
 				 struct moved_block *pmb)
 {
@@ -1016,7 +1015,7 @@ static void pmb_advance_or_null(struct diff_options *o,
 		if (o->color_moved_ws_handling &
 		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
 			match = cur &&
-				!cmp_in_block_with_wsd(o, cur, l, &pmb[i]);
+				!cmp_in_block_with_wsd(cur, l, &pmb[i]);
 		else
 			match = cur && cur->es->id == l->id;
 

From 8a41badcffff4479a1317bd668451d13741bf49e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 5 Aug 2021 20:46:51 -0400
Subject: [PATCH 174/198] refs: drop unused "flags" parameter to
 lock_ref_oid_basic()

Commit 1578215dab (refs/files: remove unused REF_DELETING in
lock_ref_oid_basic(), 2021-07-20), dropped the last use of this flag
parameter.

All of the callers do pass REF_NO_DEREF, but that has been ignored
completely since 7a418f3a17 (lock_ref_sha1_basic(): only handle
REF_NODEREF mode, 2016-04-22).

So we can simply get rid of the parameter entirely.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs/files-backend.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 5d12003471e586..9a20d2bfc6ef45 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -876,8 +876,7 @@ static int create_reflock(const char *path, void *cb)
  * On failure errno is set to something meaningful.
  */
 static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
-					   const char *refname,
-					   unsigned int flags, int *type,
+					   const char *refname, int *type,
 					   struct strbuf *err)
 {
 	struct strbuf ref_file = STRBUF_INIT;
@@ -1345,7 +1344,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 
 	logmoved = log;
 
-	lock = lock_ref_oid_basic(refs, newrefname, REF_NO_DEREF, NULL, &err);
+	lock = lock_ref_oid_basic(refs, newrefname, NULL, &err);
 	if (!lock) {
 		if (copy)
 			error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf);
@@ -1367,7 +1366,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store,
 	goto out;
 
  rollback:
-	lock = lock_ref_oid_basic(refs, oldrefname, REF_NO_DEREF, NULL, &err);
+	lock = lock_ref_oid_basic(refs, oldrefname, NULL, &err);
 	if (!lock) {
 		error("unable to lock %s for rollback: %s", oldrefname, err.buf);
 		strbuf_release(&err);
@@ -1774,7 +1773,7 @@ static int files_create_symref(struct ref_store *ref_store,
 	struct ref_lock *lock;
 	int ret;
 
-	lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, NULL, &err);
+	lock = lock_ref_oid_basic(refs, refname, NULL, &err);
 	if (!lock) {
 		error("%s", err.buf);
 		strbuf_release(&err);
@@ -2992,7 +2991,7 @@ static int files_reflog_expire(struct ref_store *ref_store,
 	 * reference itself, plus we might need to update the
 	 * reference if --updateref was specified:
 	 */
-	lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, &type, &err);
+	lock = lock_ref_oid_basic(refs, refname, &type, &err);
 	if (!lock) {
 		error("cannot lock ref '%s': %s", refname, err.buf);
 		strbuf_release(&err);

From d44ac73b5de38268a60ab91f9b483782b2454592 Mon Sep 17 00:00:00 2001
From: Noah Pendleton <noah.pendleton@gmail.com>
Date: Sun, 8 Aug 2021 13:48:47 -0400
Subject: [PATCH 175/198] blame: add config `blame.ignoreRevsFileIsOptional`

Setting the config option `blame.ignoreRevsFile` globally to eg
`.git-blame-ignore-revs` causes `git blame` to error when the file
doesn't exist in the current repository:

```
fatal: could not open object name list: .git-blame-ignore-revs
```

Add a new config option, `blame.ignoreRevsFileIsOptional`, that when set
to true, `git blame` will silently ignore any missing ignoreRevsFile's.

Signed-off-by: Noah Pendleton <noah.pendleton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/blame-options.txt |  3 ++-
 Documentation/config/blame.txt  |  5 +++++
 builtin/blame.c                 |  7 ++++++-
 t/t8013-blame-ignore-revs.sh    | 14 ++++++++++----
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
index 117f4cf80645e1..199a28ab79c47e 100644
--- a/Documentation/blame-options.txt
+++ b/Documentation/blame-options.txt
@@ -134,7 +134,8 @@ take effect.
 	`fsck.skipList`.  This option may be repeated, and these files will be
 	processed after any files specified with the `blame.ignoreRevsFile` config
 	option.  An empty file name, `""`, will clear the list of revs from
-	previously processed files.
+	previously processed files. If `blame.ignoreRevsFileIsOptional` is true,
+	missing files will be silently ignored.
 
 -h::
 	Show help message.
diff --git a/Documentation/config/blame.txt b/Documentation/config/blame.txt
index 4d047c17908cd6..2aae851e4b76e3 100644
--- a/Documentation/config/blame.txt
+++ b/Documentation/config/blame.txt
@@ -27,6 +27,11 @@ blame.ignoreRevsFile::
 	file names will reset the list of ignored revisions.  This option will
 	be handled before the command line option `--ignore-revs-file`.
 
+blame.ignoreRevsFileIsOptional::
+	Silently skip missing files specified by ignoreRevsFile or the command line
+	option `--ignore-revs-file`. If unset, or set to false, missing files will
+	cause a nonrecoverable error.
+
 blame.markUnblamableLines::
 	Mark lines that were changed by an ignored revision that we could not
 	attribute to another commit with a '*' in the output of
diff --git a/builtin/blame.c b/builtin/blame.c
index 641523ff9af693..df132b34cedc2b 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -56,6 +56,7 @@ static int coloring_mode;
 static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP;
 static int mark_unblamable_lines;
 static int mark_ignored_lines;
+static int ignorerevsfileisoptional;
 
 static struct date_mode blame_date_mode = { DATE_ISO8601 };
 static size_t blame_date_width;
@@ -715,6 +716,9 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 		string_list_insert(&ignore_revs_file_list, str);
 		return 0;
 	}
+	if (!strcmp(var, "blame.ignorerevsfileisoptional")) {
+		ignorerevsfileisoptional = git_config_bool(var, value);
+	}
 	if (!strcmp(var, "blame.markunblamablelines")) {
 		mark_unblamable_lines = git_config_bool(var, value);
 		return 0;
@@ -835,7 +839,8 @@ static void build_ignorelist(struct blame_scoreboard *sb,
 	for_each_string_list_item(i, ignore_revs_file_list) {
 		if (!strcmp(i->string, ""))
 			oidset_clear(&sb->ignore_list);
-		else
+		/* skip non-existent files if ignorerevsfileisoptional is set */
+		else if (!ignorerevsfileisoptional || file_exists(i->string))
 			oidset_parse_file_carefully(&sb->ignore_list, i->string,
 						    peel_to_commit_oid, sb);
 	}
diff --git a/t/t8013-blame-ignore-revs.sh b/t/t8013-blame-ignore-revs.sh
index b18633dee1bfb2..f789426cbf2aee 100755
--- a/t/t8013-blame-ignore-revs.sh
+++ b/t/t8013-blame-ignore-revs.sh
@@ -127,18 +127,24 @@ test_expect_success override_ignore_revs_file '
 	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
 	test_cmp expect actual
 	'
-test_expect_success bad_files_and_revs '
+test_expect_success bad_revs '
 	test_must_fail git blame file --ignore-rev NOREV 2>err &&
 	test_i18ngrep "cannot find revision NOREV to ignore" err &&
 
-	test_must_fail git blame file --ignore-revs-file NOFILE 2>err &&
-	test_i18ngrep "could not open.*: NOFILE" err &&
-
 	echo NOREV >ignore_norev &&
 	test_must_fail git blame file --ignore-revs-file ignore_norev 2>err &&
 	test_i18ngrep "invalid object name: NOREV" err
 '
 
+# Non-existent ignore-revs-file should fail unless
+# blame.ignoreRevsFileIsOptional is set
+test_expect_success bad_file '
+	test_must_fail git blame file --ignore-revs-file NOFILE &&
+
+	git config --add blame.ignorerevsfileisoptional true &&
+	git blame file --ignore-revs-file NOFILE
+'
+
 # For ignored revs that have added 'unblamable' lines, mark those lines with a
 # '*'
 # 	A--B--X--Y

From 7a672febd47e1c25c69732adda62fdfb481f8fb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Sun, 8 Aug 2021 18:38:33 -0700
Subject: [PATCH 176/198] ci: run a pedantic build as part of the GitHub
 workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

similar to the recently added sparse task, it is nice to know as early
as possible.

add a dockerized build using fedora (that usually has the latest gcc)
to be ahead of the curve and avoid older ISO C issues at the same time.

Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .github/workflows/main.yml        |  2 ++
 ci/install-docker-dependencies.sh |  4 ++++
 ci/run-build-and-tests.sh         | 10 +++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 73856bafc9debd..b93561978d3f4d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -324,6 +324,8 @@ jobs:
           image: alpine
         - jobname: Linux32
           image: daald/ubuntu32:xenial
+        - jobname: pedantic
+          image: fedora
     env:
       jobname: ${{matrix.vector.jobname}}
     runs-on: ubuntu-latest
diff --git a/ci/install-docker-dependencies.sh b/ci/install-docker-dependencies.sh
index 26a6689766d7f1..07a8c6b199d39c 100755
--- a/ci/install-docker-dependencies.sh
+++ b/ci/install-docker-dependencies.sh
@@ -15,4 +15,8 @@ linux-musl)
 	apk add --update build-base curl-dev openssl-dev expat-dev gettext \
 		pcre2-dev python3 musl-libintl perl-utils ncurses >/dev/null
 	;;
+pedantic)
+	dnf -yq update >/dev/null &&
+	dnf -yq install make gcc findutils diffutils perl python3 gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null
+	;;
 esac
diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh
index 3ce81ffee941b2..f3aba5d6cbb931 100755
--- a/ci/run-build-and-tests.sh
+++ b/ci/run-build-and-tests.sh
@@ -10,6 +10,11 @@ windows*) cmd //c mklink //j t\\.prove "$(cygpath -aw "$cache_dir/.prove")";;
 *) ln -s "$cache_dir/.prove" t/.prove;;
 esac
 
+if test "$jobname" = "pedantic"
+then
+	export DEVOPTS=pedantic
+fi
+
 make
 case "$jobname" in
 linux-gcc)
@@ -35,10 +40,9 @@ linux-clang)
 	export GIT_TEST_DEFAULT_HASH=sha256
 	make test
 	;;
-linux-gcc-4.8)
+linux-gcc-4.8|pedantic)
 	# Don't run the tests; we only care about whether Git can be
-	# built with GCC 4.8, as it errors out on some undesired (C99)
-	# constructs that newer compilers seem to quietly accept.
+	# built with GCC 4.8 or with pedantic
 	;;
 *)
 	make test

From 2d12b1d828e07a46db7ae4bef46a4726d6cad0dc Mon Sep 17 00:00:00 2001
From: Kim Altintop <kim@eagain.st>
Date: Mon, 9 Aug 2021 17:56:45 +0000
Subject: [PATCH 177/198] t5730: introduce fetch command helper

Assembling a "raw" fetch command to be fed directly to "test-tool serve-v2"
is extracted into a test helper.

Suggested-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Kim Altintop <kim@eagain.st>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5703-upload-pack-ref-in-want.sh | 107 ++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 33 deletions(-)

diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh
index e9e471621d59a6..cd4744b016944f 100755
--- a/t/t5703-upload-pack-ref-in-want.sh
+++ b/t/t5703-upload-pack-ref-in-want.sh
@@ -40,6 +40,54 @@ write_command () {
 	fi
 }
 
+# Write a complete fetch command to stdout, suitable for use with `test-tool
+# pkt-line`. "want-ref", "want", and "have" values can be given in this order,
+# with sections separated by "--".
+#
+# Examples:
+#
+# write_fetch_command refs/heads/main
+#
+# write_fetch_command \
+#	refs/heads/main \
+#	-- \
+#	-- \
+#	$(git rev-parse x)
+#
+# write_fetch_command \
+#	--
+#	$(git rev-parse a) \
+#	--
+#	$(git rev-parse b)
+write_fetch_command () {
+	write_command fetch &&
+	echo "0001" &&
+	echo "no-progress" || return
+    while :
+	do
+		case $# in 0) break ;; esac &&
+		case "$1" in --) shift; break ;; esac &&
+		echo "want-ref $1" &&
+		shift || return
+	done &&
+    while :
+	do
+		case $# in 0) break ;; esac &&
+		case "$1" in --) shift; break ;; esac &&
+		echo "want $1" &&
+		shift || return
+	done &&
+    while :
+	do
+		case $# in 0) break ;; esac &&
+		case "$1" in --) shift; break ;; esac &&
+		echo "have $1" &&
+		shift || return
+	done &&
+	echo "done" &&
+	echo "0000"
+}
+
 # c(o/foo) d(o/bar)
 #        \ /
 #         b   e(baz)  f(main)
@@ -97,15 +145,13 @@ test_expect_success 'basic want-ref' '
 	EOF
 	git rev-parse f >expected_commits &&
 
-	oid=$(git rev-parse a) &&
 	test-tool pkt-line pack >in <<-EOF &&
-	$(write_command fetch)
-	0001
-	no-progress
-	want-ref refs/heads/main
-	have $oid
-	done
-	0000
+	$(write_fetch_command \
+		refs/heads/main \
+		-- \
+		-- \
+		$(git rev-parse a) \
+	)
 	EOF
 
 	test-tool serve-v2 --stateless-rpc >out <in &&
@@ -121,16 +167,14 @@ test_expect_success 'multiple want-ref lines' '
 	EOF
 	git rev-parse c d >expected_commits &&
 
-	oid=$(git rev-parse b) &&
 	test-tool pkt-line pack >in <<-EOF &&
-	$(write_command fetch)
-	0001
-	no-progress
-	want-ref refs/heads/o/foo
-	want-ref refs/heads/o/bar
-	have $oid
-	done
-	0000
+	$(write_fetch_command \
+		refs/heads/o/foo \
+		refs/heads/o/bar \
+		-- \
+		-- \
+		$(git rev-parse b) \
+	)
 	EOF
 
 	test-tool serve-v2 --stateless-rpc >out <in &&
@@ -145,14 +189,13 @@ test_expect_success 'mix want and want-ref' '
 	git rev-parse e f >expected_commits &&
 
 	test-tool pkt-line pack >in <<-EOF &&
-	$(write_command fetch)
-	0001
-	no-progress
-	want-ref refs/heads/main
-	want $(git rev-parse e)
-	have $(git rev-parse a)
-	done
-	0000
+	$(write_fetch_command \
+		refs/heads/main \
+		-- \
+		$(git rev-parse e) \
+		-- \
+		$(git rev-parse a) \
+	)
 	EOF
 
 	test-tool serve-v2 --stateless-rpc >out <in &&
@@ -166,15 +209,13 @@ test_expect_success 'want-ref with ref we already have commit for' '
 	EOF
 	>expected_commits &&
 
-	oid=$(git rev-parse c) &&
 	test-tool pkt-line pack >in <<-EOF &&
-	$(write_command fetch)
-	0001
-	no-progress
-	want-ref refs/heads/o/foo
-	have $oid
-	done
-	0000
+	$(write_fetch_command \
+		refs/heads/o/foo \
+		-- \
+		-- \
+		$(git rev-parse c) \
+	)
 	EOF
 
 	test-tool serve-v2 --stateless-rpc >out <in &&

From f8121e671acb141acfb2e1770353b339942e9125 Mon Sep 17 00:00:00 2001
From: Kim Altintop <kim@eagain.st>
Date: Mon, 9 Aug 2021 17:57:09 +0000
Subject: [PATCH 178/198] upload-pack.c: treat want-ref relative to namespace

When 'upload-pack' runs within the context of a git namespace, treat any
'want-ref' lines the client sends as relative to that namespace.

Also check if the wanted ref is hidden via 'hideRefs'. If it is hidden,
respond with an error as if the ref didn't exist.

Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Kim Altintop <kim@eagain.st>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5703-upload-pack-ref-in-want.sh | 129 +++++++++++++++++++++++++++++
 upload-pack.c                      |  18 ++--
 2 files changed, 140 insertions(+), 7 deletions(-)

diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh
index cd4744b016944f..3e1e0b8fe17d97 100755
--- a/t/t5703-upload-pack-ref-in-want.sh
+++ b/t/t5703-upload-pack-ref-in-want.sh
@@ -339,6 +339,135 @@ test_expect_success 'fetching with wildcard that matches multiple refs' '
 	grep "want-ref refs/heads/o/bar" log
 '
 
+REPO="$(pwd)/repo-ns"
+
+test_expect_success 'setup namespaced repo' '
+	(
+		git init -b main "$REPO" &&
+		cd "$REPO" &&
+		test_commit a &&
+		test_commit b &&
+		git checkout a &&
+		test_commit c &&
+		git checkout a &&
+		test_commit d &&
+		git update-ref refs/heads/ns-no b &&
+		git update-ref refs/namespaces/ns/refs/heads/ns-yes c &&
+		git update-ref refs/namespaces/ns/refs/heads/hidden d
+	) &&
+	git -C "$REPO" config uploadpack.allowRefInWant true
+'
+
+test_expect_success 'with namespace: want-ref is considered relative to namespace' '
+	wanted_ref=refs/heads/ns-yes &&
+
+	oid=$(git -C "$REPO" rev-parse "refs/namespaces/ns/$wanted_ref") &&
+	cat >expected_refs <<-EOF &&
+	$oid $wanted_ref
+	EOF
+	cat >expected_commits <<-EOF &&
+	$oid
+	$(git -C "$REPO" rev-parse a)
+	EOF
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	GIT_NAMESPACE=ns test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	check_output
+'
+
+test_expect_success 'with namespace: want-ref outside namespace is unknown' '
+	wanted_ref=refs/heads/ns-no &&
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	test_must_fail env GIT_NAMESPACE=ns \
+		test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	grep "unknown ref" out
+'
+
+# Cross-check refs/heads/ns-no indeed exists
+test_expect_success 'without namespace: want-ref outside namespace succeeds' '
+	wanted_ref=refs/heads/ns-no &&
+
+	oid=$(git -C "$REPO" rev-parse $wanted_ref) &&
+	cat >expected_refs <<-EOF &&
+	$oid $wanted_ref
+	EOF
+	cat >expected_commits <<-EOF &&
+	$oid
+	$(git -C "$REPO" rev-parse a)
+	EOF
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	check_output
+'
+
+test_expect_success 'with namespace: hideRefs is matched, relative to namespace' '
+	wanted_ref=refs/heads/hidden &&
+	git -C "$REPO" config transfer.hideRefs $wanted_ref &&
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	test_must_fail env GIT_NAMESPACE=ns \
+		test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	grep "unknown ref" out
+'
+
+# Cross-check refs/heads/hidden indeed exists
+test_expect_success 'with namespace: want-ref succeeds if hideRefs is removed' '
+	wanted_ref=refs/heads/hidden &&
+	git -C "$REPO" config --unset transfer.hideRefs $wanted_ref &&
+
+	oid=$(git -C "$REPO" rev-parse "refs/namespaces/ns/$wanted_ref") &&
+	cat >expected_refs <<-EOF &&
+	$oid $wanted_ref
+	EOF
+	cat >expected_commits <<-EOF &&
+	$oid
+	$(git -C "$REPO" rev-parse a)
+	EOF
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	GIT_NAMESPACE=ns test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	check_output
+'
+
+test_expect_success 'without namespace: relative hideRefs does not match' '
+	wanted_ref=refs/namespaces/ns/refs/heads/hidden &&
+	git -C "$REPO" config transfer.hideRefs refs/heads/hidden &&
+
+	oid=$(git -C "$REPO" rev-parse $wanted_ref) &&
+	cat >expected_refs <<-EOF &&
+	$oid $wanted_ref
+	EOF
+	cat >expected_commits <<-EOF &&
+	$oid
+	$(git -C "$REPO" rev-parse a)
+	EOF
+
+	test-tool pkt-line pack >in <<-EOF &&
+	$(write_fetch_command $wanted_ref)
+	EOF
+
+	test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in &&
+	check_output
+'
+
+
 . "$TEST_DIRECTORY"/lib-httpd.sh
 start_httpd
 
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb43679..6ce07231d3dc34 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1417,21 +1417,25 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
 			  struct string_list *wanted_refs,
 			  struct object_array *want_obj)
 {
-	const char *arg;
-	if (skip_prefix(line, "want-ref ", &arg)) {
+	const char *refname_nons;
+	if (skip_prefix(line, "want-ref ", &refname_nons)) {
 		struct object_id oid;
 		struct string_list_item *item;
 		struct object *o;
+		struct strbuf refname = STRBUF_INIT;
 
-		if (read_ref(arg, &oid)) {
-			packet_writer_error(writer, "unknown ref %s", arg);
-			die("unknown ref %s", arg);
+		strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
+		if (ref_is_hidden(refname_nons, refname.buf) ||
+		    read_ref(refname.buf, &oid)) {
+			packet_writer_error(writer, "unknown ref %s", refname_nons);
+			die("unknown ref %s", refname_nons);
 		}
+		strbuf_release(&refname);
 
-		item = string_list_append(wanted_refs, arg);
+		item = string_list_append(wanted_refs, refname_nons);
 		item->util = oiddup(&oid);
 
-		o = parse_object_or_die(&oid, arg);
+		o = parse_object_or_die(&oid, refname_nons);
 		if (!(o->flags & WANTED)) {
 			o->flags |= WANTED;
 			add_object_array(o, NULL, want_obj);

From 24a6e5bc4e084e9e5e3b8981e98d2df8bc4c2e1a Mon Sep 17 00:00:00 2001
From: Kim Altintop <kim@eagain.st>
Date: Mon, 9 Aug 2021 17:57:24 +0000
Subject: [PATCH 179/198] docs: clarify the interaction of transfer.hideRefs
 and namespaces

Expand the section about namespaces in the documentation of
`transfer.hideRefs` to point out the subtle differences between
`upload-pack` and `receive-pack`.

9bedd82017 (upload-pack.c: treat want-ref relative to namespace,
2021-07-30) taught `upload-pack` to reject `want-ref`s for hidden refs,
which is now documented.

Signed-off-by: Kim Altintop <kim@eagain.st>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/transfer.txt | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt
index 505126a7802319..09ebb399ceb225 100644
--- a/Documentation/config/transfer.txt
+++ b/Documentation/config/transfer.txt
@@ -52,13 +52,16 @@ If you have multiple hideRefs values, later entries override earlier ones
 (and entries in more-specific config files override less-specific ones).
 +
 If a namespace is in use, the namespace prefix is stripped from each
-reference before it is matched against `transfer.hiderefs` patterns.
-For example, if `refs/heads/master` is specified in `transfer.hideRefs` and
-the current namespace is `foo`, then `refs/namespaces/foo/refs/heads/master`
-is omitted from the advertisements but `refs/heads/master` and
-`refs/namespaces/bar/refs/heads/master` are still advertised as so-called
-"have" lines. In order to match refs before stripping, add a `^` in front of
-the ref name. If you combine `!` and `^`, `!` must be specified first.
+reference before it is matched against `transfer.hiderefs` patterns. For
+example, if `refs/heads/master` is specified in `transfer.hideRefs` and the
+current namespace is `foo`, then `refs/namespaces/foo/refs/heads/master` is
+omitted from the advertisements. If `uploadpack.allowRefInWant` is set,
+`upload-pack` will treat `want-ref refs/heads/master` in a protocol v2
+`fetch` command as if `refs/heads/master` was unknown. Note, however, that
+`receive-pack` will still advertise the object id `refs/heads/master` is
+pointing to, but will conceil the name of the ref. In order to match refs
+before stripping, add a `^` in front of the ref name. If you combine `!` and
+`^`, `!` must be specified first.
 +
 Even if you hide refs, a client may still be able to steal the target
 objects via the techniques described in the "SECURITY" section of the

From 6a39a348cd4dae9935029d4fc86f25cdb86c68c3 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 9 Aug 2021 14:55:48 -0700
Subject: [PATCH 180/198] ### match next


From 86d6ca5886bdfaf92b4b5d1315a2664cc1387fbd Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:06 +0000
Subject: [PATCH 181/198] t7519: rewrite sparse index test

The sparse index is tested with the FS Monitor hook and extension since
f8fe49e (fsmonitor: integrate with sparse index, 2021-07-14). This test
was very fragile because it shared an index across sparse and non-sparse
behavior. Since that expansion and contraction could cause the index to
lose its FS Monitor bitmap and token, behavior is fragile to changes in
'git sparse-checkout set'.

Rewrite the test to use two clones of the original repo: full and
sparse. This allows us to also keep the test files (actual, expect,
trace2.txt) out of the repos we are testing with 'git status'.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7519-status-fsmonitor.sh | 38 ++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index deea88d4431d23..6f2cf306f66588 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -389,43 +389,47 @@ test_expect_success 'status succeeds after staging/unstaging' '
 # If "!" is supplied, then we verify that we do not call ensure_full_index
 # during a call to 'git status'. Otherwise, we verify that we _do_ call it.
 check_sparse_index_behavior () {
-	git status --porcelain=v2 >expect &&
-	git sparse-checkout init --cone --sparse-index &&
-	git sparse-checkout set dir1 dir2 &&
+	git -C full status --porcelain=v2 >expect &&
 	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
-		git status --porcelain=v2 >actual &&
+		git -C sparse status --porcelain=v2 >actual &&
 	test_region $1 index ensure_full_index trace2.txt &&
 	test_region fsm_hook query trace2.txt &&
 	test_cmp expect actual &&
-	rm trace2.txt &&
-	git sparse-checkout disable
+	rm trace2.txt
 }
 
 test_expect_success 'status succeeds with sparse index' '
-	git reset --hard &&
+	git clone . full &&
+	git clone . sparse &&
+	git -C sparse sparse-checkout init --cone --sparse-index &&
+	git -C sparse sparse-checkout set dir1 dir2 &&
 
-	test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
-	check_sparse_index_behavior ! &&
-
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 	EOF
-	git config core.fsmonitor .git/hooks/fsmonitor-test &&
+	git -C full config core.fsmonitor ../.git/hooks/fsmonitor-test &&
+	git -C sparse config core.fsmonitor ../.git/hooks/fsmonitor-test &&
 	check_sparse_index_behavior ! &&
 
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 		printf "dir1/modified\0"
 	EOF
 	check_sparse_index_behavior ! &&
 
-	cp -r dir1 dir1a &&
-	git add dir1a &&
-	git commit -m "add dir1a" &&
+	git -C sparse sparse-checkout add dir1a &&
+
+	for repo in full sparse
+	do
+		cp -r $repo/dir1 $repo/dir1a &&
+		git -C $repo add dir1a &&
+		git -C $repo commit -m "add dir1a" || return 1
+	done &&
+	git -C sparse sparse-checkout set dir1 dir2 &&
 
 	# This one modifies outside the sparse-checkout definition
 	# and hence we expect to expand the sparse-index.
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 		printf "dir1a/modified\0"
 	EOF

From 160eea76a2d34b343d189a4386cb667e177663fd Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:07 +0000
Subject: [PATCH 182/198] sparse-index: silently return when not using
 cone-mode patterns

While the sparse-index is only enabled when core.sparseCheckoutCone is
also enabled, it is possible for the user to modify the sparse-checkout
file manually in a way that does not match cone-mode patterns. In this
case, we should refuse to convert an index into a sparse index, since
the sparse_checkout_patterns will not be initialized with recursive and
parent path hashsets.

Also silently return if there are no cache entries, which is a simple
case: there are no paths to make sparse!

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sparse-index.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sparse-index.c b/sparse-index.c
index c6b4feec413a8f..bc5900eae35b45 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -130,7 +130,8 @@ static int index_has_unmerged_entries(struct index_state *istate)
 int convert_to_sparse(struct index_state *istate)
 {
 	int test_env;
-	if (istate->split_index || istate->sparse_index ||
+
+	if (istate->split_index || istate->sparse_index || !istate->cache_nr ||
 	    !core_apply_sparse_checkout || !core_sparse_checkout_cone)
 		return 0;
 
@@ -158,10 +159,16 @@ int convert_to_sparse(struct index_state *istate)
 			return 0;
 	}
 
-	if (!istate->sparse_checkout_patterns->use_cone_patterns) {
-		warning(_("attempting to use sparse-index without cone mode"));
-		return -1;
-	}
+	/*
+	 * We need cone-mode patterns to use sparse-index. If a user edits
+	 * their sparse-checkout file manually, then we can detect during
+	 * parsing that they are not actually using cone-mode patterns and
+	 * hence we need to abort this conversion _without error_. Warnings
+	 * already exist in the pattern parsing to inform the user of their
+	 * bad patterns.
+	 */
+	if (!istate->sparse_checkout_patterns->use_cone_patterns)
+		return 0;
 
 	/*
 	 * NEEDSWORK: If we have unmerged entries, then stay full.

From 3cfe2b83264a4fedfa266e58698322a806d8f16c Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:08 +0000
Subject: [PATCH 183/198] sparse-index: silently return when cache tree fails

If cache_tree_update() returns a non-zero value, then it could not
create the cache tree. This is likely due to a path having a merge
conflict. Since we are already returning early, let's return silently to
avoid making it seem like we failed to write the index at all.

If we remove our dependence on the cache tree within
convert_to_sparse(), then we could still recover from this scenario and
have a sparse index.

When constructing the cache-tree extension in convert_to_sparse(), it is
possible that we construct a tree object that is new to the object
database. Without the WRITE_TREE_MISSING_OK flag, this results in an
error that halts our conversion to a sparse index. Add this flag to
remove this limitation.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sparse-index.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sparse-index.c b/sparse-index.c
index bc5900eae35b45..b6e90417556afc 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -179,10 +179,15 @@ int convert_to_sparse(struct index_state *istate)
 
 	/* Clear and recompute the cache-tree */
 	cache_tree_free(&istate->cache_tree);
-	if (cache_tree_update(istate, 0)) {
-		warning(_("unable to update cache-tree, staying full"));
-		return -1;
-	}
+	/*
+	 * Silently return if there is a problem with the cache tree update,
+	 * which might just be due to a conflict state in some entry.
+	 *
+	 * This might create new tree objects, so be sure to use
+	 * WRITE_TREE_MISSING_OK.
+	 */
+	if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
+		return 0;
 
 	remove_fsmonitor(istate);
 

From 8390e19b518cc8829ef623741937a43087f2b032 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:09 +0000
Subject: [PATCH 184/198] unpack-trees: fix nested sparse-dir search

The iterated search in find_cache_entry() was recently modified to
include a loop that searches backwards for a sparse directory entry that
matches the given traverse_info and name_entry. However, the string
comparison failed to actually concatenate those two strings, so this
failed to find a sparse directory when it was not a top-level directory.

This caused some errors in rare cases where a 'git checkout' spanned a
diff that modified files within the sparse directory entry, but we could
not correctly find the entry.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 unpack-trees.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 5786645f315d51..df1f44377232c0 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1255,9 +1255,10 @@ static int sparse_dir_matches_path(const struct cache_entry *ce,
 static struct cache_entry *find_cache_entry(struct traverse_info *info,
 					    const struct name_entry *p)
 {
-	struct cache_entry *ce;
+	struct cache_entry *ce = NULL;
 	int pos = find_cache_pos(info, p->path, p->pathlen);
 	struct unpack_trees_options *o = info->data;
+	struct strbuf full_path = STRBUF_INIT;
 
 	if (0 <= pos)
 		return o->src_index->cache[pos];
@@ -1273,6 +1274,10 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
 	if (pos < 0 || pos >= o->src_index->cache_nr)
 		return NULL;
 
+	strbuf_addstr(&full_path, info->traverse_path);
+	strbuf_add(&full_path, p->path, p->pathlen);
+	strbuf_addch(&full_path, '/');
+
 	/*
 	 * Due to lexicographic sorting and sparse directory
 	 * entries ending with a trailing slash, our path as a
@@ -1283,17 +1288,20 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
 	while (pos >= 0) {
 		ce = o->src_index->cache[pos];
 
-		if (strncmp(ce->name, p->path, p->pathlen))
-			return NULL;
+		if (strncmp(ce->name, full_path.buf, full_path.len)) {
+			ce = NULL;
+			break;
+		}
 
 		if (S_ISSPARSEDIR(ce->ce_mode) &&
 		    sparse_dir_matches_path(ce, info, p))
-			return ce;
+			break;
 
 		pos--;
 	}
 
-	return NULL;
+	strbuf_release(&full_path);
+	return ce;
 }
 
 static void debug_path(struct traverse_info *info)

From d55b4bcbccb0302a36898277d4735a9e2a0078f0 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:10 +0000
Subject: [PATCH 185/198] sparse-checkout: create helper methods

As we integrate the sparse index into more builtins, we occasionally
need to check the sparse-checkout patterns to see if a path is within
the sparse-checkout cone. Create some helper methods that help
initialize the patterns and check for pattern matching to make this
easier.

The existing callers of commands like get_sparse_checkout_patterns() use
a custom 'struct pattern_list' that is not necessarily the one in the
'struct index_state', so there are not many previous uses that could
adopt these helpers. There are just two in builtin/add.c and
sparse-index.c that can use path_in_sparse_checkout().

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/add.c  |  8 ++------
 dir.c          | 29 +++++++++++++++++++++++++++++
 dir.h          |  6 ++++++
 sparse-index.c | 12 +++---------
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index 17528e8f922693..f675bdeae4a5f8 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -190,8 +190,6 @@ static int refresh(int verbose, const struct pathspec *pathspec)
 	struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP;
 	int flags = REFRESH_IGNORE_SKIP_WORKTREE |
 		    (verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET);
-	struct pattern_list pl = { 0 };
-	int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl);
 
 	seen = xcalloc(pathspec->nr, 1);
 	refresh_index(&the_index, flags, pathspec, seen,
@@ -199,12 +197,10 @@ static int refresh(int verbose, const struct pathspec *pathspec)
 	for (i = 0; i < pathspec->nr; i++) {
 		if (!seen[i]) {
 			const char *path = pathspec->items[i].original;
-			int dtype = DT_REG;
 
 			if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) ||
-			    (sparse_checkout_enabled &&
-			     !path_matches_pattern_list(path, strlen(path), NULL,
-							&dtype, &pl, &the_index))) {
+			    (core_apply_sparse_checkout &&
+			     path_in_sparse_checkout(path, &the_index) == NOT_MATCHED)) {
 				string_list_append(&only_match_skip_worktree,
 						   pathspec->items[i].original);
 			} else {
diff --git a/dir.c b/dir.c
index 03c4d212672bc4..7027bdfa068266 100644
--- a/dir.c
+++ b/dir.c
@@ -1439,6 +1439,35 @@ enum pattern_match_result path_matches_pattern_list(
 	return result;
 }
 
+int init_sparse_checkout_patterns(struct index_state *istate)
+{
+	if (istate->sparse_checkout_patterns)
+		return 0;
+
+	CALLOC_ARRAY(istate->sparse_checkout_patterns, 1);
+
+	if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) {
+		FREE_AND_NULL(istate->sparse_checkout_patterns);
+		return -1;
+	}
+
+	return 0;
+}
+
+enum pattern_match_result path_in_sparse_checkout(const char *path,
+						  struct index_state *istate)
+{
+	int dtype = DT_REG;
+	init_sparse_checkout_patterns(istate);
+
+	if (!istate->sparse_checkout_patterns)
+		return MATCHED;
+
+	return path_matches_pattern_list(path, strlen(path), NULL, &dtype,
+					 istate->sparse_checkout_patterns,
+					 istate);
+}
+
 static struct path_pattern *last_matching_pattern_from_lists(
 		struct dir_struct *dir, struct index_state *istate,
 		const char *pathname, int pathlen,
diff --git a/dir.h b/dir.h
index b3e1a54a97145d..9af2e8c4ba4d45 100644
--- a/dir.h
+++ b/dir.h
@@ -394,6 +394,12 @@ enum pattern_match_result path_matches_pattern_list(const char *pathname,
 				const char *basename, int *dtype,
 				struct pattern_list *pl,
 				struct index_state *istate);
+
+int init_sparse_checkout_patterns(struct index_state *state);
+
+enum pattern_match_result path_in_sparse_checkout(const char *path,
+						  struct index_state *istate);
+
 struct dir_entry *dir_add_ignored(struct dir_struct *dir,
 				  struct index_state *istate,
 				  const char *pathname, int len);
diff --git a/sparse-index.c b/sparse-index.c
index b6e90417556afc..2efc9fd4910844 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -34,17 +34,14 @@ static int convert_to_sparse_rec(struct index_state *istate,
 	int i, can_convert = 1;
 	int start_converted = num_converted;
 	enum pattern_match_result match;
-	int dtype = DT_UNKNOWN;
 	struct strbuf child_path = STRBUF_INIT;
-	struct pattern_list *pl = istate->sparse_checkout_patterns;
 
 	/*
 	 * Is the current path outside of the sparse cone?
 	 * Then check if the region can be replaced by a sparse
 	 * directory entry (everything is sparse and merged).
 	 */
-	match = path_matches_pattern_list(ct_path, ct_pathlen,
-					  NULL, &dtype, pl, istate);
+	match = path_in_sparse_checkout(ct_path, istate);
 	if (match != NOT_MATCHED)
 		can_convert = 0;
 
@@ -153,11 +150,8 @@ int convert_to_sparse(struct index_state *istate)
 	if (!istate->repo->settings.sparse_index)
 		return 0;
 
-	if (!istate->sparse_checkout_patterns) {
-		istate->sparse_checkout_patterns = xcalloc(1, sizeof(struct pattern_list));
-		if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0)
-			return 0;
-	}
+	if (init_sparse_checkout_patterns(istate) < 0)
+		return 0;
 
 	/*
 	 * We need cone-mode patterns to use sparse-index. If a user edits

From e311574ccf9f6032203d570d6a3a3b3be8ba0404 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:11 +0000
Subject: [PATCH 186/198] attr: be careful about sparse directories

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 attr.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/attr.c b/attr.c
index d029e681f2880a..a1009f7802900b 100644
--- a/attr.c
+++ b/attr.c
@@ -14,6 +14,7 @@
 #include "utf8.h"
 #include "quote.h"
 #include "thread-utils.h"
+#include "dir.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -744,6 +745,19 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate,
 	if (!istate)
 		return NULL;
 
+	/*
+	 * In the case of cone-mode sparse-checkout, getting the
+	 * .gitattributes file from a directory is meaningless: all
+	 * contained paths will be sparse if the .gitattributes is also
+	 * sparse. In the case of a sparse index, it is critical that we
+	 * don't go looking for one as it will expand the index.
+	 */
+	init_sparse_checkout_patterns(istate);
+	if (istate->sparse_checkout_patterns &&
+	    istate->sparse_checkout_patterns->use_cone_patterns &&
+	    path_in_sparse_checkout(path, istate) == NOT_MATCHED)
+		return NULL;
+
 	buf = read_blob_data_from_index(istate, path, NULL);
 	if (!buf)
 		return NULL;

From 7aa0a9109b8df6fab5eb9ce818b13045d9db59a6 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:12 +0000
Subject: [PATCH 187/198] sparse-index: add SPARSE_INDEX_IGNORE_CONFIG flag

The convert_to_sparse() method checks for the GIT_TEST_SPARSE_INDEX
environment variable or the "index.sparse" config setting before
converting the index to a sparse one. This is for ease of use since all
current consumers are preparing to compress the index before writing it
to disk. If these settings are not enabled, then convert_to_sparse()
silently returns without doing anything.

We will add a consumer in the next change that wants to use the sparse
index as an in-memory data structure, regardless of whether the on-disk
format should be sparse.

To that end, create the SPARSE_INDEX_IGNORE_CONFIG flag that will skip
these config checks when enabled. All current consumers are modified to
pass '0' in the new 'flags' parameter.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c   |  4 ++--
 sparse-index.c | 30 ++++++++++++++++--------------
 sparse-index.h |  3 ++-
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 9048ef9e905251..f5d4385c4080a1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3069,7 +3069,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
 	int ret;
 	int was_full = !istate->sparse_index;
 
-	ret = convert_to_sparse(istate);
+	ret = convert_to_sparse(istate, 0);
 
 	if (ret) {
 		warning(_("failed to convert to a sparse-index"));
@@ -3182,7 +3182,7 @@ static int write_shared_index(struct index_state *istate,
 	int ret, was_full = !istate->sparse_index;
 
 	move_cache_to_base_index(istate);
-	convert_to_sparse(istate);
+	convert_to_sparse(istate, 0);
 
 	trace2_region_enter_printf("index", "shared/do_write_index",
 				   the_repository, "%s", get_tempfile_path(*temp));
diff --git a/sparse-index.c b/sparse-index.c
index 2efc9fd4910844..532fd11787ea7a 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -124,7 +124,7 @@ static int index_has_unmerged_entries(struct index_state *istate)
 	return 0;
 }
 
-int convert_to_sparse(struct index_state *istate)
+int convert_to_sparse(struct index_state *istate, int flags)
 {
 	int test_env;
 
@@ -135,20 +135,22 @@ int convert_to_sparse(struct index_state *istate)
 	if (!istate->repo)
 		istate->repo = the_repository;
 
-	/*
-	 * The GIT_TEST_SPARSE_INDEX environment variable triggers the
-	 * index.sparse config variable to be on.
-	 */
-	test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1);
-	if (test_env >= 0)
-		set_sparse_index_config(istate->repo, test_env);
+	if (!(flags & SPARSE_INDEX_IGNORE_CONFIG)) {
+		/*
+		 * The GIT_TEST_SPARSE_INDEX environment variable triggers the
+		 * index.sparse config variable to be on.
+		 */
+		test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1);
+		if (test_env >= 0)
+			set_sparse_index_config(istate->repo, test_env);
 
-	/*
-	 * Only convert to sparse if index.sparse is set.
-	 */
-	prepare_repo_settings(istate->repo);
-	if (!istate->repo->settings.sparse_index)
-		return 0;
+		/*
+		 * Only convert to sparse if index.sparse is set.
+		 */
+		prepare_repo_settings(istate->repo);
+		if (!istate->repo->settings.sparse_index)
+			return 0;
+	}
 
 	if (init_sparse_checkout_patterns(istate) < 0)
 		return 0;
diff --git a/sparse-index.h b/sparse-index.h
index 1115a0d7dd984b..475f4f0f8dae73 100644
--- a/sparse-index.h
+++ b/sparse-index.h
@@ -2,7 +2,8 @@
 #define SPARSE_INDEX_H__
 
 struct index_state;
-int convert_to_sparse(struct index_state *istate);
+#define SPARSE_INDEX_IGNORE_CONFIG (1 << 0)
+int convert_to_sparse(struct index_state *istate, int flags);
 
 /*
  * Some places in the codebase expect to search for a specific path.

From 8f3a3a287fa8266b18a697398237d5f5fd7be5c1 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 10 Aug 2021 19:50:13 +0000
Subject: [PATCH 188/198] sparse-checkout: clear tracked sparse dirs

When changing the scope of a sparse-checkout using cone mode, we might
have some tracked directories go out of scope. The current logic removes
the tracked files from within those directories, but leaves the ignored
files within those directories. This is a bit unexpected to users who
have given input to Git saying they don't need those directories
anymore.

This is something that is new to the cone mode pattern type: the user
has explicitly said "I want these directories and _not_ those
directories." The typical sparse-checkout patterns more generally apply
to "I want files with with these patterns" so it is natural to leave
ignored files as they are. This focus on directories in cone mode
provides us an opportunity to change the behavior.

Leaving these ignored files in the sparse directories makes it
impossible to gain performance benefits in the sparse index. When we
track into these directories, we need to know if the files are ignored
or not, which might depend on the _tracked_ .gitignore file(s) within
the sparse directory. This depends on the indexed version of the file,
so the sparse directory must be expanded.

By deleting the sparse directories when changing scope (or running 'git
sparse-checkout reapply') we regain these performance benefits as if the
repository was in a clean state.

Since these ignored files are frequently build output or helper files
from IDEs, the users should not need the files now that the tracked
files are removed. If the tracked files reappear, then they will have
newer timestamps than the build artifacts, so the artifacts will need to
be regenerated anyway.

Use the sparse-index as a data structure in order to find the sparse
directories that can be safely deleted. Re-expand the index to a full
one if it was full before.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-sparse-checkout.txt |  8 +++
 builtin/sparse-checkout.c             | 95 +++++++++++++++++++++++++++
 t/t1091-sparse-checkout-builtin.sh    | 59 +++++++++++++++++
 3 files changed, 162 insertions(+)

diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
index fdcf43f87cb373..f9022b9d5554bd 100644
--- a/Documentation/git-sparse-checkout.txt
+++ b/Documentation/git-sparse-checkout.txt
@@ -210,6 +210,14 @@ case-insensitive check. This corrects for case mismatched filenames in the
 'git sparse-checkout set' command to reflect the expected cone in the working
 directory.
 
+The cone mode sparse-checkout patterns will also remove ignored files that
+are not within the sparse-checkout definition. This is important behavior
+to preserve the performance of the sparse index, but also matches that
+cone mode patterns care about directories, not files. If there exist files
+that are untracked and not ignored, then Git will not delete files within
+that directory other than the tracked files that are now out of scope.
+These files should be removed manually to ensure Git can behave optimally.
+
 
 SUBMODULES
 ----------
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 8ba9f13787b058..b06c8f885ac4ba 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -100,6 +100,99 @@ static int sparse_checkout_list(int argc, const char **argv)
 	return 0;
 }
 
+static void clean_tracked_sparse_directories(struct repository *r)
+{
+	int i, was_full = 0;
+	struct strbuf path = STRBUF_INIT;
+	size_t pathlen;
+	struct string_list_item *item;
+	struct string_list sparse_dirs = STRING_LIST_INIT_DUP;
+
+	/*
+	 * If we are not using cone mode patterns, then we cannot
+	 * delete directories outside of the sparse cone.
+	 */
+	if (!r || !r->index || !r->worktree)
+		return;
+	init_sparse_checkout_patterns(r->index);
+	if (!r->index->sparse_checkout_patterns ||
+	    !r->index->sparse_checkout_patterns->use_cone_patterns)
+		return;
+
+	/*
+	 * Use the sparse index as a data structure to assist finding
+	 * directories that are safe to delete. This conversion to a
+	 * sparse index will not delete directories that contain
+	 * conflicted entries or submodules.
+	 */
+	if (!r->index->sparse_index) {
+		/*
+		 * If something, such as a merge conflict or other concern,
+		 * prevents us from converting to a sparse index, then do
+		 * not try deleting files.
+		 */
+		if (convert_to_sparse(r->index, SPARSE_INDEX_IGNORE_CONFIG))
+			return;
+		was_full = 1;
+	}
+
+	strbuf_addstr(&path, r->worktree);
+	strbuf_complete(&path, '/');
+	pathlen = path.len;
+
+	/*
+	 * Collect directories that have gone out of scope but also
+	 * exist on disk, so there is some work to be done. We need to
+	 * store the entries in a list before exploring, since that might
+	 * expand the sparse-index again.
+	 */
+	for (i = 0; i < r->index->cache_nr; i++) {
+		struct cache_entry *ce = r->index->cache[i];
+
+		if (S_ISSPARSEDIR(ce->ce_mode) &&
+		    repo_file_exists(r, ce->name))
+			string_list_append(&sparse_dirs, ce->name);
+	}
+
+	for_each_string_list_item(item, &sparse_dirs) {
+		struct dir_struct dir = DIR_INIT;
+		struct pathspec p = { 0 };
+		struct strvec s = STRVEC_INIT;
+
+		strbuf_setlen(&path, pathlen);
+		strbuf_addstr(&path, item->string);
+
+		dir.flags |= DIR_SHOW_IGNORED_TOO;
+
+		setup_standard_excludes(&dir);
+		strvec_push(&s, path.buf);
+
+		parse_pathspec(&p, PATHSPEC_GLOB, 0, NULL, s.v);
+		fill_directory(&dir, r->index, &p);
+
+		if (dir.nr) {
+			warning(_("directory '%s' contains untracked files,"
+				  " but is not in the sparse-checkout cone"),
+				item->string);
+		} else if (remove_dir_recursively(&path, 0)) {
+			/*
+			 * Removal is "best effort". If something blocks
+			 * the deletion, then continue with a warning.
+			 */
+			warning(_("failed to remove directory '%s'"),
+				item->string);
+		}
+
+		dir_clear(&dir);
+	}
+
+	string_list_clear(&sparse_dirs, 0);
+	strbuf_release(&path);
+
+	if (was_full)
+		ensure_full_index(r->index);
+}
+
 static int update_working_directory(struct pattern_list *pl)
 {
 	enum update_sparsity_result result;
@@ -141,6 +234,8 @@ static int update_working_directory(struct pattern_list *pl)
 	else
 		rollback_lock_file(&lock_file);
 
+	clean_tracked_sparse_directories(r);
+
 	r->index->sparse_checkout_patterns = NULL;
 	return result;
 }
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 38fc8340f5c9b7..71236981e6484f 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -642,4 +642,63 @@ test_expect_success MINGW 'cone mode replaces backslashes with slashes' '
 	check_files repo/deep a deeper1
 '
 
+test_expect_success 'cone mode clears ignored subdirectories' '
+	rm repo/.git/info/sparse-checkout &&
+
+	git -C repo sparse-checkout init --cone &&
+	git -C repo sparse-checkout set deep/deeper1 &&
+
+	cat >repo/.gitignore <<-\EOF &&
+	obj/
+	*.o
+	EOF
+
+	git -C repo add .gitignore &&
+	git -C repo commit -m ".gitignore" &&
+
+	mkdir -p repo/obj repo/folder1/obj repo/deep/deeper2/obj &&
+	for file in folder1/obj/a obj/a folder1/file.o folder1.o \
+		    deep/deeper2/obj/a deep/deeper2/file.o file.o
+	do
+		echo ignored >repo/$file || return 1
+	done &&
+
+	git -C repo status --porcelain=v2 >out &&
+	test_must_be_empty out &&
+
+	git -C repo sparse-checkout reapply &&
+	test_path_is_missing repo/folder1 &&
+	test_path_is_missing repo/deep/deeper2 &&
+	test_path_is_dir repo/obj &&
+	test_path_is_file repo/file.o &&
+
+	git -C repo status --porcelain=v2 >out &&
+	test_must_be_empty out &&
+
+	git -C repo sparse-checkout set deep/deeper2 &&
+	test_path_is_missing repo/deep/deeper1 &&
+	test_path_is_dir repo/deep/deeper2 &&
+	test_path_is_dir repo/obj &&
+	test_path_is_file repo/file.o &&
+
+	>repo/deep/deeper2/ignored.o &&
+	>repo/deep/deeper2/untracked &&
+
+	# When an untracked file is in the way, all untracked files
+	# (even ignored files) are preserved.
+	git -C repo sparse-checkout set folder1 2>err &&
+	grep "contains untracked files" err &&
+	test_path_is_file repo/deep/deeper2/ignored.o &&
+	test_path_is_file repo/deep/deeper2/untracked &&
+
+	# The rest of the cone matches expectation
+	test_path_is_missing repo/deep/deeper1 &&
+	test_path_is_dir repo/obj &&
+	test_path_is_file repo/file.o &&
+
+	git -C repo status --porcelain=v2 >out &&
+	echo "? deep/deeper2/untracked" >expect &&
+	test_cmp expect out
+'
+
 test_done

From 65d8c5252d0ab231bef03cf21d312713f9261a0c Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:39 -0700
Subject: [PATCH 189/198] submodule: lazily add submodule ODBs as alternates

Teach Git to add submodule ODBs as alternates to the object store of
the_repository only upon the first access of an object not in
the_repository, and not when add_submodule_odb() is called.

This provides a means of gradually migrating from accessing a
submodule's object through alternates to accessing a submodule's object
by explicitly passing its repository object. Any Git command can declare
that it might access submodule objects by calling add_submodule_odb()
(as they do now), but the submodule ODBs themselves will not be added
until needed, so individual commands and/or combinations of arguments
can be migrated one by one.

[The advantage of explicit repository-object passing is code clarity (it
is clear which repository an object read is from), performance (there is
no need to linearly search through all submodule ODBs whenever an object
is accessed from any repository, whether superproject or submodule), and
the possibility of future features like partial clone submodules (which
right now is not possible because if an object is missing, we do not
know which repository to lazy-fetch into).]

This commit also introduces an environment variable that a test may set
to make the actual registration of alternates fatal, in order to
demonstrate that its codepaths do not need this registration.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c |  5 +++++
 submodule.c   | 20 +++++++++++++++++++-
 submodule.h   |  7 +++++++
 t/README      | 10 ++++++++++
 4 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/object-file.c b/object-file.c
index 3d27dc8dea3337..621b121bcb90e1 100644
--- a/object-file.c
+++ b/object-file.c
@@ -32,6 +32,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "submodule.h"
 
 /* The maximum size for an object header. */
 #define MAX_HEADER_LEN 32
@@ -1592,6 +1593,10 @@ static int do_oid_object_info_extended(struct repository *r,
 				break;
 		}
 
+		if (register_all_submodule_odb_as_alternates())
+			/* We added some alternates; retry */
+			continue;
+
 		/* Check if it is a missing object */
 		if (fetch_if_missing && repo_has_promisor_remote(r) &&
 		    !already_retried &&
diff --git a/submodule.c b/submodule.c
index 8e611fe1dbf1f7..8fde90e9063b3a 100644
--- a/submodule.c
+++ b/submodule.c
@@ -165,6 +165,8 @@ void stage_updated_gitmodules(struct index_state *istate)
 		die(_("staging updated .gitmodules failed"));
 }
 
+static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
+
 /* TODO: remove this function, use repo_submodule_init instead. */
 int add_submodule_odb(const char *path)
 {
@@ -178,12 +180,28 @@ int add_submodule_odb(const char *path)
 		ret = -1;
 		goto done;
 	}
-	add_to_alternates_memory(objects_directory.buf);
+	string_list_insert(&added_submodule_odb_paths,
+			   strbuf_detach(&objects_directory, NULL));
 done:
 	strbuf_release(&objects_directory);
 	return ret;
 }
 
+int register_all_submodule_odb_as_alternates(void)
+{
+	int i;
+	int ret = added_submodule_odb_paths.nr;
+
+	for (i = 0; i < added_submodule_odb_paths.nr; i++)
+		add_to_alternates_memory(added_submodule_odb_paths.items[i].string);
+	if (ret) {
+		string_list_clear(&added_submodule_odb_paths, 0);
+		if (git_env_bool("GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB", 0))
+			BUG("register_all_submodule_odb_as_alternates() called");
+	}
+	return ret;
+}
+
 void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
 					     const char *path)
 {
diff --git a/submodule.h b/submodule.h
index 84640c49c1149d..c252784bc2fabe 100644
--- a/submodule.h
+++ b/submodule.h
@@ -97,7 +97,14 @@ int submodule_uses_gitfile(const char *path);
 #define SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED (1<<2)
 int bad_to_remove_submodule(const char *path, unsigned flags);
 
+/*
+ * Call add_submodule_odb() to add the submodule at the given path to a list.
+ * When register_all_submodule_odb_as_alternates() is called, the object stores
+ * of all submodules in that list will be added as alternates in
+ * the_repository.
+ */
 int add_submodule_odb(const char *path);
+int register_all_submodule_odb_as_alternates(void);
 
 /*
  * Checks if there are submodule changes in a..b. If a is the null OID,
diff --git a/t/README b/t/README
index 9e701223020380..8b67b4f00b5a9e 100644
--- a/t/README
+++ b/t/README
@@ -448,6 +448,16 @@ GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting
 to <n> and 'checkout.thresholdForParallelism' to 0, forcing the
 execution of the parallel-checkout code.
 
+GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=<boolean>, when true, makes
+registering submodule ODBs as alternates a fatal action. Support for
+this environment variable can be removed once the migration to
+explicitly providing repositories when accessing submodule objects is
+complete (in which case we might want to replace this with a trace2
+call so that users can make it visible if accessing submodule objects
+without an explicit repository still happens) or needs to be abandoned
+for whatever reason (in which case the migrated codepaths still retain
+their performance benefits).
+
 Naming Tests
 ------------
 

From e37cfff2abd87c3e8c038be8ff3b2a7402a3b03c Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:40 -0700
Subject: [PATCH 190/198] grep: use submodule-ODB-as-alternate lazy-addition

In the parent commit, Git was taught to add submodule ODBs as alternates
lazily, but grep does not use this because it computes the path to add
directly, not going through add_submodule_odb(). Add an equivalent to
add_submodule_odb() that takes the exact ODB path and teach grep to use
it.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/grep.c | 2 +-
 submodule.c    | 5 +++++
 submodule.h    | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 7d2f8e5adb69c4..87bcb934a24c34 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -450,7 +450,7 @@ static int grep_submodule(struct grep_opt *opt,
 	 * store is no longer global and instead is a member of the repository
 	 * object.
 	 */
-	add_to_alternates_memory(subrepo.objects->odb->path);
+	add_submodule_odb_by_path(subrepo.objects->odb->path);
 	obj_read_unlock();
 
 	memcpy(&subopt, opt, sizeof(subopt));
diff --git a/submodule.c b/submodule.c
index 8fde90e9063b3a..8de1aecaeb0311 100644
--- a/submodule.c
+++ b/submodule.c
@@ -187,6 +187,11 @@ int add_submodule_odb(const char *path)
 	return ret;
 }
 
+void add_submodule_odb_by_path(const char *path)
+{
+	string_list_insert(&added_submodule_odb_paths, xstrdup(path));
+}
+
 int register_all_submodule_odb_as_alternates(void)
 {
 	int i;
diff --git a/submodule.h b/submodule.h
index c252784bc2fabe..17a06cc43bf3d9 100644
--- a/submodule.h
+++ b/submodule.h
@@ -104,6 +104,7 @@ int bad_to_remove_submodule(const char *path, unsigned flags);
  * the_repository.
  */
 int add_submodule_odb(const char *path);
+void add_submodule_odb_by_path(const char *path);
 int register_all_submodule_odb_as_alternates(void);
 
 /*

From 30a95d5f1339978f31ed53ed8b847ffa4f011173 Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:41 -0700
Subject: [PATCH 191/198] grep: typesafe versions of grep_source_init

grep_source_init() can create "struct grep_source" objects and,
depending on the value of the type passed, some void-pointer parameters have
different meanings. Because one of these types (GREP_SOURCE_OID) will
require an additional parameter in a subsequent patch, take the
opportunity to increase clarity and type safety by replacing this
function with individual functions for each type.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/grep.c |  4 ++--
 grep.c         | 43 +++++++++++++++++++++++++++----------------
 grep.h         |  8 +++++---
 3 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 87bcb934a24c34..e454335e9d8fee 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -333,7 +333,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
 	struct grep_source gs;
 
 	grep_source_name(opt, filename, tree_name_len, &pathbuf);
-	grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+	grep_source_init_oid(&gs, pathbuf.buf, path, oid);
 	strbuf_release(&pathbuf);
 
 	if (num_threads > 1) {
@@ -359,7 +359,7 @@ static int grep_file(struct grep_opt *opt, const char *filename)
 	struct grep_source gs;
 
 	grep_source_name(opt, filename, 0, &buf);
-	grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename);
+	grep_source_init_file(&gs, buf.buf, filename);
 	strbuf_release(&buf);
 
 	if (num_threads > 1) {
diff --git a/grep.c b/grep.c
index 424a39591b05e5..ba3711dc567d9b 100644
--- a/grep.c
+++ b/grep.c
@@ -1830,7 +1830,7 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
 	struct grep_source gs;
 	int r;
 
-	grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
+	grep_source_init_buf(&gs);
 	gs.buf = buf;
 	gs.size = size;
 
@@ -1840,28 +1840,39 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
 	return r;
 }
 
-void grep_source_init(struct grep_source *gs, enum grep_source_type type,
-		      const char *name, const char *path,
-		      const void *identifier)
+void grep_source_init_file(struct grep_source *gs, const char *name,
+			   const char *path)
 {
-	gs->type = type;
+	gs->type = GREP_SOURCE_FILE;
 	gs->name = xstrdup_or_null(name);
 	gs->path = xstrdup_or_null(path);
 	gs->buf = NULL;
 	gs->size = 0;
 	gs->driver = NULL;
+	gs->identifier = xstrdup(path);
+}
 
-	switch (type) {
-	case GREP_SOURCE_FILE:
-		gs->identifier = xstrdup(identifier);
-		break;
-	case GREP_SOURCE_OID:
-		gs->identifier = oiddup(identifier);
-		break;
-	case GREP_SOURCE_BUF:
-		gs->identifier = NULL;
-		break;
-	}
+void grep_source_init_oid(struct grep_source *gs, const char *name,
+			  const char *path, const struct object_id *oid)
+{
+	gs->type = GREP_SOURCE_OID;
+	gs->name = xstrdup_or_null(name);
+	gs->path = xstrdup_or_null(path);
+	gs->buf = NULL;
+	gs->size = 0;
+	gs->driver = NULL;
+	gs->identifier = oiddup(oid);
+}
+
+void grep_source_init_buf(struct grep_source *gs)
+{
+	gs->type = GREP_SOURCE_BUF;
+	gs->name = NULL;
+	gs->path = NULL;
+	gs->buf = NULL;
+	gs->size = 0;
+	gs->driver = NULL;
+	gs->identifier = NULL;
 }
 
 void grep_source_clear(struct grep_source *gs)
diff --git a/grep.h b/grep.h
index 72f82b1e302397..f4a3090f1c8f29 100644
--- a/grep.h
+++ b/grep.h
@@ -195,9 +195,11 @@ struct grep_source {
 	struct userdiff_driver *driver;
 };
 
-void grep_source_init(struct grep_source *gs, enum grep_source_type type,
-		      const char *name, const char *path,
-		      const void *identifier);
+void grep_source_init_file(struct grep_source *gs, const char *name,
+			   const char *path);
+void grep_source_init_oid(struct grep_source *gs, const char *name,
+			  const char *path, const struct object_id *oid);
+void grep_source_init_buf(struct grep_source *gs);
 void grep_source_clear_data(struct grep_source *gs);
 void grep_source_clear(struct grep_source *gs);
 void grep_source_load_driver(struct grep_source *gs,

From f53d26f101d12f9ab6456f4af99ab830023b1f95 Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:42 -0700
Subject: [PATCH 192/198] grep: read submodule entry with explicit repo

Replace an existing parse_object_or_die() call (which implicitly works
on the_repository) with a function call that allows a repository to be
passed in. There is no such direct equivalent to parse_object_or_die(),
but we only need the type of the object, so replace with
oid_object_info().

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/grep.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index e454335e9d8fee..9e61c7c9936c8a 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -457,27 +457,27 @@ static int grep_submodule(struct grep_opt *opt,
 	subopt.repo = &subrepo;
 
 	if (oid) {
-		struct object *object;
+		enum object_type object_type;
 		struct tree_desc tree;
 		void *data;
 		unsigned long size;
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
-		object = parse_object_or_die(oid, NULL);
+		object_type = oid_object_info(&subrepo, oid, NULL);
 		obj_read_unlock();
 		data = read_object_with_reference(&subrepo,
-						  &object->oid, tree_type,
+						  oid, tree_type,
 						  &size, NULL);
 		if (!data)
-			die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
+			die(_("unable to read tree (%s)"), oid_to_hex(oid));
 
 		strbuf_addstr(&base, filename);
 		strbuf_addch(&base, '/');
 
 		init_tree_desc(&tree, data, size);
 		hit = grep_tree(&subopt, pathspec, &tree, &base, base.len,
-				object->type == OBJ_COMMIT);
+				object_type == OBJ_COMMIT);
 		strbuf_release(&base);
 		free(data);
 	} else {

From f601ca4f06670fb57fa097b78eb3bfcd2a58183b Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:43 -0700
Subject: [PATCH 193/198] grep: allocate subrepos on heap

Currently, struct repository objects corresponding to submodules are
allocated on the stack in grep_submodule(). This currently works because
they will not be used once grep_submodule() exits, but a subsequent
patch will require these structs to be accessible for longer (perhaps
even in another thread). Allocate them on the heap and clear them only
at the very end.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/grep.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 9e61c7c9936c8a..5a40e18e477395 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -65,6 +65,9 @@ static int todo_done;
 /* Has all work items been added? */
 static int all_work_added;
 
+static struct repository **repos_to_free;
+static size_t repos_to_free_nr, repos_to_free_alloc;
+
 /* This lock protects all the variables above. */
 static pthread_mutex_t grep_mutex;
 
@@ -168,6 +171,17 @@ static void work_done(struct work_item *w)
 	grep_unlock();
 }
 
+static void free_repos(void)
+{
+	int i;
+
+	for (i = 0; i < repos_to_free_nr; i++) {
+		repo_clear(repos_to_free[i]);
+		free(repos_to_free[i]);
+	}
+	free(repos_to_free);
+}
+
 static void *run(void *arg)
 {
 	int hit = 0;
@@ -415,19 +429,24 @@ static int grep_submodule(struct grep_opt *opt,
 			  const struct object_id *oid,
 			  const char *filename, const char *path, int cached)
 {
-	struct repository subrepo;
+	struct repository *subrepo;
 	struct repository *superproject = opt->repo;
 	const struct submodule *sub;
 	struct grep_opt subopt;
-	int hit;
+	int hit = 0;
 
 	sub = submodule_from_path(superproject, null_oid(), path);
 
 	if (!is_submodule_active(superproject, path))
 		return 0;
 
-	if (repo_submodule_init(&subrepo, superproject, sub))
+	subrepo = xmalloc(sizeof(*subrepo));
+	if (repo_submodule_init(subrepo, superproject, sub)) {
+		free(subrepo);
 		return 0;
+	}
+	ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc);
+	repos_to_free[repos_to_free_nr++] = subrepo;
 
 	/*
 	 * NEEDSWORK: repo_read_gitmodules() might call
@@ -438,7 +457,7 @@ static int grep_submodule(struct grep_opt *opt,
 	 * subrepo's odbs to the in-memory alternates list.
 	 */
 	obj_read_lock();
-	repo_read_gitmodules(&subrepo, 0);
+	repo_read_gitmodules(subrepo, 0);
 
 	/*
 	 * NEEDSWORK: This adds the submodule's object directory to the list of
@@ -450,11 +469,11 @@ static int grep_submodule(struct grep_opt *opt,
 	 * store is no longer global and instead is a member of the repository
 	 * object.
 	 */
-	add_submodule_odb_by_path(subrepo.objects->odb->path);
+	add_submodule_odb_by_path(subrepo->objects->odb->path);
 	obj_read_unlock();
 
 	memcpy(&subopt, opt, sizeof(subopt));
-	subopt.repo = &subrepo;
+	subopt.repo = subrepo;
 
 	if (oid) {
 		enum object_type object_type;
@@ -464,9 +483,9 @@ static int grep_submodule(struct grep_opt *opt,
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
-		object_type = oid_object_info(&subrepo, oid, NULL);
+		object_type = oid_object_info(subrepo, oid, NULL);
 		obj_read_unlock();
-		data = read_object_with_reference(&subrepo,
+		data = read_object_with_reference(subrepo,
 						  oid, tree_type,
 						  &size, NULL);
 		if (!data)
@@ -484,7 +503,6 @@ static int grep_submodule(struct grep_opt *opt,
 		hit = grep_cache(&subopt, pathspec, cached);
 	}
 
-	repo_clear(&subrepo);
 	return hit;
 }
 
@@ -1182,5 +1200,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		run_pager(&opt, prefix);
 	clear_pathspec(&pathspec);
 	free_grep_patterns(&opt);
+	free_repos();
 	return !hit;
 }

From b9d5dec89df646df030aa6a95b3a70e034a0ba7a Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:44 -0700
Subject: [PATCH 194/198] grep: add repository to OID grep sources

Record the repository whenever an OID grep source is created, and teach
the worker threads to explicitly provide the repository when accessing
objects.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/grep.c | 2 +-
 grep.c         | 7 +++++--
 grep.h         | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 5a40e18e477395..ea6df6dca4ccd9 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -347,7 +347,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
 	struct grep_source gs;
 
 	grep_source_name(opt, filename, tree_name_len, &pathbuf);
-	grep_source_init_oid(&gs, pathbuf.buf, path, oid);
+	grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo);
 	strbuf_release(&pathbuf);
 
 	if (num_threads > 1) {
diff --git a/grep.c b/grep.c
index ba3711dc567d9b..14c677b4ae1a77 100644
--- a/grep.c
+++ b/grep.c
@@ -1853,7 +1853,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name,
 }
 
 void grep_source_init_oid(struct grep_source *gs, const char *name,
-			  const char *path, const struct object_id *oid)
+			  const char *path, const struct object_id *oid,
+			  struct repository *repo)
 {
 	gs->type = GREP_SOURCE_OID;
 	gs->name = xstrdup_or_null(name);
@@ -1862,6 +1863,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name,
 	gs->size = 0;
 	gs->driver = NULL;
 	gs->identifier = oiddup(oid);
+	gs->repo = repo;
 }
 
 void grep_source_init_buf(struct grep_source *gs)
@@ -1901,7 +1903,8 @@ static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
 
-	gs->buf = read_object_file(gs->identifier, &type, &gs->size);
+	gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
+					&gs->size);
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
diff --git a/grep.h b/grep.h
index f4a3090f1c8f29..c5234f9b38fd08 100644
--- a/grep.h
+++ b/grep.h
@@ -187,6 +187,7 @@ struct grep_source {
 		GREP_SOURCE_BUF,
 	} type;
 	void *identifier;
+	struct repository *repo; /* if GREP_SOURCE_OID */
 
 	char *buf;
 	unsigned long size;
@@ -198,7 +199,8 @@ struct grep_source {
 void grep_source_init_file(struct grep_source *gs, const char *name,
 			   const char *path);
 void grep_source_init_oid(struct grep_source *gs, const char *name,
-			  const char *path, const struct object_id *oid);
+			  const char *path, const struct object_id *oid,
+			  struct repository *repo);
 void grep_source_init_buf(struct grep_source *gs);
 void grep_source_clear_data(struct grep_source *gs);
 void grep_source_clear(struct grep_source *gs);

From 8ac00fb3ce03bd8066bd5ae8257e8456355f8976 Mon Sep 17 00:00:00 2001
From: Jonathan Tan <jonathantanmy@google.com>
Date: Tue, 10 Aug 2021 11:28:45 -0700
Subject: [PATCH 195/198] t7814: show lack of alternate ODB-adding

The previous patches have made "git grep" no longer need to add
submodule ODBs as alternates, at least for the code paths tested in
t7814. Demonstrate this by making adding a submodule ODB as an alternate
fatal in this test.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7814-grep-recurse-submodules.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh
index 828cb3ba5818fd..3172f5b936489e 100755
--- a/t/t7814-grep-recurse-submodules.sh
+++ b/t/t7814-grep-recurse-submodules.sh
@@ -8,6 +8,9 @@ submodules.
 
 . ./test-lib.sh
 
+GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=1
+export GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB
+
 test_expect_success 'setup directory structure and submodule' '
 	echo "(1|2)d(3|4)" >a &&
 	mkdir b &&

From 96a547145144849918c64f7144ae090ec595dbcf Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Tue, 10 Aug 2021 15:12:01 -0700
Subject: [PATCH 196/198] userdiff: comment on the builtin patterns

Remind developers that they do not need to go overboard to implement
patterns to prepare for invalid constructs.  They only have to be
sufficiently permissive, assuming that the payload is syntactically
correct.

Text stolen mostly from Johannes Sixt.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 userdiff.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/userdiff.c b/userdiff.c
index 3c3bbe38b048db..29adef7d6dce3e 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -13,6 +13,16 @@ static int drivers_alloc;
 #define IPATTERN(name, pattern, word_regex)			\
 	{ name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, \
 	  word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" }
+
+/*
+ * Built-in drivers for various languages, sorted by their names
+ * (except that the "default" is left at the end).
+ *
+ * When writing or updating patterns, assume that the contents these
+ * patterns are applied to are syntactically correct.  You do not have
+ * to implement all syntactical corner cases---the patterns have to be
+ * sufficiently permissive.
+ */
 static struct userdiff_driver builtin_drivers[] = {
 IPATTERN("ada",
 	 "!^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n"

From d393e45f8cccb270eef82e34cc744c1428f77998 Mon Sep 17 00:00:00 2001
From: Josh Steadmon <steadmon@google.com>
Date: Tue, 10 Aug 2021 12:20:38 -0700
Subject: [PATCH 197/198] sequencer: advise if skipping cherry-picked commit

Silently skipping commits when rebasing with --no-reapply-cherry-picks
(currently the default behavior) can cause user confusion. Issue advice
in this case so that users are aware of what's happening.

Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/advice.txt |  3 +++
 advice.c                        |  3 +++
 advice.h                        |  1 +
 sequencer.c                     | 22 ++++++++++++++++++++--
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt
index 8b2849ff7b3f5c..063eec2511d37e 100644
--- a/Documentation/config/advice.txt
+++ b/Documentation/config/advice.txt
@@ -44,6 +44,9 @@ advice.*::
 		Shown when linkgit:git-push[1] rejects a forced update of
 		a branch when its remote-tracking ref has updates that we
 		do not have locally.
+	skippedCherryPicks::
+		Shown when linkgit:git-rebase[1] skips a commit that has already
+		been cherry-picked onto the upstream branch.
 	statusAheadBehind::
 		Shown when linkgit:git-status[1] computes the ahead/behind
 		counts for a local ref compared to its remote tracking ref,
diff --git a/advice.c b/advice.c
index 0b9c89c48ab996..629e594252eb57 100644
--- a/advice.c
+++ b/advice.c
@@ -34,6 +34,7 @@ int advice_checkout_ambiguous_remote_branch_name = 1;
 int advice_submodule_alternate_error_strategy_die = 1;
 int advice_add_ignored_file = 1;
 int advice_add_empty_pathspec = 1;
+int advice_skipped_cherry_picks = 1;
 
 static int advice_use_color = -1;
 static char advice_colors[][COLOR_MAXLEN] = {
@@ -96,6 +97,7 @@ static struct {
 	{ "submoduleAlternateErrorStrategyDie", &advice_submodule_alternate_error_strategy_die },
 	{ "addIgnoredFile", &advice_add_ignored_file },
 	{ "addEmptyPathspec", &advice_add_empty_pathspec },
+	{ "skippedCherryPicks", &advice_skipped_cherry_picks },
 
 	/* make this an alias for backward compatibility */
 	{ "pushNonFastForward", &advice_push_update_rejected }
@@ -139,6 +141,7 @@ static struct {
 	[ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE] = { "submoduleAlternateErrorStrategyDie", 1 },
 	[ADVICE_UPDATE_SPARSE_PATH]			= { "updateSparsePath", 1 },
 	[ADVICE_WAITING_FOR_EDITOR]			= { "waitingForEditor", 1 },
+	[ADVICE_SKIPPED_CHERRY_PICKS]			= { "skippedCherryPicks", 1 },
 };
 
 static const char turn_off_instructions[] =
diff --git a/advice.h b/advice.h
index 9f8ffc73546b39..d705bf164c36ca 100644
--- a/advice.h
+++ b/advice.h
@@ -75,6 +75,7 @@ extern int advice_add_empty_pathspec;
 	ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE,
 	ADVICE_UPDATE_SPARSE_PATH,
 	ADVICE_WAITING_FOR_EDITOR,
+	ADVICE_SKIPPED_CHERRY_PICKS,
 };
 
 int git_default_advice_config(const char *var, const char *value);
diff --git a/sequencer.c b/sequencer.c
index 7f07cd00f3f20a..1235f61c9d19dd 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -5099,6 +5099,7 @@ static int make_script_with_merges(struct pretty_print_context *pp,
 	int keep_empty = flags & TODO_LIST_KEEP_EMPTY;
 	int rebase_cousins = flags & TODO_LIST_REBASE_COUSINS;
 	int root_with_onto = flags & TODO_LIST_ROOT_WITH_ONTO;
+	int skipped_commit = 0;
 	struct strbuf buf = STRBUF_INIT, oneline = STRBUF_INIT;
 	struct strbuf label = STRBUF_INIT;
 	struct commit_list *commits = NULL, **tail = &commits, *iter;
@@ -5149,8 +5150,13 @@ static int make_script_with_merges(struct pretty_print_context *pp,
 		oidset_insert(&interesting, &commit->object.oid);
 
 		is_empty = is_original_commit_empty(commit);
-		if (!is_empty && (commit->object.flags & PATCHSAME))
+		if (!is_empty && (commit->object.flags & PATCHSAME)) {
+			advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS,
+					_("skipped previously applied commit %s"),
+					short_commit_name(commit));
+			skipped_commit = 1;
 			continue;
+		}
 		if (is_empty && !keep_empty)
 			continue;
 
@@ -5214,6 +5220,9 @@ static int make_script_with_merges(struct pretty_print_context *pp,
 		oidcpy(&entry->entry.oid, &commit->object.oid);
 		oidmap_put(&commit2todo, entry);
 	}
+	if (skipped_commit)
+		advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS,
+				  _("use --reapply-cherry-picks to include skipped commits"));
 
 	/*
 	 * Second phase:
@@ -5334,6 +5343,7 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc,
 	const char *insn = flags & TODO_LIST_ABBREVIATE_CMDS ? "p" : "pick";
 	int rebase_merges = flags & TODO_LIST_REBASE_MERGES;
 	int reapply_cherry_picks = flags & TODO_LIST_REAPPLY_CHERRY_PICKS;
+	int skipped_commit = 0;
 
 	repo_init_revisions(r, &revs, NULL);
 	revs.verbose_header = 1;
@@ -5369,8 +5379,13 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc,
 	while ((commit = get_revision(&revs))) {
 		int is_empty = is_original_commit_empty(commit);
 
-		if (!is_empty && (commit->object.flags & PATCHSAME))
+		if (!is_empty && (commit->object.flags & PATCHSAME)) {
+			advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS,
+					  _("skipped previously applied commit %s"),
+					  short_commit_name(commit));
+			skipped_commit = 1;
 			continue;
+		}
 		if (is_empty && !keep_empty)
 			continue;
 		strbuf_addf(out, "%s %s ", insn,
@@ -5380,6 +5395,9 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc,
 			strbuf_addf(out, " %c empty", comment_line_char);
 		strbuf_addch(out, '\n');
 	}
+	if (skipped_commit)
+		advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS,
+				  _("use --reapply-cherry-picks to include skipped commits"));
 	return 0;
 }
 

From 193625e6b26e0c4947e2165cac12e13d3d8c2fe8 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Tue, 10 Aug 2021 15:42:57 -0700
Subject: [PATCH 198/198] SQUASH???

---
 advice.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/advice.c b/advice.c
index 629e594252eb57..efc92d33af74d8 100644
--- a/advice.c
+++ b/advice.c
@@ -34,7 +34,7 @@ int advice_checkout_ambiguous_remote_branch_name = 1;
 int advice_submodule_alternate_error_strategy_die = 1;
 int advice_add_ignored_file = 1;
 int advice_add_empty_pathspec = 1;
-int advice_skipped_cherry_picks = 1;
+static int advice_skipped_cherry_picks = 1;
 
 static int advice_use_color = -1;
 static char advice_colors[][COLOR_MAXLEN] = {